| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 100.0, |
| "eval_steps": 20000, |
| "global_step": 309400, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03232062055591468, |
| "grad_norm": 63.80897521972656, |
| "learning_rate": 9.900000000000002e-06, |
| "loss": 5.0757, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06464124111182935, |
| "grad_norm": 51.32004165649414, |
| "learning_rate": 1.9900000000000003e-05, |
| "loss": 4.3807, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09696186166774402, |
| "grad_norm": 9.607514381408691, |
| "learning_rate": 2.9900000000000002e-05, |
| "loss": 4.1432, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1292824822236587, |
| "grad_norm": 7.153324127197266, |
| "learning_rate": 3.99e-05, |
| "loss": 4.0461, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.16160310277957338, |
| "grad_norm": 3.7312490940093994, |
| "learning_rate": 4.99e-05, |
| "loss": 3.9927, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.19392372333548805, |
| "grad_norm": 2.4463391304016113, |
| "learning_rate": 5.9900000000000006e-05, |
| "loss": 3.9632, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.22624434389140272, |
| "grad_norm": 3.8260347843170166, |
| "learning_rate": 6.99e-05, |
| "loss": 3.9379, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.2585649644473174, |
| "grad_norm": 5.175188064575195, |
| "learning_rate": 7.99e-05, |
| "loss": 3.9001, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.2908855850032321, |
| "grad_norm": 13.827736854553223, |
| "learning_rate": 8.989999999999999e-05, |
| "loss": 3.8392, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.32320620555914675, |
| "grad_norm": 2.378403425216675, |
| "learning_rate": 9.99e-05, |
| "loss": 3.8368, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.3555268261150614, |
| "grad_norm": 4.115757942199707, |
| "learning_rate": 0.0001099, |
| "loss": 3.8114, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.3878474466709761, |
| "grad_norm": 2.430039882659912, |
| "learning_rate": 0.00011990000000000001, |
| "loss": 3.8009, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.42016806722689076, |
| "grad_norm": 3.2554678916931152, |
| "learning_rate": 0.00012989999999999999, |
| "loss": 3.7745, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.45248868778280543, |
| "grad_norm": 1.9953078031539917, |
| "learning_rate": 0.0001399, |
| "loss": 3.7577, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.4848093083387201, |
| "grad_norm": 2.4289982318878174, |
| "learning_rate": 0.0001499, |
| "loss": 3.72, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5171299288946348, |
| "grad_norm": 2.354421377182007, |
| "learning_rate": 0.00015989999999999998, |
| "loss": 3.7104, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.5494505494505495, |
| "grad_norm": 2.225425958633423, |
| "learning_rate": 0.0001699, |
| "loss": 3.7334, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.5817711700064642, |
| "grad_norm": 7.562653541564941, |
| "learning_rate": 0.0001799, |
| "loss": 3.6986, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.6140917905623788, |
| "grad_norm": 2.0109822750091553, |
| "learning_rate": 0.0001899, |
| "loss": 3.7064, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.6464124111182935, |
| "grad_norm": 2.309058904647827, |
| "learning_rate": 0.0001999, |
| "loss": 3.6754, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.6787330316742082, |
| "grad_norm": 2.2192893028259277, |
| "learning_rate": 0.0002099, |
| "loss": 3.668, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.7110536522301228, |
| "grad_norm": 2.3891031742095947, |
| "learning_rate": 0.0002199, |
| "loss": 3.6532, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.7433742727860375, |
| "grad_norm": 2.036247491836548, |
| "learning_rate": 0.0002299, |
| "loss": 3.6497, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.7756948933419522, |
| "grad_norm": 2.8763175010681152, |
| "learning_rate": 0.0002399, |
| "loss": 3.6293, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.8080155138978669, |
| "grad_norm": 2.384962558746338, |
| "learning_rate": 0.0002499, |
| "loss": 3.6139, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.8403361344537815, |
| "grad_norm": 2.3743388652801514, |
| "learning_rate": 0.00025990000000000003, |
| "loss": 3.6356, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.8726567550096962, |
| "grad_norm": 2.2165491580963135, |
| "learning_rate": 0.0002699, |
| "loss": 3.5882, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.9049773755656109, |
| "grad_norm": 2.483201503753662, |
| "learning_rate": 0.0002799, |
| "loss": 3.5897, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.9372979961215255, |
| "grad_norm": 2.3332512378692627, |
| "learning_rate": 0.0002899, |
| "loss": 3.5775, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.9696186166774402, |
| "grad_norm": 2.368666887283325, |
| "learning_rate": 0.0002999, |
| "loss": 3.5657, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.0019392372333549, |
| "grad_norm": 1.8092663288116455, |
| "learning_rate": 0.0003099, |
| "loss": 3.578, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.0342598577892697, |
| "grad_norm": 1.7232047319412231, |
| "learning_rate": 0.0003199, |
| "loss": 3.512, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.0665804783451842, |
| "grad_norm": 1.9123388528823853, |
| "learning_rate": 0.00032990000000000005, |
| "loss": 3.5244, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.098901098901099, |
| "grad_norm": 1.1431645154953003, |
| "learning_rate": 0.00033989999999999997, |
| "loss": 3.5241, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.1312217194570136, |
| "grad_norm": 1.532568335533142, |
| "learning_rate": 0.0003499, |
| "loss": 3.4954, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.1635423400129283, |
| "grad_norm": 1.2241241931915283, |
| "learning_rate": 0.0003599, |
| "loss": 3.5262, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.195862960568843, |
| "grad_norm": 1.7371819019317627, |
| "learning_rate": 0.0003699, |
| "loss": 3.5145, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.2281835811247577, |
| "grad_norm": 1.4901371002197266, |
| "learning_rate": 0.0003799, |
| "loss": 3.5058, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.2605042016806722, |
| "grad_norm": 1.6141966581344604, |
| "learning_rate": 0.00038990000000000004, |
| "loss": 3.5136, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.292824822236587, |
| "grad_norm": 1.7414507865905762, |
| "learning_rate": 0.00039989999999999996, |
| "loss": 3.4896, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.3251454427925016, |
| "grad_norm": 2.876239538192749, |
| "learning_rate": 0.0004099, |
| "loss": 3.479, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.3574660633484164, |
| "grad_norm": 1.535861849784851, |
| "learning_rate": 0.0004199, |
| "loss": 3.4806, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.389786683904331, |
| "grad_norm": 1.4972548484802246, |
| "learning_rate": 0.0004299, |
| "loss": 3.4871, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.4221073044602457, |
| "grad_norm": 2.6041390895843506, |
| "learning_rate": 0.0004399, |
| "loss": 3.4717, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.4544279250161603, |
| "grad_norm": 1.3674488067626953, |
| "learning_rate": 0.00044990000000000004, |
| "loss": 3.4634, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.486748545572075, |
| "grad_norm": 1.598819375038147, |
| "learning_rate": 0.0004599, |
| "loss": 3.4396, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.5190691661279896, |
| "grad_norm": 1.1317894458770752, |
| "learning_rate": 0.0004699, |
| "loss": 3.4456, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.5513897866839044, |
| "grad_norm": 1.3716052770614624, |
| "learning_rate": 0.0004799, |
| "loss": 3.455, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.5837104072398192, |
| "grad_norm": 1.099937915802002, |
| "learning_rate": 0.0004899, |
| "loss": 3.4444, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.6160310277957337, |
| "grad_norm": 1.3480145931243896, |
| "learning_rate": 0.0004999000000000001, |
| "loss": 3.4497, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.6483516483516483, |
| "grad_norm": 1.1580358743667603, |
| "learning_rate": 0.0005099, |
| "loss": 3.4217, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.680672268907563, |
| "grad_norm": 1.1731674671173096, |
| "learning_rate": 0.0005199, |
| "loss": 3.4201, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.7129928894634778, |
| "grad_norm": 1.1458629369735718, |
| "learning_rate": 0.0005299, |
| "loss": 3.4209, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.7453135100193924, |
| "grad_norm": 1.2101383209228516, |
| "learning_rate": 0.0005399000000000001, |
| "loss": 3.4244, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.777634130575307, |
| "grad_norm": 1.1774795055389404, |
| "learning_rate": 0.0005499000000000001, |
| "loss": 3.4151, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.8099547511312217, |
| "grad_norm": 1.2488919496536255, |
| "learning_rate": 0.0005599, |
| "loss": 3.4146, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.8422753716871365, |
| "grad_norm": 1.0765422582626343, |
| "learning_rate": 0.0005698999999999999, |
| "loss": 3.4276, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.874595992243051, |
| "grad_norm": 1.0370737314224243, |
| "learning_rate": 0.0005799, |
| "loss": 3.3888, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.9069166127989656, |
| "grad_norm": 1.0570347309112549, |
| "learning_rate": 0.0005899, |
| "loss": 3.3894, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.9392372333548804, |
| "grad_norm": 1.089784860610962, |
| "learning_rate": 0.0005999, |
| "loss": 3.3823, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.9715578539107952, |
| "grad_norm": 1.1070977449417114, |
| "learning_rate": 0.0006099, |
| "loss": 3.3782, |
| "step": 6100 |
| }, |
| { |
| "epoch": 2.0038784744667097, |
| "grad_norm": 0.9683001041412354, |
| "learning_rate": 0.0006199, |
| "loss": 3.3913, |
| "step": 6200 |
| }, |
| { |
| "epoch": 2.0361990950226243, |
| "grad_norm": 0.8402898907661438, |
| "learning_rate": 0.0006299000000000001, |
| "loss": 3.3484, |
| "step": 6300 |
| }, |
| { |
| "epoch": 2.0685197155785393, |
| "grad_norm": 0.8574873208999634, |
| "learning_rate": 0.0006399, |
| "loss": 3.3462, |
| "step": 6400 |
| }, |
| { |
| "epoch": 2.100840336134454, |
| "grad_norm": 0.733184278011322, |
| "learning_rate": 0.0006499, |
| "loss": 3.3413, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.1331609566903684, |
| "grad_norm": 1.1950469017028809, |
| "learning_rate": 0.0006599, |
| "loss": 3.348, |
| "step": 6600 |
| }, |
| { |
| "epoch": 2.165481577246283, |
| "grad_norm": 0.8092224597930908, |
| "learning_rate": 0.0006699000000000001, |
| "loss": 3.349, |
| "step": 6700 |
| }, |
| { |
| "epoch": 2.197802197802198, |
| "grad_norm": 1.2157089710235596, |
| "learning_rate": 0.0006799, |
| "loss": 3.334, |
| "step": 6800 |
| }, |
| { |
| "epoch": 2.2301228183581125, |
| "grad_norm": 0.9567965865135193, |
| "learning_rate": 0.0006899, |
| "loss": 3.3301, |
| "step": 6900 |
| }, |
| { |
| "epoch": 2.262443438914027, |
| "grad_norm": 0.7585746049880981, |
| "learning_rate": 0.0006998999999999999, |
| "loss": 3.3377, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.2947640594699417, |
| "grad_norm": 1.1528912782669067, |
| "learning_rate": 0.0007099, |
| "loss": 3.3225, |
| "step": 7100 |
| }, |
| { |
| "epoch": 2.3270846800258567, |
| "grad_norm": 1.0700244903564453, |
| "learning_rate": 0.0007199, |
| "loss": 3.3434, |
| "step": 7200 |
| }, |
| { |
| "epoch": 2.3594053005817712, |
| "grad_norm": 1.1488701105117798, |
| "learning_rate": 0.0007299, |
| "loss": 3.3283, |
| "step": 7300 |
| }, |
| { |
| "epoch": 2.391725921137686, |
| "grad_norm": 1.4043810367584229, |
| "learning_rate": 0.0007399, |
| "loss": 3.3344, |
| "step": 7400 |
| }, |
| { |
| "epoch": 2.4240465416936003, |
| "grad_norm": 1.1909414529800415, |
| "learning_rate": 0.0007499000000000001, |
| "loss": 3.3465, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.4563671622495153, |
| "grad_norm": 0.8690209984779358, |
| "learning_rate": 0.0007599, |
| "loss": 3.3478, |
| "step": 7600 |
| }, |
| { |
| "epoch": 2.48868778280543, |
| "grad_norm": 0.767558217048645, |
| "learning_rate": 0.0007699, |
| "loss": 3.3331, |
| "step": 7700 |
| }, |
| { |
| "epoch": 2.5210084033613445, |
| "grad_norm": 0.9267312288284302, |
| "learning_rate": 0.0007799, |
| "loss": 3.3127, |
| "step": 7800 |
| }, |
| { |
| "epoch": 2.553329023917259, |
| "grad_norm": 1.0323102474212646, |
| "learning_rate": 0.0007899000000000001, |
| "loss": 3.3026, |
| "step": 7900 |
| }, |
| { |
| "epoch": 2.585649644473174, |
| "grad_norm": 0.7380838990211487, |
| "learning_rate": 0.0007999000000000001, |
| "loss": 3.3376, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.6179702650290886, |
| "grad_norm": 0.9032792448997498, |
| "learning_rate": 0.0008099, |
| "loss": 3.3435, |
| "step": 8100 |
| }, |
| { |
| "epoch": 2.650290885585003, |
| "grad_norm": 0.9353654980659485, |
| "learning_rate": 0.0008198999999999999, |
| "loss": 3.3104, |
| "step": 8200 |
| }, |
| { |
| "epoch": 2.682611506140918, |
| "grad_norm": 0.8816394209861755, |
| "learning_rate": 0.0008299, |
| "loss": 3.335, |
| "step": 8300 |
| }, |
| { |
| "epoch": 2.7149321266968327, |
| "grad_norm": 0.8619208931922913, |
| "learning_rate": 0.0008399, |
| "loss": 3.3452, |
| "step": 8400 |
| }, |
| { |
| "epoch": 2.7472527472527473, |
| "grad_norm": 0.9694051146507263, |
| "learning_rate": 0.0008499, |
| "loss": 3.3382, |
| "step": 8500 |
| }, |
| { |
| "epoch": 2.779573367808662, |
| "grad_norm": 1.0988004207611084, |
| "learning_rate": 0.0008599, |
| "loss": 3.329, |
| "step": 8600 |
| }, |
| { |
| "epoch": 2.8118939883645764, |
| "grad_norm": 1.3006855249404907, |
| "learning_rate": 0.0008699000000000001, |
| "loss": 3.3152, |
| "step": 8700 |
| }, |
| { |
| "epoch": 2.8442146089204914, |
| "grad_norm": 0.9456707239151001, |
| "learning_rate": 0.0008799000000000001, |
| "loss": 3.3295, |
| "step": 8800 |
| }, |
| { |
| "epoch": 2.876535229476406, |
| "grad_norm": 1.0091285705566406, |
| "learning_rate": 0.0008899, |
| "loss": 3.3332, |
| "step": 8900 |
| }, |
| { |
| "epoch": 2.9088558500323205, |
| "grad_norm": 1.110554814338684, |
| "learning_rate": 0.0008999, |
| "loss": 3.3078, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.9411764705882355, |
| "grad_norm": 1.009962558746338, |
| "learning_rate": 0.0009099, |
| "loss": 3.3246, |
| "step": 9100 |
| }, |
| { |
| "epoch": 2.97349709114415, |
| "grad_norm": 0.8776062726974487, |
| "learning_rate": 0.0009199000000000001, |
| "loss": 3.3259, |
| "step": 9200 |
| }, |
| { |
| "epoch": 3.0058177117000646, |
| "grad_norm": 1.2100518941879272, |
| "learning_rate": 0.0009299, |
| "loss": 3.3019, |
| "step": 9300 |
| }, |
| { |
| "epoch": 3.038138332255979, |
| "grad_norm": 1.0079325437545776, |
| "learning_rate": 0.0009399, |
| "loss": 3.1978, |
| "step": 9400 |
| }, |
| { |
| "epoch": 3.070458952811894, |
| "grad_norm": 1.0548369884490967, |
| "learning_rate": 0.0009498999999999999, |
| "loss": 3.2165, |
| "step": 9500 |
| }, |
| { |
| "epoch": 3.1027795733678087, |
| "grad_norm": 0.9353946447372437, |
| "learning_rate": 0.0009599, |
| "loss": 3.2126, |
| "step": 9600 |
| }, |
| { |
| "epoch": 3.1351001939237233, |
| "grad_norm": 1.093759536743164, |
| "learning_rate": 0.0009699, |
| "loss": 3.2195, |
| "step": 9700 |
| }, |
| { |
| "epoch": 3.167420814479638, |
| "grad_norm": 1.34163236618042, |
| "learning_rate": 0.0009799, |
| "loss": 3.2211, |
| "step": 9800 |
| }, |
| { |
| "epoch": 3.199741435035553, |
| "grad_norm": 0.8957286477088928, |
| "learning_rate": 0.0009899, |
| "loss": 3.1988, |
| "step": 9900 |
| }, |
| { |
| "epoch": 3.2320620555914674, |
| "grad_norm": 0.7971066236495972, |
| "learning_rate": 0.0009999, |
| "loss": 3.2276, |
| "step": 10000 |
| }, |
| { |
| "epoch": 3.264382676147382, |
| "grad_norm": 0.6855546832084656, |
| "learning_rate": 0.001, |
| "loss": 3.2354, |
| "step": 10100 |
| }, |
| { |
| "epoch": 3.2967032967032965, |
| "grad_norm": 1.0885473489761353, |
| "learning_rate": 0.001, |
| "loss": 3.217, |
| "step": 10200 |
| }, |
| { |
| "epoch": 3.3290239172592115, |
| "grad_norm": 1.4477795362472534, |
| "learning_rate": 0.001, |
| "loss": 3.2184, |
| "step": 10300 |
| }, |
| { |
| "epoch": 3.361344537815126, |
| "grad_norm": 0.9638878703117371, |
| "learning_rate": 0.001, |
| "loss": 3.2117, |
| "step": 10400 |
| }, |
| { |
| "epoch": 3.3936651583710407, |
| "grad_norm": 0.9306259751319885, |
| "learning_rate": 0.001, |
| "loss": 3.2184, |
| "step": 10500 |
| }, |
| { |
| "epoch": 3.425985778926955, |
| "grad_norm": 0.8912835717201233, |
| "learning_rate": 0.001, |
| "loss": 3.2402, |
| "step": 10600 |
| }, |
| { |
| "epoch": 3.45830639948287, |
| "grad_norm": 1.0636497735977173, |
| "learning_rate": 0.001, |
| "loss": 3.2319, |
| "step": 10700 |
| }, |
| { |
| "epoch": 3.490627020038785, |
| "grad_norm": 0.9546043276786804, |
| "learning_rate": 0.001, |
| "loss": 3.2295, |
| "step": 10800 |
| }, |
| { |
| "epoch": 3.5229476405946993, |
| "grad_norm": 0.7783652544021606, |
| "learning_rate": 0.001, |
| "loss": 3.2105, |
| "step": 10900 |
| }, |
| { |
| "epoch": 3.555268261150614, |
| "grad_norm": 0.9355916976928711, |
| "learning_rate": 0.001, |
| "loss": 3.2095, |
| "step": 11000 |
| }, |
| { |
| "epoch": 3.587588881706529, |
| "grad_norm": 0.7190990447998047, |
| "learning_rate": 0.001, |
| "loss": 3.2194, |
| "step": 11100 |
| }, |
| { |
| "epoch": 3.6199095022624435, |
| "grad_norm": 1.257772445678711, |
| "learning_rate": 0.001, |
| "loss": 3.227, |
| "step": 11200 |
| }, |
| { |
| "epoch": 3.652230122818358, |
| "grad_norm": 0.7877817749977112, |
| "learning_rate": 0.001, |
| "loss": 3.24, |
| "step": 11300 |
| }, |
| { |
| "epoch": 3.684550743374273, |
| "grad_norm": 1.0141783952713013, |
| "learning_rate": 0.001, |
| "loss": 3.2138, |
| "step": 11400 |
| }, |
| { |
| "epoch": 3.7168713639301876, |
| "grad_norm": 0.9734593033790588, |
| "learning_rate": 0.001, |
| "loss": 3.1886, |
| "step": 11500 |
| }, |
| { |
| "epoch": 3.749191984486102, |
| "grad_norm": 0.7290641665458679, |
| "learning_rate": 0.001, |
| "loss": 3.2082, |
| "step": 11600 |
| }, |
| { |
| "epoch": 3.7815126050420167, |
| "grad_norm": 0.8883140683174133, |
| "learning_rate": 0.001, |
| "loss": 3.2206, |
| "step": 11700 |
| }, |
| { |
| "epoch": 3.8138332255979313, |
| "grad_norm": 0.9745876789093018, |
| "learning_rate": 0.001, |
| "loss": 3.2189, |
| "step": 11800 |
| }, |
| { |
| "epoch": 3.8461538461538463, |
| "grad_norm": 0.8779596090316772, |
| "learning_rate": 0.001, |
| "loss": 3.1992, |
| "step": 11900 |
| }, |
| { |
| "epoch": 3.878474466709761, |
| "grad_norm": 0.8627077341079712, |
| "learning_rate": 0.001, |
| "loss": 3.2177, |
| "step": 12000 |
| }, |
| { |
| "epoch": 3.9107950872656754, |
| "grad_norm": 0.7932115793228149, |
| "learning_rate": 0.001, |
| "loss": 3.2022, |
| "step": 12100 |
| }, |
| { |
| "epoch": 3.9431157078215904, |
| "grad_norm": 0.9975898861885071, |
| "learning_rate": 0.001, |
| "loss": 3.1953, |
| "step": 12200 |
| }, |
| { |
| "epoch": 3.975436328377505, |
| "grad_norm": 1.192795753479004, |
| "learning_rate": 0.001, |
| "loss": 3.2168, |
| "step": 12300 |
| }, |
| { |
| "epoch": 4.0077569489334195, |
| "grad_norm": 1.080036997795105, |
| "learning_rate": 0.001, |
| "loss": 3.1953, |
| "step": 12400 |
| }, |
| { |
| "epoch": 4.040077569489334, |
| "grad_norm": 0.8876031041145325, |
| "learning_rate": 0.001, |
| "loss": 3.0628, |
| "step": 12500 |
| }, |
| { |
| "epoch": 4.072398190045249, |
| "grad_norm": 0.9834277629852295, |
| "learning_rate": 0.001, |
| "loss": 3.0713, |
| "step": 12600 |
| }, |
| { |
| "epoch": 4.104718810601163, |
| "grad_norm": 0.9485188126564026, |
| "learning_rate": 0.001, |
| "loss": 3.0813, |
| "step": 12700 |
| }, |
| { |
| "epoch": 4.137039431157079, |
| "grad_norm": 1.029014229774475, |
| "learning_rate": 0.001, |
| "loss": 3.0846, |
| "step": 12800 |
| }, |
| { |
| "epoch": 4.169360051712993, |
| "grad_norm": 0.7999043464660645, |
| "learning_rate": 0.001, |
| "loss": 3.0871, |
| "step": 12900 |
| }, |
| { |
| "epoch": 4.201680672268908, |
| "grad_norm": 1.1836215257644653, |
| "learning_rate": 0.001, |
| "loss": 3.0808, |
| "step": 13000 |
| }, |
| { |
| "epoch": 4.234001292824822, |
| "grad_norm": 0.7550233602523804, |
| "learning_rate": 0.001, |
| "loss": 3.0935, |
| "step": 13100 |
| }, |
| { |
| "epoch": 4.266321913380737, |
| "grad_norm": 0.9442141056060791, |
| "learning_rate": 0.001, |
| "loss": 3.0744, |
| "step": 13200 |
| }, |
| { |
| "epoch": 4.298642533936651, |
| "grad_norm": 0.864423930644989, |
| "learning_rate": 0.001, |
| "loss": 3.0986, |
| "step": 13300 |
| }, |
| { |
| "epoch": 4.330963154492566, |
| "grad_norm": 0.780209481716156, |
| "learning_rate": 0.001, |
| "loss": 3.0885, |
| "step": 13400 |
| }, |
| { |
| "epoch": 4.3632837750484805, |
| "grad_norm": 0.9403650164604187, |
| "learning_rate": 0.001, |
| "loss": 3.1163, |
| "step": 13500 |
| }, |
| { |
| "epoch": 4.395604395604396, |
| "grad_norm": 0.7610721588134766, |
| "learning_rate": 0.001, |
| "loss": 3.0917, |
| "step": 13600 |
| }, |
| { |
| "epoch": 4.4279250161603105, |
| "grad_norm": 1.266564130783081, |
| "learning_rate": 0.001, |
| "loss": 3.108, |
| "step": 13700 |
| }, |
| { |
| "epoch": 4.460245636716225, |
| "grad_norm": 1.096002221107483, |
| "learning_rate": 0.001, |
| "loss": 3.1087, |
| "step": 13800 |
| }, |
| { |
| "epoch": 4.49256625727214, |
| "grad_norm": 0.7693293690681458, |
| "learning_rate": 0.001, |
| "loss": 3.1152, |
| "step": 13900 |
| }, |
| { |
| "epoch": 4.524886877828054, |
| "grad_norm": 0.8701288104057312, |
| "learning_rate": 0.001, |
| "loss": 3.1059, |
| "step": 14000 |
| }, |
| { |
| "epoch": 4.557207498383969, |
| "grad_norm": 0.995335578918457, |
| "learning_rate": 0.001, |
| "loss": 3.0896, |
| "step": 14100 |
| }, |
| { |
| "epoch": 4.589528118939883, |
| "grad_norm": 1.0865423679351807, |
| "learning_rate": 0.001, |
| "loss": 3.1025, |
| "step": 14200 |
| }, |
| { |
| "epoch": 4.621848739495798, |
| "grad_norm": 1.1124467849731445, |
| "learning_rate": 0.001, |
| "loss": 3.1061, |
| "step": 14300 |
| }, |
| { |
| "epoch": 4.654169360051713, |
| "grad_norm": 1.4063979387283325, |
| "learning_rate": 0.001, |
| "loss": 3.097, |
| "step": 14400 |
| }, |
| { |
| "epoch": 4.686489980607628, |
| "grad_norm": 0.9458435773849487, |
| "learning_rate": 0.001, |
| "loss": 3.1045, |
| "step": 14500 |
| }, |
| { |
| "epoch": 4.7188106011635425, |
| "grad_norm": 0.6742769479751587, |
| "learning_rate": 0.001, |
| "loss": 3.1088, |
| "step": 14600 |
| }, |
| { |
| "epoch": 4.751131221719457, |
| "grad_norm": 0.8667743802070618, |
| "learning_rate": 0.001, |
| "loss": 3.1002, |
| "step": 14700 |
| }, |
| { |
| "epoch": 4.783451842275372, |
| "grad_norm": 0.9243529438972473, |
| "learning_rate": 0.001, |
| "loss": 3.1099, |
| "step": 14800 |
| }, |
| { |
| "epoch": 4.815772462831286, |
| "grad_norm": 1.021361231803894, |
| "learning_rate": 0.001, |
| "loss": 3.1036, |
| "step": 14900 |
| }, |
| { |
| "epoch": 4.848093083387201, |
| "grad_norm": 0.7967251539230347, |
| "learning_rate": 0.001, |
| "loss": 3.0663, |
| "step": 15000 |
| }, |
| { |
| "epoch": 4.880413703943116, |
| "grad_norm": 0.8347923159599304, |
| "learning_rate": 0.001, |
| "loss": 3.0948, |
| "step": 15100 |
| }, |
| { |
| "epoch": 4.912734324499031, |
| "grad_norm": 0.8334933519363403, |
| "learning_rate": 0.001, |
| "loss": 3.0957, |
| "step": 15200 |
| }, |
| { |
| "epoch": 4.945054945054945, |
| "grad_norm": 0.9289120435714722, |
| "learning_rate": 0.001, |
| "loss": 3.1009, |
| "step": 15300 |
| }, |
| { |
| "epoch": 4.97737556561086, |
| "grad_norm": 1.081323504447937, |
| "learning_rate": 0.001, |
| "loss": 3.0972, |
| "step": 15400 |
| }, |
| { |
| "epoch": 5.009696186166774, |
| "grad_norm": 0.886936366558075, |
| "learning_rate": 0.001, |
| "loss": 3.0692, |
| "step": 15500 |
| }, |
| { |
| "epoch": 5.042016806722689, |
| "grad_norm": 0.8752295970916748, |
| "learning_rate": 0.001, |
| "loss": 2.9501, |
| "step": 15600 |
| }, |
| { |
| "epoch": 5.0743374272786035, |
| "grad_norm": 0.7193301916122437, |
| "learning_rate": 0.001, |
| "loss": 2.9623, |
| "step": 15700 |
| }, |
| { |
| "epoch": 5.106658047834518, |
| "grad_norm": 1.147542953491211, |
| "learning_rate": 0.001, |
| "loss": 2.9765, |
| "step": 15800 |
| }, |
| { |
| "epoch": 5.1389786683904335, |
| "grad_norm": 0.8573718070983887, |
| "learning_rate": 0.001, |
| "loss": 2.9609, |
| "step": 15900 |
| }, |
| { |
| "epoch": 5.171299288946348, |
| "grad_norm": 0.9090355634689331, |
| "learning_rate": 0.001, |
| "loss": 2.9924, |
| "step": 16000 |
| }, |
| { |
| "epoch": 5.203619909502263, |
| "grad_norm": 0.8234449625015259, |
| "learning_rate": 0.001, |
| "loss": 2.9723, |
| "step": 16100 |
| }, |
| { |
| "epoch": 5.235940530058177, |
| "grad_norm": 0.712264358997345, |
| "learning_rate": 0.001, |
| "loss": 2.9773, |
| "step": 16200 |
| }, |
| { |
| "epoch": 5.268261150614092, |
| "grad_norm": 1.019116997718811, |
| "learning_rate": 0.001, |
| "loss": 2.9837, |
| "step": 16300 |
| }, |
| { |
| "epoch": 5.300581771170006, |
| "grad_norm": 1.2517943382263184, |
| "learning_rate": 0.001, |
| "loss": 2.9781, |
| "step": 16400 |
| }, |
| { |
| "epoch": 5.332902391725921, |
| "grad_norm": 0.9867748618125916, |
| "learning_rate": 0.001, |
| "loss": 3.0102, |
| "step": 16500 |
| }, |
| { |
| "epoch": 5.365223012281835, |
| "grad_norm": 0.8714147806167603, |
| "learning_rate": 0.001, |
| "loss": 3.0023, |
| "step": 16600 |
| }, |
| { |
| "epoch": 5.397543632837751, |
| "grad_norm": 0.852895975112915, |
| "learning_rate": 0.001, |
| "loss": 2.9872, |
| "step": 16700 |
| }, |
| { |
| "epoch": 5.429864253393665, |
| "grad_norm": 1.0148080587387085, |
| "learning_rate": 0.001, |
| "loss": 3.0073, |
| "step": 16800 |
| }, |
| { |
| "epoch": 5.46218487394958, |
| "grad_norm": 1.0061284303665161, |
| "learning_rate": 0.001, |
| "loss": 3.0088, |
| "step": 16900 |
| }, |
| { |
| "epoch": 5.4945054945054945, |
| "grad_norm": 1.031447410583496, |
| "learning_rate": 0.001, |
| "loss": 2.9962, |
| "step": 17000 |
| }, |
| { |
| "epoch": 5.526826115061409, |
| "grad_norm": 0.8130742311477661, |
| "learning_rate": 0.001, |
| "loss": 3.0175, |
| "step": 17100 |
| }, |
| { |
| "epoch": 5.559146735617324, |
| "grad_norm": 0.78955078125, |
| "learning_rate": 0.001, |
| "loss": 3.0058, |
| "step": 17200 |
| }, |
| { |
| "epoch": 5.591467356173238, |
| "grad_norm": 0.7605058550834656, |
| "learning_rate": 0.001, |
| "loss": 2.9887, |
| "step": 17300 |
| }, |
| { |
| "epoch": 5.623787976729153, |
| "grad_norm": 0.8350357413291931, |
| "learning_rate": 0.001, |
| "loss": 3.0166, |
| "step": 17400 |
| }, |
| { |
| "epoch": 5.656108597285068, |
| "grad_norm": 0.7349731922149658, |
| "learning_rate": 0.001, |
| "loss": 2.9868, |
| "step": 17500 |
| }, |
| { |
| "epoch": 5.688429217840983, |
| "grad_norm": 0.8610934615135193, |
| "learning_rate": 0.001, |
| "loss": 3.0184, |
| "step": 17600 |
| }, |
| { |
| "epoch": 5.720749838396897, |
| "grad_norm": 0.9035360813140869, |
| "learning_rate": 0.001, |
| "loss": 3.0072, |
| "step": 17700 |
| }, |
| { |
| "epoch": 5.753070458952812, |
| "grad_norm": 0.8169543147087097, |
| "learning_rate": 0.001, |
| "loss": 3.0184, |
| "step": 17800 |
| }, |
| { |
| "epoch": 5.785391079508726, |
| "grad_norm": 0.932416558265686, |
| "learning_rate": 0.001, |
| "loss": 3.0175, |
| "step": 17900 |
| }, |
| { |
| "epoch": 5.817711700064641, |
| "grad_norm": 0.83762526512146, |
| "learning_rate": 0.001, |
| "loss": 2.9901, |
| "step": 18000 |
| }, |
| { |
| "epoch": 5.850032320620556, |
| "grad_norm": 0.7610704898834229, |
| "learning_rate": 0.001, |
| "loss": 3.0045, |
| "step": 18100 |
| }, |
| { |
| "epoch": 5.882352941176471, |
| "grad_norm": 0.7906739711761475, |
| "learning_rate": 0.001, |
| "loss": 3.0014, |
| "step": 18200 |
| }, |
| { |
| "epoch": 5.914673561732386, |
| "grad_norm": 1.1893870830535889, |
| "learning_rate": 0.001, |
| "loss": 3.0193, |
| "step": 18300 |
| }, |
| { |
| "epoch": 5.9469941822883, |
| "grad_norm": 1.0120941400527954, |
| "learning_rate": 0.001, |
| "loss": 3.0242, |
| "step": 18400 |
| }, |
| { |
| "epoch": 5.979314802844215, |
| "grad_norm": 0.8534390926361084, |
| "learning_rate": 0.001, |
| "loss": 3.008, |
| "step": 18500 |
| }, |
| { |
| "epoch": 6.011635423400129, |
| "grad_norm": 0.9205257296562195, |
| "learning_rate": 0.001, |
| "loss": 2.9515, |
| "step": 18600 |
| }, |
| { |
| "epoch": 6.043956043956044, |
| "grad_norm": 1.1282649040222168, |
| "learning_rate": 0.001, |
| "loss": 2.8832, |
| "step": 18700 |
| }, |
| { |
| "epoch": 6.076276664511958, |
| "grad_norm": 1.0840119123458862, |
| "learning_rate": 0.001, |
| "loss": 2.865, |
| "step": 18800 |
| }, |
| { |
| "epoch": 6.108597285067873, |
| "grad_norm": 1.0464580059051514, |
| "learning_rate": 0.001, |
| "loss": 2.8871, |
| "step": 18900 |
| }, |
| { |
| "epoch": 6.140917905623788, |
| "grad_norm": 0.8120374083518982, |
| "learning_rate": 0.001, |
| "loss": 2.8726, |
| "step": 19000 |
| }, |
| { |
| "epoch": 6.173238526179703, |
| "grad_norm": 0.8664466738700867, |
| "learning_rate": 0.001, |
| "loss": 2.899, |
| "step": 19100 |
| }, |
| { |
| "epoch": 6.2055591467356175, |
| "grad_norm": 0.9399696588516235, |
| "learning_rate": 0.001, |
| "loss": 2.8829, |
| "step": 19200 |
| }, |
| { |
| "epoch": 6.237879767291532, |
| "grad_norm": 1.3675711154937744, |
| "learning_rate": 0.001, |
| "loss": 2.9043, |
| "step": 19300 |
| }, |
| { |
| "epoch": 6.270200387847447, |
| "grad_norm": 1.2555503845214844, |
| "learning_rate": 0.001, |
| "loss": 2.8953, |
| "step": 19400 |
| }, |
| { |
| "epoch": 6.302521008403361, |
| "grad_norm": 0.8409241437911987, |
| "learning_rate": 0.001, |
| "loss": 2.9037, |
| "step": 19500 |
| }, |
| { |
| "epoch": 6.334841628959276, |
| "grad_norm": 0.7829875946044922, |
| "learning_rate": 0.001, |
| "loss": 2.9159, |
| "step": 19600 |
| }, |
| { |
| "epoch": 6.36716224951519, |
| "grad_norm": 0.9084135293960571, |
| "learning_rate": 0.001, |
| "loss": 2.911, |
| "step": 19700 |
| }, |
| { |
| "epoch": 6.399482870071106, |
| "grad_norm": 1.1866672039031982, |
| "learning_rate": 0.001, |
| "loss": 2.8979, |
| "step": 19800 |
| }, |
| { |
| "epoch": 6.43180349062702, |
| "grad_norm": 0.7364369034767151, |
| "learning_rate": 0.001, |
| "loss": 2.9264, |
| "step": 19900 |
| }, |
| { |
| "epoch": 6.464124111182935, |
| "grad_norm": 0.8288730382919312, |
| "learning_rate": 0.001, |
| "loss": 2.9046, |
| "step": 20000 |
| }, |
| { |
| "epoch": 6.496444731738849, |
| "grad_norm": 1.1130245923995972, |
| "learning_rate": 0.001, |
| "loss": 2.9216, |
| "step": 20100 |
| }, |
| { |
| "epoch": 6.528765352294764, |
| "grad_norm": 1.074400782585144, |
| "learning_rate": 0.001, |
| "loss": 2.9239, |
| "step": 20200 |
| }, |
| { |
| "epoch": 6.5610859728506785, |
| "grad_norm": 0.8148232102394104, |
| "learning_rate": 0.001, |
| "loss": 2.8964, |
| "step": 20300 |
| }, |
| { |
| "epoch": 6.593406593406593, |
| "grad_norm": 1.0962389707565308, |
| "learning_rate": 0.001, |
| "loss": 2.9386, |
| "step": 20400 |
| }, |
| { |
| "epoch": 6.625727213962508, |
| "grad_norm": 0.788106381893158, |
| "learning_rate": 0.001, |
| "loss": 2.9134, |
| "step": 20500 |
| }, |
| { |
| "epoch": 6.658047834518423, |
| "grad_norm": 0.8781216144561768, |
| "learning_rate": 0.001, |
| "loss": 2.9286, |
| "step": 20600 |
| }, |
| { |
| "epoch": 6.690368455074338, |
| "grad_norm": 0.8809503316879272, |
| "learning_rate": 0.001, |
| "loss": 2.9277, |
| "step": 20700 |
| }, |
| { |
| "epoch": 6.722689075630252, |
| "grad_norm": 0.8170621991157532, |
| "learning_rate": 0.001, |
| "loss": 2.9222, |
| "step": 20800 |
| }, |
| { |
| "epoch": 6.755009696186167, |
| "grad_norm": 1.054281234741211, |
| "learning_rate": 0.001, |
| "loss": 2.9309, |
| "step": 20900 |
| }, |
| { |
| "epoch": 6.787330316742081, |
| "grad_norm": 0.8404674530029297, |
| "learning_rate": 0.001, |
| "loss": 2.9194, |
| "step": 21000 |
| }, |
| { |
| "epoch": 6.819650937297996, |
| "grad_norm": 0.8002380132675171, |
| "learning_rate": 0.001, |
| "loss": 2.9142, |
| "step": 21100 |
| }, |
| { |
| "epoch": 6.85197155785391, |
| "grad_norm": 1.09268057346344, |
| "learning_rate": 0.001, |
| "loss": 2.9427, |
| "step": 21200 |
| }, |
| { |
| "epoch": 6.884292178409826, |
| "grad_norm": 1.0005022287368774, |
| "learning_rate": 0.001, |
| "loss": 2.9272, |
| "step": 21300 |
| }, |
| { |
| "epoch": 6.91661279896574, |
| "grad_norm": 0.8553439974784851, |
| "learning_rate": 0.001, |
| "loss": 2.9235, |
| "step": 21400 |
| }, |
| { |
| "epoch": 6.948933419521655, |
| "grad_norm": 0.8628080487251282, |
| "learning_rate": 0.001, |
| "loss": 2.9177, |
| "step": 21500 |
| }, |
| { |
| "epoch": 6.98125404007757, |
| "grad_norm": 1.0014255046844482, |
| "learning_rate": 0.001, |
| "loss": 2.9363, |
| "step": 21600 |
| }, |
| { |
| "epoch": 7.013574660633484, |
| "grad_norm": 1.1169780492782593, |
| "learning_rate": 0.001, |
| "loss": 2.8747, |
| "step": 21700 |
| }, |
| { |
| "epoch": 7.045895281189399, |
| "grad_norm": 1.1986509561538696, |
| "learning_rate": 0.001, |
| "loss": 2.7947, |
| "step": 21800 |
| }, |
| { |
| "epoch": 7.078215901745313, |
| "grad_norm": 0.9472404718399048, |
| "learning_rate": 0.001, |
| "loss": 2.781, |
| "step": 21900 |
| }, |
| { |
| "epoch": 7.110536522301228, |
| "grad_norm": 0.8761770725250244, |
| "learning_rate": 0.001, |
| "loss": 2.8102, |
| "step": 22000 |
| }, |
| { |
| "epoch": 7.142857142857143, |
| "grad_norm": 1.10127854347229, |
| "learning_rate": 0.001, |
| "loss": 2.7989, |
| "step": 22100 |
| }, |
| { |
| "epoch": 7.175177763413058, |
| "grad_norm": 1.1972665786743164, |
| "learning_rate": 0.001, |
| "loss": 2.8007, |
| "step": 22200 |
| }, |
| { |
| "epoch": 7.207498383968972, |
| "grad_norm": 0.9232390522956848, |
| "learning_rate": 0.001, |
| "loss": 2.8016, |
| "step": 22300 |
| }, |
| { |
| "epoch": 7.239819004524887, |
| "grad_norm": 1.0866069793701172, |
| "learning_rate": 0.001, |
| "loss": 2.8312, |
| "step": 22400 |
| }, |
| { |
| "epoch": 7.2721396250808015, |
| "grad_norm": 1.0556726455688477, |
| "learning_rate": 0.001, |
| "loss": 2.8322, |
| "step": 22500 |
| }, |
| { |
| "epoch": 7.304460245636716, |
| "grad_norm": 1.025449514389038, |
| "learning_rate": 0.001, |
| "loss": 2.8113, |
| "step": 22600 |
| }, |
| { |
| "epoch": 7.336780866192631, |
| "grad_norm": 0.8753816485404968, |
| "learning_rate": 0.001, |
| "loss": 2.8309, |
| "step": 22700 |
| }, |
| { |
| "epoch": 7.369101486748546, |
| "grad_norm": 1.1139119863510132, |
| "learning_rate": 0.001, |
| "loss": 2.8213, |
| "step": 22800 |
| }, |
| { |
| "epoch": 7.401422107304461, |
| "grad_norm": 1.1850379705429077, |
| "learning_rate": 0.001, |
| "loss": 2.8138, |
| "step": 22900 |
| }, |
| { |
| "epoch": 7.433742727860375, |
| "grad_norm": 0.8396257162094116, |
| "learning_rate": 0.001, |
| "loss": 2.8561, |
| "step": 23000 |
| }, |
| { |
| "epoch": 7.46606334841629, |
| "grad_norm": 1.0254182815551758, |
| "learning_rate": 0.001, |
| "loss": 2.845, |
| "step": 23100 |
| }, |
| { |
| "epoch": 7.498383968972204, |
| "grad_norm": 1.0984256267547607, |
| "learning_rate": 0.001, |
| "loss": 2.8638, |
| "step": 23200 |
| }, |
| { |
| "epoch": 7.530704589528119, |
| "grad_norm": 1.1533817052841187, |
| "learning_rate": 0.001, |
| "loss": 2.8465, |
| "step": 23300 |
| }, |
| { |
| "epoch": 7.563025210084033, |
| "grad_norm": 0.9635702967643738, |
| "learning_rate": 0.001, |
| "loss": 2.8595, |
| "step": 23400 |
| }, |
| { |
| "epoch": 7.595345830639948, |
| "grad_norm": 0.8350183367729187, |
| "learning_rate": 0.001, |
| "loss": 2.8639, |
| "step": 23500 |
| }, |
| { |
| "epoch": 7.6276664511958625, |
| "grad_norm": 1.0109552145004272, |
| "learning_rate": 0.001, |
| "loss": 2.8395, |
| "step": 23600 |
| }, |
| { |
| "epoch": 7.659987071751778, |
| "grad_norm": 1.049338698387146, |
| "learning_rate": 0.001, |
| "loss": 2.8587, |
| "step": 23700 |
| }, |
| { |
| "epoch": 7.6923076923076925, |
| "grad_norm": 0.985700786113739, |
| "learning_rate": 0.001, |
| "loss": 2.8637, |
| "step": 23800 |
| }, |
| { |
| "epoch": 7.724628312863607, |
| "grad_norm": 0.9394757151603699, |
| "learning_rate": 0.001, |
| "loss": 2.8453, |
| "step": 23900 |
| }, |
| { |
| "epoch": 7.756948933419522, |
| "grad_norm": 1.203756332397461, |
| "learning_rate": 0.001, |
| "loss": 2.8625, |
| "step": 24000 |
| }, |
| { |
| "epoch": 7.789269553975436, |
| "grad_norm": 0.89085853099823, |
| "learning_rate": 0.001, |
| "loss": 2.8558, |
| "step": 24100 |
| }, |
| { |
| "epoch": 7.821590174531351, |
| "grad_norm": 1.2652662992477417, |
| "learning_rate": 0.001, |
| "loss": 2.8569, |
| "step": 24200 |
| }, |
| { |
| "epoch": 7.853910795087265, |
| "grad_norm": 1.0638023614883423, |
| "learning_rate": 0.001, |
| "loss": 2.8699, |
| "step": 24300 |
| }, |
| { |
| "epoch": 7.886231415643181, |
| "grad_norm": 1.0257869958877563, |
| "learning_rate": 0.001, |
| "loss": 2.8554, |
| "step": 24400 |
| }, |
| { |
| "epoch": 7.918552036199095, |
| "grad_norm": 1.2246901988983154, |
| "learning_rate": 0.001, |
| "loss": 2.8755, |
| "step": 24500 |
| }, |
| { |
| "epoch": 7.95087265675501, |
| "grad_norm": 0.8839750289916992, |
| "learning_rate": 0.001, |
| "loss": 2.845, |
| "step": 24600 |
| }, |
| { |
| "epoch": 7.983193277310924, |
| "grad_norm": 1.1866658926010132, |
| "learning_rate": 0.001, |
| "loss": 2.858, |
| "step": 24700 |
| }, |
| { |
| "epoch": 8.015513897866839, |
| "grad_norm": 1.3818105459213257, |
| "learning_rate": 0.001, |
| "loss": 2.7831, |
| "step": 24800 |
| }, |
| { |
| "epoch": 8.047834518422754, |
| "grad_norm": 1.0419820547103882, |
| "learning_rate": 0.001, |
| "loss": 2.7238, |
| "step": 24900 |
| }, |
| { |
| "epoch": 8.080155138978668, |
| "grad_norm": 1.126366376876831, |
| "learning_rate": 0.001, |
| "loss": 2.7257, |
| "step": 25000 |
| }, |
| { |
| "epoch": 8.112475759534583, |
| "grad_norm": 1.0105080604553223, |
| "learning_rate": 0.001, |
| "loss": 2.7445, |
| "step": 25100 |
| }, |
| { |
| "epoch": 8.144796380090497, |
| "grad_norm": 1.1063216924667358, |
| "learning_rate": 0.001, |
| "loss": 2.7421, |
| "step": 25200 |
| }, |
| { |
| "epoch": 8.177117000646412, |
| "grad_norm": 1.4662680625915527, |
| "learning_rate": 0.001, |
| "loss": 2.7432, |
| "step": 25300 |
| }, |
| { |
| "epoch": 8.209437621202326, |
| "grad_norm": 1.0692142248153687, |
| "learning_rate": 0.001, |
| "loss": 2.7346, |
| "step": 25400 |
| }, |
| { |
| "epoch": 8.241758241758241, |
| "grad_norm": 1.5519376993179321, |
| "learning_rate": 0.001, |
| "loss": 2.7417, |
| "step": 25500 |
| }, |
| { |
| "epoch": 8.274078862314157, |
| "grad_norm": 1.1988738775253296, |
| "learning_rate": 0.001, |
| "loss": 2.7734, |
| "step": 25600 |
| }, |
| { |
| "epoch": 8.306399482870072, |
| "grad_norm": 1.592111587524414, |
| "learning_rate": 0.001, |
| "loss": 2.7705, |
| "step": 25700 |
| }, |
| { |
| "epoch": 8.338720103425986, |
| "grad_norm": 1.6889156103134155, |
| "learning_rate": 0.001, |
| "loss": 2.7662, |
| "step": 25800 |
| }, |
| { |
| "epoch": 8.371040723981901, |
| "grad_norm": 1.0734881162643433, |
| "learning_rate": 0.001, |
| "loss": 2.7336, |
| "step": 25900 |
| }, |
| { |
| "epoch": 8.403361344537815, |
| "grad_norm": 1.3303179740905762, |
| "learning_rate": 0.001, |
| "loss": 2.7566, |
| "step": 26000 |
| }, |
| { |
| "epoch": 8.43568196509373, |
| "grad_norm": 1.2512389421463013, |
| "learning_rate": 0.001, |
| "loss": 2.7635, |
| "step": 26100 |
| }, |
| { |
| "epoch": 8.468002585649645, |
| "grad_norm": 1.4159555435180664, |
| "learning_rate": 0.001, |
| "loss": 2.7741, |
| "step": 26200 |
| }, |
| { |
| "epoch": 8.50032320620556, |
| "grad_norm": 1.1148388385772705, |
| "learning_rate": 0.001, |
| "loss": 2.7955, |
| "step": 26300 |
| }, |
| { |
| "epoch": 8.532643826761474, |
| "grad_norm": 0.9710886478424072, |
| "learning_rate": 0.001, |
| "loss": 2.7931, |
| "step": 26400 |
| }, |
| { |
| "epoch": 8.564964447317388, |
| "grad_norm": 1.1238210201263428, |
| "learning_rate": 0.001, |
| "loss": 2.7663, |
| "step": 26500 |
| }, |
| { |
| "epoch": 8.597285067873303, |
| "grad_norm": 1.0266234874725342, |
| "learning_rate": 0.001, |
| "loss": 2.7859, |
| "step": 26600 |
| }, |
| { |
| "epoch": 8.629605688429217, |
| "grad_norm": 1.1067254543304443, |
| "learning_rate": 0.001, |
| "loss": 2.7709, |
| "step": 26700 |
| }, |
| { |
| "epoch": 8.661926308985132, |
| "grad_norm": 1.1781024932861328, |
| "learning_rate": 0.001, |
| "loss": 2.7905, |
| "step": 26800 |
| }, |
| { |
| "epoch": 8.694246929541046, |
| "grad_norm": 1.460496187210083, |
| "learning_rate": 0.001, |
| "loss": 2.7954, |
| "step": 26900 |
| }, |
| { |
| "epoch": 8.726567550096961, |
| "grad_norm": 1.214035987854004, |
| "learning_rate": 0.001, |
| "loss": 2.798, |
| "step": 27000 |
| }, |
| { |
| "epoch": 8.758888170652877, |
| "grad_norm": 1.1744524240493774, |
| "learning_rate": 0.001, |
| "loss": 2.773, |
| "step": 27100 |
| }, |
| { |
| "epoch": 8.791208791208792, |
| "grad_norm": 1.1752805709838867, |
| "learning_rate": 0.001, |
| "loss": 2.7875, |
| "step": 27200 |
| }, |
| { |
| "epoch": 8.823529411764707, |
| "grad_norm": 1.3713490962982178, |
| "learning_rate": 0.001, |
| "loss": 2.7948, |
| "step": 27300 |
| }, |
| { |
| "epoch": 8.855850032320621, |
| "grad_norm": 1.4796494245529175, |
| "learning_rate": 0.001, |
| "loss": 2.7963, |
| "step": 27400 |
| }, |
| { |
| "epoch": 8.888170652876536, |
| "grad_norm": 1.0539920330047607, |
| "learning_rate": 0.001, |
| "loss": 2.8062, |
| "step": 27500 |
| }, |
| { |
| "epoch": 8.92049127343245, |
| "grad_norm": 1.4270648956298828, |
| "learning_rate": 0.001, |
| "loss": 2.8028, |
| "step": 27600 |
| }, |
| { |
| "epoch": 8.952811893988365, |
| "grad_norm": 1.0808746814727783, |
| "learning_rate": 0.001, |
| "loss": 2.8078, |
| "step": 27700 |
| }, |
| { |
| "epoch": 8.98513251454428, |
| "grad_norm": 1.1713131666183472, |
| "learning_rate": 0.001, |
| "loss": 2.8011, |
| "step": 27800 |
| }, |
| { |
| "epoch": 9.017453135100194, |
| "grad_norm": 1.2255527973175049, |
| "learning_rate": 0.001, |
| "loss": 2.7072, |
| "step": 27900 |
| }, |
| { |
| "epoch": 9.049773755656108, |
| "grad_norm": 1.5293316841125488, |
| "learning_rate": 0.001, |
| "loss": 2.6307, |
| "step": 28000 |
| }, |
| { |
| "epoch": 9.082094376212023, |
| "grad_norm": 1.493971586227417, |
| "learning_rate": 0.001, |
| "loss": 2.6732, |
| "step": 28100 |
| }, |
| { |
| "epoch": 9.114414996767938, |
| "grad_norm": 1.0354042053222656, |
| "learning_rate": 0.001, |
| "loss": 2.6383, |
| "step": 28200 |
| }, |
| { |
| "epoch": 9.146735617323852, |
| "grad_norm": 1.18374502658844, |
| "learning_rate": 0.001, |
| "loss": 2.6923, |
| "step": 28300 |
| }, |
| { |
| "epoch": 9.179056237879767, |
| "grad_norm": 0.874068021774292, |
| "learning_rate": 0.001, |
| "loss": 2.6817, |
| "step": 28400 |
| }, |
| { |
| "epoch": 9.211376858435681, |
| "grad_norm": 1.5086418390274048, |
| "learning_rate": 0.001, |
| "loss": 2.6822, |
| "step": 28500 |
| }, |
| { |
| "epoch": 9.243697478991596, |
| "grad_norm": 1.9380152225494385, |
| "learning_rate": 0.001, |
| "loss": 2.6832, |
| "step": 28600 |
| }, |
| { |
| "epoch": 9.276018099547512, |
| "grad_norm": 0.9644330739974976, |
| "learning_rate": 0.001, |
| "loss": 2.7148, |
| "step": 28700 |
| }, |
| { |
| "epoch": 9.308338720103427, |
| "grad_norm": 1.3587816953659058, |
| "learning_rate": 0.001, |
| "loss": 2.6998, |
| "step": 28800 |
| }, |
| { |
| "epoch": 9.340659340659341, |
| "grad_norm": 1.1259444952011108, |
| "learning_rate": 0.001, |
| "loss": 2.6989, |
| "step": 28900 |
| }, |
| { |
| "epoch": 9.372979961215256, |
| "grad_norm": 1.3353569507598877, |
| "learning_rate": 0.001, |
| "loss": 2.7075, |
| "step": 29000 |
| }, |
| { |
| "epoch": 9.40530058177117, |
| "grad_norm": 1.4237273931503296, |
| "learning_rate": 0.001, |
| "loss": 2.6998, |
| "step": 29100 |
| }, |
| { |
| "epoch": 9.437621202327085, |
| "grad_norm": 1.4491281509399414, |
| "learning_rate": 0.001, |
| "loss": 2.7173, |
| "step": 29200 |
| }, |
| { |
| "epoch": 9.469941822883, |
| "grad_norm": 1.1129282712936401, |
| "learning_rate": 0.001, |
| "loss": 2.7142, |
| "step": 29300 |
| }, |
| { |
| "epoch": 9.502262443438914, |
| "grad_norm": 1.2722301483154297, |
| "learning_rate": 0.001, |
| "loss": 2.6983, |
| "step": 29400 |
| }, |
| { |
| "epoch": 9.534583063994829, |
| "grad_norm": 1.1919955015182495, |
| "learning_rate": 0.001, |
| "loss": 2.7242, |
| "step": 29500 |
| }, |
| { |
| "epoch": 9.566903684550743, |
| "grad_norm": 0.8851849436759949, |
| "learning_rate": 0.001, |
| "loss": 2.7172, |
| "step": 29600 |
| }, |
| { |
| "epoch": 9.599224305106658, |
| "grad_norm": 1.1396182775497437, |
| "learning_rate": 0.001, |
| "loss": 2.7365, |
| "step": 29700 |
| }, |
| { |
| "epoch": 9.631544925662572, |
| "grad_norm": 1.0789813995361328, |
| "learning_rate": 0.001, |
| "loss": 2.7288, |
| "step": 29800 |
| }, |
| { |
| "epoch": 9.663865546218487, |
| "grad_norm": 1.1112487316131592, |
| "learning_rate": 0.001, |
| "loss": 2.7228, |
| "step": 29900 |
| }, |
| { |
| "epoch": 9.696186166774401, |
| "grad_norm": 1.112724781036377, |
| "learning_rate": 0.001, |
| "loss": 2.7157, |
| "step": 30000 |
| }, |
| { |
| "epoch": 9.728506787330316, |
| "grad_norm": 1.1656497716903687, |
| "learning_rate": 0.001, |
| "loss": 2.724, |
| "step": 30100 |
| }, |
| { |
| "epoch": 9.760827407886232, |
| "grad_norm": 1.4802846908569336, |
| "learning_rate": 0.001, |
| "loss": 2.727, |
| "step": 30200 |
| }, |
| { |
| "epoch": 9.793148028442147, |
| "grad_norm": 0.9352574348449707, |
| "learning_rate": 0.001, |
| "loss": 2.7296, |
| "step": 30300 |
| }, |
| { |
| "epoch": 9.825468648998061, |
| "grad_norm": 1.2167083024978638, |
| "learning_rate": 0.001, |
| "loss": 2.7345, |
| "step": 30400 |
| }, |
| { |
| "epoch": 9.857789269553976, |
| "grad_norm": 1.1363461017608643, |
| "learning_rate": 0.001, |
| "loss": 2.7302, |
| "step": 30500 |
| }, |
| { |
| "epoch": 9.89010989010989, |
| "grad_norm": 1.0233099460601807, |
| "learning_rate": 0.001, |
| "loss": 2.7356, |
| "step": 30600 |
| }, |
| { |
| "epoch": 9.922430510665805, |
| "grad_norm": 1.6240216493606567, |
| "learning_rate": 0.001, |
| "loss": 2.7486, |
| "step": 30700 |
| }, |
| { |
| "epoch": 9.95475113122172, |
| "grad_norm": 1.293664813041687, |
| "learning_rate": 0.001, |
| "loss": 2.7391, |
| "step": 30800 |
| }, |
| { |
| "epoch": 9.987071751777634, |
| "grad_norm": 1.1840318441390991, |
| "learning_rate": 0.001, |
| "loss": 2.746, |
| "step": 30900 |
| }, |
| { |
| "epoch": 10.019392372333549, |
| "grad_norm": 1.403177261352539, |
| "learning_rate": 0.001, |
| "loss": 2.653, |
| "step": 31000 |
| }, |
| { |
| "epoch": 10.051712992889463, |
| "grad_norm": 0.9433863759040833, |
| "learning_rate": 0.001, |
| "loss": 2.598, |
| "step": 31100 |
| }, |
| { |
| "epoch": 10.084033613445378, |
| "grad_norm": 1.0219208002090454, |
| "learning_rate": 0.001, |
| "loss": 2.6092, |
| "step": 31200 |
| }, |
| { |
| "epoch": 10.116354234001292, |
| "grad_norm": 0.9918056130409241, |
| "learning_rate": 0.001, |
| "loss": 2.6183, |
| "step": 31300 |
| }, |
| { |
| "epoch": 10.148674854557207, |
| "grad_norm": 1.1675612926483154, |
| "learning_rate": 0.001, |
| "loss": 2.6016, |
| "step": 31400 |
| }, |
| { |
| "epoch": 10.180995475113122, |
| "grad_norm": 0.7957185506820679, |
| "learning_rate": 0.001, |
| "loss": 2.6274, |
| "step": 31500 |
| }, |
| { |
| "epoch": 10.213316095669036, |
| "grad_norm": 1.0217318534851074, |
| "learning_rate": 0.001, |
| "loss": 2.6152, |
| "step": 31600 |
| }, |
| { |
| "epoch": 10.24563671622495, |
| "grad_norm": 1.0155982971191406, |
| "learning_rate": 0.001, |
| "loss": 2.6283, |
| "step": 31700 |
| }, |
| { |
| "epoch": 10.277957336780867, |
| "grad_norm": 0.8074186444282532, |
| "learning_rate": 0.001, |
| "loss": 2.634, |
| "step": 31800 |
| }, |
| { |
| "epoch": 10.310277957336782, |
| "grad_norm": 0.8662866353988647, |
| "learning_rate": 0.001, |
| "loss": 2.6434, |
| "step": 31900 |
| }, |
| { |
| "epoch": 10.342598577892696, |
| "grad_norm": 1.2984246015548706, |
| "learning_rate": 0.001, |
| "loss": 2.6334, |
| "step": 32000 |
| }, |
| { |
| "epoch": 10.37491919844861, |
| "grad_norm": 1.0286608934402466, |
| "learning_rate": 0.001, |
| "loss": 2.6406, |
| "step": 32100 |
| }, |
| { |
| "epoch": 10.407239819004525, |
| "grad_norm": 1.104810118675232, |
| "learning_rate": 0.001, |
| "loss": 2.6734, |
| "step": 32200 |
| }, |
| { |
| "epoch": 10.43956043956044, |
| "grad_norm": 0.9450679421424866, |
| "learning_rate": 0.001, |
| "loss": 2.6495, |
| "step": 32300 |
| }, |
| { |
| "epoch": 10.471881060116354, |
| "grad_norm": 1.0712677240371704, |
| "learning_rate": 0.001, |
| "loss": 2.6659, |
| "step": 32400 |
| }, |
| { |
| "epoch": 10.504201680672269, |
| "grad_norm": 0.8774548768997192, |
| "learning_rate": 0.001, |
| "loss": 2.6579, |
| "step": 32500 |
| }, |
| { |
| "epoch": 10.536522301228183, |
| "grad_norm": 0.9978793859481812, |
| "learning_rate": 0.001, |
| "loss": 2.6579, |
| "step": 32600 |
| }, |
| { |
| "epoch": 10.568842921784098, |
| "grad_norm": 1.344751000404358, |
| "learning_rate": 0.001, |
| "loss": 2.662, |
| "step": 32700 |
| }, |
| { |
| "epoch": 10.601163542340013, |
| "grad_norm": 1.010345697402954, |
| "learning_rate": 0.001, |
| "loss": 2.6454, |
| "step": 32800 |
| }, |
| { |
| "epoch": 10.633484162895927, |
| "grad_norm": 0.9622514247894287, |
| "learning_rate": 0.001, |
| "loss": 2.671, |
| "step": 32900 |
| }, |
| { |
| "epoch": 10.665804783451842, |
| "grad_norm": 0.9483785033226013, |
| "learning_rate": 0.001, |
| "loss": 2.6475, |
| "step": 33000 |
| }, |
| { |
| "epoch": 10.698125404007756, |
| "grad_norm": 1.1246763467788696, |
| "learning_rate": 0.001, |
| "loss": 2.6651, |
| "step": 33100 |
| }, |
| { |
| "epoch": 10.73044602456367, |
| "grad_norm": 1.0108826160430908, |
| "learning_rate": 0.001, |
| "loss": 2.6842, |
| "step": 33200 |
| }, |
| { |
| "epoch": 10.762766645119587, |
| "grad_norm": 0.9328263401985168, |
| "learning_rate": 0.001, |
| "loss": 2.6932, |
| "step": 33300 |
| }, |
| { |
| "epoch": 10.795087265675502, |
| "grad_norm": 0.9086120128631592, |
| "learning_rate": 0.001, |
| "loss": 2.6769, |
| "step": 33400 |
| }, |
| { |
| "epoch": 10.827407886231416, |
| "grad_norm": 0.953272819519043, |
| "learning_rate": 0.001, |
| "loss": 2.6935, |
| "step": 33500 |
| }, |
| { |
| "epoch": 10.85972850678733, |
| "grad_norm": 1.2190536260604858, |
| "learning_rate": 0.001, |
| "loss": 2.6817, |
| "step": 33600 |
| }, |
| { |
| "epoch": 10.892049127343245, |
| "grad_norm": 0.8844929337501526, |
| "learning_rate": 0.001, |
| "loss": 2.6891, |
| "step": 33700 |
| }, |
| { |
| "epoch": 10.92436974789916, |
| "grad_norm": 0.9220634698867798, |
| "learning_rate": 0.001, |
| "loss": 2.6707, |
| "step": 33800 |
| }, |
| { |
| "epoch": 10.956690368455074, |
| "grad_norm": 0.8071881532669067, |
| "learning_rate": 0.001, |
| "loss": 2.6822, |
| "step": 33900 |
| }, |
| { |
| "epoch": 10.989010989010989, |
| "grad_norm": 1.1711838245391846, |
| "learning_rate": 0.001, |
| "loss": 2.6829, |
| "step": 34000 |
| }, |
| { |
| "epoch": 11.021331609566904, |
| "grad_norm": 1.0321942567825317, |
| "learning_rate": 0.001, |
| "loss": 2.6256, |
| "step": 34100 |
| }, |
| { |
| "epoch": 11.053652230122818, |
| "grad_norm": 1.0227347612380981, |
| "learning_rate": 0.001, |
| "loss": 2.5575, |
| "step": 34200 |
| }, |
| { |
| "epoch": 11.085972850678733, |
| "grad_norm": 1.0060416460037231, |
| "learning_rate": 0.001, |
| "loss": 2.5526, |
| "step": 34300 |
| }, |
| { |
| "epoch": 11.118293471234647, |
| "grad_norm": 1.0068382024765015, |
| "learning_rate": 0.001, |
| "loss": 2.5789, |
| "step": 34400 |
| }, |
| { |
| "epoch": 11.150614091790562, |
| "grad_norm": 1.3233875036239624, |
| "learning_rate": 0.001, |
| "loss": 2.563, |
| "step": 34500 |
| }, |
| { |
| "epoch": 11.182934712346476, |
| "grad_norm": 0.952245831489563, |
| "learning_rate": 0.001, |
| "loss": 2.5817, |
| "step": 34600 |
| }, |
| { |
| "epoch": 11.215255332902391, |
| "grad_norm": 1.0358257293701172, |
| "learning_rate": 0.001, |
| "loss": 2.5875, |
| "step": 34700 |
| }, |
| { |
| "epoch": 11.247575953458306, |
| "grad_norm": 1.11635160446167, |
| "learning_rate": 0.001, |
| "loss": 2.5677, |
| "step": 34800 |
| }, |
| { |
| "epoch": 11.279896574014222, |
| "grad_norm": 1.115350604057312, |
| "learning_rate": 0.001, |
| "loss": 2.5832, |
| "step": 34900 |
| }, |
| { |
| "epoch": 11.312217194570136, |
| "grad_norm": 1.3675668239593506, |
| "learning_rate": 0.001, |
| "loss": 2.5801, |
| "step": 35000 |
| }, |
| { |
| "epoch": 11.344537815126051, |
| "grad_norm": 0.9039455652236938, |
| "learning_rate": 0.001, |
| "loss": 2.581, |
| "step": 35100 |
| }, |
| { |
| "epoch": 11.376858435681966, |
| "grad_norm": 0.9431731700897217, |
| "learning_rate": 0.001, |
| "loss": 2.5871, |
| "step": 35200 |
| }, |
| { |
| "epoch": 11.40917905623788, |
| "grad_norm": 1.5077314376831055, |
| "learning_rate": 0.001, |
| "loss": 2.5942, |
| "step": 35300 |
| }, |
| { |
| "epoch": 11.441499676793795, |
| "grad_norm": 0.9280950427055359, |
| "learning_rate": 0.001, |
| "loss": 2.5854, |
| "step": 35400 |
| }, |
| { |
| "epoch": 11.47382029734971, |
| "grad_norm": 0.8959194421768188, |
| "learning_rate": 0.001, |
| "loss": 2.6077, |
| "step": 35500 |
| }, |
| { |
| "epoch": 11.506140917905624, |
| "grad_norm": 0.8173645734786987, |
| "learning_rate": 0.001, |
| "loss": 2.6389, |
| "step": 35600 |
| }, |
| { |
| "epoch": 11.538461538461538, |
| "grad_norm": 0.9029143452644348, |
| "learning_rate": 0.001, |
| "loss": 2.6093, |
| "step": 35700 |
| }, |
| { |
| "epoch": 11.570782159017453, |
| "grad_norm": 1.0597341060638428, |
| "learning_rate": 0.001, |
| "loss": 2.6155, |
| "step": 35800 |
| }, |
| { |
| "epoch": 11.603102779573367, |
| "grad_norm": 0.7653180360794067, |
| "learning_rate": 0.001, |
| "loss": 2.5938, |
| "step": 35900 |
| }, |
| { |
| "epoch": 11.635423400129282, |
| "grad_norm": 0.9608849287033081, |
| "learning_rate": 0.001, |
| "loss": 2.6059, |
| "step": 36000 |
| }, |
| { |
| "epoch": 11.667744020685197, |
| "grad_norm": 0.7896739840507507, |
| "learning_rate": 0.001, |
| "loss": 2.6306, |
| "step": 36100 |
| }, |
| { |
| "epoch": 11.700064641241111, |
| "grad_norm": 1.1343886852264404, |
| "learning_rate": 0.001, |
| "loss": 2.6083, |
| "step": 36200 |
| }, |
| { |
| "epoch": 11.732385261797026, |
| "grad_norm": 1.0658496618270874, |
| "learning_rate": 0.001, |
| "loss": 2.6192, |
| "step": 36300 |
| }, |
| { |
| "epoch": 11.764705882352942, |
| "grad_norm": 0.9849185943603516, |
| "learning_rate": 0.001, |
| "loss": 2.6148, |
| "step": 36400 |
| }, |
| { |
| "epoch": 11.797026502908857, |
| "grad_norm": 1.1015666723251343, |
| "learning_rate": 0.001, |
| "loss": 2.6081, |
| "step": 36500 |
| }, |
| { |
| "epoch": 11.829347123464771, |
| "grad_norm": 1.2068266868591309, |
| "learning_rate": 0.001, |
| "loss": 2.6453, |
| "step": 36600 |
| }, |
| { |
| "epoch": 11.861667744020686, |
| "grad_norm": 1.140753984451294, |
| "learning_rate": 0.001, |
| "loss": 2.6327, |
| "step": 36700 |
| }, |
| { |
| "epoch": 11.8939883645766, |
| "grad_norm": 1.2157039642333984, |
| "learning_rate": 0.001, |
| "loss": 2.6198, |
| "step": 36800 |
| }, |
| { |
| "epoch": 11.926308985132515, |
| "grad_norm": 1.1962515115737915, |
| "learning_rate": 0.001, |
| "loss": 2.6495, |
| "step": 36900 |
| }, |
| { |
| "epoch": 11.95862960568843, |
| "grad_norm": 1.0149519443511963, |
| "learning_rate": 0.001, |
| "loss": 2.6221, |
| "step": 37000 |
| }, |
| { |
| "epoch": 11.990950226244344, |
| "grad_norm": 1.1916568279266357, |
| "learning_rate": 0.001, |
| "loss": 2.6462, |
| "step": 37100 |
| }, |
| { |
| "epoch": 12.023270846800258, |
| "grad_norm": 1.198426365852356, |
| "learning_rate": 0.001, |
| "loss": 2.5254, |
| "step": 37200 |
| }, |
| { |
| "epoch": 12.055591467356173, |
| "grad_norm": 1.0675870180130005, |
| "learning_rate": 0.001, |
| "loss": 2.4878, |
| "step": 37300 |
| }, |
| { |
| "epoch": 12.087912087912088, |
| "grad_norm": 0.934251070022583, |
| "learning_rate": 0.001, |
| "loss": 2.4979, |
| "step": 37400 |
| }, |
| { |
| "epoch": 12.120232708468002, |
| "grad_norm": 1.2349053621292114, |
| "learning_rate": 0.001, |
| "loss": 2.4913, |
| "step": 37500 |
| }, |
| { |
| "epoch": 12.152553329023917, |
| "grad_norm": 1.235815405845642, |
| "learning_rate": 0.001, |
| "loss": 2.5046, |
| "step": 37600 |
| }, |
| { |
| "epoch": 12.184873949579831, |
| "grad_norm": 1.257660150527954, |
| "learning_rate": 0.001, |
| "loss": 2.5259, |
| "step": 37700 |
| }, |
| { |
| "epoch": 12.217194570135746, |
| "grad_norm": 1.246017336845398, |
| "learning_rate": 0.001, |
| "loss": 2.5421, |
| "step": 37800 |
| }, |
| { |
| "epoch": 12.24951519069166, |
| "grad_norm": 1.1210070848464966, |
| "learning_rate": 0.001, |
| "loss": 2.5366, |
| "step": 37900 |
| }, |
| { |
| "epoch": 12.281835811247577, |
| "grad_norm": 1.0047426223754883, |
| "learning_rate": 0.001, |
| "loss": 2.5445, |
| "step": 38000 |
| }, |
| { |
| "epoch": 12.314156431803491, |
| "grad_norm": 0.8410885334014893, |
| "learning_rate": 0.001, |
| "loss": 2.5369, |
| "step": 38100 |
| }, |
| { |
| "epoch": 12.346477052359406, |
| "grad_norm": 1.1904544830322266, |
| "learning_rate": 0.001, |
| "loss": 2.5295, |
| "step": 38200 |
| }, |
| { |
| "epoch": 12.37879767291532, |
| "grad_norm": 1.5273857116699219, |
| "learning_rate": 0.001, |
| "loss": 2.5166, |
| "step": 38300 |
| }, |
| { |
| "epoch": 12.411118293471235, |
| "grad_norm": 0.9626736640930176, |
| "learning_rate": 0.001, |
| "loss": 2.5535, |
| "step": 38400 |
| }, |
| { |
| "epoch": 12.44343891402715, |
| "grad_norm": 0.8150332570075989, |
| "learning_rate": 0.001, |
| "loss": 2.5595, |
| "step": 38500 |
| }, |
| { |
| "epoch": 12.475759534583064, |
| "grad_norm": 1.0832295417785645, |
| "learning_rate": 0.001, |
| "loss": 2.5457, |
| "step": 38600 |
| }, |
| { |
| "epoch": 12.508080155138979, |
| "grad_norm": 1.0489040613174438, |
| "learning_rate": 0.001, |
| "loss": 2.5534, |
| "step": 38700 |
| }, |
| { |
| "epoch": 12.540400775694893, |
| "grad_norm": 0.9710071682929993, |
| "learning_rate": 0.001, |
| "loss": 2.5536, |
| "step": 38800 |
| }, |
| { |
| "epoch": 12.572721396250808, |
| "grad_norm": 1.0966761112213135, |
| "learning_rate": 0.001, |
| "loss": 2.5589, |
| "step": 38900 |
| }, |
| { |
| "epoch": 12.605042016806722, |
| "grad_norm": 1.2801822423934937, |
| "learning_rate": 0.001, |
| "loss": 2.5783, |
| "step": 39000 |
| }, |
| { |
| "epoch": 12.637362637362637, |
| "grad_norm": 0.9837306141853333, |
| "learning_rate": 0.001, |
| "loss": 2.578, |
| "step": 39100 |
| }, |
| { |
| "epoch": 12.669683257918551, |
| "grad_norm": 0.9963669776916504, |
| "learning_rate": 0.001, |
| "loss": 2.5728, |
| "step": 39200 |
| }, |
| { |
| "epoch": 12.702003878474466, |
| "grad_norm": 1.0618131160736084, |
| "learning_rate": 0.001, |
| "loss": 2.5602, |
| "step": 39300 |
| }, |
| { |
| "epoch": 12.73432449903038, |
| "grad_norm": 1.1699451208114624, |
| "learning_rate": 0.001, |
| "loss": 2.6021, |
| "step": 39400 |
| }, |
| { |
| "epoch": 12.766645119586297, |
| "grad_norm": 1.159548282623291, |
| "learning_rate": 0.001, |
| "loss": 2.5887, |
| "step": 39500 |
| }, |
| { |
| "epoch": 12.798965740142211, |
| "grad_norm": 1.0369675159454346, |
| "learning_rate": 0.001, |
| "loss": 2.5738, |
| "step": 39600 |
| }, |
| { |
| "epoch": 12.831286360698126, |
| "grad_norm": 1.0057525634765625, |
| "learning_rate": 0.001, |
| "loss": 2.593, |
| "step": 39700 |
| }, |
| { |
| "epoch": 12.86360698125404, |
| "grad_norm": 1.0129728317260742, |
| "learning_rate": 0.001, |
| "loss": 2.5799, |
| "step": 39800 |
| }, |
| { |
| "epoch": 12.895927601809955, |
| "grad_norm": 1.3273283243179321, |
| "learning_rate": 0.001, |
| "loss": 2.5779, |
| "step": 39900 |
| }, |
| { |
| "epoch": 12.92824822236587, |
| "grad_norm": 0.9598479866981506, |
| "learning_rate": 0.001, |
| "loss": 2.5914, |
| "step": 40000 |
| }, |
| { |
| "epoch": 12.960568842921784, |
| "grad_norm": 1.2933992147445679, |
| "learning_rate": 0.001, |
| "loss": 2.5951, |
| "step": 40100 |
| }, |
| { |
| "epoch": 12.992889463477699, |
| "grad_norm": 1.0155681371688843, |
| "learning_rate": 0.001, |
| "loss": 2.5899, |
| "step": 40200 |
| }, |
| { |
| "epoch": 13.025210084033613, |
| "grad_norm": 1.4213255643844604, |
| "learning_rate": 0.001, |
| "loss": 2.4736, |
| "step": 40300 |
| }, |
| { |
| "epoch": 13.057530704589528, |
| "grad_norm": 1.0931323766708374, |
| "learning_rate": 0.001, |
| "loss": 2.458, |
| "step": 40400 |
| }, |
| { |
| "epoch": 13.089851325145442, |
| "grad_norm": 1.0388720035552979, |
| "learning_rate": 0.001, |
| "loss": 2.4759, |
| "step": 40500 |
| }, |
| { |
| "epoch": 13.122171945701357, |
| "grad_norm": 1.0456798076629639, |
| "learning_rate": 0.001, |
| "loss": 2.4536, |
| "step": 40600 |
| }, |
| { |
| "epoch": 13.154492566257272, |
| "grad_norm": 0.8500736355781555, |
| "learning_rate": 0.001, |
| "loss": 2.465, |
| "step": 40700 |
| }, |
| { |
| "epoch": 13.186813186813186, |
| "grad_norm": 1.1477937698364258, |
| "learning_rate": 0.001, |
| "loss": 2.4896, |
| "step": 40800 |
| }, |
| { |
| "epoch": 13.2191338073691, |
| "grad_norm": 0.9674694538116455, |
| "learning_rate": 0.001, |
| "loss": 2.4841, |
| "step": 40900 |
| }, |
| { |
| "epoch": 13.251454427925015, |
| "grad_norm": 0.861808717250824, |
| "learning_rate": 0.001, |
| "loss": 2.4665, |
| "step": 41000 |
| }, |
| { |
| "epoch": 13.283775048480932, |
| "grad_norm": 0.9526507258415222, |
| "learning_rate": 0.001, |
| "loss": 2.4698, |
| "step": 41100 |
| }, |
| { |
| "epoch": 13.316095669036846, |
| "grad_norm": 1.0938525199890137, |
| "learning_rate": 0.001, |
| "loss": 2.4909, |
| "step": 41200 |
| }, |
| { |
| "epoch": 13.34841628959276, |
| "grad_norm": 0.9371994137763977, |
| "learning_rate": 0.001, |
| "loss": 2.4832, |
| "step": 41300 |
| }, |
| { |
| "epoch": 13.380736910148675, |
| "grad_norm": 1.1339445114135742, |
| "learning_rate": 0.001, |
| "loss": 2.4994, |
| "step": 41400 |
| }, |
| { |
| "epoch": 13.41305753070459, |
| "grad_norm": 0.8687131404876709, |
| "learning_rate": 0.001, |
| "loss": 2.5068, |
| "step": 41500 |
| }, |
| { |
| "epoch": 13.445378151260504, |
| "grad_norm": 0.8617784380912781, |
| "learning_rate": 0.001, |
| "loss": 2.5108, |
| "step": 41600 |
| }, |
| { |
| "epoch": 13.477698771816419, |
| "grad_norm": 1.063913345336914, |
| "learning_rate": 0.001, |
| "loss": 2.5043, |
| "step": 41700 |
| }, |
| { |
| "epoch": 13.510019392372334, |
| "grad_norm": 1.0324054956436157, |
| "learning_rate": 0.001, |
| "loss": 2.5288, |
| "step": 41800 |
| }, |
| { |
| "epoch": 13.542340012928248, |
| "grad_norm": 0.934737503528595, |
| "learning_rate": 0.001, |
| "loss": 2.5024, |
| "step": 41900 |
| }, |
| { |
| "epoch": 13.574660633484163, |
| "grad_norm": 0.7680659294128418, |
| "learning_rate": 0.001, |
| "loss": 2.5058, |
| "step": 42000 |
| }, |
| { |
| "epoch": 13.606981254040077, |
| "grad_norm": 1.341858148574829, |
| "learning_rate": 0.001, |
| "loss": 2.5329, |
| "step": 42100 |
| }, |
| { |
| "epoch": 13.639301874595992, |
| "grad_norm": 0.8703786730766296, |
| "learning_rate": 0.001, |
| "loss": 2.524, |
| "step": 42200 |
| }, |
| { |
| "epoch": 13.671622495151906, |
| "grad_norm": 1.0334969758987427, |
| "learning_rate": 0.001, |
| "loss": 2.5282, |
| "step": 42300 |
| }, |
| { |
| "epoch": 13.70394311570782, |
| "grad_norm": 0.9755718111991882, |
| "learning_rate": 0.001, |
| "loss": 2.5121, |
| "step": 42400 |
| }, |
| { |
| "epoch": 13.736263736263737, |
| "grad_norm": 0.9405517578125, |
| "learning_rate": 0.001, |
| "loss": 2.5202, |
| "step": 42500 |
| }, |
| { |
| "epoch": 13.768584356819652, |
| "grad_norm": 1.0205954313278198, |
| "learning_rate": 0.001, |
| "loss": 2.5402, |
| "step": 42600 |
| }, |
| { |
| "epoch": 13.800904977375566, |
| "grad_norm": 0.7872236371040344, |
| "learning_rate": 0.001, |
| "loss": 2.5279, |
| "step": 42700 |
| }, |
| { |
| "epoch": 13.83322559793148, |
| "grad_norm": 1.1544864177703857, |
| "learning_rate": 0.001, |
| "loss": 2.548, |
| "step": 42800 |
| }, |
| { |
| "epoch": 13.865546218487395, |
| "grad_norm": 0.7300485372543335, |
| "learning_rate": 0.001, |
| "loss": 2.5433, |
| "step": 42900 |
| }, |
| { |
| "epoch": 13.89786683904331, |
| "grad_norm": 1.3647654056549072, |
| "learning_rate": 0.001, |
| "loss": 2.5652, |
| "step": 43000 |
| }, |
| { |
| "epoch": 13.930187459599225, |
| "grad_norm": 1.4036544561386108, |
| "learning_rate": 0.001, |
| "loss": 2.5485, |
| "step": 43100 |
| }, |
| { |
| "epoch": 13.96250808015514, |
| "grad_norm": 1.0153886079788208, |
| "learning_rate": 0.001, |
| "loss": 2.5488, |
| "step": 43200 |
| }, |
| { |
| "epoch": 13.994828700711054, |
| "grad_norm": 1.1712045669555664, |
| "learning_rate": 0.001, |
| "loss": 2.5506, |
| "step": 43300 |
| }, |
| { |
| "epoch": 14.027149321266968, |
| "grad_norm": 0.9621208310127258, |
| "learning_rate": 0.001, |
| "loss": 2.4262, |
| "step": 43400 |
| }, |
| { |
| "epoch": 14.059469941822883, |
| "grad_norm": 0.9850360751152039, |
| "learning_rate": 0.001, |
| "loss": 2.405, |
| "step": 43500 |
| }, |
| { |
| "epoch": 14.091790562378797, |
| "grad_norm": 1.406996726989746, |
| "learning_rate": 0.001, |
| "loss": 2.4143, |
| "step": 43600 |
| }, |
| { |
| "epoch": 14.124111182934712, |
| "grad_norm": 1.0328227281570435, |
| "learning_rate": 0.001, |
| "loss": 2.4296, |
| "step": 43700 |
| }, |
| { |
| "epoch": 14.156431803490626, |
| "grad_norm": 1.3105231523513794, |
| "learning_rate": 0.001, |
| "loss": 2.4179, |
| "step": 43800 |
| }, |
| { |
| "epoch": 14.188752424046541, |
| "grad_norm": 1.309824824333191, |
| "learning_rate": 0.001, |
| "loss": 2.4447, |
| "step": 43900 |
| }, |
| { |
| "epoch": 14.221073044602456, |
| "grad_norm": 1.170006275177002, |
| "learning_rate": 0.001, |
| "loss": 2.443, |
| "step": 44000 |
| }, |
| { |
| "epoch": 14.25339366515837, |
| "grad_norm": 0.92409348487854, |
| "learning_rate": 0.001, |
| "loss": 2.4174, |
| "step": 44100 |
| }, |
| { |
| "epoch": 14.285714285714286, |
| "grad_norm": 1.0731486082077026, |
| "learning_rate": 0.001, |
| "loss": 2.4297, |
| "step": 44200 |
| }, |
| { |
| "epoch": 14.318034906270201, |
| "grad_norm": 0.9552510380744934, |
| "learning_rate": 0.001, |
| "loss": 2.4626, |
| "step": 44300 |
| }, |
| { |
| "epoch": 14.350355526826116, |
| "grad_norm": 1.3744381666183472, |
| "learning_rate": 0.001, |
| "loss": 2.4487, |
| "step": 44400 |
| }, |
| { |
| "epoch": 14.38267614738203, |
| "grad_norm": 0.9880512356758118, |
| "learning_rate": 0.001, |
| "loss": 2.4556, |
| "step": 44500 |
| }, |
| { |
| "epoch": 14.414996767937945, |
| "grad_norm": 1.1186575889587402, |
| "learning_rate": 0.001, |
| "loss": 2.4665, |
| "step": 44600 |
| }, |
| { |
| "epoch": 14.44731738849386, |
| "grad_norm": 1.2342720031738281, |
| "learning_rate": 0.001, |
| "loss": 2.4458, |
| "step": 44700 |
| }, |
| { |
| "epoch": 14.479638009049774, |
| "grad_norm": 1.1874266862869263, |
| "learning_rate": 0.001, |
| "loss": 2.462, |
| "step": 44800 |
| }, |
| { |
| "epoch": 14.511958629605688, |
| "grad_norm": 0.9831111431121826, |
| "learning_rate": 0.001, |
| "loss": 2.4848, |
| "step": 44900 |
| }, |
| { |
| "epoch": 14.544279250161603, |
| "grad_norm": 1.2371684312820435, |
| "learning_rate": 0.001, |
| "loss": 2.4589, |
| "step": 45000 |
| }, |
| { |
| "epoch": 14.576599870717518, |
| "grad_norm": 0.7964707016944885, |
| "learning_rate": 0.001, |
| "loss": 2.4648, |
| "step": 45100 |
| }, |
| { |
| "epoch": 14.608920491273432, |
| "grad_norm": 1.0098804235458374, |
| "learning_rate": 0.001, |
| "loss": 2.4696, |
| "step": 45200 |
| }, |
| { |
| "epoch": 14.641241111829347, |
| "grad_norm": 1.0485209226608276, |
| "learning_rate": 0.001, |
| "loss": 2.482, |
| "step": 45300 |
| }, |
| { |
| "epoch": 14.673561732385261, |
| "grad_norm": 1.0087767839431763, |
| "learning_rate": 0.001, |
| "loss": 2.4725, |
| "step": 45400 |
| }, |
| { |
| "epoch": 14.705882352941176, |
| "grad_norm": 1.2038854360580444, |
| "learning_rate": 0.001, |
| "loss": 2.479, |
| "step": 45500 |
| }, |
| { |
| "epoch": 14.738202973497092, |
| "grad_norm": 0.9472567439079285, |
| "learning_rate": 0.001, |
| "loss": 2.4873, |
| "step": 45600 |
| }, |
| { |
| "epoch": 14.770523594053007, |
| "grad_norm": 1.1870334148406982, |
| "learning_rate": 0.001, |
| "loss": 2.5014, |
| "step": 45700 |
| }, |
| { |
| "epoch": 14.802844214608921, |
| "grad_norm": 0.9796122908592224, |
| "learning_rate": 0.001, |
| "loss": 2.4833, |
| "step": 45800 |
| }, |
| { |
| "epoch": 14.835164835164836, |
| "grad_norm": 1.093863606452942, |
| "learning_rate": 0.001, |
| "loss": 2.5106, |
| "step": 45900 |
| }, |
| { |
| "epoch": 14.86748545572075, |
| "grad_norm": 0.8866999745368958, |
| "learning_rate": 0.001, |
| "loss": 2.5075, |
| "step": 46000 |
| }, |
| { |
| "epoch": 14.899806076276665, |
| "grad_norm": 1.0653350353240967, |
| "learning_rate": 0.001, |
| "loss": 2.4932, |
| "step": 46100 |
| }, |
| { |
| "epoch": 14.93212669683258, |
| "grad_norm": 1.0493881702423096, |
| "learning_rate": 0.001, |
| "loss": 2.5202, |
| "step": 46200 |
| }, |
| { |
| "epoch": 14.964447317388494, |
| "grad_norm": 0.9885337948799133, |
| "learning_rate": 0.001, |
| "loss": 2.4977, |
| "step": 46300 |
| }, |
| { |
| "epoch": 14.996767937944409, |
| "grad_norm": 1.1102535724639893, |
| "learning_rate": 0.001, |
| "loss": 2.5095, |
| "step": 46400 |
| }, |
| { |
| "epoch": 15.029088558500323, |
| "grad_norm": 1.1449183225631714, |
| "learning_rate": 0.001, |
| "loss": 2.3813, |
| "step": 46500 |
| }, |
| { |
| "epoch": 15.061409179056238, |
| "grad_norm": 1.0444215536117554, |
| "learning_rate": 0.001, |
| "loss": 2.3625, |
| "step": 46600 |
| }, |
| { |
| "epoch": 15.093729799612152, |
| "grad_norm": 1.1632866859436035, |
| "learning_rate": 0.001, |
| "loss": 2.3935, |
| "step": 46700 |
| }, |
| { |
| "epoch": 15.126050420168067, |
| "grad_norm": 1.1026229858398438, |
| "learning_rate": 0.001, |
| "loss": 2.357, |
| "step": 46800 |
| }, |
| { |
| "epoch": 15.158371040723981, |
| "grad_norm": 1.2819719314575195, |
| "learning_rate": 0.001, |
| "loss": 2.3705, |
| "step": 46900 |
| }, |
| { |
| "epoch": 15.190691661279896, |
| "grad_norm": 1.118762493133545, |
| "learning_rate": 0.001, |
| "loss": 2.3738, |
| "step": 47000 |
| }, |
| { |
| "epoch": 15.22301228183581, |
| "grad_norm": 0.9931496977806091, |
| "learning_rate": 0.001, |
| "loss": 2.3871, |
| "step": 47100 |
| }, |
| { |
| "epoch": 15.255332902391725, |
| "grad_norm": 1.0876235961914062, |
| "learning_rate": 0.001, |
| "loss": 2.4186, |
| "step": 47200 |
| }, |
| { |
| "epoch": 15.287653522947641, |
| "grad_norm": 1.0590667724609375, |
| "learning_rate": 0.001, |
| "loss": 2.3973, |
| "step": 47300 |
| }, |
| { |
| "epoch": 15.319974143503556, |
| "grad_norm": 1.3229364156723022, |
| "learning_rate": 0.001, |
| "loss": 2.4038, |
| "step": 47400 |
| }, |
| { |
| "epoch": 15.35229476405947, |
| "grad_norm": 1.4679980278015137, |
| "learning_rate": 0.001, |
| "loss": 2.4207, |
| "step": 47500 |
| }, |
| { |
| "epoch": 15.384615384615385, |
| "grad_norm": 1.6495345830917358, |
| "learning_rate": 0.001, |
| "loss": 2.398, |
| "step": 47600 |
| }, |
| { |
| "epoch": 15.4169360051713, |
| "grad_norm": 1.0480834245681763, |
| "learning_rate": 0.001, |
| "loss": 2.4145, |
| "step": 47700 |
| }, |
| { |
| "epoch": 15.449256625727214, |
| "grad_norm": 1.340477705001831, |
| "learning_rate": 0.001, |
| "loss": 2.4233, |
| "step": 47800 |
| }, |
| { |
| "epoch": 15.481577246283129, |
| "grad_norm": 1.1183497905731201, |
| "learning_rate": 0.001, |
| "loss": 2.4189, |
| "step": 47900 |
| }, |
| { |
| "epoch": 15.513897866839043, |
| "grad_norm": 0.9559330344200134, |
| "learning_rate": 0.001, |
| "loss": 2.4346, |
| "step": 48000 |
| }, |
| { |
| "epoch": 15.546218487394958, |
| "grad_norm": 1.090479850769043, |
| "learning_rate": 0.001, |
| "loss": 2.4432, |
| "step": 48100 |
| }, |
| { |
| "epoch": 15.578539107950872, |
| "grad_norm": 1.1318349838256836, |
| "learning_rate": 0.001, |
| "loss": 2.4252, |
| "step": 48200 |
| }, |
| { |
| "epoch": 15.610859728506787, |
| "grad_norm": 1.2161760330200195, |
| "learning_rate": 0.001, |
| "loss": 2.4305, |
| "step": 48300 |
| }, |
| { |
| "epoch": 15.643180349062701, |
| "grad_norm": 1.0710586309432983, |
| "learning_rate": 0.001, |
| "loss": 2.4512, |
| "step": 48400 |
| }, |
| { |
| "epoch": 15.675500969618616, |
| "grad_norm": 1.1996996402740479, |
| "learning_rate": 0.001, |
| "loss": 2.4546, |
| "step": 48500 |
| }, |
| { |
| "epoch": 15.70782159017453, |
| "grad_norm": 1.1999651193618774, |
| "learning_rate": 0.001, |
| "loss": 2.4391, |
| "step": 48600 |
| }, |
| { |
| "epoch": 15.740142210730447, |
| "grad_norm": 1.0343999862670898, |
| "learning_rate": 0.001, |
| "loss": 2.4512, |
| "step": 48700 |
| }, |
| { |
| "epoch": 15.772462831286362, |
| "grad_norm": 1.038922667503357, |
| "learning_rate": 0.001, |
| "loss": 2.4524, |
| "step": 48800 |
| }, |
| { |
| "epoch": 15.804783451842276, |
| "grad_norm": 1.0192667245864868, |
| "learning_rate": 0.001, |
| "loss": 2.4479, |
| "step": 48900 |
| }, |
| { |
| "epoch": 15.83710407239819, |
| "grad_norm": 1.0764349699020386, |
| "learning_rate": 0.001, |
| "loss": 2.4616, |
| "step": 49000 |
| }, |
| { |
| "epoch": 15.869424692954105, |
| "grad_norm": 1.4133694171905518, |
| "learning_rate": 0.001, |
| "loss": 2.4489, |
| "step": 49100 |
| }, |
| { |
| "epoch": 15.90174531351002, |
| "grad_norm": 1.2370002269744873, |
| "learning_rate": 0.001, |
| "loss": 2.4603, |
| "step": 49200 |
| }, |
| { |
| "epoch": 15.934065934065934, |
| "grad_norm": 0.9169979691505432, |
| "learning_rate": 0.001, |
| "loss": 2.4641, |
| "step": 49300 |
| }, |
| { |
| "epoch": 15.966386554621849, |
| "grad_norm": 1.161568522453308, |
| "learning_rate": 0.001, |
| "loss": 2.471, |
| "step": 49400 |
| }, |
| { |
| "epoch": 15.998707175177763, |
| "grad_norm": 1.4521310329437256, |
| "learning_rate": 0.001, |
| "loss": 2.456, |
| "step": 49500 |
| }, |
| { |
| "epoch": 16.031027795733678, |
| "grad_norm": 1.418137550354004, |
| "learning_rate": 0.001, |
| "loss": 2.3271, |
| "step": 49600 |
| }, |
| { |
| "epoch": 16.063348416289593, |
| "grad_norm": 1.6356244087219238, |
| "learning_rate": 0.001, |
| "loss": 2.3257, |
| "step": 49700 |
| }, |
| { |
| "epoch": 16.095669036845507, |
| "grad_norm": 1.5366765260696411, |
| "learning_rate": 0.001, |
| "loss": 2.3292, |
| "step": 49800 |
| }, |
| { |
| "epoch": 16.12798965740142, |
| "grad_norm": 1.386238694190979, |
| "learning_rate": 0.001, |
| "loss": 2.3504, |
| "step": 49900 |
| }, |
| { |
| "epoch": 16.160310277957336, |
| "grad_norm": 1.2431461811065674, |
| "learning_rate": 0.001, |
| "loss": 2.3428, |
| "step": 50000 |
| }, |
| { |
| "epoch": 16.19263089851325, |
| "grad_norm": 1.676797866821289, |
| "learning_rate": 0.001, |
| "loss": 2.3447, |
| "step": 50100 |
| }, |
| { |
| "epoch": 16.224951519069165, |
| "grad_norm": 1.3970446586608887, |
| "learning_rate": 0.001, |
| "loss": 2.36, |
| "step": 50200 |
| }, |
| { |
| "epoch": 16.25727213962508, |
| "grad_norm": 1.542039394378662, |
| "learning_rate": 0.001, |
| "loss": 2.3574, |
| "step": 50300 |
| }, |
| { |
| "epoch": 16.289592760180994, |
| "grad_norm": 1.7507634162902832, |
| "learning_rate": 0.001, |
| "loss": 2.3575, |
| "step": 50400 |
| }, |
| { |
| "epoch": 16.32191338073691, |
| "grad_norm": 1.0830522775650024, |
| "learning_rate": 0.001, |
| "loss": 2.3557, |
| "step": 50500 |
| }, |
| { |
| "epoch": 16.354234001292824, |
| "grad_norm": 1.1822794675827026, |
| "learning_rate": 0.001, |
| "loss": 2.3762, |
| "step": 50600 |
| }, |
| { |
| "epoch": 16.386554621848738, |
| "grad_norm": 1.2072137594223022, |
| "learning_rate": 0.001, |
| "loss": 2.3763, |
| "step": 50700 |
| }, |
| { |
| "epoch": 16.418875242404653, |
| "grad_norm": 1.2439135313034058, |
| "learning_rate": 0.001, |
| "loss": 2.3854, |
| "step": 50800 |
| }, |
| { |
| "epoch": 16.451195862960567, |
| "grad_norm": 1.6300753355026245, |
| "learning_rate": 0.001, |
| "loss": 2.3863, |
| "step": 50900 |
| }, |
| { |
| "epoch": 16.483516483516482, |
| "grad_norm": 1.2830435037612915, |
| "learning_rate": 0.001, |
| "loss": 2.3932, |
| "step": 51000 |
| }, |
| { |
| "epoch": 16.5158371040724, |
| "grad_norm": 1.2407660484313965, |
| "learning_rate": 0.001, |
| "loss": 2.3846, |
| "step": 51100 |
| }, |
| { |
| "epoch": 16.548157724628314, |
| "grad_norm": 1.5125946998596191, |
| "learning_rate": 0.001, |
| "loss": 2.38, |
| "step": 51200 |
| }, |
| { |
| "epoch": 16.58047834518423, |
| "grad_norm": 1.2218705415725708, |
| "learning_rate": 0.001, |
| "loss": 2.4112, |
| "step": 51300 |
| }, |
| { |
| "epoch": 16.612798965740144, |
| "grad_norm": 1.1899499893188477, |
| "learning_rate": 0.001, |
| "loss": 2.3926, |
| "step": 51400 |
| }, |
| { |
| "epoch": 16.645119586296058, |
| "grad_norm": 1.378842830657959, |
| "learning_rate": 0.001, |
| "loss": 2.3945, |
| "step": 51500 |
| }, |
| { |
| "epoch": 16.677440206851973, |
| "grad_norm": 1.0433300733566284, |
| "learning_rate": 0.001, |
| "loss": 2.4003, |
| "step": 51600 |
| }, |
| { |
| "epoch": 16.709760827407887, |
| "grad_norm": 1.6604347229003906, |
| "learning_rate": 0.001, |
| "loss": 2.4054, |
| "step": 51700 |
| }, |
| { |
| "epoch": 16.742081447963802, |
| "grad_norm": 1.0785212516784668, |
| "learning_rate": 0.001, |
| "loss": 2.4039, |
| "step": 51800 |
| }, |
| { |
| "epoch": 16.774402068519716, |
| "grad_norm": 1.3013510704040527, |
| "learning_rate": 0.001, |
| "loss": 2.4119, |
| "step": 51900 |
| }, |
| { |
| "epoch": 16.80672268907563, |
| "grad_norm": 1.3156054019927979, |
| "learning_rate": 0.001, |
| "loss": 2.4216, |
| "step": 52000 |
| }, |
| { |
| "epoch": 16.839043309631545, |
| "grad_norm": 1.7108491659164429, |
| "learning_rate": 0.001, |
| "loss": 2.4079, |
| "step": 52100 |
| }, |
| { |
| "epoch": 16.87136393018746, |
| "grad_norm": 1.443699836730957, |
| "learning_rate": 0.001, |
| "loss": 2.4053, |
| "step": 52200 |
| }, |
| { |
| "epoch": 16.903684550743375, |
| "grad_norm": 1.277143955230713, |
| "learning_rate": 0.001, |
| "loss": 2.421, |
| "step": 52300 |
| }, |
| { |
| "epoch": 16.93600517129929, |
| "grad_norm": 1.3698267936706543, |
| "learning_rate": 0.001, |
| "loss": 2.4179, |
| "step": 52400 |
| }, |
| { |
| "epoch": 16.968325791855204, |
| "grad_norm": 1.3238378763198853, |
| "learning_rate": 0.001, |
| "loss": 2.4443, |
| "step": 52500 |
| }, |
| { |
| "epoch": 17.00064641241112, |
| "grad_norm": 1.0544229745864868, |
| "learning_rate": 0.001, |
| "loss": 2.4145, |
| "step": 52600 |
| }, |
| { |
| "epoch": 17.032967032967033, |
| "grad_norm": 1.4758795499801636, |
| "learning_rate": 0.001, |
| "loss": 2.2783, |
| "step": 52700 |
| }, |
| { |
| "epoch": 17.065287653522947, |
| "grad_norm": 1.10501229763031, |
| "learning_rate": 0.001, |
| "loss": 2.2856, |
| "step": 52800 |
| }, |
| { |
| "epoch": 17.097608274078862, |
| "grad_norm": 1.2699426412582397, |
| "learning_rate": 0.001, |
| "loss": 2.273, |
| "step": 52900 |
| }, |
| { |
| "epoch": 17.129928894634777, |
| "grad_norm": 1.629236102104187, |
| "learning_rate": 0.001, |
| "loss": 2.294, |
| "step": 53000 |
| }, |
| { |
| "epoch": 17.16224951519069, |
| "grad_norm": 2.1197991371154785, |
| "learning_rate": 0.001, |
| "loss": 2.3205, |
| "step": 53100 |
| }, |
| { |
| "epoch": 17.194570135746606, |
| "grad_norm": 1.8824236392974854, |
| "learning_rate": 0.001, |
| "loss": 2.3119, |
| "step": 53200 |
| }, |
| { |
| "epoch": 17.22689075630252, |
| "grad_norm": 1.3362033367156982, |
| "learning_rate": 0.001, |
| "loss": 2.3331, |
| "step": 53300 |
| }, |
| { |
| "epoch": 17.259211376858435, |
| "grad_norm": 1.6379491090774536, |
| "learning_rate": 0.001, |
| "loss": 2.3171, |
| "step": 53400 |
| }, |
| { |
| "epoch": 17.29153199741435, |
| "grad_norm": 2.098562240600586, |
| "learning_rate": 0.001, |
| "loss": 2.3238, |
| "step": 53500 |
| }, |
| { |
| "epoch": 17.323852617970264, |
| "grad_norm": 2.088083505630493, |
| "learning_rate": 0.001, |
| "loss": 2.3207, |
| "step": 53600 |
| }, |
| { |
| "epoch": 17.35617323852618, |
| "grad_norm": 1.4209333658218384, |
| "learning_rate": 0.001, |
| "loss": 2.3402, |
| "step": 53700 |
| }, |
| { |
| "epoch": 17.388493859082093, |
| "grad_norm": 1.4178082942962646, |
| "learning_rate": 0.001, |
| "loss": 2.3379, |
| "step": 53800 |
| }, |
| { |
| "epoch": 17.420814479638008, |
| "grad_norm": 1.5361058712005615, |
| "learning_rate": 0.001, |
| "loss": 2.3434, |
| "step": 53900 |
| }, |
| { |
| "epoch": 17.453135100193922, |
| "grad_norm": 1.3857008218765259, |
| "learning_rate": 0.001, |
| "loss": 2.3418, |
| "step": 54000 |
| }, |
| { |
| "epoch": 17.485455720749837, |
| "grad_norm": 1.328792691230774, |
| "learning_rate": 0.001, |
| "loss": 2.351, |
| "step": 54100 |
| }, |
| { |
| "epoch": 17.517776341305755, |
| "grad_norm": 1.2982306480407715, |
| "learning_rate": 0.001, |
| "loss": 2.3433, |
| "step": 54200 |
| }, |
| { |
| "epoch": 17.55009696186167, |
| "grad_norm": 1.5809420347213745, |
| "learning_rate": 0.001, |
| "loss": 2.3577, |
| "step": 54300 |
| }, |
| { |
| "epoch": 17.582417582417584, |
| "grad_norm": 1.4151922464370728, |
| "learning_rate": 0.001, |
| "loss": 2.3852, |
| "step": 54400 |
| }, |
| { |
| "epoch": 17.6147382029735, |
| "grad_norm": 1.4341883659362793, |
| "learning_rate": 0.001, |
| "loss": 2.3577, |
| "step": 54500 |
| }, |
| { |
| "epoch": 17.647058823529413, |
| "grad_norm": 1.6244245767593384, |
| "learning_rate": 0.001, |
| "loss": 2.3682, |
| "step": 54600 |
| }, |
| { |
| "epoch": 17.679379444085328, |
| "grad_norm": 1.7657774686813354, |
| "learning_rate": 0.001, |
| "loss": 2.3503, |
| "step": 54700 |
| }, |
| { |
| "epoch": 17.711700064641242, |
| "grad_norm": 1.8639613389968872, |
| "learning_rate": 0.001, |
| "loss": 2.3719, |
| "step": 54800 |
| }, |
| { |
| "epoch": 17.744020685197157, |
| "grad_norm": 1.389522910118103, |
| "learning_rate": 0.001, |
| "loss": 2.3989, |
| "step": 54900 |
| }, |
| { |
| "epoch": 17.77634130575307, |
| "grad_norm": 1.4958171844482422, |
| "learning_rate": 0.001, |
| "loss": 2.3753, |
| "step": 55000 |
| }, |
| { |
| "epoch": 17.808661926308986, |
| "grad_norm": 1.6785283088684082, |
| "learning_rate": 0.001, |
| "loss": 2.3798, |
| "step": 55100 |
| }, |
| { |
| "epoch": 17.8409825468649, |
| "grad_norm": 1.7483515739440918, |
| "learning_rate": 0.001, |
| "loss": 2.3783, |
| "step": 55200 |
| }, |
| { |
| "epoch": 17.873303167420815, |
| "grad_norm": 1.5856812000274658, |
| "learning_rate": 0.001, |
| "loss": 2.3854, |
| "step": 55300 |
| }, |
| { |
| "epoch": 17.90562378797673, |
| "grad_norm": 1.6331095695495605, |
| "learning_rate": 0.001, |
| "loss": 2.369, |
| "step": 55400 |
| }, |
| { |
| "epoch": 17.937944408532644, |
| "grad_norm": 1.7543635368347168, |
| "learning_rate": 0.001, |
| "loss": 2.3685, |
| "step": 55500 |
| }, |
| { |
| "epoch": 17.97026502908856, |
| "grad_norm": 1.5347305536270142, |
| "learning_rate": 0.001, |
| "loss": 2.3902, |
| "step": 55600 |
| }, |
| { |
| "epoch": 18.002585649644473, |
| "grad_norm": 1.2079260349273682, |
| "learning_rate": 0.001, |
| "loss": 2.4057, |
| "step": 55700 |
| }, |
| { |
| "epoch": 18.034906270200388, |
| "grad_norm": 1.109870195388794, |
| "learning_rate": 0.001, |
| "loss": 2.2369, |
| "step": 55800 |
| }, |
| { |
| "epoch": 18.067226890756302, |
| "grad_norm": 0.941036581993103, |
| "learning_rate": 0.001, |
| "loss": 2.2422, |
| "step": 55900 |
| }, |
| { |
| "epoch": 18.099547511312217, |
| "grad_norm": 1.3007092475891113, |
| "learning_rate": 0.001, |
| "loss": 2.2524, |
| "step": 56000 |
| }, |
| { |
| "epoch": 18.13186813186813, |
| "grad_norm": 1.3963851928710938, |
| "learning_rate": 0.001, |
| "loss": 2.2683, |
| "step": 56100 |
| }, |
| { |
| "epoch": 18.164188752424046, |
| "grad_norm": 1.2644718885421753, |
| "learning_rate": 0.001, |
| "loss": 2.293, |
| "step": 56200 |
| }, |
| { |
| "epoch": 18.19650937297996, |
| "grad_norm": 1.173424243927002, |
| "learning_rate": 0.001, |
| "loss": 2.279, |
| "step": 56300 |
| }, |
| { |
| "epoch": 18.228829993535875, |
| "grad_norm": 1.003485918045044, |
| "learning_rate": 0.001, |
| "loss": 2.2928, |
| "step": 56400 |
| }, |
| { |
| "epoch": 18.26115061409179, |
| "grad_norm": 0.9148826599121094, |
| "learning_rate": 0.001, |
| "loss": 2.2939, |
| "step": 56500 |
| }, |
| { |
| "epoch": 18.293471234647704, |
| "grad_norm": 1.1833020448684692, |
| "learning_rate": 0.001, |
| "loss": 2.2878, |
| "step": 56600 |
| }, |
| { |
| "epoch": 18.32579185520362, |
| "grad_norm": 0.9342957735061646, |
| "learning_rate": 0.001, |
| "loss": 2.3092, |
| "step": 56700 |
| }, |
| { |
| "epoch": 18.358112475759533, |
| "grad_norm": 1.3121254444122314, |
| "learning_rate": 0.001, |
| "loss": 2.269, |
| "step": 56800 |
| }, |
| { |
| "epoch": 18.390433096315448, |
| "grad_norm": 1.312016248703003, |
| "learning_rate": 0.001, |
| "loss": 2.3006, |
| "step": 56900 |
| }, |
| { |
| "epoch": 18.422753716871362, |
| "grad_norm": 1.1071215867996216, |
| "learning_rate": 0.001, |
| "loss": 2.3017, |
| "step": 57000 |
| }, |
| { |
| "epoch": 18.455074337427277, |
| "grad_norm": 1.495505690574646, |
| "learning_rate": 0.001, |
| "loss": 2.3203, |
| "step": 57100 |
| }, |
| { |
| "epoch": 18.48739495798319, |
| "grad_norm": 1.095226526260376, |
| "learning_rate": 0.001, |
| "loss": 2.2802, |
| "step": 57200 |
| }, |
| { |
| "epoch": 18.51971557853911, |
| "grad_norm": 1.1401058435440063, |
| "learning_rate": 0.001, |
| "loss": 2.3278, |
| "step": 57300 |
| }, |
| { |
| "epoch": 18.552036199095024, |
| "grad_norm": 1.0433752536773682, |
| "learning_rate": 0.001, |
| "loss": 2.341, |
| "step": 57400 |
| }, |
| { |
| "epoch": 18.58435681965094, |
| "grad_norm": 1.3284567594528198, |
| "learning_rate": 0.001, |
| "loss": 2.3327, |
| "step": 57500 |
| }, |
| { |
| "epoch": 18.616677440206853, |
| "grad_norm": 1.3075090646743774, |
| "learning_rate": 0.001, |
| "loss": 2.3224, |
| "step": 57600 |
| }, |
| { |
| "epoch": 18.648998060762768, |
| "grad_norm": 1.1280314922332764, |
| "learning_rate": 0.001, |
| "loss": 2.3369, |
| "step": 57700 |
| }, |
| { |
| "epoch": 18.681318681318682, |
| "grad_norm": 1.4304474592208862, |
| "learning_rate": 0.001, |
| "loss": 2.3305, |
| "step": 57800 |
| }, |
| { |
| "epoch": 18.713639301874597, |
| "grad_norm": 1.4093077182769775, |
| "learning_rate": 0.001, |
| "loss": 2.3169, |
| "step": 57900 |
| }, |
| { |
| "epoch": 18.74595992243051, |
| "grad_norm": 0.8837575912475586, |
| "learning_rate": 0.001, |
| "loss": 2.344, |
| "step": 58000 |
| }, |
| { |
| "epoch": 18.778280542986426, |
| "grad_norm": 1.1318713426589966, |
| "learning_rate": 0.001, |
| "loss": 2.3622, |
| "step": 58100 |
| }, |
| { |
| "epoch": 18.81060116354234, |
| "grad_norm": 1.010046124458313, |
| "learning_rate": 0.001, |
| "loss": 2.3521, |
| "step": 58200 |
| }, |
| { |
| "epoch": 18.842921784098255, |
| "grad_norm": 1.1944527626037598, |
| "learning_rate": 0.001, |
| "loss": 2.3417, |
| "step": 58300 |
| }, |
| { |
| "epoch": 18.87524240465417, |
| "grad_norm": 1.2252675294876099, |
| "learning_rate": 0.001, |
| "loss": 2.3597, |
| "step": 58400 |
| }, |
| { |
| "epoch": 18.907563025210084, |
| "grad_norm": 1.2115516662597656, |
| "learning_rate": 0.001, |
| "loss": 2.3281, |
| "step": 58500 |
| }, |
| { |
| "epoch": 18.939883645766, |
| "grad_norm": 1.3430302143096924, |
| "learning_rate": 0.001, |
| "loss": 2.3678, |
| "step": 58600 |
| }, |
| { |
| "epoch": 18.972204266321913, |
| "grad_norm": 1.059554934501648, |
| "learning_rate": 0.001, |
| "loss": 2.3558, |
| "step": 58700 |
| }, |
| { |
| "epoch": 19.004524886877828, |
| "grad_norm": 1.26351797580719, |
| "learning_rate": 0.001, |
| "loss": 2.3541, |
| "step": 58800 |
| }, |
| { |
| "epoch": 19.036845507433743, |
| "grad_norm": 1.0600953102111816, |
| "learning_rate": 0.001, |
| "loss": 2.219, |
| "step": 58900 |
| }, |
| { |
| "epoch": 19.069166127989657, |
| "grad_norm": 1.5005885362625122, |
| "learning_rate": 0.001, |
| "loss": 2.2073, |
| "step": 59000 |
| }, |
| { |
| "epoch": 19.10148674854557, |
| "grad_norm": 1.4235773086547852, |
| "learning_rate": 0.001, |
| "loss": 2.2287, |
| "step": 59100 |
| }, |
| { |
| "epoch": 19.133807369101486, |
| "grad_norm": 1.0466982126235962, |
| "learning_rate": 0.001, |
| "loss": 2.2534, |
| "step": 59200 |
| }, |
| { |
| "epoch": 19.1661279896574, |
| "grad_norm": 1.4175443649291992, |
| "learning_rate": 0.001, |
| "loss": 2.2323, |
| "step": 59300 |
| }, |
| { |
| "epoch": 19.198448610213315, |
| "grad_norm": 1.1985677480697632, |
| "learning_rate": 0.001, |
| "loss": 2.2415, |
| "step": 59400 |
| }, |
| { |
| "epoch": 19.23076923076923, |
| "grad_norm": 1.0888758897781372, |
| "learning_rate": 0.001, |
| "loss": 2.2418, |
| "step": 59500 |
| }, |
| { |
| "epoch": 19.263089851325145, |
| "grad_norm": 0.9708693027496338, |
| "learning_rate": 0.001, |
| "loss": 2.2625, |
| "step": 59600 |
| }, |
| { |
| "epoch": 19.29541047188106, |
| "grad_norm": 1.3424566984176636, |
| "learning_rate": 0.001, |
| "loss": 2.2504, |
| "step": 59700 |
| }, |
| { |
| "epoch": 19.327731092436974, |
| "grad_norm": 1.1441328525543213, |
| "learning_rate": 0.001, |
| "loss": 2.2535, |
| "step": 59800 |
| }, |
| { |
| "epoch": 19.360051712992888, |
| "grad_norm": 1.1090998649597168, |
| "learning_rate": 0.001, |
| "loss": 2.2799, |
| "step": 59900 |
| }, |
| { |
| "epoch": 19.392372333548803, |
| "grad_norm": 0.9804235696792603, |
| "learning_rate": 0.001, |
| "loss": 2.2683, |
| "step": 60000 |
| }, |
| { |
| "epoch": 19.424692954104717, |
| "grad_norm": 1.1989473104476929, |
| "learning_rate": 0.001, |
| "loss": 2.2779, |
| "step": 60100 |
| }, |
| { |
| "epoch": 19.457013574660632, |
| "grad_norm": 1.6978230476379395, |
| "learning_rate": 0.001, |
| "loss": 2.2626, |
| "step": 60200 |
| }, |
| { |
| "epoch": 19.489334195216546, |
| "grad_norm": 1.3387566804885864, |
| "learning_rate": 0.001, |
| "loss": 2.2908, |
| "step": 60300 |
| }, |
| { |
| "epoch": 19.521654815772465, |
| "grad_norm": 1.1720887422561646, |
| "learning_rate": 0.001, |
| "loss": 2.2893, |
| "step": 60400 |
| }, |
| { |
| "epoch": 19.55397543632838, |
| "grad_norm": 1.0618383884429932, |
| "learning_rate": 0.001, |
| "loss": 2.2833, |
| "step": 60500 |
| }, |
| { |
| "epoch": 19.586296056884294, |
| "grad_norm": 0.9412760734558105, |
| "learning_rate": 0.001, |
| "loss": 2.283, |
| "step": 60600 |
| }, |
| { |
| "epoch": 19.618616677440208, |
| "grad_norm": 1.4370046854019165, |
| "learning_rate": 0.001, |
| "loss": 2.2991, |
| "step": 60700 |
| }, |
| { |
| "epoch": 19.650937297996123, |
| "grad_norm": 1.4847232103347778, |
| "learning_rate": 0.001, |
| "loss": 2.2973, |
| "step": 60800 |
| }, |
| { |
| "epoch": 19.683257918552037, |
| "grad_norm": 0.9826359152793884, |
| "learning_rate": 0.001, |
| "loss": 2.2933, |
| "step": 60900 |
| }, |
| { |
| "epoch": 19.715578539107952, |
| "grad_norm": 1.269861102104187, |
| "learning_rate": 0.001, |
| "loss": 2.3051, |
| "step": 61000 |
| }, |
| { |
| "epoch": 19.747899159663866, |
| "grad_norm": 1.4577744007110596, |
| "learning_rate": 0.001, |
| "loss": 2.2907, |
| "step": 61100 |
| }, |
| { |
| "epoch": 19.78021978021978, |
| "grad_norm": 1.3713974952697754, |
| "learning_rate": 0.001, |
| "loss": 2.2964, |
| "step": 61200 |
| }, |
| { |
| "epoch": 19.812540400775696, |
| "grad_norm": 0.8531525731086731, |
| "learning_rate": 0.001, |
| "loss": 2.322, |
| "step": 61300 |
| }, |
| { |
| "epoch": 19.84486102133161, |
| "grad_norm": 0.8501541614532471, |
| "learning_rate": 0.001, |
| "loss": 2.2948, |
| "step": 61400 |
| }, |
| { |
| "epoch": 19.877181641887525, |
| "grad_norm": 1.437203288078308, |
| "learning_rate": 0.001, |
| "loss": 2.3299, |
| "step": 61500 |
| }, |
| { |
| "epoch": 19.90950226244344, |
| "grad_norm": 1.27197265625, |
| "learning_rate": 0.001, |
| "loss": 2.3322, |
| "step": 61600 |
| }, |
| { |
| "epoch": 19.941822882999354, |
| "grad_norm": 0.863344132900238, |
| "learning_rate": 0.001, |
| "loss": 2.3453, |
| "step": 61700 |
| }, |
| { |
| "epoch": 19.97414350355527, |
| "grad_norm": 1.3954321146011353, |
| "learning_rate": 0.001, |
| "loss": 2.3386, |
| "step": 61800 |
| }, |
| { |
| "epoch": 20.006464124111183, |
| "grad_norm": 1.2504416704177856, |
| "learning_rate": 0.001, |
| "loss": 2.3092, |
| "step": 61900 |
| }, |
| { |
| "epoch": 20.038784744667097, |
| "grad_norm": 1.1983377933502197, |
| "learning_rate": 0.001, |
| "loss": 2.2074, |
| "step": 62000 |
| }, |
| { |
| "epoch": 20.071105365223012, |
| "grad_norm": 1.1122028827667236, |
| "learning_rate": 0.001, |
| "loss": 2.1786, |
| "step": 62100 |
| }, |
| { |
| "epoch": 20.103425985778927, |
| "grad_norm": 1.2796459197998047, |
| "learning_rate": 0.001, |
| "loss": 2.2029, |
| "step": 62200 |
| }, |
| { |
| "epoch": 20.13574660633484, |
| "grad_norm": 1.1963897943496704, |
| "learning_rate": 0.001, |
| "loss": 2.1912, |
| "step": 62300 |
| }, |
| { |
| "epoch": 20.168067226890756, |
| "grad_norm": 1.084876537322998, |
| "learning_rate": 0.001, |
| "loss": 2.2138, |
| "step": 62400 |
| }, |
| { |
| "epoch": 20.20038784744667, |
| "grad_norm": 1.357012391090393, |
| "learning_rate": 0.001, |
| "loss": 2.2203, |
| "step": 62500 |
| }, |
| { |
| "epoch": 20.232708468002585, |
| "grad_norm": 1.0108733177185059, |
| "learning_rate": 0.001, |
| "loss": 2.2175, |
| "step": 62600 |
| }, |
| { |
| "epoch": 20.2650290885585, |
| "grad_norm": 1.3037816286087036, |
| "learning_rate": 0.001, |
| "loss": 2.2083, |
| "step": 62700 |
| }, |
| { |
| "epoch": 20.297349709114414, |
| "grad_norm": 1.2153654098510742, |
| "learning_rate": 0.001, |
| "loss": 2.2167, |
| "step": 62800 |
| }, |
| { |
| "epoch": 20.32967032967033, |
| "grad_norm": 1.1544158458709717, |
| "learning_rate": 0.001, |
| "loss": 2.2183, |
| "step": 62900 |
| }, |
| { |
| "epoch": 20.361990950226243, |
| "grad_norm": 1.18621826171875, |
| "learning_rate": 0.001, |
| "loss": 2.217, |
| "step": 63000 |
| }, |
| { |
| "epoch": 20.394311570782158, |
| "grad_norm": 1.1521902084350586, |
| "learning_rate": 0.001, |
| "loss": 2.2508, |
| "step": 63100 |
| }, |
| { |
| "epoch": 20.426632191338072, |
| "grad_norm": 1.1992689371109009, |
| "learning_rate": 0.001, |
| "loss": 2.2515, |
| "step": 63200 |
| }, |
| { |
| "epoch": 20.458952811893987, |
| "grad_norm": 1.1839091777801514, |
| "learning_rate": 0.001, |
| "loss": 2.2541, |
| "step": 63300 |
| }, |
| { |
| "epoch": 20.4912734324499, |
| "grad_norm": 1.1625888347625732, |
| "learning_rate": 0.001, |
| "loss": 2.2538, |
| "step": 63400 |
| }, |
| { |
| "epoch": 20.52359405300582, |
| "grad_norm": 1.188076376914978, |
| "learning_rate": 0.001, |
| "loss": 2.2535, |
| "step": 63500 |
| }, |
| { |
| "epoch": 20.555914673561734, |
| "grad_norm": 1.0718660354614258, |
| "learning_rate": 0.001, |
| "loss": 2.2624, |
| "step": 63600 |
| }, |
| { |
| "epoch": 20.58823529411765, |
| "grad_norm": 1.2299270629882812, |
| "learning_rate": 0.001, |
| "loss": 2.2516, |
| "step": 63700 |
| }, |
| { |
| "epoch": 20.620555914673563, |
| "grad_norm": 1.1382802724838257, |
| "learning_rate": 0.001, |
| "loss": 2.2649, |
| "step": 63800 |
| }, |
| { |
| "epoch": 20.652876535229478, |
| "grad_norm": 1.2164167165756226, |
| "learning_rate": 0.001, |
| "loss": 2.2592, |
| "step": 63900 |
| }, |
| { |
| "epoch": 20.685197155785392, |
| "grad_norm": 1.0841768980026245, |
| "learning_rate": 0.001, |
| "loss": 2.2691, |
| "step": 64000 |
| }, |
| { |
| "epoch": 20.717517776341307, |
| "grad_norm": 1.0041121244430542, |
| "learning_rate": 0.001, |
| "loss": 2.2783, |
| "step": 64100 |
| }, |
| { |
| "epoch": 20.74983839689722, |
| "grad_norm": 1.1556775569915771, |
| "learning_rate": 0.001, |
| "loss": 2.2603, |
| "step": 64200 |
| }, |
| { |
| "epoch": 20.782159017453136, |
| "grad_norm": 0.9205794930458069, |
| "learning_rate": 0.001, |
| "loss": 2.3004, |
| "step": 64300 |
| }, |
| { |
| "epoch": 20.81447963800905, |
| "grad_norm": 1.3644750118255615, |
| "learning_rate": 0.001, |
| "loss": 2.2996, |
| "step": 64400 |
| }, |
| { |
| "epoch": 20.846800258564965, |
| "grad_norm": 0.9331674575805664, |
| "learning_rate": 0.001, |
| "loss": 2.2768, |
| "step": 64500 |
| }, |
| { |
| "epoch": 20.87912087912088, |
| "grad_norm": 1.0455474853515625, |
| "learning_rate": 0.001, |
| "loss": 2.2781, |
| "step": 64600 |
| }, |
| { |
| "epoch": 20.911441499676794, |
| "grad_norm": 1.4594848155975342, |
| "learning_rate": 0.001, |
| "loss": 2.2765, |
| "step": 64700 |
| }, |
| { |
| "epoch": 20.94376212023271, |
| "grad_norm": 1.0385990142822266, |
| "learning_rate": 0.001, |
| "loss": 2.2935, |
| "step": 64800 |
| }, |
| { |
| "epoch": 20.976082740788623, |
| "grad_norm": 1.577081561088562, |
| "learning_rate": 0.001, |
| "loss": 2.2826, |
| "step": 64900 |
| }, |
| { |
| "epoch": 21.008403361344538, |
| "grad_norm": 1.0930685997009277, |
| "learning_rate": 0.001, |
| "loss": 2.2722, |
| "step": 65000 |
| }, |
| { |
| "epoch": 21.040723981900452, |
| "grad_norm": 1.1612266302108765, |
| "learning_rate": 0.001, |
| "loss": 2.1529, |
| "step": 65100 |
| }, |
| { |
| "epoch": 21.073044602456367, |
| "grad_norm": 1.321443796157837, |
| "learning_rate": 0.001, |
| "loss": 2.1513, |
| "step": 65200 |
| }, |
| { |
| "epoch": 21.10536522301228, |
| "grad_norm": 0.8242465257644653, |
| "learning_rate": 0.001, |
| "loss": 2.1593, |
| "step": 65300 |
| }, |
| { |
| "epoch": 21.137685843568196, |
| "grad_norm": 1.2424418926239014, |
| "learning_rate": 0.001, |
| "loss": 2.1824, |
| "step": 65400 |
| }, |
| { |
| "epoch": 21.17000646412411, |
| "grad_norm": 1.1042993068695068, |
| "learning_rate": 0.001, |
| "loss": 2.1891, |
| "step": 65500 |
| }, |
| { |
| "epoch": 21.202327084680025, |
| "grad_norm": 0.9918628334999084, |
| "learning_rate": 0.001, |
| "loss": 2.1892, |
| "step": 65600 |
| }, |
| { |
| "epoch": 21.23464770523594, |
| "grad_norm": 1.0536214113235474, |
| "learning_rate": 0.001, |
| "loss": 2.1884, |
| "step": 65700 |
| }, |
| { |
| "epoch": 21.266968325791854, |
| "grad_norm": 1.1322977542877197, |
| "learning_rate": 0.001, |
| "loss": 2.1944, |
| "step": 65800 |
| }, |
| { |
| "epoch": 21.29928894634777, |
| "grad_norm": 1.2656852006912231, |
| "learning_rate": 0.001, |
| "loss": 2.1963, |
| "step": 65900 |
| }, |
| { |
| "epoch": 21.331609566903683, |
| "grad_norm": 1.2672237157821655, |
| "learning_rate": 0.001, |
| "loss": 2.2046, |
| "step": 66000 |
| }, |
| { |
| "epoch": 21.363930187459598, |
| "grad_norm": 1.2874271869659424, |
| "learning_rate": 0.001, |
| "loss": 2.1938, |
| "step": 66100 |
| }, |
| { |
| "epoch": 21.396250808015512, |
| "grad_norm": 0.881458044052124, |
| "learning_rate": 0.001, |
| "loss": 2.1936, |
| "step": 66200 |
| }, |
| { |
| "epoch": 21.428571428571427, |
| "grad_norm": 1.2030190229415894, |
| "learning_rate": 0.001, |
| "loss": 2.2008, |
| "step": 66300 |
| }, |
| { |
| "epoch": 21.46089204912734, |
| "grad_norm": 0.9939525127410889, |
| "learning_rate": 0.001, |
| "loss": 2.2307, |
| "step": 66400 |
| }, |
| { |
| "epoch": 21.49321266968326, |
| "grad_norm": 0.927545428276062, |
| "learning_rate": 0.001, |
| "loss": 2.2114, |
| "step": 66500 |
| }, |
| { |
| "epoch": 21.525533290239174, |
| "grad_norm": 1.0802602767944336, |
| "learning_rate": 0.001, |
| "loss": 2.2072, |
| "step": 66600 |
| }, |
| { |
| "epoch": 21.55785391079509, |
| "grad_norm": 1.0599498748779297, |
| "learning_rate": 0.001, |
| "loss": 2.2306, |
| "step": 66700 |
| }, |
| { |
| "epoch": 21.590174531351003, |
| "grad_norm": 1.3177958726882935, |
| "learning_rate": 0.001, |
| "loss": 2.2166, |
| "step": 66800 |
| }, |
| { |
| "epoch": 21.622495151906918, |
| "grad_norm": 1.34926438331604, |
| "learning_rate": 0.001, |
| "loss": 2.2222, |
| "step": 66900 |
| }, |
| { |
| "epoch": 21.654815772462833, |
| "grad_norm": 1.0225821733474731, |
| "learning_rate": 0.001, |
| "loss": 2.247, |
| "step": 67000 |
| }, |
| { |
| "epoch": 21.687136393018747, |
| "grad_norm": 1.190183401107788, |
| "learning_rate": 0.001, |
| "loss": 2.2398, |
| "step": 67100 |
| }, |
| { |
| "epoch": 21.71945701357466, |
| "grad_norm": 1.215757131576538, |
| "learning_rate": 0.001, |
| "loss": 2.2526, |
| "step": 67200 |
| }, |
| { |
| "epoch": 21.751777634130576, |
| "grad_norm": 1.2403733730316162, |
| "learning_rate": 0.001, |
| "loss": 2.2506, |
| "step": 67300 |
| }, |
| { |
| "epoch": 21.78409825468649, |
| "grad_norm": 1.2998617887496948, |
| "learning_rate": 0.001, |
| "loss": 2.2337, |
| "step": 67400 |
| }, |
| { |
| "epoch": 21.816418875242405, |
| "grad_norm": 0.9788816571235657, |
| "learning_rate": 0.001, |
| "loss": 2.2501, |
| "step": 67500 |
| }, |
| { |
| "epoch": 21.84873949579832, |
| "grad_norm": 1.0035640001296997, |
| "learning_rate": 0.001, |
| "loss": 2.2456, |
| "step": 67600 |
| }, |
| { |
| "epoch": 21.881060116354234, |
| "grad_norm": 0.9567862749099731, |
| "learning_rate": 0.001, |
| "loss": 2.2563, |
| "step": 67700 |
| }, |
| { |
| "epoch": 21.91338073691015, |
| "grad_norm": 1.2522963285446167, |
| "learning_rate": 0.001, |
| "loss": 2.2691, |
| "step": 67800 |
| }, |
| { |
| "epoch": 21.945701357466064, |
| "grad_norm": 1.3323355913162231, |
| "learning_rate": 0.001, |
| "loss": 2.2682, |
| "step": 67900 |
| }, |
| { |
| "epoch": 21.978021978021978, |
| "grad_norm": 1.022301197052002, |
| "learning_rate": 0.001, |
| "loss": 2.2829, |
| "step": 68000 |
| }, |
| { |
| "epoch": 22.010342598577893, |
| "grad_norm": 0.9632961750030518, |
| "learning_rate": 0.001, |
| "loss": 2.2159, |
| "step": 68100 |
| }, |
| { |
| "epoch": 22.042663219133807, |
| "grad_norm": 0.9349167346954346, |
| "learning_rate": 0.001, |
| "loss": 2.1377, |
| "step": 68200 |
| }, |
| { |
| "epoch": 22.07498383968972, |
| "grad_norm": 1.2619801759719849, |
| "learning_rate": 0.001, |
| "loss": 2.1479, |
| "step": 68300 |
| }, |
| { |
| "epoch": 22.107304460245636, |
| "grad_norm": 1.0707018375396729, |
| "learning_rate": 0.001, |
| "loss": 2.1212, |
| "step": 68400 |
| }, |
| { |
| "epoch": 22.13962508080155, |
| "grad_norm": 1.2993378639221191, |
| "learning_rate": 0.001, |
| "loss": 2.1276, |
| "step": 68500 |
| }, |
| { |
| "epoch": 22.171945701357465, |
| "grad_norm": 1.2283509969711304, |
| "learning_rate": 0.001, |
| "loss": 2.1508, |
| "step": 68600 |
| }, |
| { |
| "epoch": 22.20426632191338, |
| "grad_norm": 1.0965924263000488, |
| "learning_rate": 0.001, |
| "loss": 2.1642, |
| "step": 68700 |
| }, |
| { |
| "epoch": 22.236586942469295, |
| "grad_norm": 1.5436381101608276, |
| "learning_rate": 0.001, |
| "loss": 2.1672, |
| "step": 68800 |
| }, |
| { |
| "epoch": 22.26890756302521, |
| "grad_norm": 1.2528314590454102, |
| "learning_rate": 0.001, |
| "loss": 2.1684, |
| "step": 68900 |
| }, |
| { |
| "epoch": 22.301228183581124, |
| "grad_norm": 1.1026721000671387, |
| "learning_rate": 0.001, |
| "loss": 2.1799, |
| "step": 69000 |
| }, |
| { |
| "epoch": 22.33354880413704, |
| "grad_norm": 1.2602219581604004, |
| "learning_rate": 0.001, |
| "loss": 2.1647, |
| "step": 69100 |
| }, |
| { |
| "epoch": 22.365869424692953, |
| "grad_norm": 1.118823528289795, |
| "learning_rate": 0.001, |
| "loss": 2.1881, |
| "step": 69200 |
| }, |
| { |
| "epoch": 22.398190045248867, |
| "grad_norm": 1.1456882953643799, |
| "learning_rate": 0.001, |
| "loss": 2.1736, |
| "step": 69300 |
| }, |
| { |
| "epoch": 22.430510665804782, |
| "grad_norm": 0.9726011157035828, |
| "learning_rate": 0.001, |
| "loss": 2.1651, |
| "step": 69400 |
| }, |
| { |
| "epoch": 22.462831286360696, |
| "grad_norm": 0.9580657482147217, |
| "learning_rate": 0.001, |
| "loss": 2.1747, |
| "step": 69500 |
| }, |
| { |
| "epoch": 22.49515190691661, |
| "grad_norm": 1.0189781188964844, |
| "learning_rate": 0.001, |
| "loss": 2.1691, |
| "step": 69600 |
| }, |
| { |
| "epoch": 22.52747252747253, |
| "grad_norm": 0.9164172410964966, |
| "learning_rate": 0.001, |
| "loss": 2.1956, |
| "step": 69700 |
| }, |
| { |
| "epoch": 22.559793148028444, |
| "grad_norm": 1.3433533906936646, |
| "learning_rate": 0.001, |
| "loss": 2.1881, |
| "step": 69800 |
| }, |
| { |
| "epoch": 22.59211376858436, |
| "grad_norm": 1.2436511516571045, |
| "learning_rate": 0.001, |
| "loss": 2.1895, |
| "step": 69900 |
| }, |
| { |
| "epoch": 22.624434389140273, |
| "grad_norm": 1.0203531980514526, |
| "learning_rate": 0.001, |
| "loss": 2.2088, |
| "step": 70000 |
| }, |
| { |
| "epoch": 22.656755009696187, |
| "grad_norm": 0.9450057744979858, |
| "learning_rate": 0.001, |
| "loss": 2.2119, |
| "step": 70100 |
| }, |
| { |
| "epoch": 22.689075630252102, |
| "grad_norm": 1.4776325225830078, |
| "learning_rate": 0.001, |
| "loss": 2.2145, |
| "step": 70200 |
| }, |
| { |
| "epoch": 22.721396250808017, |
| "grad_norm": 1.3227741718292236, |
| "learning_rate": 0.001, |
| "loss": 2.2034, |
| "step": 70300 |
| }, |
| { |
| "epoch": 22.75371687136393, |
| "grad_norm": 1.2652865648269653, |
| "learning_rate": 0.001, |
| "loss": 2.2091, |
| "step": 70400 |
| }, |
| { |
| "epoch": 22.786037491919846, |
| "grad_norm": 1.1568591594696045, |
| "learning_rate": 0.001, |
| "loss": 2.2176, |
| "step": 70500 |
| }, |
| { |
| "epoch": 22.81835811247576, |
| "grad_norm": 1.1600966453552246, |
| "learning_rate": 0.001, |
| "loss": 2.2186, |
| "step": 70600 |
| }, |
| { |
| "epoch": 22.850678733031675, |
| "grad_norm": 1.0852841138839722, |
| "learning_rate": 0.001, |
| "loss": 2.2318, |
| "step": 70700 |
| }, |
| { |
| "epoch": 22.88299935358759, |
| "grad_norm": 1.266211748123169, |
| "learning_rate": 0.001, |
| "loss": 2.2282, |
| "step": 70800 |
| }, |
| { |
| "epoch": 22.915319974143504, |
| "grad_norm": 1.2288943529129028, |
| "learning_rate": 0.001, |
| "loss": 2.228, |
| "step": 70900 |
| }, |
| { |
| "epoch": 22.94764059469942, |
| "grad_norm": 1.3902039527893066, |
| "learning_rate": 0.001, |
| "loss": 2.2269, |
| "step": 71000 |
| }, |
| { |
| "epoch": 22.979961215255333, |
| "grad_norm": 0.937089741230011, |
| "learning_rate": 0.001, |
| "loss": 2.2499, |
| "step": 71100 |
| }, |
| { |
| "epoch": 23.012281835811248, |
| "grad_norm": 1.1552362442016602, |
| "learning_rate": 0.001, |
| "loss": 2.1682, |
| "step": 71200 |
| }, |
| { |
| "epoch": 23.044602456367162, |
| "grad_norm": 1.3249483108520508, |
| "learning_rate": 0.001, |
| "loss": 2.0807, |
| "step": 71300 |
| }, |
| { |
| "epoch": 23.076923076923077, |
| "grad_norm": 1.5028316974639893, |
| "learning_rate": 0.001, |
| "loss": 2.1071, |
| "step": 71400 |
| }, |
| { |
| "epoch": 23.10924369747899, |
| "grad_norm": 1.7541735172271729, |
| "learning_rate": 0.001, |
| "loss": 2.1075, |
| "step": 71500 |
| }, |
| { |
| "epoch": 23.141564318034906, |
| "grad_norm": 1.3183276653289795, |
| "learning_rate": 0.001, |
| "loss": 2.1169, |
| "step": 71600 |
| }, |
| { |
| "epoch": 23.17388493859082, |
| "grad_norm": 1.4954915046691895, |
| "learning_rate": 0.001, |
| "loss": 2.1122, |
| "step": 71700 |
| }, |
| { |
| "epoch": 23.206205559146735, |
| "grad_norm": 1.128176212310791, |
| "learning_rate": 0.001, |
| "loss": 2.1316, |
| "step": 71800 |
| }, |
| { |
| "epoch": 23.23852617970265, |
| "grad_norm": 1.1584060192108154, |
| "learning_rate": 0.001, |
| "loss": 2.1386, |
| "step": 71900 |
| }, |
| { |
| "epoch": 23.270846800258564, |
| "grad_norm": 1.363810658454895, |
| "learning_rate": 0.001, |
| "loss": 2.1218, |
| "step": 72000 |
| }, |
| { |
| "epoch": 23.30316742081448, |
| "grad_norm": 1.2617250680923462, |
| "learning_rate": 0.001, |
| "loss": 2.1395, |
| "step": 72100 |
| }, |
| { |
| "epoch": 23.335488041370393, |
| "grad_norm": 1.2358677387237549, |
| "learning_rate": 0.001, |
| "loss": 2.1483, |
| "step": 72200 |
| }, |
| { |
| "epoch": 23.367808661926308, |
| "grad_norm": 1.4727267026901245, |
| "learning_rate": 0.001, |
| "loss": 2.1461, |
| "step": 72300 |
| }, |
| { |
| "epoch": 23.400129282482222, |
| "grad_norm": 1.2978276014328003, |
| "learning_rate": 0.001, |
| "loss": 2.164, |
| "step": 72400 |
| }, |
| { |
| "epoch": 23.432449903038137, |
| "grad_norm": 1.1162272691726685, |
| "learning_rate": 0.001, |
| "loss": 2.1393, |
| "step": 72500 |
| }, |
| { |
| "epoch": 23.46477052359405, |
| "grad_norm": 1.5267865657806396, |
| "learning_rate": 0.001, |
| "loss": 2.1532, |
| "step": 72600 |
| }, |
| { |
| "epoch": 23.49709114414997, |
| "grad_norm": 1.349844217300415, |
| "learning_rate": 0.001, |
| "loss": 2.1577, |
| "step": 72700 |
| }, |
| { |
| "epoch": 23.529411764705884, |
| "grad_norm": 1.7891136407852173, |
| "learning_rate": 0.001, |
| "loss": 2.1496, |
| "step": 72800 |
| }, |
| { |
| "epoch": 23.5617323852618, |
| "grad_norm": 1.613793134689331, |
| "learning_rate": 0.001, |
| "loss": 2.162, |
| "step": 72900 |
| }, |
| { |
| "epoch": 23.594053005817713, |
| "grad_norm": 1.121187686920166, |
| "learning_rate": 0.001, |
| "loss": 2.159, |
| "step": 73000 |
| }, |
| { |
| "epoch": 23.626373626373628, |
| "grad_norm": 1.2575411796569824, |
| "learning_rate": 0.001, |
| "loss": 2.1651, |
| "step": 73100 |
| }, |
| { |
| "epoch": 23.658694246929542, |
| "grad_norm": 1.5651819705963135, |
| "learning_rate": 0.001, |
| "loss": 2.1736, |
| "step": 73200 |
| }, |
| { |
| "epoch": 23.691014867485457, |
| "grad_norm": 1.4974477291107178, |
| "learning_rate": 0.001, |
| "loss": 2.1858, |
| "step": 73300 |
| }, |
| { |
| "epoch": 23.72333548804137, |
| "grad_norm": 1.1631637811660767, |
| "learning_rate": 0.001, |
| "loss": 2.1891, |
| "step": 73400 |
| }, |
| { |
| "epoch": 23.755656108597286, |
| "grad_norm": 1.5709234476089478, |
| "learning_rate": 0.001, |
| "loss": 2.1827, |
| "step": 73500 |
| }, |
| { |
| "epoch": 23.7879767291532, |
| "grad_norm": 1.447441577911377, |
| "learning_rate": 0.001, |
| "loss": 2.2088, |
| "step": 73600 |
| }, |
| { |
| "epoch": 23.820297349709115, |
| "grad_norm": 1.2315850257873535, |
| "learning_rate": 0.001, |
| "loss": 2.2009, |
| "step": 73700 |
| }, |
| { |
| "epoch": 23.85261797026503, |
| "grad_norm": 1.4171655178070068, |
| "learning_rate": 0.001, |
| "loss": 2.1866, |
| "step": 73800 |
| }, |
| { |
| "epoch": 23.884938590820944, |
| "grad_norm": 1.5399742126464844, |
| "learning_rate": 0.001, |
| "loss": 2.1967, |
| "step": 73900 |
| }, |
| { |
| "epoch": 23.91725921137686, |
| "grad_norm": 1.135227918624878, |
| "learning_rate": 0.001, |
| "loss": 2.1932, |
| "step": 74000 |
| }, |
| { |
| "epoch": 23.949579831932773, |
| "grad_norm": 1.1006965637207031, |
| "learning_rate": 0.001, |
| "loss": 2.2132, |
| "step": 74100 |
| }, |
| { |
| "epoch": 23.981900452488688, |
| "grad_norm": 1.530867576599121, |
| "learning_rate": 0.001, |
| "loss": 2.2207, |
| "step": 74200 |
| }, |
| { |
| "epoch": 24.014221073044602, |
| "grad_norm": 1.686764121055603, |
| "learning_rate": 0.001, |
| "loss": 2.1332, |
| "step": 74300 |
| }, |
| { |
| "epoch": 24.046541693600517, |
| "grad_norm": 1.4816681146621704, |
| "learning_rate": 0.001, |
| "loss": 2.0556, |
| "step": 74400 |
| }, |
| { |
| "epoch": 24.07886231415643, |
| "grad_norm": 1.2083070278167725, |
| "learning_rate": 0.001, |
| "loss": 2.0783, |
| "step": 74500 |
| }, |
| { |
| "epoch": 24.111182934712346, |
| "grad_norm": 1.873100757598877, |
| "learning_rate": 0.001, |
| "loss": 2.0883, |
| "step": 74600 |
| }, |
| { |
| "epoch": 24.14350355526826, |
| "grad_norm": 1.3434606790542603, |
| "learning_rate": 0.001, |
| "loss": 2.0975, |
| "step": 74700 |
| }, |
| { |
| "epoch": 24.175824175824175, |
| "grad_norm": 1.4237688779830933, |
| "learning_rate": 0.001, |
| "loss": 2.0818, |
| "step": 74800 |
| }, |
| { |
| "epoch": 24.20814479638009, |
| "grad_norm": 1.3739618062973022, |
| "learning_rate": 0.001, |
| "loss": 2.0801, |
| "step": 74900 |
| }, |
| { |
| "epoch": 24.240465416936004, |
| "grad_norm": 1.2694149017333984, |
| "learning_rate": 0.001, |
| "loss": 2.1075, |
| "step": 75000 |
| }, |
| { |
| "epoch": 24.27278603749192, |
| "grad_norm": 1.557441234588623, |
| "learning_rate": 0.001, |
| "loss": 2.0868, |
| "step": 75100 |
| }, |
| { |
| "epoch": 24.305106658047833, |
| "grad_norm": 1.2402006387710571, |
| "learning_rate": 0.001, |
| "loss": 2.1188, |
| "step": 75200 |
| }, |
| { |
| "epoch": 24.337427278603748, |
| "grad_norm": 1.2395163774490356, |
| "learning_rate": 0.001, |
| "loss": 2.1081, |
| "step": 75300 |
| }, |
| { |
| "epoch": 24.369747899159663, |
| "grad_norm": 1.3019064664840698, |
| "learning_rate": 0.001, |
| "loss": 2.1262, |
| "step": 75400 |
| }, |
| { |
| "epoch": 24.402068519715577, |
| "grad_norm": 1.385576605796814, |
| "learning_rate": 0.001, |
| "loss": 2.1431, |
| "step": 75500 |
| }, |
| { |
| "epoch": 24.43438914027149, |
| "grad_norm": 1.610942006111145, |
| "learning_rate": 0.001, |
| "loss": 2.1083, |
| "step": 75600 |
| }, |
| { |
| "epoch": 24.466709760827406, |
| "grad_norm": 1.2178122997283936, |
| "learning_rate": 0.001, |
| "loss": 2.1371, |
| "step": 75700 |
| }, |
| { |
| "epoch": 24.49903038138332, |
| "grad_norm": 1.3105348348617554, |
| "learning_rate": 0.001, |
| "loss": 2.1256, |
| "step": 75800 |
| }, |
| { |
| "epoch": 24.53135100193924, |
| "grad_norm": 1.3397716283798218, |
| "learning_rate": 0.001, |
| "loss": 2.1286, |
| "step": 75900 |
| }, |
| { |
| "epoch": 24.563671622495153, |
| "grad_norm": 1.9577621221542358, |
| "learning_rate": 0.001, |
| "loss": 2.1523, |
| "step": 76000 |
| }, |
| { |
| "epoch": 24.595992243051068, |
| "grad_norm": 1.2940340042114258, |
| "learning_rate": 0.001, |
| "loss": 2.1451, |
| "step": 76100 |
| }, |
| { |
| "epoch": 24.628312863606983, |
| "grad_norm": 1.330095648765564, |
| "learning_rate": 0.001, |
| "loss": 2.1563, |
| "step": 76200 |
| }, |
| { |
| "epoch": 24.660633484162897, |
| "grad_norm": 1.4415050745010376, |
| "learning_rate": 0.001, |
| "loss": 2.1577, |
| "step": 76300 |
| }, |
| { |
| "epoch": 24.69295410471881, |
| "grad_norm": 1.3965532779693604, |
| "learning_rate": 0.001, |
| "loss": 2.1528, |
| "step": 76400 |
| }, |
| { |
| "epoch": 24.725274725274726, |
| "grad_norm": 1.4386045932769775, |
| "learning_rate": 0.001, |
| "loss": 2.1542, |
| "step": 76500 |
| }, |
| { |
| "epoch": 24.75759534583064, |
| "grad_norm": 1.6891236305236816, |
| "learning_rate": 0.001, |
| "loss": 2.1646, |
| "step": 76600 |
| }, |
| { |
| "epoch": 24.789915966386555, |
| "grad_norm": 1.355452537536621, |
| "learning_rate": 0.001, |
| "loss": 2.1637, |
| "step": 76700 |
| }, |
| { |
| "epoch": 24.82223658694247, |
| "grad_norm": 1.5033726692199707, |
| "learning_rate": 0.001, |
| "loss": 2.1679, |
| "step": 76800 |
| }, |
| { |
| "epoch": 24.854557207498384, |
| "grad_norm": 1.8620679378509521, |
| "learning_rate": 0.001, |
| "loss": 2.152, |
| "step": 76900 |
| }, |
| { |
| "epoch": 24.8868778280543, |
| "grad_norm": 1.3260308504104614, |
| "learning_rate": 0.001, |
| "loss": 2.1783, |
| "step": 77000 |
| }, |
| { |
| "epoch": 24.919198448610214, |
| "grad_norm": 1.2607606649398804, |
| "learning_rate": 0.001, |
| "loss": 2.1856, |
| "step": 77100 |
| }, |
| { |
| "epoch": 24.951519069166128, |
| "grad_norm": 1.466381311416626, |
| "learning_rate": 0.001, |
| "loss": 2.1857, |
| "step": 77200 |
| }, |
| { |
| "epoch": 24.983839689722043, |
| "grad_norm": 1.292051911354065, |
| "learning_rate": 0.001, |
| "loss": 2.1806, |
| "step": 77300 |
| }, |
| { |
| "epoch": 25.016160310277957, |
| "grad_norm": 1.7954028844833374, |
| "learning_rate": 0.001, |
| "loss": 2.0906, |
| "step": 77400 |
| }, |
| { |
| "epoch": 25.048480930833872, |
| "grad_norm": 1.6722086668014526, |
| "learning_rate": 0.001, |
| "loss": 2.0501, |
| "step": 77500 |
| }, |
| { |
| "epoch": 25.080801551389786, |
| "grad_norm": 2.243319272994995, |
| "learning_rate": 0.001, |
| "loss": 2.0422, |
| "step": 77600 |
| }, |
| { |
| "epoch": 25.1131221719457, |
| "grad_norm": 2.0324037075042725, |
| "learning_rate": 0.001, |
| "loss": 2.064, |
| "step": 77700 |
| }, |
| { |
| "epoch": 25.145442792501616, |
| "grad_norm": 1.8791396617889404, |
| "learning_rate": 0.001, |
| "loss": 2.0754, |
| "step": 77800 |
| }, |
| { |
| "epoch": 25.17776341305753, |
| "grad_norm": 2.5221776962280273, |
| "learning_rate": 0.001, |
| "loss": 2.0617, |
| "step": 77900 |
| }, |
| { |
| "epoch": 25.210084033613445, |
| "grad_norm": 2.2298007011413574, |
| "learning_rate": 0.001, |
| "loss": 2.059, |
| "step": 78000 |
| }, |
| { |
| "epoch": 25.24240465416936, |
| "grad_norm": 2.844670057296753, |
| "learning_rate": 0.001, |
| "loss": 2.055, |
| "step": 78100 |
| }, |
| { |
| "epoch": 25.274725274725274, |
| "grad_norm": 1.8920618295669556, |
| "learning_rate": 0.001, |
| "loss": 2.1047, |
| "step": 78200 |
| }, |
| { |
| "epoch": 25.30704589528119, |
| "grad_norm": 2.7440826892852783, |
| "learning_rate": 0.001, |
| "loss": 2.0731, |
| "step": 78300 |
| }, |
| { |
| "epoch": 25.339366515837103, |
| "grad_norm": 1.8846522569656372, |
| "learning_rate": 0.001, |
| "loss": 2.0671, |
| "step": 78400 |
| }, |
| { |
| "epoch": 25.371687136393017, |
| "grad_norm": 2.325923204421997, |
| "learning_rate": 0.001, |
| "loss": 2.0712, |
| "step": 78500 |
| }, |
| { |
| "epoch": 25.404007756948932, |
| "grad_norm": 1.8636107444763184, |
| "learning_rate": 0.001, |
| "loss": 2.0903, |
| "step": 78600 |
| }, |
| { |
| "epoch": 25.436328377504847, |
| "grad_norm": 1.8899035453796387, |
| "learning_rate": 0.001, |
| "loss": 2.0982, |
| "step": 78700 |
| }, |
| { |
| "epoch": 25.46864899806076, |
| "grad_norm": 2.493457317352295, |
| "learning_rate": 0.001, |
| "loss": 2.1104, |
| "step": 78800 |
| }, |
| { |
| "epoch": 25.50096961861668, |
| "grad_norm": 1.93169367313385, |
| "learning_rate": 0.001, |
| "loss": 2.1031, |
| "step": 78900 |
| }, |
| { |
| "epoch": 25.533290239172594, |
| "grad_norm": 2.1825897693634033, |
| "learning_rate": 0.001, |
| "loss": 2.1127, |
| "step": 79000 |
| }, |
| { |
| "epoch": 25.56561085972851, |
| "grad_norm": 1.7733447551727295, |
| "learning_rate": 0.001, |
| "loss": 2.1179, |
| "step": 79100 |
| }, |
| { |
| "epoch": 25.597931480284423, |
| "grad_norm": 1.6951693296432495, |
| "learning_rate": 0.001, |
| "loss": 2.1353, |
| "step": 79200 |
| }, |
| { |
| "epoch": 25.630252100840337, |
| "grad_norm": 1.6487561464309692, |
| "learning_rate": 0.001, |
| "loss": 2.1199, |
| "step": 79300 |
| }, |
| { |
| "epoch": 25.662572721396252, |
| "grad_norm": 2.3216216564178467, |
| "learning_rate": 0.001, |
| "loss": 2.1412, |
| "step": 79400 |
| }, |
| { |
| "epoch": 25.694893341952167, |
| "grad_norm": 2.1835920810699463, |
| "learning_rate": 0.001, |
| "loss": 2.1387, |
| "step": 79500 |
| }, |
| { |
| "epoch": 25.72721396250808, |
| "grad_norm": 2.5037333965301514, |
| "learning_rate": 0.001, |
| "loss": 2.1345, |
| "step": 79600 |
| }, |
| { |
| "epoch": 25.759534583063996, |
| "grad_norm": 1.924839735031128, |
| "learning_rate": 0.001, |
| "loss": 2.1384, |
| "step": 79700 |
| }, |
| { |
| "epoch": 25.79185520361991, |
| "grad_norm": 2.1649904251098633, |
| "learning_rate": 0.001, |
| "loss": 2.1526, |
| "step": 79800 |
| }, |
| { |
| "epoch": 25.824175824175825, |
| "grad_norm": 1.911528468132019, |
| "learning_rate": 0.001, |
| "loss": 2.1241, |
| "step": 79900 |
| }, |
| { |
| "epoch": 25.85649644473174, |
| "grad_norm": 1.9926867485046387, |
| "learning_rate": 0.001, |
| "loss": 2.1524, |
| "step": 80000 |
| }, |
| { |
| "epoch": 25.888817065287654, |
| "grad_norm": 1.946124792098999, |
| "learning_rate": 0.001, |
| "loss": 2.1363, |
| "step": 80100 |
| }, |
| { |
| "epoch": 25.92113768584357, |
| "grad_norm": 1.9511570930480957, |
| "learning_rate": 0.001, |
| "loss": 2.1686, |
| "step": 80200 |
| }, |
| { |
| "epoch": 25.953458306399483, |
| "grad_norm": 2.4123759269714355, |
| "learning_rate": 0.001, |
| "loss": 2.1474, |
| "step": 80300 |
| }, |
| { |
| "epoch": 25.985778926955398, |
| "grad_norm": 1.9220401048660278, |
| "learning_rate": 0.001, |
| "loss": 2.1668, |
| "step": 80400 |
| }, |
| { |
| "epoch": 26.018099547511312, |
| "grad_norm": 1.3430564403533936, |
| "learning_rate": 0.001, |
| "loss": 2.0692, |
| "step": 80500 |
| }, |
| { |
| "epoch": 26.050420168067227, |
| "grad_norm": 1.3940428495407104, |
| "learning_rate": 0.001, |
| "loss": 1.9975, |
| "step": 80600 |
| }, |
| { |
| "epoch": 26.08274078862314, |
| "grad_norm": 1.4681583642959595, |
| "learning_rate": 0.001, |
| "loss": 2.0075, |
| "step": 80700 |
| }, |
| { |
| "epoch": 26.115061409179056, |
| "grad_norm": 0.9912952780723572, |
| "learning_rate": 0.001, |
| "loss": 2.0133, |
| "step": 80800 |
| }, |
| { |
| "epoch": 26.14738202973497, |
| "grad_norm": 1.3190398216247559, |
| "learning_rate": 0.001, |
| "loss": 2.0483, |
| "step": 80900 |
| }, |
| { |
| "epoch": 26.179702650290885, |
| "grad_norm": 1.2165215015411377, |
| "learning_rate": 0.001, |
| "loss": 2.0469, |
| "step": 81000 |
| }, |
| { |
| "epoch": 26.2120232708468, |
| "grad_norm": 1.2185070514678955, |
| "learning_rate": 0.001, |
| "loss": 2.0529, |
| "step": 81100 |
| }, |
| { |
| "epoch": 26.244343891402714, |
| "grad_norm": 1.1104484796524048, |
| "learning_rate": 0.001, |
| "loss": 2.0501, |
| "step": 81200 |
| }, |
| { |
| "epoch": 26.27666451195863, |
| "grad_norm": 1.1159557104110718, |
| "learning_rate": 0.001, |
| "loss": 2.056, |
| "step": 81300 |
| }, |
| { |
| "epoch": 26.308985132514543, |
| "grad_norm": 1.1061629056930542, |
| "learning_rate": 0.001, |
| "loss": 2.072, |
| "step": 81400 |
| }, |
| { |
| "epoch": 26.341305753070458, |
| "grad_norm": 1.1867953538894653, |
| "learning_rate": 0.001, |
| "loss": 2.075, |
| "step": 81500 |
| }, |
| { |
| "epoch": 26.373626373626372, |
| "grad_norm": 1.3672994375228882, |
| "learning_rate": 0.001, |
| "loss": 2.0571, |
| "step": 81600 |
| }, |
| { |
| "epoch": 26.405946994182287, |
| "grad_norm": 1.466471791267395, |
| "learning_rate": 0.001, |
| "loss": 2.0804, |
| "step": 81700 |
| }, |
| { |
| "epoch": 26.4382676147382, |
| "grad_norm": 1.4500887393951416, |
| "learning_rate": 0.001, |
| "loss": 2.0849, |
| "step": 81800 |
| }, |
| { |
| "epoch": 26.470588235294116, |
| "grad_norm": 1.4271094799041748, |
| "learning_rate": 0.001, |
| "loss": 2.074, |
| "step": 81900 |
| }, |
| { |
| "epoch": 26.50290885585003, |
| "grad_norm": 1.4136507511138916, |
| "learning_rate": 0.001, |
| "loss": 2.0754, |
| "step": 82000 |
| }, |
| { |
| "epoch": 26.53522947640595, |
| "grad_norm": 1.851994276046753, |
| "learning_rate": 0.001, |
| "loss": 2.1002, |
| "step": 82100 |
| }, |
| { |
| "epoch": 26.567550096961863, |
| "grad_norm": 1.2131636142730713, |
| "learning_rate": 0.001, |
| "loss": 2.0822, |
| "step": 82200 |
| }, |
| { |
| "epoch": 26.599870717517778, |
| "grad_norm": 1.9536834955215454, |
| "learning_rate": 0.001, |
| "loss": 2.0967, |
| "step": 82300 |
| }, |
| { |
| "epoch": 26.632191338073692, |
| "grad_norm": 1.3947795629501343, |
| "learning_rate": 0.001, |
| "loss": 2.0994, |
| "step": 82400 |
| }, |
| { |
| "epoch": 26.664511958629607, |
| "grad_norm": 1.3950212001800537, |
| "learning_rate": 0.001, |
| "loss": 2.1009, |
| "step": 82500 |
| }, |
| { |
| "epoch": 26.69683257918552, |
| "grad_norm": 1.3273499011993408, |
| "learning_rate": 0.001, |
| "loss": 2.1082, |
| "step": 82600 |
| }, |
| { |
| "epoch": 26.729153199741436, |
| "grad_norm": 1.3609721660614014, |
| "learning_rate": 0.001, |
| "loss": 2.1004, |
| "step": 82700 |
| }, |
| { |
| "epoch": 26.76147382029735, |
| "grad_norm": 1.6250184774398804, |
| "learning_rate": 0.001, |
| "loss": 2.1293, |
| "step": 82800 |
| }, |
| { |
| "epoch": 26.793794440853265, |
| "grad_norm": 1.179201602935791, |
| "learning_rate": 0.001, |
| "loss": 2.1137, |
| "step": 82900 |
| }, |
| { |
| "epoch": 26.82611506140918, |
| "grad_norm": 1.2400749921798706, |
| "learning_rate": 0.001, |
| "loss": 2.1199, |
| "step": 83000 |
| }, |
| { |
| "epoch": 26.858435681965094, |
| "grad_norm": 1.4110934734344482, |
| "learning_rate": 0.001, |
| "loss": 2.1298, |
| "step": 83100 |
| }, |
| { |
| "epoch": 26.89075630252101, |
| "grad_norm": 1.213017463684082, |
| "learning_rate": 0.001, |
| "loss": 2.1216, |
| "step": 83200 |
| }, |
| { |
| "epoch": 26.923076923076923, |
| "grad_norm": 1.3616845607757568, |
| "learning_rate": 0.001, |
| "loss": 2.1267, |
| "step": 83300 |
| }, |
| { |
| "epoch": 26.955397543632838, |
| "grad_norm": 1.1601148843765259, |
| "learning_rate": 0.001, |
| "loss": 2.1222, |
| "step": 83400 |
| }, |
| { |
| "epoch": 26.987718164188752, |
| "grad_norm": 1.6087828874588013, |
| "learning_rate": 0.001, |
| "loss": 2.121, |
| "step": 83500 |
| }, |
| { |
| "epoch": 27.020038784744667, |
| "grad_norm": 1.7387524843215942, |
| "learning_rate": 0.001, |
| "loss": 2.0578, |
| "step": 83600 |
| }, |
| { |
| "epoch": 27.05235940530058, |
| "grad_norm": 1.2267652750015259, |
| "learning_rate": 0.001, |
| "loss": 1.9945, |
| "step": 83700 |
| }, |
| { |
| "epoch": 27.084680025856496, |
| "grad_norm": 1.6405795812606812, |
| "learning_rate": 0.001, |
| "loss": 2.0002, |
| "step": 83800 |
| }, |
| { |
| "epoch": 27.11700064641241, |
| "grad_norm": 1.1666873693466187, |
| "learning_rate": 0.001, |
| "loss": 2.015, |
| "step": 83900 |
| }, |
| { |
| "epoch": 27.149321266968325, |
| "grad_norm": 1.1755584478378296, |
| "learning_rate": 0.001, |
| "loss": 2.0102, |
| "step": 84000 |
| }, |
| { |
| "epoch": 27.18164188752424, |
| "grad_norm": 1.324985384941101, |
| "learning_rate": 0.001, |
| "loss": 2.0359, |
| "step": 84100 |
| }, |
| { |
| "epoch": 27.213962508080154, |
| "grad_norm": 1.079712152481079, |
| "learning_rate": 0.001, |
| "loss": 1.9905, |
| "step": 84200 |
| }, |
| { |
| "epoch": 27.24628312863607, |
| "grad_norm": 1.401971697807312, |
| "learning_rate": 0.001, |
| "loss": 2.0207, |
| "step": 84300 |
| }, |
| { |
| "epoch": 27.278603749191983, |
| "grad_norm": 1.2873399257659912, |
| "learning_rate": 0.001, |
| "loss": 2.0211, |
| "step": 84400 |
| }, |
| { |
| "epoch": 27.310924369747898, |
| "grad_norm": 1.115574598312378, |
| "learning_rate": 0.001, |
| "loss": 2.0332, |
| "step": 84500 |
| }, |
| { |
| "epoch": 27.343244990303813, |
| "grad_norm": 1.2931740283966064, |
| "learning_rate": 0.001, |
| "loss": 2.0606, |
| "step": 84600 |
| }, |
| { |
| "epoch": 27.375565610859727, |
| "grad_norm": 1.0610228776931763, |
| "learning_rate": 0.001, |
| "loss": 2.0494, |
| "step": 84700 |
| }, |
| { |
| "epoch": 27.40788623141564, |
| "grad_norm": 1.0473644733428955, |
| "learning_rate": 0.001, |
| "loss": 2.0388, |
| "step": 84800 |
| }, |
| { |
| "epoch": 27.440206851971556, |
| "grad_norm": 1.187235713005066, |
| "learning_rate": 0.001, |
| "loss": 2.0494, |
| "step": 84900 |
| }, |
| { |
| "epoch": 27.47252747252747, |
| "grad_norm": 1.115997076034546, |
| "learning_rate": 0.001, |
| "loss": 2.0444, |
| "step": 85000 |
| }, |
| { |
| "epoch": 27.50484809308339, |
| "grad_norm": 1.2527693510055542, |
| "learning_rate": 0.001, |
| "loss": 2.0718, |
| "step": 85100 |
| }, |
| { |
| "epoch": 27.537168713639304, |
| "grad_norm": 1.5422104597091675, |
| "learning_rate": 0.001, |
| "loss": 2.0718, |
| "step": 85200 |
| }, |
| { |
| "epoch": 27.569489334195218, |
| "grad_norm": 1.2862259149551392, |
| "learning_rate": 0.001, |
| "loss": 2.0728, |
| "step": 85300 |
| }, |
| { |
| "epoch": 27.601809954751133, |
| "grad_norm": 1.295546293258667, |
| "learning_rate": 0.001, |
| "loss": 2.0683, |
| "step": 85400 |
| }, |
| { |
| "epoch": 27.634130575307047, |
| "grad_norm": 1.2638643980026245, |
| "learning_rate": 0.001, |
| "loss": 2.0751, |
| "step": 85500 |
| }, |
| { |
| "epoch": 27.66645119586296, |
| "grad_norm": 1.4461342096328735, |
| "learning_rate": 0.001, |
| "loss": 2.071, |
| "step": 85600 |
| }, |
| { |
| "epoch": 27.698771816418876, |
| "grad_norm": 1.4849462509155273, |
| "learning_rate": 0.001, |
| "loss": 2.0922, |
| "step": 85700 |
| }, |
| { |
| "epoch": 27.73109243697479, |
| "grad_norm": 1.451640009880066, |
| "learning_rate": 0.001, |
| "loss": 2.0789, |
| "step": 85800 |
| }, |
| { |
| "epoch": 27.763413057530705, |
| "grad_norm": 1.152021884918213, |
| "learning_rate": 0.001, |
| "loss": 2.0748, |
| "step": 85900 |
| }, |
| { |
| "epoch": 27.79573367808662, |
| "grad_norm": 1.290688395500183, |
| "learning_rate": 0.001, |
| "loss": 2.0829, |
| "step": 86000 |
| }, |
| { |
| "epoch": 27.828054298642535, |
| "grad_norm": 1.3290468454360962, |
| "learning_rate": 0.001, |
| "loss": 2.0807, |
| "step": 86100 |
| }, |
| { |
| "epoch": 27.86037491919845, |
| "grad_norm": 1.049330472946167, |
| "learning_rate": 0.001, |
| "loss": 2.0989, |
| "step": 86200 |
| }, |
| { |
| "epoch": 27.892695539754364, |
| "grad_norm": 1.2603390216827393, |
| "learning_rate": 0.001, |
| "loss": 2.0839, |
| "step": 86300 |
| }, |
| { |
| "epoch": 27.92501616031028, |
| "grad_norm": 1.6097103357315063, |
| "learning_rate": 0.001, |
| "loss": 2.0925, |
| "step": 86400 |
| }, |
| { |
| "epoch": 27.957336780866193, |
| "grad_norm": 1.294921875, |
| "learning_rate": 0.001, |
| "loss": 2.1088, |
| "step": 86500 |
| }, |
| { |
| "epoch": 27.989657401422107, |
| "grad_norm": 1.0576322078704834, |
| "learning_rate": 0.001, |
| "loss": 2.1014, |
| "step": 86600 |
| }, |
| { |
| "epoch": 28.021978021978022, |
| "grad_norm": 1.2913296222686768, |
| "learning_rate": 0.001, |
| "loss": 2.0329, |
| "step": 86700 |
| }, |
| { |
| "epoch": 28.054298642533936, |
| "grad_norm": 1.1250892877578735, |
| "learning_rate": 0.001, |
| "loss": 1.9698, |
| "step": 86800 |
| }, |
| { |
| "epoch": 28.08661926308985, |
| "grad_norm": 1.4452911615371704, |
| "learning_rate": 0.001, |
| "loss": 1.9609, |
| "step": 86900 |
| }, |
| { |
| "epoch": 28.118939883645766, |
| "grad_norm": 1.107289433479309, |
| "learning_rate": 0.001, |
| "loss": 1.9791, |
| "step": 87000 |
| }, |
| { |
| "epoch": 28.15126050420168, |
| "grad_norm": 1.0976715087890625, |
| "learning_rate": 0.001, |
| "loss": 1.9739, |
| "step": 87100 |
| }, |
| { |
| "epoch": 28.183581124757595, |
| "grad_norm": 1.0388271808624268, |
| "learning_rate": 0.001, |
| "loss": 1.9921, |
| "step": 87200 |
| }, |
| { |
| "epoch": 28.21590174531351, |
| "grad_norm": 0.9731875061988831, |
| "learning_rate": 0.001, |
| "loss": 2.0035, |
| "step": 87300 |
| }, |
| { |
| "epoch": 28.248222365869424, |
| "grad_norm": 1.587350845336914, |
| "learning_rate": 0.001, |
| "loss": 2.0067, |
| "step": 87400 |
| }, |
| { |
| "epoch": 28.28054298642534, |
| "grad_norm": 1.0208741426467896, |
| "learning_rate": 0.001, |
| "loss": 2.0131, |
| "step": 87500 |
| }, |
| { |
| "epoch": 28.312863606981253, |
| "grad_norm": 1.1413222551345825, |
| "learning_rate": 0.001, |
| "loss": 1.9982, |
| "step": 87600 |
| }, |
| { |
| "epoch": 28.345184227537167, |
| "grad_norm": 1.7160639762878418, |
| "learning_rate": 0.001, |
| "loss": 1.9979, |
| "step": 87700 |
| }, |
| { |
| "epoch": 28.377504848093082, |
| "grad_norm": 1.1650629043579102, |
| "learning_rate": 0.001, |
| "loss": 2.0399, |
| "step": 87800 |
| }, |
| { |
| "epoch": 28.409825468648997, |
| "grad_norm": 1.4147566556930542, |
| "learning_rate": 0.001, |
| "loss": 2.0203, |
| "step": 87900 |
| }, |
| { |
| "epoch": 28.44214608920491, |
| "grad_norm": 1.616251826286316, |
| "learning_rate": 0.001, |
| "loss": 2.0179, |
| "step": 88000 |
| }, |
| { |
| "epoch": 28.474466709760826, |
| "grad_norm": 1.2545949220657349, |
| "learning_rate": 0.001, |
| "loss": 2.0283, |
| "step": 88100 |
| }, |
| { |
| "epoch": 28.50678733031674, |
| "grad_norm": 1.191787838935852, |
| "learning_rate": 0.001, |
| "loss": 2.0339, |
| "step": 88200 |
| }, |
| { |
| "epoch": 28.53910795087266, |
| "grad_norm": 1.0043002367019653, |
| "learning_rate": 0.001, |
| "loss": 2.0448, |
| "step": 88300 |
| }, |
| { |
| "epoch": 28.571428571428573, |
| "grad_norm": 1.0854709148406982, |
| "learning_rate": 0.001, |
| "loss": 2.0488, |
| "step": 88400 |
| }, |
| { |
| "epoch": 28.603749191984488, |
| "grad_norm": 1.4309149980545044, |
| "learning_rate": 0.001, |
| "loss": 2.0632, |
| "step": 88500 |
| }, |
| { |
| "epoch": 28.636069812540402, |
| "grad_norm": 1.083609700202942, |
| "learning_rate": 0.001, |
| "loss": 2.0538, |
| "step": 88600 |
| }, |
| { |
| "epoch": 28.668390433096317, |
| "grad_norm": 1.0403717756271362, |
| "learning_rate": 0.001, |
| "loss": 2.0595, |
| "step": 88700 |
| }, |
| { |
| "epoch": 28.70071105365223, |
| "grad_norm": 1.0985143184661865, |
| "learning_rate": 0.001, |
| "loss": 2.0622, |
| "step": 88800 |
| }, |
| { |
| "epoch": 28.733031674208146, |
| "grad_norm": 1.4709973335266113, |
| "learning_rate": 0.001, |
| "loss": 2.0665, |
| "step": 88900 |
| }, |
| { |
| "epoch": 28.76535229476406, |
| "grad_norm": 1.333082675933838, |
| "learning_rate": 0.001, |
| "loss": 2.0751, |
| "step": 89000 |
| }, |
| { |
| "epoch": 28.797672915319975, |
| "grad_norm": 1.4253976345062256, |
| "learning_rate": 0.001, |
| "loss": 2.0654, |
| "step": 89100 |
| }, |
| { |
| "epoch": 28.82999353587589, |
| "grad_norm": 0.9215937852859497, |
| "learning_rate": 0.001, |
| "loss": 2.0716, |
| "step": 89200 |
| }, |
| { |
| "epoch": 28.862314156431804, |
| "grad_norm": 1.0642420053482056, |
| "learning_rate": 0.001, |
| "loss": 2.0668, |
| "step": 89300 |
| }, |
| { |
| "epoch": 28.89463477698772, |
| "grad_norm": 1.1206321716308594, |
| "learning_rate": 0.001, |
| "loss": 2.0576, |
| "step": 89400 |
| }, |
| { |
| "epoch": 28.926955397543633, |
| "grad_norm": 1.4976261854171753, |
| "learning_rate": 0.001, |
| "loss": 2.0805, |
| "step": 89500 |
| }, |
| { |
| "epoch": 28.959276018099548, |
| "grad_norm": 1.3443635702133179, |
| "learning_rate": 0.001, |
| "loss": 2.0714, |
| "step": 89600 |
| }, |
| { |
| "epoch": 28.991596638655462, |
| "grad_norm": 0.9091135859489441, |
| "learning_rate": 0.001, |
| "loss": 2.0872, |
| "step": 89700 |
| }, |
| { |
| "epoch": 29.023917259211377, |
| "grad_norm": 1.4851621389389038, |
| "learning_rate": 0.001, |
| "loss": 1.9905, |
| "step": 89800 |
| }, |
| { |
| "epoch": 29.05623787976729, |
| "grad_norm": 2.0039730072021484, |
| "learning_rate": 0.001, |
| "loss": 1.9387, |
| "step": 89900 |
| }, |
| { |
| "epoch": 29.088558500323206, |
| "grad_norm": 1.3306325674057007, |
| "learning_rate": 0.001, |
| "loss": 1.9439, |
| "step": 90000 |
| }, |
| { |
| "epoch": 29.12087912087912, |
| "grad_norm": 1.3993875980377197, |
| "learning_rate": 0.001, |
| "loss": 1.9633, |
| "step": 90100 |
| }, |
| { |
| "epoch": 29.153199741435035, |
| "grad_norm": 1.2553470134735107, |
| "learning_rate": 0.001, |
| "loss": 1.9568, |
| "step": 90200 |
| }, |
| { |
| "epoch": 29.18552036199095, |
| "grad_norm": 1.2187873125076294, |
| "learning_rate": 0.001, |
| "loss": 1.9731, |
| "step": 90300 |
| }, |
| { |
| "epoch": 29.217840982546864, |
| "grad_norm": 1.3381234407424927, |
| "learning_rate": 0.001, |
| "loss": 1.9704, |
| "step": 90400 |
| }, |
| { |
| "epoch": 29.25016160310278, |
| "grad_norm": 1.2848703861236572, |
| "learning_rate": 0.001, |
| "loss": 1.9669, |
| "step": 90500 |
| }, |
| { |
| "epoch": 29.282482223658693, |
| "grad_norm": 1.2281534671783447, |
| "learning_rate": 0.001, |
| "loss": 1.9694, |
| "step": 90600 |
| }, |
| { |
| "epoch": 29.314802844214608, |
| "grad_norm": 1.4465069770812988, |
| "learning_rate": 0.001, |
| "loss": 1.9953, |
| "step": 90700 |
| }, |
| { |
| "epoch": 29.347123464770522, |
| "grad_norm": 1.279419183731079, |
| "learning_rate": 0.001, |
| "loss": 1.9941, |
| "step": 90800 |
| }, |
| { |
| "epoch": 29.379444085326437, |
| "grad_norm": 0.9530718326568604, |
| "learning_rate": 0.001, |
| "loss": 1.9907, |
| "step": 90900 |
| }, |
| { |
| "epoch": 29.41176470588235, |
| "grad_norm": 1.0085426568984985, |
| "learning_rate": 0.001, |
| "loss": 2.0044, |
| "step": 91000 |
| }, |
| { |
| "epoch": 29.444085326438266, |
| "grad_norm": 0.9455175995826721, |
| "learning_rate": 0.001, |
| "loss": 2.0158, |
| "step": 91100 |
| }, |
| { |
| "epoch": 29.47640594699418, |
| "grad_norm": 1.0811785459518433, |
| "learning_rate": 0.001, |
| "loss": 2.0083, |
| "step": 91200 |
| }, |
| { |
| "epoch": 29.5087265675501, |
| "grad_norm": 1.1927356719970703, |
| "learning_rate": 0.001, |
| "loss": 2.0165, |
| "step": 91300 |
| }, |
| { |
| "epoch": 29.541047188106013, |
| "grad_norm": 1.1006433963775635, |
| "learning_rate": 0.001, |
| "loss": 2.0339, |
| "step": 91400 |
| }, |
| { |
| "epoch": 29.573367808661928, |
| "grad_norm": 1.3249398469924927, |
| "learning_rate": 0.001, |
| "loss": 2.0158, |
| "step": 91500 |
| }, |
| { |
| "epoch": 29.605688429217842, |
| "grad_norm": 0.9829372763633728, |
| "learning_rate": 0.001, |
| "loss": 2.0007, |
| "step": 91600 |
| }, |
| { |
| "epoch": 29.638009049773757, |
| "grad_norm": 1.1615664958953857, |
| "learning_rate": 0.001, |
| "loss": 2.0224, |
| "step": 91700 |
| }, |
| { |
| "epoch": 29.67032967032967, |
| "grad_norm": 0.927891194820404, |
| "learning_rate": 0.001, |
| "loss": 2.0409, |
| "step": 91800 |
| }, |
| { |
| "epoch": 29.702650290885586, |
| "grad_norm": 1.3460584878921509, |
| "learning_rate": 0.001, |
| "loss": 2.0524, |
| "step": 91900 |
| }, |
| { |
| "epoch": 29.7349709114415, |
| "grad_norm": 1.1224879026412964, |
| "learning_rate": 0.001, |
| "loss": 2.0366, |
| "step": 92000 |
| }, |
| { |
| "epoch": 29.767291531997415, |
| "grad_norm": 1.0611379146575928, |
| "learning_rate": 0.001, |
| "loss": 2.0434, |
| "step": 92100 |
| }, |
| { |
| "epoch": 29.79961215255333, |
| "grad_norm": 0.874144434928894, |
| "learning_rate": 0.001, |
| "loss": 2.0535, |
| "step": 92200 |
| }, |
| { |
| "epoch": 29.831932773109244, |
| "grad_norm": 1.5935416221618652, |
| "learning_rate": 0.001, |
| "loss": 2.0518, |
| "step": 92300 |
| }, |
| { |
| "epoch": 29.86425339366516, |
| "grad_norm": 1.1362969875335693, |
| "learning_rate": 0.001, |
| "loss": 2.0513, |
| "step": 92400 |
| }, |
| { |
| "epoch": 29.896574014221073, |
| "grad_norm": 1.1391018629074097, |
| "learning_rate": 0.001, |
| "loss": 2.0559, |
| "step": 92500 |
| }, |
| { |
| "epoch": 29.928894634776988, |
| "grad_norm": 1.5134310722351074, |
| "learning_rate": 0.001, |
| "loss": 2.0598, |
| "step": 92600 |
| }, |
| { |
| "epoch": 29.961215255332903, |
| "grad_norm": 1.102142572402954, |
| "learning_rate": 0.001, |
| "loss": 2.0483, |
| "step": 92700 |
| }, |
| { |
| "epoch": 29.993535875888817, |
| "grad_norm": 1.028840184211731, |
| "learning_rate": 0.001, |
| "loss": 2.068, |
| "step": 92800 |
| }, |
| { |
| "epoch": 30.02585649644473, |
| "grad_norm": 1.2381364107131958, |
| "learning_rate": 0.001, |
| "loss": 1.9441, |
| "step": 92900 |
| }, |
| { |
| "epoch": 30.058177117000646, |
| "grad_norm": 1.2475801706314087, |
| "learning_rate": 0.001, |
| "loss": 1.9169, |
| "step": 93000 |
| }, |
| { |
| "epoch": 30.09049773755656, |
| "grad_norm": 1.440842866897583, |
| "learning_rate": 0.001, |
| "loss": 1.9157, |
| "step": 93100 |
| }, |
| { |
| "epoch": 30.122818358112475, |
| "grad_norm": 1.1453300714492798, |
| "learning_rate": 0.001, |
| "loss": 1.9223, |
| "step": 93200 |
| }, |
| { |
| "epoch": 30.15513897866839, |
| "grad_norm": 1.0690298080444336, |
| "learning_rate": 0.001, |
| "loss": 1.9323, |
| "step": 93300 |
| }, |
| { |
| "epoch": 30.187459599224304, |
| "grad_norm": 1.7190358638763428, |
| "learning_rate": 0.001, |
| "loss": 1.9623, |
| "step": 93400 |
| }, |
| { |
| "epoch": 30.21978021978022, |
| "grad_norm": 1.0190703868865967, |
| "learning_rate": 0.001, |
| "loss": 1.9552, |
| "step": 93500 |
| }, |
| { |
| "epoch": 30.252100840336134, |
| "grad_norm": 1.0576410293579102, |
| "learning_rate": 0.001, |
| "loss": 1.9724, |
| "step": 93600 |
| }, |
| { |
| "epoch": 30.284421460892048, |
| "grad_norm": 1.1603765487670898, |
| "learning_rate": 0.001, |
| "loss": 1.9563, |
| "step": 93700 |
| }, |
| { |
| "epoch": 30.316742081447963, |
| "grad_norm": 1.3184884786605835, |
| "learning_rate": 0.001, |
| "loss": 1.9375, |
| "step": 93800 |
| }, |
| { |
| "epoch": 30.349062702003877, |
| "grad_norm": 0.9913526177406311, |
| "learning_rate": 0.001, |
| "loss": 1.967, |
| "step": 93900 |
| }, |
| { |
| "epoch": 30.381383322559792, |
| "grad_norm": 1.3647373914718628, |
| "learning_rate": 0.001, |
| "loss": 1.9812, |
| "step": 94000 |
| }, |
| { |
| "epoch": 30.413703943115706, |
| "grad_norm": 1.1337854862213135, |
| "learning_rate": 0.001, |
| "loss": 1.9927, |
| "step": 94100 |
| }, |
| { |
| "epoch": 30.44602456367162, |
| "grad_norm": 1.3728899955749512, |
| "learning_rate": 0.001, |
| "loss": 1.9755, |
| "step": 94200 |
| }, |
| { |
| "epoch": 30.478345184227535, |
| "grad_norm": 1.633349061012268, |
| "learning_rate": 0.001, |
| "loss": 2.0141, |
| "step": 94300 |
| }, |
| { |
| "epoch": 30.51066580478345, |
| "grad_norm": 1.2653772830963135, |
| "learning_rate": 0.001, |
| "loss": 2.0159, |
| "step": 94400 |
| }, |
| { |
| "epoch": 30.542986425339368, |
| "grad_norm": 1.3831946849822998, |
| "learning_rate": 0.001, |
| "loss": 1.9999, |
| "step": 94500 |
| }, |
| { |
| "epoch": 30.575307045895283, |
| "grad_norm": 1.022870659828186, |
| "learning_rate": 0.001, |
| "loss": 2.0143, |
| "step": 94600 |
| }, |
| { |
| "epoch": 30.607627666451197, |
| "grad_norm": 1.227567195892334, |
| "learning_rate": 0.001, |
| "loss": 2.0142, |
| "step": 94700 |
| }, |
| { |
| "epoch": 30.639948287007112, |
| "grad_norm": 1.2944329977035522, |
| "learning_rate": 0.001, |
| "loss": 1.9835, |
| "step": 94800 |
| }, |
| { |
| "epoch": 30.672268907563026, |
| "grad_norm": 1.64191472530365, |
| "learning_rate": 0.001, |
| "loss": 1.9901, |
| "step": 94900 |
| }, |
| { |
| "epoch": 30.70458952811894, |
| "grad_norm": 1.162553071975708, |
| "learning_rate": 0.001, |
| "loss": 2.0045, |
| "step": 95000 |
| }, |
| { |
| "epoch": 30.736910148674855, |
| "grad_norm": 1.1812092065811157, |
| "learning_rate": 0.001, |
| "loss": 2.0317, |
| "step": 95100 |
| }, |
| { |
| "epoch": 30.76923076923077, |
| "grad_norm": 1.1358566284179688, |
| "learning_rate": 0.001, |
| "loss": 2.0088, |
| "step": 95200 |
| }, |
| { |
| "epoch": 30.801551389786685, |
| "grad_norm": 1.4215623140335083, |
| "learning_rate": 0.001, |
| "loss": 2.0188, |
| "step": 95300 |
| }, |
| { |
| "epoch": 30.8338720103426, |
| "grad_norm": 1.0207571983337402, |
| "learning_rate": 0.001, |
| "loss": 2.0421, |
| "step": 95400 |
| }, |
| { |
| "epoch": 30.866192630898514, |
| "grad_norm": 1.1766799688339233, |
| "learning_rate": 0.001, |
| "loss": 2.0261, |
| "step": 95500 |
| }, |
| { |
| "epoch": 30.89851325145443, |
| "grad_norm": 1.167883276939392, |
| "learning_rate": 0.001, |
| "loss": 2.0145, |
| "step": 95600 |
| }, |
| { |
| "epoch": 30.930833872010343, |
| "grad_norm": 1.4235504865646362, |
| "learning_rate": 0.001, |
| "loss": 2.0521, |
| "step": 95700 |
| }, |
| { |
| "epoch": 30.963154492566257, |
| "grad_norm": 0.9883701205253601, |
| "learning_rate": 0.001, |
| "loss": 2.028, |
| "step": 95800 |
| }, |
| { |
| "epoch": 30.995475113122172, |
| "grad_norm": 1.138373613357544, |
| "learning_rate": 0.001, |
| "loss": 2.0154, |
| "step": 95900 |
| }, |
| { |
| "epoch": 31.027795733678087, |
| "grad_norm": 1.3630508184432983, |
| "learning_rate": 0.001, |
| "loss": 1.8929, |
| "step": 96000 |
| }, |
| { |
| "epoch": 31.060116354234, |
| "grad_norm": 1.895157814025879, |
| "learning_rate": 0.001, |
| "loss": 1.9025, |
| "step": 96100 |
| }, |
| { |
| "epoch": 31.092436974789916, |
| "grad_norm": 1.3133440017700195, |
| "learning_rate": 0.001, |
| "loss": 1.9233, |
| "step": 96200 |
| }, |
| { |
| "epoch": 31.12475759534583, |
| "grad_norm": 1.2981374263763428, |
| "learning_rate": 0.001, |
| "loss": 1.9224, |
| "step": 96300 |
| }, |
| { |
| "epoch": 31.157078215901745, |
| "grad_norm": 1.547069787979126, |
| "learning_rate": 0.001, |
| "loss": 1.9235, |
| "step": 96400 |
| }, |
| { |
| "epoch": 31.18939883645766, |
| "grad_norm": 1.1872363090515137, |
| "learning_rate": 0.001, |
| "loss": 1.9043, |
| "step": 96500 |
| }, |
| { |
| "epoch": 31.221719457013574, |
| "grad_norm": 1.2956496477127075, |
| "learning_rate": 0.001, |
| "loss": 1.941, |
| "step": 96600 |
| }, |
| { |
| "epoch": 31.25404007756949, |
| "grad_norm": 1.8166218996047974, |
| "learning_rate": 0.001, |
| "loss": 1.9277, |
| "step": 96700 |
| }, |
| { |
| "epoch": 31.286360698125403, |
| "grad_norm": 1.355141520500183, |
| "learning_rate": 0.001, |
| "loss": 1.9502, |
| "step": 96800 |
| }, |
| { |
| "epoch": 31.318681318681318, |
| "grad_norm": 1.3605191707611084, |
| "learning_rate": 0.001, |
| "loss": 1.9556, |
| "step": 96900 |
| }, |
| { |
| "epoch": 31.351001939237232, |
| "grad_norm": 1.4461206197738647, |
| "learning_rate": 0.001, |
| "loss": 1.949, |
| "step": 97000 |
| }, |
| { |
| "epoch": 31.383322559793147, |
| "grad_norm": 1.3594260215759277, |
| "learning_rate": 0.001, |
| "loss": 1.9486, |
| "step": 97100 |
| }, |
| { |
| "epoch": 31.41564318034906, |
| "grad_norm": 1.4502143859863281, |
| "learning_rate": 0.001, |
| "loss": 1.969, |
| "step": 97200 |
| }, |
| { |
| "epoch": 31.447963800904976, |
| "grad_norm": 1.5335756540298462, |
| "learning_rate": 0.001, |
| "loss": 1.9664, |
| "step": 97300 |
| }, |
| { |
| "epoch": 31.48028442146089, |
| "grad_norm": 1.6835445165634155, |
| "learning_rate": 0.001, |
| "loss": 1.9612, |
| "step": 97400 |
| }, |
| { |
| "epoch": 31.51260504201681, |
| "grad_norm": 1.0857112407684326, |
| "learning_rate": 0.001, |
| "loss": 1.9835, |
| "step": 97500 |
| }, |
| { |
| "epoch": 31.544925662572723, |
| "grad_norm": 1.4577279090881348, |
| "learning_rate": 0.001, |
| "loss": 1.9784, |
| "step": 97600 |
| }, |
| { |
| "epoch": 31.577246283128638, |
| "grad_norm": 1.52741539478302, |
| "learning_rate": 0.001, |
| "loss": 1.9788, |
| "step": 97700 |
| }, |
| { |
| "epoch": 31.609566903684552, |
| "grad_norm": 1.308344841003418, |
| "learning_rate": 0.001, |
| "loss": 1.9791, |
| "step": 97800 |
| }, |
| { |
| "epoch": 31.641887524240467, |
| "grad_norm": 1.5521067380905151, |
| "learning_rate": 0.001, |
| "loss": 1.9852, |
| "step": 97900 |
| }, |
| { |
| "epoch": 31.67420814479638, |
| "grad_norm": 1.5745213031768799, |
| "learning_rate": 0.001, |
| "loss": 1.9962, |
| "step": 98000 |
| }, |
| { |
| "epoch": 31.706528765352296, |
| "grad_norm": 1.4261987209320068, |
| "learning_rate": 0.001, |
| "loss": 2.0037, |
| "step": 98100 |
| }, |
| { |
| "epoch": 31.73884938590821, |
| "grad_norm": 1.3460932970046997, |
| "learning_rate": 0.001, |
| "loss": 1.985, |
| "step": 98200 |
| }, |
| { |
| "epoch": 31.771170006464125, |
| "grad_norm": 1.37859308719635, |
| "learning_rate": 0.001, |
| "loss": 2.0048, |
| "step": 98300 |
| }, |
| { |
| "epoch": 31.80349062702004, |
| "grad_norm": 1.5813380479812622, |
| "learning_rate": 0.001, |
| "loss": 1.9964, |
| "step": 98400 |
| }, |
| { |
| "epoch": 31.835811247575954, |
| "grad_norm": 1.2057660818099976, |
| "learning_rate": 0.001, |
| "loss": 2.0085, |
| "step": 98500 |
| }, |
| { |
| "epoch": 31.86813186813187, |
| "grad_norm": 1.7127104997634888, |
| "learning_rate": 0.001, |
| "loss": 2.0064, |
| "step": 98600 |
| }, |
| { |
| "epoch": 31.900452488687783, |
| "grad_norm": 1.3543590307235718, |
| "learning_rate": 0.001, |
| "loss": 2.0216, |
| "step": 98700 |
| }, |
| { |
| "epoch": 31.932773109243698, |
| "grad_norm": 1.3376524448394775, |
| "learning_rate": 0.001, |
| "loss": 2.0191, |
| "step": 98800 |
| }, |
| { |
| "epoch": 31.965093729799612, |
| "grad_norm": 1.2518678903579712, |
| "learning_rate": 0.001, |
| "loss": 2.0098, |
| "step": 98900 |
| }, |
| { |
| "epoch": 31.997414350355527, |
| "grad_norm": 1.4121952056884766, |
| "learning_rate": 0.001, |
| "loss": 2.006, |
| "step": 99000 |
| }, |
| { |
| "epoch": 32.02973497091144, |
| "grad_norm": 1.2436646223068237, |
| "learning_rate": 0.001, |
| "loss": 1.9104, |
| "step": 99100 |
| }, |
| { |
| "epoch": 32.062055591467356, |
| "grad_norm": 1.7367206811904907, |
| "learning_rate": 0.001, |
| "loss": 1.8809, |
| "step": 99200 |
| }, |
| { |
| "epoch": 32.09437621202327, |
| "grad_norm": 1.4106762409210205, |
| "learning_rate": 0.001, |
| "loss": 1.871, |
| "step": 99300 |
| }, |
| { |
| "epoch": 32.126696832579185, |
| "grad_norm": 1.8812962770462036, |
| "learning_rate": 0.001, |
| "loss": 1.8948, |
| "step": 99400 |
| }, |
| { |
| "epoch": 32.1590174531351, |
| "grad_norm": 1.5063000917434692, |
| "learning_rate": 0.001, |
| "loss": 1.9008, |
| "step": 99500 |
| }, |
| { |
| "epoch": 32.191338073691014, |
| "grad_norm": 1.3886702060699463, |
| "learning_rate": 0.001, |
| "loss": 1.9098, |
| "step": 99600 |
| }, |
| { |
| "epoch": 32.22365869424693, |
| "grad_norm": 1.7176951169967651, |
| "learning_rate": 0.001, |
| "loss": 1.911, |
| "step": 99700 |
| }, |
| { |
| "epoch": 32.25597931480284, |
| "grad_norm": 1.4107688665390015, |
| "learning_rate": 0.001, |
| "loss": 1.9095, |
| "step": 99800 |
| }, |
| { |
| "epoch": 32.28829993535876, |
| "grad_norm": 1.4663777351379395, |
| "learning_rate": 0.001, |
| "loss": 1.9209, |
| "step": 99900 |
| }, |
| { |
| "epoch": 32.32062055591467, |
| "grad_norm": 1.4027447700500488, |
| "learning_rate": 0.001, |
| "loss": 1.9371, |
| "step": 100000 |
| }, |
| { |
| "epoch": 32.35294117647059, |
| "grad_norm": 1.5440044403076172, |
| "learning_rate": 0.001, |
| "loss": 1.9344, |
| "step": 100100 |
| }, |
| { |
| "epoch": 32.3852617970265, |
| "grad_norm": 1.1808867454528809, |
| "learning_rate": 0.001, |
| "loss": 1.9392, |
| "step": 100200 |
| }, |
| { |
| "epoch": 32.417582417582416, |
| "grad_norm": 1.8131669759750366, |
| "learning_rate": 0.001, |
| "loss": 1.9282, |
| "step": 100300 |
| }, |
| { |
| "epoch": 32.44990303813833, |
| "grad_norm": 1.7838051319122314, |
| "learning_rate": 0.001, |
| "loss": 1.939, |
| "step": 100400 |
| }, |
| { |
| "epoch": 32.482223658694245, |
| "grad_norm": 1.6978235244750977, |
| "learning_rate": 0.001, |
| "loss": 1.9372, |
| "step": 100500 |
| }, |
| { |
| "epoch": 32.51454427925016, |
| "grad_norm": 1.7364856004714966, |
| "learning_rate": 0.001, |
| "loss": 1.9371, |
| "step": 100600 |
| }, |
| { |
| "epoch": 32.546864899806074, |
| "grad_norm": 1.515915870666504, |
| "learning_rate": 0.001, |
| "loss": 1.9544, |
| "step": 100700 |
| }, |
| { |
| "epoch": 32.57918552036199, |
| "grad_norm": 1.5563019514083862, |
| "learning_rate": 0.001, |
| "loss": 1.9404, |
| "step": 100800 |
| }, |
| { |
| "epoch": 32.6115061409179, |
| "grad_norm": 1.6745736598968506, |
| "learning_rate": 0.001, |
| "loss": 1.959, |
| "step": 100900 |
| }, |
| { |
| "epoch": 32.64382676147382, |
| "grad_norm": 1.301689863204956, |
| "learning_rate": 0.001, |
| "loss": 1.9624, |
| "step": 101000 |
| }, |
| { |
| "epoch": 32.67614738202973, |
| "grad_norm": 1.9219502210617065, |
| "learning_rate": 0.001, |
| "loss": 1.9617, |
| "step": 101100 |
| }, |
| { |
| "epoch": 32.70846800258565, |
| "grad_norm": 1.2895933389663696, |
| "learning_rate": 0.001, |
| "loss": 1.9755, |
| "step": 101200 |
| }, |
| { |
| "epoch": 32.74078862314156, |
| "grad_norm": 1.6644096374511719, |
| "learning_rate": 0.001, |
| "loss": 1.9767, |
| "step": 101300 |
| }, |
| { |
| "epoch": 32.773109243697476, |
| "grad_norm": 1.2009501457214355, |
| "learning_rate": 0.001, |
| "loss": 1.9859, |
| "step": 101400 |
| }, |
| { |
| "epoch": 32.80542986425339, |
| "grad_norm": 1.6504448652267456, |
| "learning_rate": 0.001, |
| "loss": 1.9893, |
| "step": 101500 |
| }, |
| { |
| "epoch": 32.837750484809305, |
| "grad_norm": 1.294108510017395, |
| "learning_rate": 0.001, |
| "loss": 1.9821, |
| "step": 101600 |
| }, |
| { |
| "epoch": 32.87007110536522, |
| "grad_norm": 1.3442059755325317, |
| "learning_rate": 0.001, |
| "loss": 1.9768, |
| "step": 101700 |
| }, |
| { |
| "epoch": 32.902391725921134, |
| "grad_norm": 1.323499321937561, |
| "learning_rate": 0.001, |
| "loss": 1.9862, |
| "step": 101800 |
| }, |
| { |
| "epoch": 32.93471234647705, |
| "grad_norm": 1.263298749923706, |
| "learning_rate": 0.001, |
| "loss": 1.9949, |
| "step": 101900 |
| }, |
| { |
| "epoch": 32.967032967032964, |
| "grad_norm": 1.1355193853378296, |
| "learning_rate": 0.001, |
| "loss": 1.9974, |
| "step": 102000 |
| }, |
| { |
| "epoch": 32.999353587588885, |
| "grad_norm": 1.4146533012390137, |
| "learning_rate": 0.001, |
| "loss": 1.989, |
| "step": 102100 |
| }, |
| { |
| "epoch": 33.0316742081448, |
| "grad_norm": 1.960270643234253, |
| "learning_rate": 0.001, |
| "loss": 1.8564, |
| "step": 102200 |
| }, |
| { |
| "epoch": 33.063994828700714, |
| "grad_norm": 1.4034452438354492, |
| "learning_rate": 0.001, |
| "loss": 1.8468, |
| "step": 102300 |
| }, |
| { |
| "epoch": 33.09631544925663, |
| "grad_norm": 1.3908497095108032, |
| "learning_rate": 0.001, |
| "loss": 1.8627, |
| "step": 102400 |
| }, |
| { |
| "epoch": 33.12863606981254, |
| "grad_norm": 1.7255970239639282, |
| "learning_rate": 0.001, |
| "loss": 1.8636, |
| "step": 102500 |
| }, |
| { |
| "epoch": 33.16095669036846, |
| "grad_norm": 1.5471359491348267, |
| "learning_rate": 0.001, |
| "loss": 1.8713, |
| "step": 102600 |
| }, |
| { |
| "epoch": 33.19327731092437, |
| "grad_norm": 1.8626443147659302, |
| "learning_rate": 0.001, |
| "loss": 1.8739, |
| "step": 102700 |
| }, |
| { |
| "epoch": 33.22559793148029, |
| "grad_norm": 2.0910379886627197, |
| "learning_rate": 0.001, |
| "loss": 1.8891, |
| "step": 102800 |
| }, |
| { |
| "epoch": 33.2579185520362, |
| "grad_norm": 1.719954013824463, |
| "learning_rate": 0.001, |
| "loss": 1.8907, |
| "step": 102900 |
| }, |
| { |
| "epoch": 33.290239172592116, |
| "grad_norm": 1.8111015558242798, |
| "learning_rate": 0.001, |
| "loss": 1.8979, |
| "step": 103000 |
| }, |
| { |
| "epoch": 33.32255979314803, |
| "grad_norm": 1.9263817071914673, |
| "learning_rate": 0.001, |
| "loss": 1.8959, |
| "step": 103100 |
| }, |
| { |
| "epoch": 33.354880413703945, |
| "grad_norm": 1.8665021657943726, |
| "learning_rate": 0.001, |
| "loss": 1.9126, |
| "step": 103200 |
| }, |
| { |
| "epoch": 33.38720103425986, |
| "grad_norm": 1.5710049867630005, |
| "learning_rate": 0.001, |
| "loss": 1.9129, |
| "step": 103300 |
| }, |
| { |
| "epoch": 33.419521654815775, |
| "grad_norm": 1.529770016670227, |
| "learning_rate": 0.001, |
| "loss": 1.9401, |
| "step": 103400 |
| }, |
| { |
| "epoch": 33.45184227537169, |
| "grad_norm": 1.5240123271942139, |
| "learning_rate": 0.001, |
| "loss": 1.9269, |
| "step": 103500 |
| }, |
| { |
| "epoch": 33.484162895927604, |
| "grad_norm": 1.6057839393615723, |
| "learning_rate": 0.001, |
| "loss": 1.9255, |
| "step": 103600 |
| }, |
| { |
| "epoch": 33.51648351648352, |
| "grad_norm": 1.86514151096344, |
| "learning_rate": 0.001, |
| "loss": 1.9281, |
| "step": 103700 |
| }, |
| { |
| "epoch": 33.54880413703943, |
| "grad_norm": 1.2546050548553467, |
| "learning_rate": 0.001, |
| "loss": 1.9388, |
| "step": 103800 |
| }, |
| { |
| "epoch": 33.58112475759535, |
| "grad_norm": 1.3954681158065796, |
| "learning_rate": 0.001, |
| "loss": 1.9281, |
| "step": 103900 |
| }, |
| { |
| "epoch": 33.61344537815126, |
| "grad_norm": 1.4027971029281616, |
| "learning_rate": 0.001, |
| "loss": 1.9497, |
| "step": 104000 |
| }, |
| { |
| "epoch": 33.645765998707176, |
| "grad_norm": 1.707762598991394, |
| "learning_rate": 0.001, |
| "loss": 1.9408, |
| "step": 104100 |
| }, |
| { |
| "epoch": 33.67808661926309, |
| "grad_norm": 1.932440161705017, |
| "learning_rate": 0.001, |
| "loss": 1.952, |
| "step": 104200 |
| }, |
| { |
| "epoch": 33.710407239819006, |
| "grad_norm": 1.7187687158584595, |
| "learning_rate": 0.001, |
| "loss": 1.9449, |
| "step": 104300 |
| }, |
| { |
| "epoch": 33.74272786037492, |
| "grad_norm": 1.6193797588348389, |
| "learning_rate": 0.001, |
| "loss": 1.9647, |
| "step": 104400 |
| }, |
| { |
| "epoch": 33.775048480930835, |
| "grad_norm": 1.7175681591033936, |
| "learning_rate": 0.001, |
| "loss": 1.9666, |
| "step": 104500 |
| }, |
| { |
| "epoch": 33.80736910148675, |
| "grad_norm": 1.404217004776001, |
| "learning_rate": 0.001, |
| "loss": 1.9607, |
| "step": 104600 |
| }, |
| { |
| "epoch": 33.839689722042664, |
| "grad_norm": 1.708268165588379, |
| "learning_rate": 0.001, |
| "loss": 1.9749, |
| "step": 104700 |
| }, |
| { |
| "epoch": 33.87201034259858, |
| "grad_norm": 1.5690302848815918, |
| "learning_rate": 0.001, |
| "loss": 1.9723, |
| "step": 104800 |
| }, |
| { |
| "epoch": 33.90433096315449, |
| "grad_norm": 1.7933155298233032, |
| "learning_rate": 0.001, |
| "loss": 1.9668, |
| "step": 104900 |
| }, |
| { |
| "epoch": 33.93665158371041, |
| "grad_norm": 1.6681078672409058, |
| "learning_rate": 0.001, |
| "loss": 1.9825, |
| "step": 105000 |
| }, |
| { |
| "epoch": 33.96897220426632, |
| "grad_norm": 1.6313607692718506, |
| "learning_rate": 0.001, |
| "loss": 2.0023, |
| "step": 105100 |
| }, |
| { |
| "epoch": 34.00129282482224, |
| "grad_norm": 1.3496909141540527, |
| "learning_rate": 0.001, |
| "loss": 1.97, |
| "step": 105200 |
| }, |
| { |
| "epoch": 34.03361344537815, |
| "grad_norm": 1.725895881652832, |
| "learning_rate": 0.001, |
| "loss": 1.8485, |
| "step": 105300 |
| }, |
| { |
| "epoch": 34.065934065934066, |
| "grad_norm": 1.2111670970916748, |
| "learning_rate": 0.001, |
| "loss": 1.8429, |
| "step": 105400 |
| }, |
| { |
| "epoch": 34.09825468648998, |
| "grad_norm": 1.6606335639953613, |
| "learning_rate": 0.001, |
| "loss": 1.8409, |
| "step": 105500 |
| }, |
| { |
| "epoch": 34.130575307045895, |
| "grad_norm": 1.2555725574493408, |
| "learning_rate": 0.001, |
| "loss": 1.8649, |
| "step": 105600 |
| }, |
| { |
| "epoch": 34.16289592760181, |
| "grad_norm": 1.5818908214569092, |
| "learning_rate": 0.001, |
| "loss": 1.8675, |
| "step": 105700 |
| }, |
| { |
| "epoch": 34.195216548157724, |
| "grad_norm": 1.6778271198272705, |
| "learning_rate": 0.001, |
| "loss": 1.8732, |
| "step": 105800 |
| }, |
| { |
| "epoch": 34.22753716871364, |
| "grad_norm": 1.4768215417861938, |
| "learning_rate": 0.001, |
| "loss": 1.868, |
| "step": 105900 |
| }, |
| { |
| "epoch": 34.25985778926955, |
| "grad_norm": 1.2955535650253296, |
| "learning_rate": 0.001, |
| "loss": 1.8781, |
| "step": 106000 |
| }, |
| { |
| "epoch": 34.29217840982547, |
| "grad_norm": 1.585286259651184, |
| "learning_rate": 0.001, |
| "loss": 1.8739, |
| "step": 106100 |
| }, |
| { |
| "epoch": 34.32449903038138, |
| "grad_norm": 1.5454331636428833, |
| "learning_rate": 0.001, |
| "loss": 1.8881, |
| "step": 106200 |
| }, |
| { |
| "epoch": 34.3568196509373, |
| "grad_norm": 1.4111459255218506, |
| "learning_rate": 0.001, |
| "loss": 1.9013, |
| "step": 106300 |
| }, |
| { |
| "epoch": 34.38914027149321, |
| "grad_norm": 1.7052496671676636, |
| "learning_rate": 0.001, |
| "loss": 1.883, |
| "step": 106400 |
| }, |
| { |
| "epoch": 34.421460892049126, |
| "grad_norm": 1.6497286558151245, |
| "learning_rate": 0.001, |
| "loss": 1.9053, |
| "step": 106500 |
| }, |
| { |
| "epoch": 34.45378151260504, |
| "grad_norm": 1.4096091985702515, |
| "learning_rate": 0.001, |
| "loss": 1.9002, |
| "step": 106600 |
| }, |
| { |
| "epoch": 34.486102133160955, |
| "grad_norm": 1.349039077758789, |
| "learning_rate": 0.001, |
| "loss": 1.8935, |
| "step": 106700 |
| }, |
| { |
| "epoch": 34.51842275371687, |
| "grad_norm": 1.6230989694595337, |
| "learning_rate": 0.001, |
| "loss": 1.9151, |
| "step": 106800 |
| }, |
| { |
| "epoch": 34.550743374272784, |
| "grad_norm": 1.2353782653808594, |
| "learning_rate": 0.001, |
| "loss": 1.904, |
| "step": 106900 |
| }, |
| { |
| "epoch": 34.5830639948287, |
| "grad_norm": 1.5027496814727783, |
| "learning_rate": 0.001, |
| "loss": 1.9106, |
| "step": 107000 |
| }, |
| { |
| "epoch": 34.61538461538461, |
| "grad_norm": 1.5522241592407227, |
| "learning_rate": 0.001, |
| "loss": 1.9283, |
| "step": 107100 |
| }, |
| { |
| "epoch": 34.64770523594053, |
| "grad_norm": 1.550193428993225, |
| "learning_rate": 0.001, |
| "loss": 1.9336, |
| "step": 107200 |
| }, |
| { |
| "epoch": 34.68002585649644, |
| "grad_norm": 1.786367654800415, |
| "learning_rate": 0.001, |
| "loss": 1.9277, |
| "step": 107300 |
| }, |
| { |
| "epoch": 34.71234647705236, |
| "grad_norm": 1.2258667945861816, |
| "learning_rate": 0.001, |
| "loss": 1.9287, |
| "step": 107400 |
| }, |
| { |
| "epoch": 34.74466709760827, |
| "grad_norm": 1.552863597869873, |
| "learning_rate": 0.001, |
| "loss": 1.9324, |
| "step": 107500 |
| }, |
| { |
| "epoch": 34.776987718164186, |
| "grad_norm": 1.5470998287200928, |
| "learning_rate": 0.001, |
| "loss": 1.9377, |
| "step": 107600 |
| }, |
| { |
| "epoch": 34.8093083387201, |
| "grad_norm": 1.2013700008392334, |
| "learning_rate": 0.001, |
| "loss": 1.9421, |
| "step": 107700 |
| }, |
| { |
| "epoch": 34.841628959276015, |
| "grad_norm": 1.5143885612487793, |
| "learning_rate": 0.001, |
| "loss": 1.9404, |
| "step": 107800 |
| }, |
| { |
| "epoch": 34.87394957983193, |
| "grad_norm": 1.606493592262268, |
| "learning_rate": 0.001, |
| "loss": 1.9494, |
| "step": 107900 |
| }, |
| { |
| "epoch": 34.906270200387844, |
| "grad_norm": 1.654229998588562, |
| "learning_rate": 0.001, |
| "loss": 1.946, |
| "step": 108000 |
| }, |
| { |
| "epoch": 34.93859082094376, |
| "grad_norm": 1.496848225593567, |
| "learning_rate": 0.001, |
| "loss": 1.965, |
| "step": 108100 |
| }, |
| { |
| "epoch": 34.97091144149967, |
| "grad_norm": 1.171495795249939, |
| "learning_rate": 0.001, |
| "loss": 1.9583, |
| "step": 108200 |
| }, |
| { |
| "epoch": 35.003232062055595, |
| "grad_norm": 1.484670877456665, |
| "learning_rate": 0.001, |
| "loss": 1.9489, |
| "step": 108300 |
| }, |
| { |
| "epoch": 35.03555268261151, |
| "grad_norm": 1.6827988624572754, |
| "learning_rate": 0.001, |
| "loss": 1.8172, |
| "step": 108400 |
| }, |
| { |
| "epoch": 35.067873303167424, |
| "grad_norm": 1.5099338293075562, |
| "learning_rate": 0.001, |
| "loss": 1.8169, |
| "step": 108500 |
| }, |
| { |
| "epoch": 35.10019392372334, |
| "grad_norm": 1.0843244791030884, |
| "learning_rate": 0.001, |
| "loss": 1.8134, |
| "step": 108600 |
| }, |
| { |
| "epoch": 35.13251454427925, |
| "grad_norm": 1.0883989334106445, |
| "learning_rate": 0.001, |
| "loss": 1.8469, |
| "step": 108700 |
| }, |
| { |
| "epoch": 35.16483516483517, |
| "grad_norm": 1.2726517915725708, |
| "learning_rate": 0.001, |
| "loss": 1.8401, |
| "step": 108800 |
| }, |
| { |
| "epoch": 35.19715578539108, |
| "grad_norm": 1.3099430799484253, |
| "learning_rate": 0.001, |
| "loss": 1.8624, |
| "step": 108900 |
| }, |
| { |
| "epoch": 35.229476405947, |
| "grad_norm": 1.0547666549682617, |
| "learning_rate": 0.001, |
| "loss": 1.8532, |
| "step": 109000 |
| }, |
| { |
| "epoch": 35.26179702650291, |
| "grad_norm": 1.3312631845474243, |
| "learning_rate": 0.001, |
| "loss": 1.8733, |
| "step": 109100 |
| }, |
| { |
| "epoch": 35.294117647058826, |
| "grad_norm": 1.3960740566253662, |
| "learning_rate": 0.001, |
| "loss": 1.8592, |
| "step": 109200 |
| }, |
| { |
| "epoch": 35.32643826761474, |
| "grad_norm": 0.9560843110084534, |
| "learning_rate": 0.001, |
| "loss": 1.8542, |
| "step": 109300 |
| }, |
| { |
| "epoch": 35.358758888170655, |
| "grad_norm": 1.2486746311187744, |
| "learning_rate": 0.001, |
| "loss": 1.8638, |
| "step": 109400 |
| }, |
| { |
| "epoch": 35.39107950872657, |
| "grad_norm": 1.9921132326126099, |
| "learning_rate": 0.001, |
| "loss": 1.8619, |
| "step": 109500 |
| }, |
| { |
| "epoch": 35.423400129282484, |
| "grad_norm": 1.6659047603607178, |
| "learning_rate": 0.001, |
| "loss": 1.861, |
| "step": 109600 |
| }, |
| { |
| "epoch": 35.4557207498384, |
| "grad_norm": 1.2826552391052246, |
| "learning_rate": 0.001, |
| "loss": 1.8915, |
| "step": 109700 |
| }, |
| { |
| "epoch": 35.48804137039431, |
| "grad_norm": 1.251529335975647, |
| "learning_rate": 0.001, |
| "loss": 1.8829, |
| "step": 109800 |
| }, |
| { |
| "epoch": 35.52036199095023, |
| "grad_norm": 1.6887863874435425, |
| "learning_rate": 0.001, |
| "loss": 1.9092, |
| "step": 109900 |
| }, |
| { |
| "epoch": 35.55268261150614, |
| "grad_norm": 1.3254314661026, |
| "learning_rate": 0.001, |
| "loss": 1.9104, |
| "step": 110000 |
| }, |
| { |
| "epoch": 35.58500323206206, |
| "grad_norm": 1.2733975648880005, |
| "learning_rate": 0.001, |
| "loss": 1.8877, |
| "step": 110100 |
| }, |
| { |
| "epoch": 35.61732385261797, |
| "grad_norm": 1.3033257722854614, |
| "learning_rate": 0.001, |
| "loss": 1.8943, |
| "step": 110200 |
| }, |
| { |
| "epoch": 35.649644473173886, |
| "grad_norm": 1.5639281272888184, |
| "learning_rate": 0.001, |
| "loss": 1.9241, |
| "step": 110300 |
| }, |
| { |
| "epoch": 35.6819650937298, |
| "grad_norm": 1.1277391910552979, |
| "learning_rate": 0.001, |
| "loss": 1.9131, |
| "step": 110400 |
| }, |
| { |
| "epoch": 35.714285714285715, |
| "grad_norm": 1.113537311553955, |
| "learning_rate": 0.001, |
| "loss": 1.9023, |
| "step": 110500 |
| }, |
| { |
| "epoch": 35.74660633484163, |
| "grad_norm": 1.3119865655899048, |
| "learning_rate": 0.001, |
| "loss": 1.9088, |
| "step": 110600 |
| }, |
| { |
| "epoch": 35.778926955397544, |
| "grad_norm": 1.0738476514816284, |
| "learning_rate": 0.001, |
| "loss": 1.9163, |
| "step": 110700 |
| }, |
| { |
| "epoch": 35.81124757595346, |
| "grad_norm": 1.345321774482727, |
| "learning_rate": 0.001, |
| "loss": 1.897, |
| "step": 110800 |
| }, |
| { |
| "epoch": 35.84356819650937, |
| "grad_norm": 0.9614481329917908, |
| "learning_rate": 0.001, |
| "loss": 1.951, |
| "step": 110900 |
| }, |
| { |
| "epoch": 35.87588881706529, |
| "grad_norm": 1.6291916370391846, |
| "learning_rate": 0.001, |
| "loss": 1.9186, |
| "step": 111000 |
| }, |
| { |
| "epoch": 35.9082094376212, |
| "grad_norm": 1.0890250205993652, |
| "learning_rate": 0.001, |
| "loss": 1.9356, |
| "step": 111100 |
| }, |
| { |
| "epoch": 35.94053005817712, |
| "grad_norm": 1.4325668811798096, |
| "learning_rate": 0.001, |
| "loss": 1.9342, |
| "step": 111200 |
| }, |
| { |
| "epoch": 35.97285067873303, |
| "grad_norm": 1.6891958713531494, |
| "learning_rate": 0.001, |
| "loss": 1.9331, |
| "step": 111300 |
| }, |
| { |
| "epoch": 36.005171299288946, |
| "grad_norm": 1.3368526697158813, |
| "learning_rate": 0.001, |
| "loss": 1.9427, |
| "step": 111400 |
| }, |
| { |
| "epoch": 36.03749191984486, |
| "grad_norm": 1.5092604160308838, |
| "learning_rate": 0.001, |
| "loss": 1.8021, |
| "step": 111500 |
| }, |
| { |
| "epoch": 36.069812540400775, |
| "grad_norm": 1.243878722190857, |
| "learning_rate": 0.001, |
| "loss": 1.8109, |
| "step": 111600 |
| }, |
| { |
| "epoch": 36.10213316095669, |
| "grad_norm": 0.9875638484954834, |
| "learning_rate": 0.001, |
| "loss": 1.8099, |
| "step": 111700 |
| }, |
| { |
| "epoch": 36.134453781512605, |
| "grad_norm": 1.0712621212005615, |
| "learning_rate": 0.001, |
| "loss": 1.8081, |
| "step": 111800 |
| }, |
| { |
| "epoch": 36.16677440206852, |
| "grad_norm": 1.3209693431854248, |
| "learning_rate": 0.001, |
| "loss": 1.828, |
| "step": 111900 |
| }, |
| { |
| "epoch": 36.199095022624434, |
| "grad_norm": 1.3973124027252197, |
| "learning_rate": 0.001, |
| "loss": 1.8319, |
| "step": 112000 |
| }, |
| { |
| "epoch": 36.23141564318035, |
| "grad_norm": 1.2643061876296997, |
| "learning_rate": 0.001, |
| "loss": 1.8468, |
| "step": 112100 |
| }, |
| { |
| "epoch": 36.26373626373626, |
| "grad_norm": 1.2189161777496338, |
| "learning_rate": 0.001, |
| "loss": 1.8274, |
| "step": 112200 |
| }, |
| { |
| "epoch": 36.29605688429218, |
| "grad_norm": 1.1872079372406006, |
| "learning_rate": 0.001, |
| "loss": 1.8441, |
| "step": 112300 |
| }, |
| { |
| "epoch": 36.32837750484809, |
| "grad_norm": 1.401799201965332, |
| "learning_rate": 0.001, |
| "loss": 1.86, |
| "step": 112400 |
| }, |
| { |
| "epoch": 36.36069812540401, |
| "grad_norm": 1.4873459339141846, |
| "learning_rate": 0.001, |
| "loss": 1.8611, |
| "step": 112500 |
| }, |
| { |
| "epoch": 36.39301874595992, |
| "grad_norm": 1.5051405429840088, |
| "learning_rate": 0.001, |
| "loss": 1.8512, |
| "step": 112600 |
| }, |
| { |
| "epoch": 36.425339366515836, |
| "grad_norm": 1.6135274171829224, |
| "learning_rate": 0.001, |
| "loss": 1.8497, |
| "step": 112700 |
| }, |
| { |
| "epoch": 36.45765998707175, |
| "grad_norm": 1.05423104763031, |
| "learning_rate": 0.001, |
| "loss": 1.8806, |
| "step": 112800 |
| }, |
| { |
| "epoch": 36.489980607627665, |
| "grad_norm": 1.1396740674972534, |
| "learning_rate": 0.001, |
| "loss": 1.8848, |
| "step": 112900 |
| }, |
| { |
| "epoch": 36.52230122818358, |
| "grad_norm": 1.095083236694336, |
| "learning_rate": 0.001, |
| "loss": 1.8578, |
| "step": 113000 |
| }, |
| { |
| "epoch": 36.554621848739494, |
| "grad_norm": 1.0797603130340576, |
| "learning_rate": 0.001, |
| "loss": 1.8792, |
| "step": 113100 |
| }, |
| { |
| "epoch": 36.58694246929541, |
| "grad_norm": 1.4871982336044312, |
| "learning_rate": 0.001, |
| "loss": 1.8817, |
| "step": 113200 |
| }, |
| { |
| "epoch": 36.61926308985132, |
| "grad_norm": 1.134158730506897, |
| "learning_rate": 0.001, |
| "loss": 1.9027, |
| "step": 113300 |
| }, |
| { |
| "epoch": 36.65158371040724, |
| "grad_norm": 1.3541128635406494, |
| "learning_rate": 0.001, |
| "loss": 1.8867, |
| "step": 113400 |
| }, |
| { |
| "epoch": 36.68390433096315, |
| "grad_norm": 0.9839674234390259, |
| "learning_rate": 0.001, |
| "loss": 1.8863, |
| "step": 113500 |
| }, |
| { |
| "epoch": 36.71622495151907, |
| "grad_norm": 1.0756222009658813, |
| "learning_rate": 0.001, |
| "loss": 1.8909, |
| "step": 113600 |
| }, |
| { |
| "epoch": 36.74854557207498, |
| "grad_norm": 1.3785662651062012, |
| "learning_rate": 0.001, |
| "loss": 1.9015, |
| "step": 113700 |
| }, |
| { |
| "epoch": 36.780866192630896, |
| "grad_norm": 1.2404823303222656, |
| "learning_rate": 0.001, |
| "loss": 1.8972, |
| "step": 113800 |
| }, |
| { |
| "epoch": 36.81318681318681, |
| "grad_norm": 1.2794967889785767, |
| "learning_rate": 0.001, |
| "loss": 1.9081, |
| "step": 113900 |
| }, |
| { |
| "epoch": 36.845507433742725, |
| "grad_norm": 1.1256643533706665, |
| "learning_rate": 0.001, |
| "loss": 1.904, |
| "step": 114000 |
| }, |
| { |
| "epoch": 36.87782805429864, |
| "grad_norm": 1.3804694414138794, |
| "learning_rate": 0.001, |
| "loss": 1.9057, |
| "step": 114100 |
| }, |
| { |
| "epoch": 36.910148674854554, |
| "grad_norm": 1.4928176403045654, |
| "learning_rate": 0.001, |
| "loss": 1.9147, |
| "step": 114200 |
| }, |
| { |
| "epoch": 36.94246929541047, |
| "grad_norm": 1.1267145872116089, |
| "learning_rate": 0.001, |
| "loss": 1.9063, |
| "step": 114300 |
| }, |
| { |
| "epoch": 36.97478991596638, |
| "grad_norm": 1.179831862449646, |
| "learning_rate": 0.001, |
| "loss": 1.9184, |
| "step": 114400 |
| }, |
| { |
| "epoch": 37.007110536522305, |
| "grad_norm": 1.2279536724090576, |
| "learning_rate": 0.001, |
| "loss": 1.8946, |
| "step": 114500 |
| }, |
| { |
| "epoch": 37.03943115707822, |
| "grad_norm": 1.1896717548370361, |
| "learning_rate": 0.001, |
| "loss": 1.7819, |
| "step": 114600 |
| }, |
| { |
| "epoch": 37.071751777634134, |
| "grad_norm": 1.3132928609848022, |
| "learning_rate": 0.001, |
| "loss": 1.7896, |
| "step": 114700 |
| }, |
| { |
| "epoch": 37.10407239819005, |
| "grad_norm": 1.1400185823440552, |
| "learning_rate": 0.001, |
| "loss": 1.7985, |
| "step": 114800 |
| }, |
| { |
| "epoch": 37.13639301874596, |
| "grad_norm": 1.4554778337478638, |
| "learning_rate": 0.001, |
| "loss": 1.7952, |
| "step": 114900 |
| }, |
| { |
| "epoch": 37.16871363930188, |
| "grad_norm": 1.4060354232788086, |
| "learning_rate": 0.001, |
| "loss": 1.7948, |
| "step": 115000 |
| }, |
| { |
| "epoch": 37.20103425985779, |
| "grad_norm": 0.9965196847915649, |
| "learning_rate": 0.001, |
| "loss": 1.8039, |
| "step": 115100 |
| }, |
| { |
| "epoch": 37.23335488041371, |
| "grad_norm": 1.0781031847000122, |
| "learning_rate": 0.001, |
| "loss": 1.8393, |
| "step": 115200 |
| }, |
| { |
| "epoch": 37.26567550096962, |
| "grad_norm": 1.2023656368255615, |
| "learning_rate": 0.001, |
| "loss": 1.818, |
| "step": 115300 |
| }, |
| { |
| "epoch": 37.297996121525536, |
| "grad_norm": 1.4046202898025513, |
| "learning_rate": 0.001, |
| "loss": 1.8312, |
| "step": 115400 |
| }, |
| { |
| "epoch": 37.33031674208145, |
| "grad_norm": 1.2398176193237305, |
| "learning_rate": 0.001, |
| "loss": 1.8338, |
| "step": 115500 |
| }, |
| { |
| "epoch": 37.362637362637365, |
| "grad_norm": 1.5853986740112305, |
| "learning_rate": 0.001, |
| "loss": 1.8268, |
| "step": 115600 |
| }, |
| { |
| "epoch": 37.39495798319328, |
| "grad_norm": 1.4195340871810913, |
| "learning_rate": 0.001, |
| "loss": 1.8459, |
| "step": 115700 |
| }, |
| { |
| "epoch": 37.427278603749194, |
| "grad_norm": 1.4397501945495605, |
| "learning_rate": 0.001, |
| "loss": 1.851, |
| "step": 115800 |
| }, |
| { |
| "epoch": 37.45959922430511, |
| "grad_norm": 1.0061242580413818, |
| "learning_rate": 0.001, |
| "loss": 1.8391, |
| "step": 115900 |
| }, |
| { |
| "epoch": 37.49191984486102, |
| "grad_norm": 1.672114610671997, |
| "learning_rate": 0.001, |
| "loss": 1.8614, |
| "step": 116000 |
| }, |
| { |
| "epoch": 37.52424046541694, |
| "grad_norm": 1.0445514917373657, |
| "learning_rate": 0.001, |
| "loss": 1.8528, |
| "step": 116100 |
| }, |
| { |
| "epoch": 37.55656108597285, |
| "grad_norm": 1.3120521306991577, |
| "learning_rate": 0.001, |
| "loss": 1.8488, |
| "step": 116200 |
| }, |
| { |
| "epoch": 37.58888170652877, |
| "grad_norm": 1.2624495029449463, |
| "learning_rate": 0.001, |
| "loss": 1.8653, |
| "step": 116300 |
| }, |
| { |
| "epoch": 37.62120232708468, |
| "grad_norm": 0.9890035390853882, |
| "learning_rate": 0.001, |
| "loss": 1.8696, |
| "step": 116400 |
| }, |
| { |
| "epoch": 37.653522947640596, |
| "grad_norm": 1.3105789422988892, |
| "learning_rate": 0.001, |
| "loss": 1.8667, |
| "step": 116500 |
| }, |
| { |
| "epoch": 37.68584356819651, |
| "grad_norm": 1.5599737167358398, |
| "learning_rate": 0.001, |
| "loss": 1.867, |
| "step": 116600 |
| }, |
| { |
| "epoch": 37.718164188752425, |
| "grad_norm": 1.437135934829712, |
| "learning_rate": 0.001, |
| "loss": 1.8869, |
| "step": 116700 |
| }, |
| { |
| "epoch": 37.75048480930834, |
| "grad_norm": 1.0562933683395386, |
| "learning_rate": 0.001, |
| "loss": 1.8696, |
| "step": 116800 |
| }, |
| { |
| "epoch": 37.782805429864254, |
| "grad_norm": 1.3576165437698364, |
| "learning_rate": 0.001, |
| "loss": 1.9007, |
| "step": 116900 |
| }, |
| { |
| "epoch": 37.81512605042017, |
| "grad_norm": 1.1719582080841064, |
| "learning_rate": 0.001, |
| "loss": 1.8788, |
| "step": 117000 |
| }, |
| { |
| "epoch": 37.84744667097608, |
| "grad_norm": 1.3145471811294556, |
| "learning_rate": 0.001, |
| "loss": 1.8952, |
| "step": 117100 |
| }, |
| { |
| "epoch": 37.879767291532, |
| "grad_norm": 1.1600955724716187, |
| "learning_rate": 0.001, |
| "loss": 1.8729, |
| "step": 117200 |
| }, |
| { |
| "epoch": 37.91208791208791, |
| "grad_norm": 1.1656097173690796, |
| "learning_rate": 0.001, |
| "loss": 1.9102, |
| "step": 117300 |
| }, |
| { |
| "epoch": 37.94440853264383, |
| "grad_norm": 1.3269482851028442, |
| "learning_rate": 0.001, |
| "loss": 1.9104, |
| "step": 117400 |
| }, |
| { |
| "epoch": 37.97672915319974, |
| "grad_norm": 1.4651037454605103, |
| "learning_rate": 0.001, |
| "loss": 1.9153, |
| "step": 117500 |
| }, |
| { |
| "epoch": 38.009049773755656, |
| "grad_norm": 1.4630742073059082, |
| "learning_rate": 0.001, |
| "loss": 1.8721, |
| "step": 117600 |
| }, |
| { |
| "epoch": 38.04137039431157, |
| "grad_norm": 1.5355961322784424, |
| "learning_rate": 0.001, |
| "loss": 1.779, |
| "step": 117700 |
| }, |
| { |
| "epoch": 38.073691014867485, |
| "grad_norm": 1.068429708480835, |
| "learning_rate": 0.001, |
| "loss": 1.7664, |
| "step": 117800 |
| }, |
| { |
| "epoch": 38.1060116354234, |
| "grad_norm": 1.7566649913787842, |
| "learning_rate": 0.001, |
| "loss": 1.786, |
| "step": 117900 |
| }, |
| { |
| "epoch": 38.138332255979314, |
| "grad_norm": 1.3657581806182861, |
| "learning_rate": 0.001, |
| "loss": 1.7981, |
| "step": 118000 |
| }, |
| { |
| "epoch": 38.17065287653523, |
| "grad_norm": 1.031551480293274, |
| "learning_rate": 0.001, |
| "loss": 1.7941, |
| "step": 118100 |
| }, |
| { |
| "epoch": 38.20297349709114, |
| "grad_norm": 1.4861186742782593, |
| "learning_rate": 0.001, |
| "loss": 1.8068, |
| "step": 118200 |
| }, |
| { |
| "epoch": 38.23529411764706, |
| "grad_norm": 1.6767009496688843, |
| "learning_rate": 0.001, |
| "loss": 1.7795, |
| "step": 118300 |
| }, |
| { |
| "epoch": 38.26761473820297, |
| "grad_norm": 1.299914836883545, |
| "learning_rate": 0.001, |
| "loss": 1.8062, |
| "step": 118400 |
| }, |
| { |
| "epoch": 38.29993535875889, |
| "grad_norm": 1.477315902709961, |
| "learning_rate": 0.001, |
| "loss": 1.8114, |
| "step": 118500 |
| }, |
| { |
| "epoch": 38.3322559793148, |
| "grad_norm": 1.3180029392242432, |
| "learning_rate": 0.001, |
| "loss": 1.8187, |
| "step": 118600 |
| }, |
| { |
| "epoch": 38.364576599870716, |
| "grad_norm": 1.2591907978057861, |
| "learning_rate": 0.001, |
| "loss": 1.8163, |
| "step": 118700 |
| }, |
| { |
| "epoch": 38.39689722042663, |
| "grad_norm": 0.9934723377227783, |
| "learning_rate": 0.001, |
| "loss": 1.8256, |
| "step": 118800 |
| }, |
| { |
| "epoch": 38.429217840982545, |
| "grad_norm": 1.316521406173706, |
| "learning_rate": 0.001, |
| "loss": 1.8199, |
| "step": 118900 |
| }, |
| { |
| "epoch": 38.46153846153846, |
| "grad_norm": 1.5410053730010986, |
| "learning_rate": 0.001, |
| "loss": 1.8195, |
| "step": 119000 |
| }, |
| { |
| "epoch": 38.493859082094374, |
| "grad_norm": 1.301754355430603, |
| "learning_rate": 0.001, |
| "loss": 1.8209, |
| "step": 119100 |
| }, |
| { |
| "epoch": 38.52617970265029, |
| "grad_norm": 1.0874885320663452, |
| "learning_rate": 0.001, |
| "loss": 1.8474, |
| "step": 119200 |
| }, |
| { |
| "epoch": 38.558500323206204, |
| "grad_norm": 1.8527356386184692, |
| "learning_rate": 0.001, |
| "loss": 1.8503, |
| "step": 119300 |
| }, |
| { |
| "epoch": 38.59082094376212, |
| "grad_norm": 1.24700927734375, |
| "learning_rate": 0.001, |
| "loss": 1.8372, |
| "step": 119400 |
| }, |
| { |
| "epoch": 38.62314156431803, |
| "grad_norm": 1.1928914785385132, |
| "learning_rate": 0.001, |
| "loss": 1.8499, |
| "step": 119500 |
| }, |
| { |
| "epoch": 38.65546218487395, |
| "grad_norm": 1.2841598987579346, |
| "learning_rate": 0.001, |
| "loss": 1.8461, |
| "step": 119600 |
| }, |
| { |
| "epoch": 38.68778280542986, |
| "grad_norm": 1.2930001020431519, |
| "learning_rate": 0.001, |
| "loss": 1.8546, |
| "step": 119700 |
| }, |
| { |
| "epoch": 38.720103425985776, |
| "grad_norm": 1.3524971008300781, |
| "learning_rate": 0.001, |
| "loss": 1.8654, |
| "step": 119800 |
| }, |
| { |
| "epoch": 38.75242404654169, |
| "grad_norm": 1.240613341331482, |
| "learning_rate": 0.001, |
| "loss": 1.8599, |
| "step": 119900 |
| }, |
| { |
| "epoch": 38.784744667097605, |
| "grad_norm": 1.337829351425171, |
| "learning_rate": 0.001, |
| "loss": 1.8665, |
| "step": 120000 |
| }, |
| { |
| "epoch": 38.81706528765352, |
| "grad_norm": 1.1188397407531738, |
| "learning_rate": 0.001, |
| "loss": 1.8555, |
| "step": 120100 |
| }, |
| { |
| "epoch": 38.849385908209435, |
| "grad_norm": 1.049929141998291, |
| "learning_rate": 0.001, |
| "loss": 1.8901, |
| "step": 120200 |
| }, |
| { |
| "epoch": 38.88170652876535, |
| "grad_norm": 1.0788209438323975, |
| "learning_rate": 0.001, |
| "loss": 1.8839, |
| "step": 120300 |
| }, |
| { |
| "epoch": 38.914027149321264, |
| "grad_norm": 1.2696701288223267, |
| "learning_rate": 0.001, |
| "loss": 1.8861, |
| "step": 120400 |
| }, |
| { |
| "epoch": 38.94634776987718, |
| "grad_norm": 1.7940024137496948, |
| "learning_rate": 0.001, |
| "loss": 1.8673, |
| "step": 120500 |
| }, |
| { |
| "epoch": 38.97866839043309, |
| "grad_norm": 1.25827956199646, |
| "learning_rate": 0.001, |
| "loss": 1.8943, |
| "step": 120600 |
| }, |
| { |
| "epoch": 39.010989010989015, |
| "grad_norm": 1.3485623598098755, |
| "learning_rate": 0.001, |
| "loss": 1.8329, |
| "step": 120700 |
| }, |
| { |
| "epoch": 39.04330963154493, |
| "grad_norm": 1.3273441791534424, |
| "learning_rate": 0.001, |
| "loss": 1.7511, |
| "step": 120800 |
| }, |
| { |
| "epoch": 39.075630252100844, |
| "grad_norm": 1.3480987548828125, |
| "learning_rate": 0.001, |
| "loss": 1.7612, |
| "step": 120900 |
| }, |
| { |
| "epoch": 39.10795087265676, |
| "grad_norm": 1.6776913404464722, |
| "learning_rate": 0.001, |
| "loss": 1.7685, |
| "step": 121000 |
| }, |
| { |
| "epoch": 39.14027149321267, |
| "grad_norm": 1.483688473701477, |
| "learning_rate": 0.001, |
| "loss": 1.7646, |
| "step": 121100 |
| }, |
| { |
| "epoch": 39.17259211376859, |
| "grad_norm": 1.494076132774353, |
| "learning_rate": 0.001, |
| "loss": 1.7574, |
| "step": 121200 |
| }, |
| { |
| "epoch": 39.2049127343245, |
| "grad_norm": 1.4595417976379395, |
| "learning_rate": 0.001, |
| "loss": 1.7649, |
| "step": 121300 |
| }, |
| { |
| "epoch": 39.237233354880416, |
| "grad_norm": 1.205970048904419, |
| "learning_rate": 0.001, |
| "loss": 1.8028, |
| "step": 121400 |
| }, |
| { |
| "epoch": 39.26955397543633, |
| "grad_norm": 1.363794207572937, |
| "learning_rate": 0.001, |
| "loss": 1.7898, |
| "step": 121500 |
| }, |
| { |
| "epoch": 39.301874595992246, |
| "grad_norm": 1.1740237474441528, |
| "learning_rate": 0.001, |
| "loss": 1.78, |
| "step": 121600 |
| }, |
| { |
| "epoch": 39.33419521654816, |
| "grad_norm": 1.4625617265701294, |
| "learning_rate": 0.001, |
| "loss": 1.8087, |
| "step": 121700 |
| }, |
| { |
| "epoch": 39.366515837104075, |
| "grad_norm": 1.282263994216919, |
| "learning_rate": 0.001, |
| "loss": 1.8007, |
| "step": 121800 |
| }, |
| { |
| "epoch": 39.39883645765999, |
| "grad_norm": 1.1302987337112427, |
| "learning_rate": 0.001, |
| "loss": 1.7909, |
| "step": 121900 |
| }, |
| { |
| "epoch": 39.431157078215904, |
| "grad_norm": 1.6036306619644165, |
| "learning_rate": 0.001, |
| "loss": 1.809, |
| "step": 122000 |
| }, |
| { |
| "epoch": 39.46347769877182, |
| "grad_norm": 1.3770872354507446, |
| "learning_rate": 0.001, |
| "loss": 1.8171, |
| "step": 122100 |
| }, |
| { |
| "epoch": 39.49579831932773, |
| "grad_norm": 1.3180655241012573, |
| "learning_rate": 0.001, |
| "loss": 1.8166, |
| "step": 122200 |
| }, |
| { |
| "epoch": 39.52811893988365, |
| "grad_norm": 1.7276040315628052, |
| "learning_rate": 0.001, |
| "loss": 1.8245, |
| "step": 122300 |
| }, |
| { |
| "epoch": 39.56043956043956, |
| "grad_norm": 1.8373006582260132, |
| "learning_rate": 0.001, |
| "loss": 1.8152, |
| "step": 122400 |
| }, |
| { |
| "epoch": 39.59276018099548, |
| "grad_norm": 1.5962961912155151, |
| "learning_rate": 0.001, |
| "loss": 1.8338, |
| "step": 122500 |
| }, |
| { |
| "epoch": 39.62508080155139, |
| "grad_norm": 1.576045036315918, |
| "learning_rate": 0.001, |
| "loss": 1.8291, |
| "step": 122600 |
| }, |
| { |
| "epoch": 39.657401422107306, |
| "grad_norm": 1.198539137840271, |
| "learning_rate": 0.001, |
| "loss": 1.8487, |
| "step": 122700 |
| }, |
| { |
| "epoch": 39.68972204266322, |
| "grad_norm": 1.3989983797073364, |
| "learning_rate": 0.001, |
| "loss": 1.8487, |
| "step": 122800 |
| }, |
| { |
| "epoch": 39.722042663219135, |
| "grad_norm": 1.213088870048523, |
| "learning_rate": 0.001, |
| "loss": 1.838, |
| "step": 122900 |
| }, |
| { |
| "epoch": 39.75436328377505, |
| "grad_norm": 1.3717889785766602, |
| "learning_rate": 0.001, |
| "loss": 1.8456, |
| "step": 123000 |
| }, |
| { |
| "epoch": 39.786683904330964, |
| "grad_norm": 1.225083827972412, |
| "learning_rate": 0.001, |
| "loss": 1.8628, |
| "step": 123100 |
| }, |
| { |
| "epoch": 39.81900452488688, |
| "grad_norm": 1.5054234266281128, |
| "learning_rate": 0.001, |
| "loss": 1.8628, |
| "step": 123200 |
| }, |
| { |
| "epoch": 39.85132514544279, |
| "grad_norm": 1.5282232761383057, |
| "learning_rate": 0.001, |
| "loss": 1.8504, |
| "step": 123300 |
| }, |
| { |
| "epoch": 39.88364576599871, |
| "grad_norm": 1.2803972959518433, |
| "learning_rate": 0.001, |
| "loss": 1.8651, |
| "step": 123400 |
| }, |
| { |
| "epoch": 39.91596638655462, |
| "grad_norm": 1.4066797494888306, |
| "learning_rate": 0.001, |
| "loss": 1.8671, |
| "step": 123500 |
| }, |
| { |
| "epoch": 39.94828700711054, |
| "grad_norm": 1.4010391235351562, |
| "learning_rate": 0.001, |
| "loss": 1.843, |
| "step": 123600 |
| }, |
| { |
| "epoch": 39.98060762766645, |
| "grad_norm": 1.6911927461624146, |
| "learning_rate": 0.001, |
| "loss": 1.8662, |
| "step": 123700 |
| }, |
| { |
| "epoch": 40.012928248222366, |
| "grad_norm": 1.6860558986663818, |
| "learning_rate": 0.001, |
| "loss": 1.8256, |
| "step": 123800 |
| }, |
| { |
| "epoch": 40.04524886877828, |
| "grad_norm": 1.307278037071228, |
| "learning_rate": 0.001, |
| "loss": 1.7505, |
| "step": 123900 |
| }, |
| { |
| "epoch": 40.077569489334195, |
| "grad_norm": 1.5941931009292603, |
| "learning_rate": 0.001, |
| "loss": 1.731, |
| "step": 124000 |
| }, |
| { |
| "epoch": 40.10989010989011, |
| "grad_norm": 1.1873581409454346, |
| "learning_rate": 0.001, |
| "loss": 1.7325, |
| "step": 124100 |
| }, |
| { |
| "epoch": 40.142210730446024, |
| "grad_norm": 1.153531551361084, |
| "learning_rate": 0.001, |
| "loss": 1.7385, |
| "step": 124200 |
| }, |
| { |
| "epoch": 40.17453135100194, |
| "grad_norm": 1.6365715265274048, |
| "learning_rate": 0.001, |
| "loss": 1.7542, |
| "step": 124300 |
| }, |
| { |
| "epoch": 40.20685197155785, |
| "grad_norm": 1.7263197898864746, |
| "learning_rate": 0.001, |
| "loss": 1.7778, |
| "step": 124400 |
| }, |
| { |
| "epoch": 40.23917259211377, |
| "grad_norm": 1.5471076965332031, |
| "learning_rate": 0.001, |
| "loss": 1.7632, |
| "step": 124500 |
| }, |
| { |
| "epoch": 40.27149321266968, |
| "grad_norm": 2.015866279602051, |
| "learning_rate": 0.001, |
| "loss": 1.7633, |
| "step": 124600 |
| }, |
| { |
| "epoch": 40.3038138332256, |
| "grad_norm": 1.406688928604126, |
| "learning_rate": 0.001, |
| "loss": 1.7786, |
| "step": 124700 |
| }, |
| { |
| "epoch": 40.33613445378151, |
| "grad_norm": 1.5543419122695923, |
| "learning_rate": 0.001, |
| "loss": 1.7788, |
| "step": 124800 |
| }, |
| { |
| "epoch": 40.368455074337426, |
| "grad_norm": 1.744920253753662, |
| "learning_rate": 0.001, |
| "loss": 1.7981, |
| "step": 124900 |
| }, |
| { |
| "epoch": 40.40077569489334, |
| "grad_norm": 1.4804073572158813, |
| "learning_rate": 0.001, |
| "loss": 1.7952, |
| "step": 125000 |
| }, |
| { |
| "epoch": 40.433096315449255, |
| "grad_norm": 1.7059993743896484, |
| "learning_rate": 0.001, |
| "loss": 1.7812, |
| "step": 125100 |
| }, |
| { |
| "epoch": 40.46541693600517, |
| "grad_norm": 1.6757292747497559, |
| "learning_rate": 0.001, |
| "loss": 1.812, |
| "step": 125200 |
| }, |
| { |
| "epoch": 40.497737556561084, |
| "grad_norm": 1.4929311275482178, |
| "learning_rate": 0.001, |
| "loss": 1.8132, |
| "step": 125300 |
| }, |
| { |
| "epoch": 40.530058177117, |
| "grad_norm": 1.617465615272522, |
| "learning_rate": 0.001, |
| "loss": 1.8102, |
| "step": 125400 |
| }, |
| { |
| "epoch": 40.56237879767291, |
| "grad_norm": 1.3344014883041382, |
| "learning_rate": 0.001, |
| "loss": 1.8099, |
| "step": 125500 |
| }, |
| { |
| "epoch": 40.59469941822883, |
| "grad_norm": 1.1278409957885742, |
| "learning_rate": 0.001, |
| "loss": 1.8197, |
| "step": 125600 |
| }, |
| { |
| "epoch": 40.62702003878474, |
| "grad_norm": 1.408752202987671, |
| "learning_rate": 0.001, |
| "loss": 1.8128, |
| "step": 125700 |
| }, |
| { |
| "epoch": 40.65934065934066, |
| "grad_norm": 1.6868847608566284, |
| "learning_rate": 0.001, |
| "loss": 1.8067, |
| "step": 125800 |
| }, |
| { |
| "epoch": 40.69166127989657, |
| "grad_norm": 1.3664335012435913, |
| "learning_rate": 0.001, |
| "loss": 1.8145, |
| "step": 125900 |
| }, |
| { |
| "epoch": 40.723981900452486, |
| "grad_norm": 1.0876169204711914, |
| "learning_rate": 0.001, |
| "loss": 1.839, |
| "step": 126000 |
| }, |
| { |
| "epoch": 40.7563025210084, |
| "grad_norm": 1.4990159273147583, |
| "learning_rate": 0.001, |
| "loss": 1.8261, |
| "step": 126100 |
| }, |
| { |
| "epoch": 40.788623141564315, |
| "grad_norm": 1.2511619329452515, |
| "learning_rate": 0.001, |
| "loss": 1.8379, |
| "step": 126200 |
| }, |
| { |
| "epoch": 40.82094376212023, |
| "grad_norm": 1.3371134996414185, |
| "learning_rate": 0.001, |
| "loss": 1.835, |
| "step": 126300 |
| }, |
| { |
| "epoch": 40.853264382676144, |
| "grad_norm": 1.6547143459320068, |
| "learning_rate": 0.001, |
| "loss": 1.8423, |
| "step": 126400 |
| }, |
| { |
| "epoch": 40.88558500323206, |
| "grad_norm": 1.372111201286316, |
| "learning_rate": 0.001, |
| "loss": 1.8324, |
| "step": 126500 |
| }, |
| { |
| "epoch": 40.91790562378797, |
| "grad_norm": 1.5143040418624878, |
| "learning_rate": 0.001, |
| "loss": 1.8382, |
| "step": 126600 |
| }, |
| { |
| "epoch": 40.95022624434389, |
| "grad_norm": 1.6955279111862183, |
| "learning_rate": 0.001, |
| "loss": 1.8533, |
| "step": 126700 |
| }, |
| { |
| "epoch": 40.9825468648998, |
| "grad_norm": 1.4255764484405518, |
| "learning_rate": 0.001, |
| "loss": 1.8535, |
| "step": 126800 |
| }, |
| { |
| "epoch": 41.014867485455724, |
| "grad_norm": 1.5626089572906494, |
| "learning_rate": 0.001, |
| "loss": 1.765, |
| "step": 126900 |
| }, |
| { |
| "epoch": 41.04718810601164, |
| "grad_norm": 1.950334906578064, |
| "learning_rate": 0.001, |
| "loss": 1.7285, |
| "step": 127000 |
| }, |
| { |
| "epoch": 41.07950872656755, |
| "grad_norm": 1.411240816116333, |
| "learning_rate": 0.001, |
| "loss": 1.735, |
| "step": 127100 |
| }, |
| { |
| "epoch": 41.11182934712347, |
| "grad_norm": 1.5985175371170044, |
| "learning_rate": 0.001, |
| "loss": 1.7289, |
| "step": 127200 |
| }, |
| { |
| "epoch": 41.14414996767938, |
| "grad_norm": 1.399863600730896, |
| "learning_rate": 0.001, |
| "loss": 1.7341, |
| "step": 127300 |
| }, |
| { |
| "epoch": 41.1764705882353, |
| "grad_norm": 2.0055899620056152, |
| "learning_rate": 0.001, |
| "loss": 1.72, |
| "step": 127400 |
| }, |
| { |
| "epoch": 41.20879120879121, |
| "grad_norm": 1.681624174118042, |
| "learning_rate": 0.001, |
| "loss": 1.7439, |
| "step": 127500 |
| }, |
| { |
| "epoch": 41.241111829347126, |
| "grad_norm": 1.5897300243377686, |
| "learning_rate": 0.001, |
| "loss": 1.7625, |
| "step": 127600 |
| }, |
| { |
| "epoch": 41.27343244990304, |
| "grad_norm": 1.7503191232681274, |
| "learning_rate": 0.001, |
| "loss": 1.7712, |
| "step": 127700 |
| }, |
| { |
| "epoch": 41.305753070458955, |
| "grad_norm": 1.715479850769043, |
| "learning_rate": 0.001, |
| "loss": 1.7624, |
| "step": 127800 |
| }, |
| { |
| "epoch": 41.33807369101487, |
| "grad_norm": 1.6322977542877197, |
| "learning_rate": 0.001, |
| "loss": 1.7547, |
| "step": 127900 |
| }, |
| { |
| "epoch": 41.370394311570784, |
| "grad_norm": 1.592178463935852, |
| "learning_rate": 0.001, |
| "loss": 1.7963, |
| "step": 128000 |
| }, |
| { |
| "epoch": 41.4027149321267, |
| "grad_norm": 1.2945525646209717, |
| "learning_rate": 0.001, |
| "loss": 1.7699, |
| "step": 128100 |
| }, |
| { |
| "epoch": 41.43503555268261, |
| "grad_norm": 1.4757287502288818, |
| "learning_rate": 0.001, |
| "loss": 1.7668, |
| "step": 128200 |
| }, |
| { |
| "epoch": 41.46735617323853, |
| "grad_norm": 1.5124675035476685, |
| "learning_rate": 0.001, |
| "loss": 1.7906, |
| "step": 128300 |
| }, |
| { |
| "epoch": 41.49967679379444, |
| "grad_norm": 2.080328941345215, |
| "learning_rate": 0.001, |
| "loss": 1.7906, |
| "step": 128400 |
| }, |
| { |
| "epoch": 41.53199741435036, |
| "grad_norm": 2.1160199642181396, |
| "learning_rate": 0.001, |
| "loss": 1.7867, |
| "step": 128500 |
| }, |
| { |
| "epoch": 41.56431803490627, |
| "grad_norm": 1.788028359413147, |
| "learning_rate": 0.001, |
| "loss": 1.7829, |
| "step": 128600 |
| }, |
| { |
| "epoch": 41.596638655462186, |
| "grad_norm": 1.5656241178512573, |
| "learning_rate": 0.001, |
| "loss": 1.7748, |
| "step": 128700 |
| }, |
| { |
| "epoch": 41.6289592760181, |
| "grad_norm": 1.5667623281478882, |
| "learning_rate": 0.001, |
| "loss": 1.7921, |
| "step": 128800 |
| }, |
| { |
| "epoch": 41.661279896574015, |
| "grad_norm": 1.9110908508300781, |
| "learning_rate": 0.001, |
| "loss": 1.7988, |
| "step": 128900 |
| }, |
| { |
| "epoch": 41.69360051712993, |
| "grad_norm": 1.7964316606521606, |
| "learning_rate": 0.001, |
| "loss": 1.811, |
| "step": 129000 |
| }, |
| { |
| "epoch": 41.725921137685845, |
| "grad_norm": 1.4739376306533813, |
| "learning_rate": 0.001, |
| "loss": 1.8143, |
| "step": 129100 |
| }, |
| { |
| "epoch": 41.75824175824176, |
| "grad_norm": 1.4890472888946533, |
| "learning_rate": 0.001, |
| "loss": 1.8305, |
| "step": 129200 |
| }, |
| { |
| "epoch": 41.790562378797674, |
| "grad_norm": 1.7300723791122437, |
| "learning_rate": 0.001, |
| "loss": 1.7989, |
| "step": 129300 |
| }, |
| { |
| "epoch": 41.82288299935359, |
| "grad_norm": 1.9375386238098145, |
| "learning_rate": 0.001, |
| "loss": 1.8203, |
| "step": 129400 |
| }, |
| { |
| "epoch": 41.8552036199095, |
| "grad_norm": 1.827304720878601, |
| "learning_rate": 0.001, |
| "loss": 1.8306, |
| "step": 129500 |
| }, |
| { |
| "epoch": 41.88752424046542, |
| "grad_norm": 1.7213237285614014, |
| "learning_rate": 0.001, |
| "loss": 1.8174, |
| "step": 129600 |
| }, |
| { |
| "epoch": 41.91984486102133, |
| "grad_norm": 1.3543641567230225, |
| "learning_rate": 0.001, |
| "loss": 1.8407, |
| "step": 129700 |
| }, |
| { |
| "epoch": 41.95216548157725, |
| "grad_norm": 1.6443567276000977, |
| "learning_rate": 0.001, |
| "loss": 1.8223, |
| "step": 129800 |
| }, |
| { |
| "epoch": 41.98448610213316, |
| "grad_norm": 1.3840230703353882, |
| "learning_rate": 0.001, |
| "loss": 1.8507, |
| "step": 129900 |
| }, |
| { |
| "epoch": 42.016806722689076, |
| "grad_norm": 2.517742872238159, |
| "learning_rate": 0.001, |
| "loss": 1.7459, |
| "step": 130000 |
| }, |
| { |
| "epoch": 42.04912734324499, |
| "grad_norm": 2.185734510421753, |
| "learning_rate": 0.001, |
| "loss": 1.7024, |
| "step": 130100 |
| }, |
| { |
| "epoch": 42.081447963800905, |
| "grad_norm": 2.0672836303710938, |
| "learning_rate": 0.001, |
| "loss": 1.7158, |
| "step": 130200 |
| }, |
| { |
| "epoch": 42.11376858435682, |
| "grad_norm": 1.973358154296875, |
| "learning_rate": 0.001, |
| "loss": 1.7142, |
| "step": 130300 |
| }, |
| { |
| "epoch": 42.146089204912734, |
| "grad_norm": 1.7326325178146362, |
| "learning_rate": 0.001, |
| "loss": 1.7178, |
| "step": 130400 |
| }, |
| { |
| "epoch": 42.17840982546865, |
| "grad_norm": 2.1925511360168457, |
| "learning_rate": 0.001, |
| "loss": 1.7355, |
| "step": 130500 |
| }, |
| { |
| "epoch": 42.21073044602456, |
| "grad_norm": 2.043834924697876, |
| "learning_rate": 0.001, |
| "loss": 1.7256, |
| "step": 130600 |
| }, |
| { |
| "epoch": 42.24305106658048, |
| "grad_norm": 1.3037394285202026, |
| "learning_rate": 0.001, |
| "loss": 1.731, |
| "step": 130700 |
| }, |
| { |
| "epoch": 42.27537168713639, |
| "grad_norm": 1.4939100742340088, |
| "learning_rate": 0.001, |
| "loss": 1.7371, |
| "step": 130800 |
| }, |
| { |
| "epoch": 42.30769230769231, |
| "grad_norm": 1.9814097881317139, |
| "learning_rate": 0.001, |
| "loss": 1.7486, |
| "step": 130900 |
| }, |
| { |
| "epoch": 42.34001292824822, |
| "grad_norm": 1.8973731994628906, |
| "learning_rate": 0.001, |
| "loss": 1.7409, |
| "step": 131000 |
| }, |
| { |
| "epoch": 42.372333548804136, |
| "grad_norm": 1.7295153141021729, |
| "learning_rate": 0.001, |
| "loss": 1.7641, |
| "step": 131100 |
| }, |
| { |
| "epoch": 42.40465416936005, |
| "grad_norm": 2.118375539779663, |
| "learning_rate": 0.001, |
| "loss": 1.7775, |
| "step": 131200 |
| }, |
| { |
| "epoch": 42.436974789915965, |
| "grad_norm": 2.247931480407715, |
| "learning_rate": 0.001, |
| "loss": 1.753, |
| "step": 131300 |
| }, |
| { |
| "epoch": 42.46929541047188, |
| "grad_norm": 1.6591767072677612, |
| "learning_rate": 0.001, |
| "loss": 1.7646, |
| "step": 131400 |
| }, |
| { |
| "epoch": 42.501616031027794, |
| "grad_norm": 1.8841537237167358, |
| "learning_rate": 0.001, |
| "loss": 1.7669, |
| "step": 131500 |
| }, |
| { |
| "epoch": 42.53393665158371, |
| "grad_norm": 1.5252450704574585, |
| "learning_rate": 0.001, |
| "loss": 1.7684, |
| "step": 131600 |
| }, |
| { |
| "epoch": 42.56625727213962, |
| "grad_norm": 2.3641180992126465, |
| "learning_rate": 0.001, |
| "loss": 1.7768, |
| "step": 131700 |
| }, |
| { |
| "epoch": 42.59857789269554, |
| "grad_norm": 2.023132085800171, |
| "learning_rate": 0.001, |
| "loss": 1.8046, |
| "step": 131800 |
| }, |
| { |
| "epoch": 42.63089851325145, |
| "grad_norm": 1.7865345478057861, |
| "learning_rate": 0.001, |
| "loss": 1.7726, |
| "step": 131900 |
| }, |
| { |
| "epoch": 42.66321913380737, |
| "grad_norm": 1.8594768047332764, |
| "learning_rate": 0.001, |
| "loss": 1.788, |
| "step": 132000 |
| }, |
| { |
| "epoch": 42.69553975436328, |
| "grad_norm": 1.9835621118545532, |
| "learning_rate": 0.001, |
| "loss": 1.7863, |
| "step": 132100 |
| }, |
| { |
| "epoch": 42.727860374919196, |
| "grad_norm": 1.874348759651184, |
| "learning_rate": 0.001, |
| "loss": 1.7948, |
| "step": 132200 |
| }, |
| { |
| "epoch": 42.76018099547511, |
| "grad_norm": 1.708947777748108, |
| "learning_rate": 0.001, |
| "loss": 1.7948, |
| "step": 132300 |
| }, |
| { |
| "epoch": 42.792501616031025, |
| "grad_norm": 1.8363604545593262, |
| "learning_rate": 0.001, |
| "loss": 1.8011, |
| "step": 132400 |
| }, |
| { |
| "epoch": 42.82482223658694, |
| "grad_norm": 1.9058271646499634, |
| "learning_rate": 0.001, |
| "loss": 1.8117, |
| "step": 132500 |
| }, |
| { |
| "epoch": 42.857142857142854, |
| "grad_norm": 2.1362392902374268, |
| "learning_rate": 0.001, |
| "loss": 1.8062, |
| "step": 132600 |
| }, |
| { |
| "epoch": 42.88946347769877, |
| "grad_norm": 1.9057139158248901, |
| "learning_rate": 0.001, |
| "loss": 1.7971, |
| "step": 132700 |
| }, |
| { |
| "epoch": 42.92178409825468, |
| "grad_norm": 2.2736151218414307, |
| "learning_rate": 0.001, |
| "loss": 1.8343, |
| "step": 132800 |
| }, |
| { |
| "epoch": 42.9541047188106, |
| "grad_norm": 1.8226218223571777, |
| "learning_rate": 0.001, |
| "loss": 1.8219, |
| "step": 132900 |
| }, |
| { |
| "epoch": 42.98642533936652, |
| "grad_norm": 1.5797466039657593, |
| "learning_rate": 0.001, |
| "loss": 1.8071, |
| "step": 133000 |
| }, |
| { |
| "epoch": 43.018745959922434, |
| "grad_norm": 1.521481990814209, |
| "learning_rate": 0.001, |
| "loss": 1.753, |
| "step": 133100 |
| }, |
| { |
| "epoch": 43.05106658047835, |
| "grad_norm": 1.5325391292572021, |
| "learning_rate": 0.001, |
| "loss": 1.6839, |
| "step": 133200 |
| }, |
| { |
| "epoch": 43.08338720103426, |
| "grad_norm": 1.285569429397583, |
| "learning_rate": 0.001, |
| "loss": 1.6817, |
| "step": 133300 |
| }, |
| { |
| "epoch": 43.11570782159018, |
| "grad_norm": 1.5999805927276611, |
| "learning_rate": 0.001, |
| "loss": 1.7031, |
| "step": 133400 |
| }, |
| { |
| "epoch": 43.14802844214609, |
| "grad_norm": 1.6621031761169434, |
| "learning_rate": 0.001, |
| "loss": 1.7072, |
| "step": 133500 |
| }, |
| { |
| "epoch": 43.18034906270201, |
| "grad_norm": 1.1231224536895752, |
| "learning_rate": 0.001, |
| "loss": 1.7284, |
| "step": 133600 |
| }, |
| { |
| "epoch": 43.21266968325792, |
| "grad_norm": 2.0875513553619385, |
| "learning_rate": 0.001, |
| "loss": 1.7371, |
| "step": 133700 |
| }, |
| { |
| "epoch": 43.244990303813836, |
| "grad_norm": 1.1687034368515015, |
| "learning_rate": 0.001, |
| "loss": 1.7061, |
| "step": 133800 |
| }, |
| { |
| "epoch": 43.27731092436975, |
| "grad_norm": 1.332298994064331, |
| "learning_rate": 0.001, |
| "loss": 1.7254, |
| "step": 133900 |
| }, |
| { |
| "epoch": 43.309631544925665, |
| "grad_norm": 1.3487528562545776, |
| "learning_rate": 0.001, |
| "loss": 1.7478, |
| "step": 134000 |
| }, |
| { |
| "epoch": 43.34195216548158, |
| "grad_norm": 1.3946985006332397, |
| "learning_rate": 0.001, |
| "loss": 1.7475, |
| "step": 134100 |
| }, |
| { |
| "epoch": 43.374272786037494, |
| "grad_norm": 1.4364039897918701, |
| "learning_rate": 0.001, |
| "loss": 1.7416, |
| "step": 134200 |
| }, |
| { |
| "epoch": 43.40659340659341, |
| "grad_norm": 1.4674285650253296, |
| "learning_rate": 0.001, |
| "loss": 1.7538, |
| "step": 134300 |
| }, |
| { |
| "epoch": 43.43891402714932, |
| "grad_norm": 1.2243691682815552, |
| "learning_rate": 0.001, |
| "loss": 1.7365, |
| "step": 134400 |
| }, |
| { |
| "epoch": 43.47123464770524, |
| "grad_norm": 1.2732268571853638, |
| "learning_rate": 0.001, |
| "loss": 1.7684, |
| "step": 134500 |
| }, |
| { |
| "epoch": 43.50355526826115, |
| "grad_norm": 1.8665534257888794, |
| "learning_rate": 0.001, |
| "loss": 1.7515, |
| "step": 134600 |
| }, |
| { |
| "epoch": 43.53587588881707, |
| "grad_norm": 1.401395320892334, |
| "learning_rate": 0.001, |
| "loss": 1.7465, |
| "step": 134700 |
| }, |
| { |
| "epoch": 43.56819650937298, |
| "grad_norm": 1.7193236351013184, |
| "learning_rate": 0.001, |
| "loss": 1.7725, |
| "step": 134800 |
| }, |
| { |
| "epoch": 43.600517129928896, |
| "grad_norm": 1.4448840618133545, |
| "learning_rate": 0.001, |
| "loss": 1.7486, |
| "step": 134900 |
| }, |
| { |
| "epoch": 43.63283775048481, |
| "grad_norm": 1.8662267923355103, |
| "learning_rate": 0.001, |
| "loss": 1.7845, |
| "step": 135000 |
| }, |
| { |
| "epoch": 43.665158371040725, |
| "grad_norm": 1.5399407148361206, |
| "learning_rate": 0.001, |
| "loss": 1.7644, |
| "step": 135100 |
| }, |
| { |
| "epoch": 43.69747899159664, |
| "grad_norm": 1.2298833131790161, |
| "learning_rate": 0.001, |
| "loss": 1.7841, |
| "step": 135200 |
| }, |
| { |
| "epoch": 43.729799612152554, |
| "grad_norm": 1.181747317314148, |
| "learning_rate": 0.001, |
| "loss": 1.7781, |
| "step": 135300 |
| }, |
| { |
| "epoch": 43.76212023270847, |
| "grad_norm": 1.1132404804229736, |
| "learning_rate": 0.001, |
| "loss": 1.7873, |
| "step": 135400 |
| }, |
| { |
| "epoch": 43.79444085326438, |
| "grad_norm": 1.2958418130874634, |
| "learning_rate": 0.001, |
| "loss": 1.7973, |
| "step": 135500 |
| }, |
| { |
| "epoch": 43.8267614738203, |
| "grad_norm": 1.3314193487167358, |
| "learning_rate": 0.001, |
| "loss": 1.7729, |
| "step": 135600 |
| }, |
| { |
| "epoch": 43.85908209437621, |
| "grad_norm": 1.2268832921981812, |
| "learning_rate": 0.001, |
| "loss": 1.7882, |
| "step": 135700 |
| }, |
| { |
| "epoch": 43.89140271493213, |
| "grad_norm": 1.6641273498535156, |
| "learning_rate": 0.001, |
| "loss": 1.796, |
| "step": 135800 |
| }, |
| { |
| "epoch": 43.92372333548804, |
| "grad_norm": 1.4358201026916504, |
| "learning_rate": 0.001, |
| "loss": 1.8006, |
| "step": 135900 |
| }, |
| { |
| "epoch": 43.956043956043956, |
| "grad_norm": 1.683240294456482, |
| "learning_rate": 0.001, |
| "loss": 1.789, |
| "step": 136000 |
| }, |
| { |
| "epoch": 43.98836457659987, |
| "grad_norm": 1.3339091539382935, |
| "learning_rate": 0.001, |
| "loss": 1.8012, |
| "step": 136100 |
| }, |
| { |
| "epoch": 44.020685197155785, |
| "grad_norm": 1.1389038562774658, |
| "learning_rate": 0.001, |
| "loss": 1.73, |
| "step": 136200 |
| }, |
| { |
| "epoch": 44.0530058177117, |
| "grad_norm": 1.3806034326553345, |
| "learning_rate": 0.001, |
| "loss": 1.6848, |
| "step": 136300 |
| }, |
| { |
| "epoch": 44.085326438267614, |
| "grad_norm": 1.372220754623413, |
| "learning_rate": 0.001, |
| "loss": 1.6656, |
| "step": 136400 |
| }, |
| { |
| "epoch": 44.11764705882353, |
| "grad_norm": 1.4831829071044922, |
| "learning_rate": 0.001, |
| "loss": 1.693, |
| "step": 136500 |
| }, |
| { |
| "epoch": 44.14996767937944, |
| "grad_norm": 1.418005347251892, |
| "learning_rate": 0.001, |
| "loss": 1.6875, |
| "step": 136600 |
| }, |
| { |
| "epoch": 44.18228829993536, |
| "grad_norm": 1.3556125164031982, |
| "learning_rate": 0.001, |
| "loss": 1.706, |
| "step": 136700 |
| }, |
| { |
| "epoch": 44.21460892049127, |
| "grad_norm": 1.2906237840652466, |
| "learning_rate": 0.001, |
| "loss": 1.6978, |
| "step": 136800 |
| }, |
| { |
| "epoch": 44.24692954104719, |
| "grad_norm": 1.6397830247879028, |
| "learning_rate": 0.001, |
| "loss": 1.6998, |
| "step": 136900 |
| }, |
| { |
| "epoch": 44.2792501616031, |
| "grad_norm": 1.4387032985687256, |
| "learning_rate": 0.001, |
| "loss": 1.7246, |
| "step": 137000 |
| }, |
| { |
| "epoch": 44.311570782159016, |
| "grad_norm": 1.3406988382339478, |
| "learning_rate": 0.001, |
| "loss": 1.7006, |
| "step": 137100 |
| }, |
| { |
| "epoch": 44.34389140271493, |
| "grad_norm": 1.217828392982483, |
| "learning_rate": 0.001, |
| "loss": 1.7204, |
| "step": 137200 |
| }, |
| { |
| "epoch": 44.376212023270845, |
| "grad_norm": 1.4825609922409058, |
| "learning_rate": 0.001, |
| "loss": 1.7147, |
| "step": 137300 |
| }, |
| { |
| "epoch": 44.40853264382676, |
| "grad_norm": 1.3367348909378052, |
| "learning_rate": 0.001, |
| "loss": 1.7242, |
| "step": 137400 |
| }, |
| { |
| "epoch": 44.440853264382675, |
| "grad_norm": 1.2189295291900635, |
| "learning_rate": 0.001, |
| "loss": 1.7252, |
| "step": 137500 |
| }, |
| { |
| "epoch": 44.47317388493859, |
| "grad_norm": 1.3985297679901123, |
| "learning_rate": 0.001, |
| "loss": 1.7492, |
| "step": 137600 |
| }, |
| { |
| "epoch": 44.505494505494504, |
| "grad_norm": 1.5942193269729614, |
| "learning_rate": 0.001, |
| "loss": 1.7454, |
| "step": 137700 |
| }, |
| { |
| "epoch": 44.53781512605042, |
| "grad_norm": 1.4428784847259521, |
| "learning_rate": 0.001, |
| "loss": 1.7478, |
| "step": 137800 |
| }, |
| { |
| "epoch": 44.57013574660633, |
| "grad_norm": 1.6369024515151978, |
| "learning_rate": 0.001, |
| "loss": 1.762, |
| "step": 137900 |
| }, |
| { |
| "epoch": 44.60245636716225, |
| "grad_norm": 1.4189107418060303, |
| "learning_rate": 0.001, |
| "loss": 1.7404, |
| "step": 138000 |
| }, |
| { |
| "epoch": 44.63477698771816, |
| "grad_norm": 1.0974513292312622, |
| "learning_rate": 0.001, |
| "loss": 1.7539, |
| "step": 138100 |
| }, |
| { |
| "epoch": 44.66709760827408, |
| "grad_norm": 1.4818083047866821, |
| "learning_rate": 0.001, |
| "loss": 1.7615, |
| "step": 138200 |
| }, |
| { |
| "epoch": 44.69941822882999, |
| "grad_norm": 1.5206955671310425, |
| "learning_rate": 0.001, |
| "loss": 1.7802, |
| "step": 138300 |
| }, |
| { |
| "epoch": 44.731738849385906, |
| "grad_norm": 1.472645878791809, |
| "learning_rate": 0.001, |
| "loss": 1.7725, |
| "step": 138400 |
| }, |
| { |
| "epoch": 44.76405946994182, |
| "grad_norm": 1.1264265775680542, |
| "learning_rate": 0.001, |
| "loss": 1.7725, |
| "step": 138500 |
| }, |
| { |
| "epoch": 44.796380090497735, |
| "grad_norm": 1.462930679321289, |
| "learning_rate": 0.001, |
| "loss": 1.7652, |
| "step": 138600 |
| }, |
| { |
| "epoch": 44.82870071105365, |
| "grad_norm": 1.6498340368270874, |
| "learning_rate": 0.001, |
| "loss": 1.7759, |
| "step": 138700 |
| }, |
| { |
| "epoch": 44.861021331609564, |
| "grad_norm": 1.0541050434112549, |
| "learning_rate": 0.001, |
| "loss": 1.7729, |
| "step": 138800 |
| }, |
| { |
| "epoch": 44.89334195216548, |
| "grad_norm": 1.2006580829620361, |
| "learning_rate": 0.001, |
| "loss": 1.7805, |
| "step": 138900 |
| }, |
| { |
| "epoch": 44.92566257272139, |
| "grad_norm": 1.2335492372512817, |
| "learning_rate": 0.001, |
| "loss": 1.7877, |
| "step": 139000 |
| }, |
| { |
| "epoch": 44.95798319327731, |
| "grad_norm": 1.3368200063705444, |
| "learning_rate": 0.001, |
| "loss": 1.8003, |
| "step": 139100 |
| }, |
| { |
| "epoch": 44.99030381383322, |
| "grad_norm": 1.6851129531860352, |
| "learning_rate": 0.001, |
| "loss": 1.7872, |
| "step": 139200 |
| }, |
| { |
| "epoch": 45.022624434389144, |
| "grad_norm": 1.1418347358703613, |
| "learning_rate": 0.001, |
| "loss": 1.7087, |
| "step": 139300 |
| }, |
| { |
| "epoch": 45.05494505494506, |
| "grad_norm": 1.1400333642959595, |
| "learning_rate": 0.001, |
| "loss": 1.6607, |
| "step": 139400 |
| }, |
| { |
| "epoch": 45.08726567550097, |
| "grad_norm": 1.8854553699493408, |
| "learning_rate": 0.001, |
| "loss": 1.6703, |
| "step": 139500 |
| }, |
| { |
| "epoch": 45.11958629605689, |
| "grad_norm": 1.4569580554962158, |
| "learning_rate": 0.001, |
| "loss": 1.6802, |
| "step": 139600 |
| }, |
| { |
| "epoch": 45.1519069166128, |
| "grad_norm": 1.248537302017212, |
| "learning_rate": 0.001, |
| "loss": 1.65, |
| "step": 139700 |
| }, |
| { |
| "epoch": 45.18422753716872, |
| "grad_norm": 1.1826552152633667, |
| "learning_rate": 0.001, |
| "loss": 1.679, |
| "step": 139800 |
| }, |
| { |
| "epoch": 45.21654815772463, |
| "grad_norm": 1.2726597785949707, |
| "learning_rate": 0.001, |
| "loss": 1.6903, |
| "step": 139900 |
| }, |
| { |
| "epoch": 45.248868778280546, |
| "grad_norm": 1.6515134572982788, |
| "learning_rate": 0.001, |
| "loss": 1.6979, |
| "step": 140000 |
| }, |
| { |
| "epoch": 45.28118939883646, |
| "grad_norm": 1.4121181964874268, |
| "learning_rate": 0.001, |
| "loss": 1.6842, |
| "step": 140100 |
| }, |
| { |
| "epoch": 45.313510019392375, |
| "grad_norm": 1.5250076055526733, |
| "learning_rate": 0.001, |
| "loss": 1.6971, |
| "step": 140200 |
| }, |
| { |
| "epoch": 45.34583063994829, |
| "grad_norm": 1.5536643266677856, |
| "learning_rate": 0.001, |
| "loss": 1.7001, |
| "step": 140300 |
| }, |
| { |
| "epoch": 45.378151260504204, |
| "grad_norm": 1.589000940322876, |
| "learning_rate": 0.001, |
| "loss": 1.7172, |
| "step": 140400 |
| }, |
| { |
| "epoch": 45.41047188106012, |
| "grad_norm": 1.004062533378601, |
| "learning_rate": 0.001, |
| "loss": 1.7173, |
| "step": 140500 |
| }, |
| { |
| "epoch": 45.44279250161603, |
| "grad_norm": 1.5746830701828003, |
| "learning_rate": 0.001, |
| "loss": 1.7233, |
| "step": 140600 |
| }, |
| { |
| "epoch": 45.47511312217195, |
| "grad_norm": 1.1398859024047852, |
| "learning_rate": 0.001, |
| "loss": 1.7258, |
| "step": 140700 |
| }, |
| { |
| "epoch": 45.50743374272786, |
| "grad_norm": 1.3511697053909302, |
| "learning_rate": 0.001, |
| "loss": 1.7436, |
| "step": 140800 |
| }, |
| { |
| "epoch": 45.53975436328378, |
| "grad_norm": 1.4493550062179565, |
| "learning_rate": 0.001, |
| "loss": 1.7314, |
| "step": 140900 |
| }, |
| { |
| "epoch": 45.57207498383969, |
| "grad_norm": 1.2301589250564575, |
| "learning_rate": 0.001, |
| "loss": 1.7319, |
| "step": 141000 |
| }, |
| { |
| "epoch": 45.604395604395606, |
| "grad_norm": 1.3222787380218506, |
| "learning_rate": 0.001, |
| "loss": 1.742, |
| "step": 141100 |
| }, |
| { |
| "epoch": 45.63671622495152, |
| "grad_norm": 1.094870686531067, |
| "learning_rate": 0.001, |
| "loss": 1.7492, |
| "step": 141200 |
| }, |
| { |
| "epoch": 45.669036845507435, |
| "grad_norm": 1.4017021656036377, |
| "learning_rate": 0.001, |
| "loss": 1.7559, |
| "step": 141300 |
| }, |
| { |
| "epoch": 45.70135746606335, |
| "grad_norm": 1.5024820566177368, |
| "learning_rate": 0.001, |
| "loss": 1.7335, |
| "step": 141400 |
| }, |
| { |
| "epoch": 45.733678086619264, |
| "grad_norm": 1.5428948402404785, |
| "learning_rate": 0.001, |
| "loss": 1.7629, |
| "step": 141500 |
| }, |
| { |
| "epoch": 45.76599870717518, |
| "grad_norm": 1.2330937385559082, |
| "learning_rate": 0.001, |
| "loss": 1.771, |
| "step": 141600 |
| }, |
| { |
| "epoch": 45.79831932773109, |
| "grad_norm": 1.2731399536132812, |
| "learning_rate": 0.001, |
| "loss": 1.7611, |
| "step": 141700 |
| }, |
| { |
| "epoch": 45.83063994828701, |
| "grad_norm": 1.2634693384170532, |
| "learning_rate": 0.001, |
| "loss": 1.7557, |
| "step": 141800 |
| }, |
| { |
| "epoch": 45.86296056884292, |
| "grad_norm": 1.2590782642364502, |
| "learning_rate": 0.001, |
| "loss": 1.7611, |
| "step": 141900 |
| }, |
| { |
| "epoch": 45.89528118939884, |
| "grad_norm": 1.3432759046554565, |
| "learning_rate": 0.001, |
| "loss": 1.7746, |
| "step": 142000 |
| }, |
| { |
| "epoch": 45.92760180995475, |
| "grad_norm": 1.6455352306365967, |
| "learning_rate": 0.001, |
| "loss": 1.7871, |
| "step": 142100 |
| }, |
| { |
| "epoch": 45.959922430510666, |
| "grad_norm": 1.1180686950683594, |
| "learning_rate": 0.001, |
| "loss": 1.762, |
| "step": 142200 |
| }, |
| { |
| "epoch": 45.99224305106658, |
| "grad_norm": 1.372404932975769, |
| "learning_rate": 0.001, |
| "loss": 1.7849, |
| "step": 142300 |
| }, |
| { |
| "epoch": 46.024563671622495, |
| "grad_norm": 1.215472936630249, |
| "learning_rate": 0.001, |
| "loss": 1.6824, |
| "step": 142400 |
| }, |
| { |
| "epoch": 46.05688429217841, |
| "grad_norm": 1.3119868040084839, |
| "learning_rate": 0.001, |
| "loss": 1.6581, |
| "step": 142500 |
| }, |
| { |
| "epoch": 46.089204912734324, |
| "grad_norm": 1.2721792459487915, |
| "learning_rate": 0.001, |
| "loss": 1.6457, |
| "step": 142600 |
| }, |
| { |
| "epoch": 46.12152553329024, |
| "grad_norm": 1.3706724643707275, |
| "learning_rate": 0.001, |
| "loss": 1.651, |
| "step": 142700 |
| }, |
| { |
| "epoch": 46.15384615384615, |
| "grad_norm": 1.2131431102752686, |
| "learning_rate": 0.001, |
| "loss": 1.6697, |
| "step": 142800 |
| }, |
| { |
| "epoch": 46.18616677440207, |
| "grad_norm": 1.4040346145629883, |
| "learning_rate": 0.001, |
| "loss": 1.659, |
| "step": 142900 |
| }, |
| { |
| "epoch": 46.21848739495798, |
| "grad_norm": 1.5169869661331177, |
| "learning_rate": 0.001, |
| "loss": 1.6881, |
| "step": 143000 |
| }, |
| { |
| "epoch": 46.2508080155139, |
| "grad_norm": 1.3575830459594727, |
| "learning_rate": 0.001, |
| "loss": 1.684, |
| "step": 143100 |
| }, |
| { |
| "epoch": 46.28312863606981, |
| "grad_norm": 1.4115160703659058, |
| "learning_rate": 0.001, |
| "loss": 1.6892, |
| "step": 143200 |
| }, |
| { |
| "epoch": 46.315449256625726, |
| "grad_norm": 1.3648401498794556, |
| "learning_rate": 0.001, |
| "loss": 1.6976, |
| "step": 143300 |
| }, |
| { |
| "epoch": 46.34776987718164, |
| "grad_norm": 1.5159053802490234, |
| "learning_rate": 0.001, |
| "loss": 1.7084, |
| "step": 143400 |
| }, |
| { |
| "epoch": 46.380090497737555, |
| "grad_norm": 1.3242292404174805, |
| "learning_rate": 0.001, |
| "loss": 1.6906, |
| "step": 143500 |
| }, |
| { |
| "epoch": 46.41241111829347, |
| "grad_norm": 1.2734252214431763, |
| "learning_rate": 0.001, |
| "loss": 1.6991, |
| "step": 143600 |
| }, |
| { |
| "epoch": 46.444731738849384, |
| "grad_norm": 1.3581430912017822, |
| "learning_rate": 0.001, |
| "loss": 1.7096, |
| "step": 143700 |
| }, |
| { |
| "epoch": 46.4770523594053, |
| "grad_norm": 1.5372655391693115, |
| "learning_rate": 0.001, |
| "loss": 1.7244, |
| "step": 143800 |
| }, |
| { |
| "epoch": 46.50937297996121, |
| "grad_norm": 1.160895586013794, |
| "learning_rate": 0.001, |
| "loss": 1.7147, |
| "step": 143900 |
| }, |
| { |
| "epoch": 46.54169360051713, |
| "grad_norm": 1.1761196851730347, |
| "learning_rate": 0.001, |
| "loss": 1.7101, |
| "step": 144000 |
| }, |
| { |
| "epoch": 46.57401422107304, |
| "grad_norm": 1.3373456001281738, |
| "learning_rate": 0.001, |
| "loss": 1.7102, |
| "step": 144100 |
| }, |
| { |
| "epoch": 46.60633484162896, |
| "grad_norm": 1.1968778371810913, |
| "learning_rate": 0.001, |
| "loss": 1.7149, |
| "step": 144200 |
| }, |
| { |
| "epoch": 46.63865546218487, |
| "grad_norm": 1.6086446046829224, |
| "learning_rate": 0.001, |
| "loss": 1.7136, |
| "step": 144300 |
| }, |
| { |
| "epoch": 46.670976082740786, |
| "grad_norm": 1.4648292064666748, |
| "learning_rate": 0.001, |
| "loss": 1.7364, |
| "step": 144400 |
| }, |
| { |
| "epoch": 46.7032967032967, |
| "grad_norm": 1.2216010093688965, |
| "learning_rate": 0.001, |
| "loss": 1.7496, |
| "step": 144500 |
| }, |
| { |
| "epoch": 46.735617323852615, |
| "grad_norm": 1.333053708076477, |
| "learning_rate": 0.001, |
| "loss": 1.7463, |
| "step": 144600 |
| }, |
| { |
| "epoch": 46.76793794440853, |
| "grad_norm": 1.3574227094650269, |
| "learning_rate": 0.001, |
| "loss": 1.7516, |
| "step": 144700 |
| }, |
| { |
| "epoch": 46.800258564964444, |
| "grad_norm": 1.2774755954742432, |
| "learning_rate": 0.001, |
| "loss": 1.748, |
| "step": 144800 |
| }, |
| { |
| "epoch": 46.83257918552036, |
| "grad_norm": 1.1617026329040527, |
| "learning_rate": 0.001, |
| "loss": 1.7472, |
| "step": 144900 |
| }, |
| { |
| "epoch": 46.864899806076274, |
| "grad_norm": 1.248290777206421, |
| "learning_rate": 0.001, |
| "loss": 1.7516, |
| "step": 145000 |
| }, |
| { |
| "epoch": 46.89722042663219, |
| "grad_norm": 0.983884871006012, |
| "learning_rate": 0.001, |
| "loss": 1.7464, |
| "step": 145100 |
| }, |
| { |
| "epoch": 46.9295410471881, |
| "grad_norm": 1.3453645706176758, |
| "learning_rate": 0.001, |
| "loss": 1.7557, |
| "step": 145200 |
| }, |
| { |
| "epoch": 46.96186166774402, |
| "grad_norm": 1.257980465888977, |
| "learning_rate": 0.001, |
| "loss": 1.7635, |
| "step": 145300 |
| }, |
| { |
| "epoch": 46.99418228829994, |
| "grad_norm": 1.096062421798706, |
| "learning_rate": 0.001, |
| "loss": 1.7539, |
| "step": 145400 |
| }, |
| { |
| "epoch": 47.02650290885585, |
| "grad_norm": 1.441990613937378, |
| "learning_rate": 0.001, |
| "loss": 1.656, |
| "step": 145500 |
| }, |
| { |
| "epoch": 47.05882352941177, |
| "grad_norm": 1.2403453588485718, |
| "learning_rate": 0.001, |
| "loss": 1.6452, |
| "step": 145600 |
| }, |
| { |
| "epoch": 47.09114414996768, |
| "grad_norm": 1.223830223083496, |
| "learning_rate": 0.001, |
| "loss": 1.6356, |
| "step": 145700 |
| }, |
| { |
| "epoch": 47.1234647705236, |
| "grad_norm": 1.210126519203186, |
| "learning_rate": 0.001, |
| "loss": 1.645, |
| "step": 145800 |
| }, |
| { |
| "epoch": 47.15578539107951, |
| "grad_norm": 1.6140942573547363, |
| "learning_rate": 0.001, |
| "loss": 1.651, |
| "step": 145900 |
| }, |
| { |
| "epoch": 47.188106011635426, |
| "grad_norm": 1.7467138767242432, |
| "learning_rate": 0.001, |
| "loss": 1.6784, |
| "step": 146000 |
| }, |
| { |
| "epoch": 47.22042663219134, |
| "grad_norm": 1.1775304079055786, |
| "learning_rate": 0.001, |
| "loss": 1.6649, |
| "step": 146100 |
| }, |
| { |
| "epoch": 47.252747252747255, |
| "grad_norm": 1.9474658966064453, |
| "learning_rate": 0.001, |
| "loss": 1.6672, |
| "step": 146200 |
| }, |
| { |
| "epoch": 47.28506787330317, |
| "grad_norm": 1.4976410865783691, |
| "learning_rate": 0.001, |
| "loss": 1.6637, |
| "step": 146300 |
| }, |
| { |
| "epoch": 47.317388493859085, |
| "grad_norm": 1.3201075792312622, |
| "learning_rate": 0.001, |
| "loss": 1.676, |
| "step": 146400 |
| }, |
| { |
| "epoch": 47.349709114415, |
| "grad_norm": 1.6384514570236206, |
| "learning_rate": 0.001, |
| "loss": 1.6676, |
| "step": 146500 |
| }, |
| { |
| "epoch": 47.382029734970914, |
| "grad_norm": 1.6562048196792603, |
| "learning_rate": 0.001, |
| "loss": 1.6743, |
| "step": 146600 |
| }, |
| { |
| "epoch": 47.41435035552683, |
| "grad_norm": 1.5702561140060425, |
| "learning_rate": 0.001, |
| "loss": 1.6871, |
| "step": 146700 |
| }, |
| { |
| "epoch": 47.44667097608274, |
| "grad_norm": 1.4084358215332031, |
| "learning_rate": 0.001, |
| "loss": 1.6916, |
| "step": 146800 |
| }, |
| { |
| "epoch": 47.47899159663866, |
| "grad_norm": 1.3545585870742798, |
| "learning_rate": 0.001, |
| "loss": 1.7055, |
| "step": 146900 |
| }, |
| { |
| "epoch": 47.51131221719457, |
| "grad_norm": 1.1715108156204224, |
| "learning_rate": 0.001, |
| "loss": 1.6969, |
| "step": 147000 |
| }, |
| { |
| "epoch": 47.543632837750486, |
| "grad_norm": 1.081047534942627, |
| "learning_rate": 0.001, |
| "loss": 1.6795, |
| "step": 147100 |
| }, |
| { |
| "epoch": 47.5759534583064, |
| "grad_norm": 1.5891271829605103, |
| "learning_rate": 0.001, |
| "loss": 1.7091, |
| "step": 147200 |
| }, |
| { |
| "epoch": 47.608274078862316, |
| "grad_norm": 1.2145577669143677, |
| "learning_rate": 0.001, |
| "loss": 1.7007, |
| "step": 147300 |
| }, |
| { |
| "epoch": 47.64059469941823, |
| "grad_norm": 1.4922897815704346, |
| "learning_rate": 0.001, |
| "loss": 1.7244, |
| "step": 147400 |
| }, |
| { |
| "epoch": 47.672915319974145, |
| "grad_norm": 1.6337180137634277, |
| "learning_rate": 0.001, |
| "loss": 1.7369, |
| "step": 147500 |
| }, |
| { |
| "epoch": 47.70523594053006, |
| "grad_norm": 1.620103359222412, |
| "learning_rate": 0.001, |
| "loss": 1.7342, |
| "step": 147600 |
| }, |
| { |
| "epoch": 47.737556561085974, |
| "grad_norm": 1.6328498125076294, |
| "learning_rate": 0.001, |
| "loss": 1.7209, |
| "step": 147700 |
| }, |
| { |
| "epoch": 47.76987718164189, |
| "grad_norm": 1.3009995222091675, |
| "learning_rate": 0.001, |
| "loss": 1.7132, |
| "step": 147800 |
| }, |
| { |
| "epoch": 47.8021978021978, |
| "grad_norm": 1.401782512664795, |
| "learning_rate": 0.001, |
| "loss": 1.7481, |
| "step": 147900 |
| }, |
| { |
| "epoch": 47.83451842275372, |
| "grad_norm": 1.2889764308929443, |
| "learning_rate": 0.001, |
| "loss": 1.7267, |
| "step": 148000 |
| }, |
| { |
| "epoch": 47.86683904330963, |
| "grad_norm": 1.810412049293518, |
| "learning_rate": 0.001, |
| "loss": 1.7317, |
| "step": 148100 |
| }, |
| { |
| "epoch": 47.89915966386555, |
| "grad_norm": 1.6493173837661743, |
| "learning_rate": 0.001, |
| "loss": 1.7424, |
| "step": 148200 |
| }, |
| { |
| "epoch": 47.93148028442146, |
| "grad_norm": 1.5466930866241455, |
| "learning_rate": 0.001, |
| "loss": 1.7395, |
| "step": 148300 |
| }, |
| { |
| "epoch": 47.963800904977376, |
| "grad_norm": 1.4534482955932617, |
| "learning_rate": 0.001, |
| "loss": 1.7381, |
| "step": 148400 |
| }, |
| { |
| "epoch": 47.99612152553329, |
| "grad_norm": 1.7018836736679077, |
| "learning_rate": 0.001, |
| "loss": 1.744, |
| "step": 148500 |
| }, |
| { |
| "epoch": 48.028442146089205, |
| "grad_norm": 1.7868794202804565, |
| "learning_rate": 0.001, |
| "loss": 1.639, |
| "step": 148600 |
| }, |
| { |
| "epoch": 48.06076276664512, |
| "grad_norm": 1.4775114059448242, |
| "learning_rate": 0.001, |
| "loss": 1.6331, |
| "step": 148700 |
| }, |
| { |
| "epoch": 48.093083387201034, |
| "grad_norm": 1.349012017250061, |
| "learning_rate": 0.001, |
| "loss": 1.6202, |
| "step": 148800 |
| }, |
| { |
| "epoch": 48.12540400775695, |
| "grad_norm": 1.6332101821899414, |
| "learning_rate": 0.001, |
| "loss": 1.6312, |
| "step": 148900 |
| }, |
| { |
| "epoch": 48.15772462831286, |
| "grad_norm": 1.624573826789856, |
| "learning_rate": 0.001, |
| "loss": 1.653, |
| "step": 149000 |
| }, |
| { |
| "epoch": 48.19004524886878, |
| "grad_norm": 1.2802793979644775, |
| "learning_rate": 0.001, |
| "loss": 1.6357, |
| "step": 149100 |
| }, |
| { |
| "epoch": 48.22236586942469, |
| "grad_norm": 1.2156689167022705, |
| "learning_rate": 0.001, |
| "loss": 1.656, |
| "step": 149200 |
| }, |
| { |
| "epoch": 48.25468648998061, |
| "grad_norm": 1.5607578754425049, |
| "learning_rate": 0.001, |
| "loss": 1.6587, |
| "step": 149300 |
| }, |
| { |
| "epoch": 48.28700711053652, |
| "grad_norm": 1.479413390159607, |
| "learning_rate": 0.001, |
| "loss": 1.6439, |
| "step": 149400 |
| }, |
| { |
| "epoch": 48.319327731092436, |
| "grad_norm": 1.3263925313949585, |
| "learning_rate": 0.001, |
| "loss": 1.66, |
| "step": 149500 |
| }, |
| { |
| "epoch": 48.35164835164835, |
| "grad_norm": 1.4417411088943481, |
| "learning_rate": 0.001, |
| "loss": 1.6677, |
| "step": 149600 |
| }, |
| { |
| "epoch": 48.383968972204265, |
| "grad_norm": 1.585517168045044, |
| "learning_rate": 0.001, |
| "loss": 1.6621, |
| "step": 149700 |
| }, |
| { |
| "epoch": 48.41628959276018, |
| "grad_norm": 1.6125644445419312, |
| "learning_rate": 0.001, |
| "loss": 1.675, |
| "step": 149800 |
| }, |
| { |
| "epoch": 48.448610213316094, |
| "grad_norm": 1.4252173900604248, |
| "learning_rate": 0.001, |
| "loss": 1.6818, |
| "step": 149900 |
| }, |
| { |
| "epoch": 48.48093083387201, |
| "grad_norm": 1.4481408596038818, |
| "learning_rate": 0.001, |
| "loss": 1.6734, |
| "step": 150000 |
| }, |
| { |
| "epoch": 48.51325145442792, |
| "grad_norm": 1.7809571027755737, |
| "learning_rate": 0.001, |
| "loss": 1.6738, |
| "step": 150100 |
| }, |
| { |
| "epoch": 48.54557207498384, |
| "grad_norm": 1.8779230117797852, |
| "learning_rate": 0.001, |
| "loss": 1.6895, |
| "step": 150200 |
| }, |
| { |
| "epoch": 48.57789269553975, |
| "grad_norm": 1.2452175617218018, |
| "learning_rate": 0.001, |
| "loss": 1.6788, |
| "step": 150300 |
| }, |
| { |
| "epoch": 48.61021331609567, |
| "grad_norm": 1.8720242977142334, |
| "learning_rate": 0.001, |
| "loss": 1.6983, |
| "step": 150400 |
| }, |
| { |
| "epoch": 48.64253393665158, |
| "grad_norm": 1.5265681743621826, |
| "learning_rate": 0.001, |
| "loss": 1.7017, |
| "step": 150500 |
| }, |
| { |
| "epoch": 48.674854557207496, |
| "grad_norm": 1.6183137893676758, |
| "learning_rate": 0.001, |
| "loss": 1.7047, |
| "step": 150600 |
| }, |
| { |
| "epoch": 48.70717517776341, |
| "grad_norm": 1.3076919317245483, |
| "learning_rate": 0.001, |
| "loss": 1.7059, |
| "step": 150700 |
| }, |
| { |
| "epoch": 48.739495798319325, |
| "grad_norm": 1.2748409509658813, |
| "learning_rate": 0.001, |
| "loss": 1.7215, |
| "step": 150800 |
| }, |
| { |
| "epoch": 48.77181641887524, |
| "grad_norm": 1.4130632877349854, |
| "learning_rate": 0.001, |
| "loss": 1.7318, |
| "step": 150900 |
| }, |
| { |
| "epoch": 48.804137039431154, |
| "grad_norm": 1.500614047050476, |
| "learning_rate": 0.001, |
| "loss": 1.7096, |
| "step": 151000 |
| }, |
| { |
| "epoch": 48.83645765998707, |
| "grad_norm": 1.2465304136276245, |
| "learning_rate": 0.001, |
| "loss": 1.7191, |
| "step": 151100 |
| }, |
| { |
| "epoch": 48.86877828054298, |
| "grad_norm": 1.5916765928268433, |
| "learning_rate": 0.001, |
| "loss": 1.7195, |
| "step": 151200 |
| }, |
| { |
| "epoch": 48.9010989010989, |
| "grad_norm": 1.354617714881897, |
| "learning_rate": 0.001, |
| "loss": 1.7126, |
| "step": 151300 |
| }, |
| { |
| "epoch": 48.93341952165481, |
| "grad_norm": 1.574227213859558, |
| "learning_rate": 0.001, |
| "loss": 1.7338, |
| "step": 151400 |
| }, |
| { |
| "epoch": 48.96574014221073, |
| "grad_norm": 1.5032780170440674, |
| "learning_rate": 0.001, |
| "loss": 1.7438, |
| "step": 151500 |
| }, |
| { |
| "epoch": 48.99806076276664, |
| "grad_norm": 1.5653413534164429, |
| "learning_rate": 0.001, |
| "loss": 1.7113, |
| "step": 151600 |
| }, |
| { |
| "epoch": 49.03038138332256, |
| "grad_norm": 1.396582007408142, |
| "learning_rate": 0.001, |
| "loss": 1.6055, |
| "step": 151700 |
| }, |
| { |
| "epoch": 49.06270200387848, |
| "grad_norm": 1.868228793144226, |
| "learning_rate": 0.001, |
| "loss": 1.6014, |
| "step": 151800 |
| }, |
| { |
| "epoch": 49.09502262443439, |
| "grad_norm": 1.523720145225525, |
| "learning_rate": 0.001, |
| "loss": 1.6105, |
| "step": 151900 |
| }, |
| { |
| "epoch": 49.12734324499031, |
| "grad_norm": 1.501035213470459, |
| "learning_rate": 0.001, |
| "loss": 1.6247, |
| "step": 152000 |
| }, |
| { |
| "epoch": 49.15966386554622, |
| "grad_norm": 1.7720264196395874, |
| "learning_rate": 0.001, |
| "loss": 1.6375, |
| "step": 152100 |
| }, |
| { |
| "epoch": 49.191984486102136, |
| "grad_norm": 1.2451496124267578, |
| "learning_rate": 0.001, |
| "loss": 1.6381, |
| "step": 152200 |
| }, |
| { |
| "epoch": 49.22430510665805, |
| "grad_norm": 1.6157736778259277, |
| "learning_rate": 0.001, |
| "loss": 1.643, |
| "step": 152300 |
| }, |
| { |
| "epoch": 49.256625727213965, |
| "grad_norm": 1.3556593656539917, |
| "learning_rate": 0.001, |
| "loss": 1.6422, |
| "step": 152400 |
| }, |
| { |
| "epoch": 49.28894634776988, |
| "grad_norm": 1.572458028793335, |
| "learning_rate": 0.001, |
| "loss": 1.6565, |
| "step": 152500 |
| }, |
| { |
| "epoch": 49.321266968325794, |
| "grad_norm": 1.5787707567214966, |
| "learning_rate": 0.001, |
| "loss": 1.651, |
| "step": 152600 |
| }, |
| { |
| "epoch": 49.35358758888171, |
| "grad_norm": 1.7005757093429565, |
| "learning_rate": 0.001, |
| "loss": 1.6402, |
| "step": 152700 |
| }, |
| { |
| "epoch": 49.38590820943762, |
| "grad_norm": 1.835395097732544, |
| "learning_rate": 0.001, |
| "loss": 1.6582, |
| "step": 152800 |
| }, |
| { |
| "epoch": 49.41822882999354, |
| "grad_norm": 1.7922719717025757, |
| "learning_rate": 0.001, |
| "loss": 1.6683, |
| "step": 152900 |
| }, |
| { |
| "epoch": 49.45054945054945, |
| "grad_norm": 1.5180412530899048, |
| "learning_rate": 0.001, |
| "loss": 1.6652, |
| "step": 153000 |
| }, |
| { |
| "epoch": 49.48287007110537, |
| "grad_norm": 1.7080254554748535, |
| "learning_rate": 0.001, |
| "loss": 1.6608, |
| "step": 153100 |
| }, |
| { |
| "epoch": 49.51519069166128, |
| "grad_norm": 1.5227432250976562, |
| "learning_rate": 0.001, |
| "loss": 1.6777, |
| "step": 153200 |
| }, |
| { |
| "epoch": 49.547511312217196, |
| "grad_norm": 2.0523929595947266, |
| "learning_rate": 0.001, |
| "loss": 1.6808, |
| "step": 153300 |
| }, |
| { |
| "epoch": 49.57983193277311, |
| "grad_norm": 1.4774620532989502, |
| "learning_rate": 0.001, |
| "loss": 1.6682, |
| "step": 153400 |
| }, |
| { |
| "epoch": 49.612152553329025, |
| "grad_norm": 1.576160192489624, |
| "learning_rate": 0.001, |
| "loss": 1.6844, |
| "step": 153500 |
| }, |
| { |
| "epoch": 49.64447317388494, |
| "grad_norm": 1.4338222742080688, |
| "learning_rate": 0.001, |
| "loss": 1.6934, |
| "step": 153600 |
| }, |
| { |
| "epoch": 49.676793794440854, |
| "grad_norm": 1.6853586435317993, |
| "learning_rate": 0.001, |
| "loss": 1.6855, |
| "step": 153700 |
| }, |
| { |
| "epoch": 49.70911441499677, |
| "grad_norm": 1.502305030822754, |
| "learning_rate": 0.001, |
| "loss": 1.6999, |
| "step": 153800 |
| }, |
| { |
| "epoch": 49.74143503555268, |
| "grad_norm": 1.3176287412643433, |
| "learning_rate": 0.001, |
| "loss": 1.6802, |
| "step": 153900 |
| }, |
| { |
| "epoch": 49.7737556561086, |
| "grad_norm": 1.6562001705169678, |
| "learning_rate": 0.001, |
| "loss": 1.6989, |
| "step": 154000 |
| }, |
| { |
| "epoch": 49.80607627666451, |
| "grad_norm": 1.3866074085235596, |
| "learning_rate": 0.001, |
| "loss": 1.7105, |
| "step": 154100 |
| }, |
| { |
| "epoch": 49.83839689722043, |
| "grad_norm": 1.3695135116577148, |
| "learning_rate": 0.001, |
| "loss": 1.685, |
| "step": 154200 |
| }, |
| { |
| "epoch": 49.87071751777634, |
| "grad_norm": 1.4399651288986206, |
| "learning_rate": 0.001, |
| "loss": 1.706, |
| "step": 154300 |
| }, |
| { |
| "epoch": 49.903038138332256, |
| "grad_norm": 1.3470892906188965, |
| "learning_rate": 0.001, |
| "loss": 1.7119, |
| "step": 154400 |
| }, |
| { |
| "epoch": 49.93535875888817, |
| "grad_norm": 2.029789924621582, |
| "learning_rate": 0.001, |
| "loss": 1.7271, |
| "step": 154500 |
| }, |
| { |
| "epoch": 49.967679379444085, |
| "grad_norm": 1.4181112051010132, |
| "learning_rate": 0.001, |
| "loss": 1.7194, |
| "step": 154600 |
| }, |
| { |
| "epoch": 50.0, |
| "grad_norm": 3.0098884105682373, |
| "learning_rate": 0.001, |
| "loss": 1.6978, |
| "step": 154700 |
| }, |
| { |
| "epoch": 50.032320620555915, |
| "grad_norm": 1.7859961986541748, |
| "learning_rate": 0.001, |
| "loss": 1.5883, |
| "step": 154800 |
| }, |
| { |
| "epoch": 50.06464124111183, |
| "grad_norm": 1.9198648929595947, |
| "learning_rate": 0.001, |
| "loss": 1.6018, |
| "step": 154900 |
| }, |
| { |
| "epoch": 50.096961861667744, |
| "grad_norm": 1.7625179290771484, |
| "learning_rate": 0.001, |
| "loss": 1.6018, |
| "step": 155000 |
| }, |
| { |
| "epoch": 50.12928248222366, |
| "grad_norm": 1.904485821723938, |
| "learning_rate": 0.001, |
| "loss": 1.61, |
| "step": 155100 |
| }, |
| { |
| "epoch": 50.16160310277957, |
| "grad_norm": 2.516070604324341, |
| "learning_rate": 0.001, |
| "loss": 1.5987, |
| "step": 155200 |
| }, |
| { |
| "epoch": 50.19392372333549, |
| "grad_norm": 2.0027501583099365, |
| "learning_rate": 0.001, |
| "loss": 1.6137, |
| "step": 155300 |
| }, |
| { |
| "epoch": 50.2262443438914, |
| "grad_norm": 1.994936227798462, |
| "learning_rate": 0.001, |
| "loss": 1.6372, |
| "step": 155400 |
| }, |
| { |
| "epoch": 50.25856496444732, |
| "grad_norm": 2.642782688140869, |
| "learning_rate": 0.001, |
| "loss": 1.6412, |
| "step": 155500 |
| }, |
| { |
| "epoch": 50.29088558500323, |
| "grad_norm": 1.691218614578247, |
| "learning_rate": 0.001, |
| "loss": 1.6243, |
| "step": 155600 |
| }, |
| { |
| "epoch": 50.323206205559146, |
| "grad_norm": 1.6818300485610962, |
| "learning_rate": 0.001, |
| "loss": 1.6463, |
| "step": 155700 |
| }, |
| { |
| "epoch": 50.35552682611506, |
| "grad_norm": 2.295081377029419, |
| "learning_rate": 0.001, |
| "loss": 1.6292, |
| "step": 155800 |
| }, |
| { |
| "epoch": 50.387847446670975, |
| "grad_norm": 2.1138439178466797, |
| "learning_rate": 0.001, |
| "loss": 1.6476, |
| "step": 155900 |
| }, |
| { |
| "epoch": 50.42016806722689, |
| "grad_norm": 2.1354689598083496, |
| "learning_rate": 0.001, |
| "loss": 1.6484, |
| "step": 156000 |
| }, |
| { |
| "epoch": 50.452488687782804, |
| "grad_norm": 1.5899136066436768, |
| "learning_rate": 0.001, |
| "loss": 1.6417, |
| "step": 156100 |
| }, |
| { |
| "epoch": 50.48480930833872, |
| "grad_norm": 1.873875379562378, |
| "learning_rate": 0.001, |
| "loss": 1.6577, |
| "step": 156200 |
| }, |
| { |
| "epoch": 50.51712992889463, |
| "grad_norm": 1.5048267841339111, |
| "learning_rate": 0.001, |
| "loss": 1.657, |
| "step": 156300 |
| }, |
| { |
| "epoch": 50.54945054945055, |
| "grad_norm": 2.189378499984741, |
| "learning_rate": 0.001, |
| "loss": 1.663, |
| "step": 156400 |
| }, |
| { |
| "epoch": 50.58177117000646, |
| "grad_norm": 2.1354687213897705, |
| "learning_rate": 0.001, |
| "loss": 1.6667, |
| "step": 156500 |
| }, |
| { |
| "epoch": 50.61409179056238, |
| "grad_norm": 1.819714069366455, |
| "learning_rate": 0.001, |
| "loss": 1.6824, |
| "step": 156600 |
| }, |
| { |
| "epoch": 50.64641241111829, |
| "grad_norm": 2.5435047149658203, |
| "learning_rate": 0.001, |
| "loss": 1.6583, |
| "step": 156700 |
| }, |
| { |
| "epoch": 50.678733031674206, |
| "grad_norm": 1.9671624898910522, |
| "learning_rate": 0.001, |
| "loss": 1.6705, |
| "step": 156800 |
| }, |
| { |
| "epoch": 50.71105365223012, |
| "grad_norm": 1.9127241373062134, |
| "learning_rate": 0.001, |
| "loss": 1.6981, |
| "step": 156900 |
| }, |
| { |
| "epoch": 50.743374272786035, |
| "grad_norm": 1.9809879064559937, |
| "learning_rate": 0.001, |
| "loss": 1.6902, |
| "step": 157000 |
| }, |
| { |
| "epoch": 50.77569489334195, |
| "grad_norm": 2.044308662414551, |
| "learning_rate": 0.001, |
| "loss": 1.7047, |
| "step": 157100 |
| }, |
| { |
| "epoch": 50.808015513897864, |
| "grad_norm": 1.9656683206558228, |
| "learning_rate": 0.001, |
| "loss": 1.6941, |
| "step": 157200 |
| }, |
| { |
| "epoch": 50.84033613445378, |
| "grad_norm": 1.8355093002319336, |
| "learning_rate": 0.001, |
| "loss": 1.7009, |
| "step": 157300 |
| }, |
| { |
| "epoch": 50.87265675500969, |
| "grad_norm": 2.390496015548706, |
| "learning_rate": 0.001, |
| "loss": 1.7003, |
| "step": 157400 |
| }, |
| { |
| "epoch": 50.90497737556561, |
| "grad_norm": 2.4113669395446777, |
| "learning_rate": 0.001, |
| "loss": 1.6929, |
| "step": 157500 |
| }, |
| { |
| "epoch": 50.93729799612152, |
| "grad_norm": 2.240635633468628, |
| "learning_rate": 0.001, |
| "loss": 1.6955, |
| "step": 157600 |
| }, |
| { |
| "epoch": 50.96961861667744, |
| "grad_norm": 2.356112480163574, |
| "learning_rate": 0.001, |
| "loss": 1.7039, |
| "step": 157700 |
| }, |
| { |
| "epoch": 51.00193923723336, |
| "grad_norm": 1.3113300800323486, |
| "learning_rate": 0.001, |
| "loss": 1.7148, |
| "step": 157800 |
| }, |
| { |
| "epoch": 51.03425985778927, |
| "grad_norm": 1.3943908214569092, |
| "learning_rate": 0.001, |
| "loss": 1.5684, |
| "step": 157900 |
| }, |
| { |
| "epoch": 51.06658047834519, |
| "grad_norm": 1.5345698595046997, |
| "learning_rate": 0.001, |
| "loss": 1.5841, |
| "step": 158000 |
| }, |
| { |
| "epoch": 51.0989010989011, |
| "grad_norm": 1.376284122467041, |
| "learning_rate": 0.001, |
| "loss": 1.5904, |
| "step": 158100 |
| }, |
| { |
| "epoch": 51.13122171945702, |
| "grad_norm": 1.8864432573318481, |
| "learning_rate": 0.001, |
| "loss": 1.5832, |
| "step": 158200 |
| }, |
| { |
| "epoch": 51.16354234001293, |
| "grad_norm": 1.077499508857727, |
| "learning_rate": 0.001, |
| "loss": 1.6045, |
| "step": 158300 |
| }, |
| { |
| "epoch": 51.195862960568846, |
| "grad_norm": 1.413830041885376, |
| "learning_rate": 0.001, |
| "loss": 1.622, |
| "step": 158400 |
| }, |
| { |
| "epoch": 51.22818358112476, |
| "grad_norm": 1.317996859550476, |
| "learning_rate": 0.001, |
| "loss": 1.6151, |
| "step": 158500 |
| }, |
| { |
| "epoch": 51.260504201680675, |
| "grad_norm": 1.800900936126709, |
| "learning_rate": 0.001, |
| "loss": 1.6143, |
| "step": 158600 |
| }, |
| { |
| "epoch": 51.29282482223659, |
| "grad_norm": 1.4891034364700317, |
| "learning_rate": 0.001, |
| "loss": 1.6113, |
| "step": 158700 |
| }, |
| { |
| "epoch": 51.325145442792504, |
| "grad_norm": 1.776503324508667, |
| "learning_rate": 0.001, |
| "loss": 1.6184, |
| "step": 158800 |
| }, |
| { |
| "epoch": 51.35746606334842, |
| "grad_norm": 1.4487277269363403, |
| "learning_rate": 0.001, |
| "loss": 1.6159, |
| "step": 158900 |
| }, |
| { |
| "epoch": 51.38978668390433, |
| "grad_norm": 1.4836653470993042, |
| "learning_rate": 0.001, |
| "loss": 1.6281, |
| "step": 159000 |
| }, |
| { |
| "epoch": 51.42210730446025, |
| "grad_norm": 1.5971252918243408, |
| "learning_rate": 0.001, |
| "loss": 1.6469, |
| "step": 159100 |
| }, |
| { |
| "epoch": 51.45442792501616, |
| "grad_norm": 1.2963101863861084, |
| "learning_rate": 0.001, |
| "loss": 1.6385, |
| "step": 159200 |
| }, |
| { |
| "epoch": 51.48674854557208, |
| "grad_norm": 1.1028395891189575, |
| "learning_rate": 0.001, |
| "loss": 1.6423, |
| "step": 159300 |
| }, |
| { |
| "epoch": 51.51906916612799, |
| "grad_norm": 2.0811383724212646, |
| "learning_rate": 0.001, |
| "loss": 1.6412, |
| "step": 159400 |
| }, |
| { |
| "epoch": 51.551389786683906, |
| "grad_norm": 1.5059459209442139, |
| "learning_rate": 0.001, |
| "loss": 1.6582, |
| "step": 159500 |
| }, |
| { |
| "epoch": 51.58371040723982, |
| "grad_norm": 1.9516657590866089, |
| "learning_rate": 0.001, |
| "loss": 1.6565, |
| "step": 159600 |
| }, |
| { |
| "epoch": 51.616031027795735, |
| "grad_norm": 1.6311758756637573, |
| "learning_rate": 0.001, |
| "loss": 1.6644, |
| "step": 159700 |
| }, |
| { |
| "epoch": 51.64835164835165, |
| "grad_norm": 1.7732529640197754, |
| "learning_rate": 0.001, |
| "loss": 1.6546, |
| "step": 159800 |
| }, |
| { |
| "epoch": 51.680672268907564, |
| "grad_norm": 1.8546462059020996, |
| "learning_rate": 0.001, |
| "loss": 1.6655, |
| "step": 159900 |
| }, |
| { |
| "epoch": 51.71299288946348, |
| "grad_norm": 1.6930227279663086, |
| "learning_rate": 0.001, |
| "loss": 1.6688, |
| "step": 160000 |
| }, |
| { |
| "epoch": 51.74531351001939, |
| "grad_norm": 1.3648531436920166, |
| "learning_rate": 0.001, |
| "loss": 1.6633, |
| "step": 160100 |
| }, |
| { |
| "epoch": 51.77763413057531, |
| "grad_norm": 1.6870195865631104, |
| "learning_rate": 0.001, |
| "loss": 1.6871, |
| "step": 160200 |
| }, |
| { |
| "epoch": 51.80995475113122, |
| "grad_norm": 1.2742482423782349, |
| "learning_rate": 0.001, |
| "loss": 1.6787, |
| "step": 160300 |
| }, |
| { |
| "epoch": 51.84227537168714, |
| "grad_norm": 1.6389504671096802, |
| "learning_rate": 0.001, |
| "loss": 1.6658, |
| "step": 160400 |
| }, |
| { |
| "epoch": 51.87459599224305, |
| "grad_norm": 1.4357678890228271, |
| "learning_rate": 0.001, |
| "loss": 1.6879, |
| "step": 160500 |
| }, |
| { |
| "epoch": 51.906916612798966, |
| "grad_norm": 1.522757887840271, |
| "learning_rate": 0.001, |
| "loss": 1.6811, |
| "step": 160600 |
| }, |
| { |
| "epoch": 51.93923723335488, |
| "grad_norm": 1.5563929080963135, |
| "learning_rate": 0.001, |
| "loss": 1.6796, |
| "step": 160700 |
| }, |
| { |
| "epoch": 51.971557853910795, |
| "grad_norm": 1.4780327081680298, |
| "learning_rate": 0.001, |
| "loss": 1.7072, |
| "step": 160800 |
| }, |
| { |
| "epoch": 52.00387847446671, |
| "grad_norm": 1.1867820024490356, |
| "learning_rate": 0.001, |
| "loss": 1.7241, |
| "step": 160900 |
| }, |
| { |
| "epoch": 52.036199095022624, |
| "grad_norm": 1.198205828666687, |
| "learning_rate": 0.001, |
| "loss": 1.5527, |
| "step": 161000 |
| }, |
| { |
| "epoch": 52.06851971557854, |
| "grad_norm": 1.3584864139556885, |
| "learning_rate": 0.001, |
| "loss": 1.5493, |
| "step": 161100 |
| }, |
| { |
| "epoch": 52.10084033613445, |
| "grad_norm": 2.0809905529022217, |
| "learning_rate": 0.001, |
| "loss": 1.5896, |
| "step": 161200 |
| }, |
| { |
| "epoch": 52.13316095669037, |
| "grad_norm": 1.5358004570007324, |
| "learning_rate": 0.001, |
| "loss": 1.5918, |
| "step": 161300 |
| }, |
| { |
| "epoch": 52.16548157724628, |
| "grad_norm": 1.1696178913116455, |
| "learning_rate": 0.001, |
| "loss": 1.5772, |
| "step": 161400 |
| }, |
| { |
| "epoch": 52.1978021978022, |
| "grad_norm": 1.2811452150344849, |
| "learning_rate": 0.001, |
| "loss": 1.5905, |
| "step": 161500 |
| }, |
| { |
| "epoch": 52.23012281835811, |
| "grad_norm": 1.4328819513320923, |
| "learning_rate": 0.001, |
| "loss": 1.6077, |
| "step": 161600 |
| }, |
| { |
| "epoch": 52.262443438914026, |
| "grad_norm": 1.3378756046295166, |
| "learning_rate": 0.001, |
| "loss": 1.616, |
| "step": 161700 |
| }, |
| { |
| "epoch": 52.29476405946994, |
| "grad_norm": 1.2815625667572021, |
| "learning_rate": 0.001, |
| "loss": 1.6112, |
| "step": 161800 |
| }, |
| { |
| "epoch": 52.327084680025855, |
| "grad_norm": 1.7682138681411743, |
| "learning_rate": 0.001, |
| "loss": 1.6231, |
| "step": 161900 |
| }, |
| { |
| "epoch": 52.35940530058177, |
| "grad_norm": 1.4320544004440308, |
| "learning_rate": 0.001, |
| "loss": 1.6133, |
| "step": 162000 |
| }, |
| { |
| "epoch": 52.391725921137684, |
| "grad_norm": 1.156111717224121, |
| "learning_rate": 0.001, |
| "loss": 1.6199, |
| "step": 162100 |
| }, |
| { |
| "epoch": 52.4240465416936, |
| "grad_norm": 1.2384657859802246, |
| "learning_rate": 0.001, |
| "loss": 1.6203, |
| "step": 162200 |
| }, |
| { |
| "epoch": 52.456367162249514, |
| "grad_norm": 1.2537254095077515, |
| "learning_rate": 0.001, |
| "loss": 1.6271, |
| "step": 162300 |
| }, |
| { |
| "epoch": 52.48868778280543, |
| "grad_norm": 1.0931706428527832, |
| "learning_rate": 0.001, |
| "loss": 1.6172, |
| "step": 162400 |
| }, |
| { |
| "epoch": 52.52100840336134, |
| "grad_norm": 1.236507773399353, |
| "learning_rate": 0.001, |
| "loss": 1.6281, |
| "step": 162500 |
| }, |
| { |
| "epoch": 52.55332902391726, |
| "grad_norm": 1.4299321174621582, |
| "learning_rate": 0.001, |
| "loss": 1.6391, |
| "step": 162600 |
| }, |
| { |
| "epoch": 52.58564964447317, |
| "grad_norm": 1.1509603261947632, |
| "learning_rate": 0.001, |
| "loss": 1.6387, |
| "step": 162700 |
| }, |
| { |
| "epoch": 52.617970265029086, |
| "grad_norm": 1.17874276638031, |
| "learning_rate": 0.001, |
| "loss": 1.6525, |
| "step": 162800 |
| }, |
| { |
| "epoch": 52.650290885585, |
| "grad_norm": 1.652890682220459, |
| "learning_rate": 0.001, |
| "loss": 1.6555, |
| "step": 162900 |
| }, |
| { |
| "epoch": 52.682611506140915, |
| "grad_norm": 1.334714651107788, |
| "learning_rate": 0.001, |
| "loss": 1.6563, |
| "step": 163000 |
| }, |
| { |
| "epoch": 52.71493212669683, |
| "grad_norm": 1.5494811534881592, |
| "learning_rate": 0.001, |
| "loss": 1.657, |
| "step": 163100 |
| }, |
| { |
| "epoch": 52.747252747252745, |
| "grad_norm": 2.0530385971069336, |
| "learning_rate": 0.001, |
| "loss": 1.6683, |
| "step": 163200 |
| }, |
| { |
| "epoch": 52.77957336780866, |
| "grad_norm": 1.7362953424453735, |
| "learning_rate": 0.001, |
| "loss": 1.6581, |
| "step": 163300 |
| }, |
| { |
| "epoch": 52.811893988364574, |
| "grad_norm": 1.2621122598648071, |
| "learning_rate": 0.001, |
| "loss": 1.6536, |
| "step": 163400 |
| }, |
| { |
| "epoch": 52.84421460892049, |
| "grad_norm": 1.248063325881958, |
| "learning_rate": 0.001, |
| "loss": 1.6698, |
| "step": 163500 |
| }, |
| { |
| "epoch": 52.8765352294764, |
| "grad_norm": 1.3443856239318848, |
| "learning_rate": 0.001, |
| "loss": 1.6908, |
| "step": 163600 |
| }, |
| { |
| "epoch": 52.90885585003232, |
| "grad_norm": 1.2341312170028687, |
| "learning_rate": 0.001, |
| "loss": 1.6762, |
| "step": 163700 |
| }, |
| { |
| "epoch": 52.94117647058823, |
| "grad_norm": 1.3637555837631226, |
| "learning_rate": 0.001, |
| "loss": 1.6815, |
| "step": 163800 |
| }, |
| { |
| "epoch": 52.97349709114415, |
| "grad_norm": 1.547029733657837, |
| "learning_rate": 0.001, |
| "loss": 1.6881, |
| "step": 163900 |
| }, |
| { |
| "epoch": 53.00581771170007, |
| "grad_norm": 1.252366065979004, |
| "learning_rate": 0.001, |
| "loss": 1.6504, |
| "step": 164000 |
| }, |
| { |
| "epoch": 53.03813833225598, |
| "grad_norm": 1.2293126583099365, |
| "learning_rate": 0.001, |
| "loss": 1.5598, |
| "step": 164100 |
| }, |
| { |
| "epoch": 53.0704589528119, |
| "grad_norm": 1.3912168741226196, |
| "learning_rate": 0.001, |
| "loss": 1.5306, |
| "step": 164200 |
| }, |
| { |
| "epoch": 53.10277957336781, |
| "grad_norm": 1.7945741415023804, |
| "learning_rate": 0.001, |
| "loss": 1.5636, |
| "step": 164300 |
| }, |
| { |
| "epoch": 53.135100193923726, |
| "grad_norm": 1.0372518301010132, |
| "learning_rate": 0.001, |
| "loss": 1.5817, |
| "step": 164400 |
| }, |
| { |
| "epoch": 53.16742081447964, |
| "grad_norm": 1.2016254663467407, |
| "learning_rate": 0.001, |
| "loss": 1.5833, |
| "step": 164500 |
| }, |
| { |
| "epoch": 53.199741435035556, |
| "grad_norm": 1.2256823778152466, |
| "learning_rate": 0.001, |
| "loss": 1.5829, |
| "step": 164600 |
| }, |
| { |
| "epoch": 53.23206205559147, |
| "grad_norm": 1.3637040853500366, |
| "learning_rate": 0.001, |
| "loss": 1.5824, |
| "step": 164700 |
| }, |
| { |
| "epoch": 53.264382676147385, |
| "grad_norm": 1.452815055847168, |
| "learning_rate": 0.001, |
| "loss": 1.6008, |
| "step": 164800 |
| }, |
| { |
| "epoch": 53.2967032967033, |
| "grad_norm": 1.546164870262146, |
| "learning_rate": 0.001, |
| "loss": 1.6041, |
| "step": 164900 |
| }, |
| { |
| "epoch": 53.329023917259214, |
| "grad_norm": 1.9156973361968994, |
| "learning_rate": 0.001, |
| "loss": 1.6182, |
| "step": 165000 |
| }, |
| { |
| "epoch": 53.36134453781513, |
| "grad_norm": 1.5630964040756226, |
| "learning_rate": 0.001, |
| "loss": 1.5986, |
| "step": 165100 |
| }, |
| { |
| "epoch": 53.39366515837104, |
| "grad_norm": 1.3350998163223267, |
| "learning_rate": 0.001, |
| "loss": 1.6171, |
| "step": 165200 |
| }, |
| { |
| "epoch": 53.42598577892696, |
| "grad_norm": 1.3054630756378174, |
| "learning_rate": 0.001, |
| "loss": 1.6153, |
| "step": 165300 |
| }, |
| { |
| "epoch": 53.45830639948287, |
| "grad_norm": 1.5927234888076782, |
| "learning_rate": 0.001, |
| "loss": 1.6113, |
| "step": 165400 |
| }, |
| { |
| "epoch": 53.49062702003879, |
| "grad_norm": 1.5710676908493042, |
| "learning_rate": 0.001, |
| "loss": 1.6116, |
| "step": 165500 |
| }, |
| { |
| "epoch": 53.5229476405947, |
| "grad_norm": 1.4166501760482788, |
| "learning_rate": 0.001, |
| "loss": 1.6275, |
| "step": 165600 |
| }, |
| { |
| "epoch": 53.555268261150616, |
| "grad_norm": 1.5470216274261475, |
| "learning_rate": 0.001, |
| "loss": 1.6301, |
| "step": 165700 |
| }, |
| { |
| "epoch": 53.58758888170653, |
| "grad_norm": 1.11976158618927, |
| "learning_rate": 0.001, |
| "loss": 1.6067, |
| "step": 165800 |
| }, |
| { |
| "epoch": 53.619909502262445, |
| "grad_norm": 1.5103334188461304, |
| "learning_rate": 0.001, |
| "loss": 1.6336, |
| "step": 165900 |
| }, |
| { |
| "epoch": 53.65223012281836, |
| "grad_norm": 1.3977969884872437, |
| "learning_rate": 0.001, |
| "loss": 1.6305, |
| "step": 166000 |
| }, |
| { |
| "epoch": 53.684550743374274, |
| "grad_norm": 1.0533747673034668, |
| "learning_rate": 0.001, |
| "loss": 1.6577, |
| "step": 166100 |
| }, |
| { |
| "epoch": 53.71687136393019, |
| "grad_norm": 1.1209571361541748, |
| "learning_rate": 0.001, |
| "loss": 1.6495, |
| "step": 166200 |
| }, |
| { |
| "epoch": 53.7491919844861, |
| "grad_norm": 1.325560212135315, |
| "learning_rate": 0.001, |
| "loss": 1.6387, |
| "step": 166300 |
| }, |
| { |
| "epoch": 53.78151260504202, |
| "grad_norm": 1.669684648513794, |
| "learning_rate": 0.001, |
| "loss": 1.6652, |
| "step": 166400 |
| }, |
| { |
| "epoch": 53.81383322559793, |
| "grad_norm": 1.2255173921585083, |
| "learning_rate": 0.001, |
| "loss": 1.6421, |
| "step": 166500 |
| }, |
| { |
| "epoch": 53.84615384615385, |
| "grad_norm": 1.6551024913787842, |
| "learning_rate": 0.001, |
| "loss": 1.6467, |
| "step": 166600 |
| }, |
| { |
| "epoch": 53.87847446670976, |
| "grad_norm": 1.3617397546768188, |
| "learning_rate": 0.001, |
| "loss": 1.6615, |
| "step": 166700 |
| }, |
| { |
| "epoch": 53.910795087265676, |
| "grad_norm": 1.486262321472168, |
| "learning_rate": 0.001, |
| "loss": 1.6718, |
| "step": 166800 |
| }, |
| { |
| "epoch": 53.94311570782159, |
| "grad_norm": 1.360690951347351, |
| "learning_rate": 0.001, |
| "loss": 1.667, |
| "step": 166900 |
| }, |
| { |
| "epoch": 53.975436328377505, |
| "grad_norm": 1.2477593421936035, |
| "learning_rate": 0.001, |
| "loss": 1.6788, |
| "step": 167000 |
| }, |
| { |
| "epoch": 54.00775694893342, |
| "grad_norm": 1.0727769136428833, |
| "learning_rate": 0.001, |
| "loss": 1.6627, |
| "step": 167100 |
| }, |
| { |
| "epoch": 54.040077569489334, |
| "grad_norm": 1.4639390707015991, |
| "learning_rate": 0.001, |
| "loss": 1.552, |
| "step": 167200 |
| }, |
| { |
| "epoch": 54.07239819004525, |
| "grad_norm": 1.1440781354904175, |
| "learning_rate": 0.001, |
| "loss": 1.542, |
| "step": 167300 |
| }, |
| { |
| "epoch": 54.10471881060116, |
| "grad_norm": 1.5725555419921875, |
| "learning_rate": 0.001, |
| "loss": 1.5539, |
| "step": 167400 |
| }, |
| { |
| "epoch": 54.13703943115708, |
| "grad_norm": 1.2718406915664673, |
| "learning_rate": 0.001, |
| "loss": 1.5499, |
| "step": 167500 |
| }, |
| { |
| "epoch": 54.16936005171299, |
| "grad_norm": 1.3445727825164795, |
| "learning_rate": 0.001, |
| "loss": 1.5602, |
| "step": 167600 |
| }, |
| { |
| "epoch": 54.20168067226891, |
| "grad_norm": 1.2247686386108398, |
| "learning_rate": 0.001, |
| "loss": 1.5604, |
| "step": 167700 |
| }, |
| { |
| "epoch": 54.23400129282482, |
| "grad_norm": 1.6381856203079224, |
| "learning_rate": 0.001, |
| "loss": 1.5846, |
| "step": 167800 |
| }, |
| { |
| "epoch": 54.266321913380736, |
| "grad_norm": 1.3378030061721802, |
| "learning_rate": 0.001, |
| "loss": 1.5908, |
| "step": 167900 |
| }, |
| { |
| "epoch": 54.29864253393665, |
| "grad_norm": 1.147397756576538, |
| "learning_rate": 0.001, |
| "loss": 1.5852, |
| "step": 168000 |
| }, |
| { |
| "epoch": 54.330963154492565, |
| "grad_norm": 1.2928037643432617, |
| "learning_rate": 0.001, |
| "loss": 1.586, |
| "step": 168100 |
| }, |
| { |
| "epoch": 54.36328377504848, |
| "grad_norm": 1.2967069149017334, |
| "learning_rate": 0.001, |
| "loss": 1.6003, |
| "step": 168200 |
| }, |
| { |
| "epoch": 54.395604395604394, |
| "grad_norm": 1.5993214845657349, |
| "learning_rate": 0.001, |
| "loss": 1.5974, |
| "step": 168300 |
| }, |
| { |
| "epoch": 54.42792501616031, |
| "grad_norm": 1.49488365650177, |
| "learning_rate": 0.001, |
| "loss": 1.6045, |
| "step": 168400 |
| }, |
| { |
| "epoch": 54.46024563671622, |
| "grad_norm": 1.2498141527175903, |
| "learning_rate": 0.001, |
| "loss": 1.61, |
| "step": 168500 |
| }, |
| { |
| "epoch": 54.49256625727214, |
| "grad_norm": 1.6309314966201782, |
| "learning_rate": 0.001, |
| "loss": 1.6046, |
| "step": 168600 |
| }, |
| { |
| "epoch": 54.52488687782805, |
| "grad_norm": 1.0654757022857666, |
| "learning_rate": 0.001, |
| "loss": 1.6091, |
| "step": 168700 |
| }, |
| { |
| "epoch": 54.55720749838397, |
| "grad_norm": 1.4608960151672363, |
| "learning_rate": 0.001, |
| "loss": 1.6241, |
| "step": 168800 |
| }, |
| { |
| "epoch": 54.58952811893988, |
| "grad_norm": 1.3533505201339722, |
| "learning_rate": 0.001, |
| "loss": 1.6278, |
| "step": 168900 |
| }, |
| { |
| "epoch": 54.621848739495796, |
| "grad_norm": 1.649953007698059, |
| "learning_rate": 0.001, |
| "loss": 1.6226, |
| "step": 169000 |
| }, |
| { |
| "epoch": 54.65416936005171, |
| "grad_norm": 1.1779111623764038, |
| "learning_rate": 0.001, |
| "loss": 1.6388, |
| "step": 169100 |
| }, |
| { |
| "epoch": 54.686489980607625, |
| "grad_norm": 1.131826639175415, |
| "learning_rate": 0.001, |
| "loss": 1.6333, |
| "step": 169200 |
| }, |
| { |
| "epoch": 54.71881060116354, |
| "grad_norm": 1.634190559387207, |
| "learning_rate": 0.001, |
| "loss": 1.6137, |
| "step": 169300 |
| }, |
| { |
| "epoch": 54.751131221719454, |
| "grad_norm": 1.2887123823165894, |
| "learning_rate": 0.001, |
| "loss": 1.6448, |
| "step": 169400 |
| }, |
| { |
| "epoch": 54.78345184227537, |
| "grad_norm": 1.4903887510299683, |
| "learning_rate": 0.001, |
| "loss": 1.6377, |
| "step": 169500 |
| }, |
| { |
| "epoch": 54.81577246283128, |
| "grad_norm": 1.4088680744171143, |
| "learning_rate": 0.001, |
| "loss": 1.6475, |
| "step": 169600 |
| }, |
| { |
| "epoch": 54.8480930833872, |
| "grad_norm": 1.3768340349197388, |
| "learning_rate": 0.001, |
| "loss": 1.6364, |
| "step": 169700 |
| }, |
| { |
| "epoch": 54.88041370394311, |
| "grad_norm": 1.3599269390106201, |
| "learning_rate": 0.001, |
| "loss": 1.6346, |
| "step": 169800 |
| }, |
| { |
| "epoch": 54.91273432449903, |
| "grad_norm": 1.323041319847107, |
| "learning_rate": 0.001, |
| "loss": 1.6463, |
| "step": 169900 |
| }, |
| { |
| "epoch": 54.94505494505494, |
| "grad_norm": 1.4620647430419922, |
| "learning_rate": 0.001, |
| "loss": 1.6652, |
| "step": 170000 |
| }, |
| { |
| "epoch": 54.977375565610856, |
| "grad_norm": 1.5375910997390747, |
| "learning_rate": 0.001, |
| "loss": 1.6611, |
| "step": 170100 |
| }, |
| { |
| "epoch": 55.00969618616678, |
| "grad_norm": 1.4705251455307007, |
| "learning_rate": 0.001, |
| "loss": 1.6323, |
| "step": 170200 |
| }, |
| { |
| "epoch": 55.04201680672269, |
| "grad_norm": 1.2176787853240967, |
| "learning_rate": 0.001, |
| "loss": 1.5366, |
| "step": 170300 |
| }, |
| { |
| "epoch": 55.07433742727861, |
| "grad_norm": 1.4176909923553467, |
| "learning_rate": 0.001, |
| "loss": 1.5388, |
| "step": 170400 |
| }, |
| { |
| "epoch": 55.10665804783452, |
| "grad_norm": 1.5232542753219604, |
| "learning_rate": 0.001, |
| "loss": 1.5412, |
| "step": 170500 |
| }, |
| { |
| "epoch": 55.138978668390436, |
| "grad_norm": 1.2083262205123901, |
| "learning_rate": 0.001, |
| "loss": 1.5462, |
| "step": 170600 |
| }, |
| { |
| "epoch": 55.17129928894635, |
| "grad_norm": 1.532065510749817, |
| "learning_rate": 0.001, |
| "loss": 1.5379, |
| "step": 170700 |
| }, |
| { |
| "epoch": 55.203619909502265, |
| "grad_norm": 1.2907582521438599, |
| "learning_rate": 0.001, |
| "loss": 1.5585, |
| "step": 170800 |
| }, |
| { |
| "epoch": 55.23594053005818, |
| "grad_norm": 1.2645903825759888, |
| "learning_rate": 0.001, |
| "loss": 1.5902, |
| "step": 170900 |
| }, |
| { |
| "epoch": 55.268261150614094, |
| "grad_norm": 1.5214968919754028, |
| "learning_rate": 0.001, |
| "loss": 1.565, |
| "step": 171000 |
| }, |
| { |
| "epoch": 55.30058177117001, |
| "grad_norm": 1.1648329496383667, |
| "learning_rate": 0.001, |
| "loss": 1.5745, |
| "step": 171100 |
| }, |
| { |
| "epoch": 55.33290239172592, |
| "grad_norm": 1.636714220046997, |
| "learning_rate": 0.001, |
| "loss": 1.5743, |
| "step": 171200 |
| }, |
| { |
| "epoch": 55.36522301228184, |
| "grad_norm": 1.2977919578552246, |
| "learning_rate": 0.001, |
| "loss": 1.5701, |
| "step": 171300 |
| }, |
| { |
| "epoch": 55.39754363283775, |
| "grad_norm": 1.385388970375061, |
| "learning_rate": 0.001, |
| "loss": 1.6057, |
| "step": 171400 |
| }, |
| { |
| "epoch": 55.42986425339367, |
| "grad_norm": 1.5424326658248901, |
| "learning_rate": 0.001, |
| "loss": 1.5935, |
| "step": 171500 |
| }, |
| { |
| "epoch": 55.46218487394958, |
| "grad_norm": 1.3697704076766968, |
| "learning_rate": 0.001, |
| "loss": 1.5902, |
| "step": 171600 |
| }, |
| { |
| "epoch": 55.494505494505496, |
| "grad_norm": 1.3550012111663818, |
| "learning_rate": 0.001, |
| "loss": 1.5964, |
| "step": 171700 |
| }, |
| { |
| "epoch": 55.52682611506141, |
| "grad_norm": 1.5691142082214355, |
| "learning_rate": 0.001, |
| "loss": 1.5953, |
| "step": 171800 |
| }, |
| { |
| "epoch": 55.559146735617325, |
| "grad_norm": 1.4401603937149048, |
| "learning_rate": 0.001, |
| "loss": 1.6091, |
| "step": 171900 |
| }, |
| { |
| "epoch": 55.59146735617324, |
| "grad_norm": 2.070875406265259, |
| "learning_rate": 0.001, |
| "loss": 1.6154, |
| "step": 172000 |
| }, |
| { |
| "epoch": 55.623787976729155, |
| "grad_norm": 1.2211717367172241, |
| "learning_rate": 0.001, |
| "loss": 1.6156, |
| "step": 172100 |
| }, |
| { |
| "epoch": 55.65610859728507, |
| "grad_norm": 1.0735397338867188, |
| "learning_rate": 0.001, |
| "loss": 1.6031, |
| "step": 172200 |
| }, |
| { |
| "epoch": 55.688429217840984, |
| "grad_norm": 1.4228463172912598, |
| "learning_rate": 0.001, |
| "loss": 1.6114, |
| "step": 172300 |
| }, |
| { |
| "epoch": 55.7207498383969, |
| "grad_norm": 1.237438440322876, |
| "learning_rate": 0.001, |
| "loss": 1.6255, |
| "step": 172400 |
| }, |
| { |
| "epoch": 55.75307045895281, |
| "grad_norm": 1.0559433698654175, |
| "learning_rate": 0.001, |
| "loss": 1.6164, |
| "step": 172500 |
| }, |
| { |
| "epoch": 55.78539107950873, |
| "grad_norm": 1.2364671230316162, |
| "learning_rate": 0.001, |
| "loss": 1.6247, |
| "step": 172600 |
| }, |
| { |
| "epoch": 55.81771170006464, |
| "grad_norm": 1.3936049938201904, |
| "learning_rate": 0.001, |
| "loss": 1.6272, |
| "step": 172700 |
| }, |
| { |
| "epoch": 55.85003232062056, |
| "grad_norm": 1.3188313245773315, |
| "learning_rate": 0.001, |
| "loss": 1.6322, |
| "step": 172800 |
| }, |
| { |
| "epoch": 55.88235294117647, |
| "grad_norm": 1.413771152496338, |
| "learning_rate": 0.001, |
| "loss": 1.6411, |
| "step": 172900 |
| }, |
| { |
| "epoch": 55.914673561732386, |
| "grad_norm": 1.473656415939331, |
| "learning_rate": 0.001, |
| "loss": 1.6318, |
| "step": 173000 |
| }, |
| { |
| "epoch": 55.9469941822883, |
| "grad_norm": 1.2637544870376587, |
| "learning_rate": 0.001, |
| "loss": 1.6574, |
| "step": 173100 |
| }, |
| { |
| "epoch": 55.979314802844215, |
| "grad_norm": 1.431186556816101, |
| "learning_rate": 0.001, |
| "loss": 1.655, |
| "step": 173200 |
| }, |
| { |
| "epoch": 56.01163542340013, |
| "grad_norm": 1.8388493061065674, |
| "learning_rate": 0.001, |
| "loss": 1.5949, |
| "step": 173300 |
| }, |
| { |
| "epoch": 56.043956043956044, |
| "grad_norm": 1.6828988790512085, |
| "learning_rate": 0.001, |
| "loss": 1.5117, |
| "step": 173400 |
| }, |
| { |
| "epoch": 56.07627666451196, |
| "grad_norm": 1.5223714113235474, |
| "learning_rate": 0.001, |
| "loss": 1.5248, |
| "step": 173500 |
| }, |
| { |
| "epoch": 56.10859728506787, |
| "grad_norm": 1.3648293018341064, |
| "learning_rate": 0.001, |
| "loss": 1.5379, |
| "step": 173600 |
| }, |
| { |
| "epoch": 56.14091790562379, |
| "grad_norm": 1.7287791967391968, |
| "learning_rate": 0.001, |
| "loss": 1.5393, |
| "step": 173700 |
| }, |
| { |
| "epoch": 56.1732385261797, |
| "grad_norm": 1.3508548736572266, |
| "learning_rate": 0.001, |
| "loss": 1.5409, |
| "step": 173800 |
| }, |
| { |
| "epoch": 56.20555914673562, |
| "grad_norm": 1.7643887996673584, |
| "learning_rate": 0.001, |
| "loss": 1.5358, |
| "step": 173900 |
| }, |
| { |
| "epoch": 56.23787976729153, |
| "grad_norm": 1.4329142570495605, |
| "learning_rate": 0.001, |
| "loss": 1.5356, |
| "step": 174000 |
| }, |
| { |
| "epoch": 56.270200387847446, |
| "grad_norm": 1.8588013648986816, |
| "learning_rate": 0.001, |
| "loss": 1.5515, |
| "step": 174100 |
| }, |
| { |
| "epoch": 56.30252100840336, |
| "grad_norm": 1.16867995262146, |
| "learning_rate": 0.001, |
| "loss": 1.5672, |
| "step": 174200 |
| }, |
| { |
| "epoch": 56.334841628959275, |
| "grad_norm": 1.4061293601989746, |
| "learning_rate": 0.001, |
| "loss": 1.5661, |
| "step": 174300 |
| }, |
| { |
| "epoch": 56.36716224951519, |
| "grad_norm": 1.90892493724823, |
| "learning_rate": 0.001, |
| "loss": 1.5693, |
| "step": 174400 |
| }, |
| { |
| "epoch": 56.399482870071104, |
| "grad_norm": 1.3699308633804321, |
| "learning_rate": 0.001, |
| "loss": 1.5647, |
| "step": 174500 |
| }, |
| { |
| "epoch": 56.43180349062702, |
| "grad_norm": 1.3952831029891968, |
| "learning_rate": 0.001, |
| "loss": 1.5815, |
| "step": 174600 |
| }, |
| { |
| "epoch": 56.46412411118293, |
| "grad_norm": 1.5254894495010376, |
| "learning_rate": 0.001, |
| "loss": 1.5699, |
| "step": 174700 |
| }, |
| { |
| "epoch": 56.49644473173885, |
| "grad_norm": 1.2883787155151367, |
| "learning_rate": 0.001, |
| "loss": 1.5758, |
| "step": 174800 |
| }, |
| { |
| "epoch": 56.52876535229476, |
| "grad_norm": 1.4908933639526367, |
| "learning_rate": 0.001, |
| "loss": 1.5834, |
| "step": 174900 |
| }, |
| { |
| "epoch": 56.56108597285068, |
| "grad_norm": 1.6060596704483032, |
| "learning_rate": 0.001, |
| "loss": 1.5951, |
| "step": 175000 |
| }, |
| { |
| "epoch": 56.59340659340659, |
| "grad_norm": 1.373229742050171, |
| "learning_rate": 0.001, |
| "loss": 1.6092, |
| "step": 175100 |
| }, |
| { |
| "epoch": 56.625727213962506, |
| "grad_norm": 1.2752670049667358, |
| "learning_rate": 0.001, |
| "loss": 1.5959, |
| "step": 175200 |
| }, |
| { |
| "epoch": 56.65804783451842, |
| "grad_norm": 1.3325690031051636, |
| "learning_rate": 0.001, |
| "loss": 1.6177, |
| "step": 175300 |
| }, |
| { |
| "epoch": 56.690368455074335, |
| "grad_norm": 1.4257392883300781, |
| "learning_rate": 0.001, |
| "loss": 1.5941, |
| "step": 175400 |
| }, |
| { |
| "epoch": 56.72268907563025, |
| "grad_norm": 1.7372586727142334, |
| "learning_rate": 0.001, |
| "loss": 1.6156, |
| "step": 175500 |
| }, |
| { |
| "epoch": 56.755009696186164, |
| "grad_norm": 1.8692024946212769, |
| "learning_rate": 0.001, |
| "loss": 1.6122, |
| "step": 175600 |
| }, |
| { |
| "epoch": 56.78733031674208, |
| "grad_norm": 1.1845149993896484, |
| "learning_rate": 0.001, |
| "loss": 1.6191, |
| "step": 175700 |
| }, |
| { |
| "epoch": 56.81965093729799, |
| "grad_norm": 1.329891324043274, |
| "learning_rate": 0.001, |
| "loss": 1.6264, |
| "step": 175800 |
| }, |
| { |
| "epoch": 56.85197155785391, |
| "grad_norm": 1.892138123512268, |
| "learning_rate": 0.001, |
| "loss": 1.6295, |
| "step": 175900 |
| }, |
| { |
| "epoch": 56.88429217840982, |
| "grad_norm": 1.3890790939331055, |
| "learning_rate": 0.001, |
| "loss": 1.6303, |
| "step": 176000 |
| }, |
| { |
| "epoch": 56.91661279896574, |
| "grad_norm": 1.2890719175338745, |
| "learning_rate": 0.001, |
| "loss": 1.6325, |
| "step": 176100 |
| }, |
| { |
| "epoch": 56.94893341952165, |
| "grad_norm": 1.4877135753631592, |
| "learning_rate": 0.001, |
| "loss": 1.636, |
| "step": 176200 |
| }, |
| { |
| "epoch": 56.981254040077566, |
| "grad_norm": 1.6091532707214355, |
| "learning_rate": 0.001, |
| "loss": 1.6331, |
| "step": 176300 |
| }, |
| { |
| "epoch": 57.01357466063349, |
| "grad_norm": 1.7103350162506104, |
| "learning_rate": 0.001, |
| "loss": 1.5788, |
| "step": 176400 |
| }, |
| { |
| "epoch": 57.0458952811894, |
| "grad_norm": 1.5880162715911865, |
| "learning_rate": 0.001, |
| "loss": 1.504, |
| "step": 176500 |
| }, |
| { |
| "epoch": 57.07821590174532, |
| "grad_norm": 1.7704602479934692, |
| "learning_rate": 0.001, |
| "loss": 1.5129, |
| "step": 176600 |
| }, |
| { |
| "epoch": 57.11053652230123, |
| "grad_norm": 1.5123844146728516, |
| "learning_rate": 0.001, |
| "loss": 1.5252, |
| "step": 176700 |
| }, |
| { |
| "epoch": 57.142857142857146, |
| "grad_norm": 1.4449516534805298, |
| "learning_rate": 0.001, |
| "loss": 1.5276, |
| "step": 176800 |
| }, |
| { |
| "epoch": 57.17517776341306, |
| "grad_norm": 1.8149229288101196, |
| "learning_rate": 0.001, |
| "loss": 1.5328, |
| "step": 176900 |
| }, |
| { |
| "epoch": 57.207498383968975, |
| "grad_norm": 1.581376552581787, |
| "learning_rate": 0.001, |
| "loss": 1.5392, |
| "step": 177000 |
| }, |
| { |
| "epoch": 57.23981900452489, |
| "grad_norm": 1.5949654579162598, |
| "learning_rate": 0.001, |
| "loss": 1.5372, |
| "step": 177100 |
| }, |
| { |
| "epoch": 57.272139625080804, |
| "grad_norm": 1.690617561340332, |
| "learning_rate": 0.001, |
| "loss": 1.5487, |
| "step": 177200 |
| }, |
| { |
| "epoch": 57.30446024563672, |
| "grad_norm": 1.688520073890686, |
| "learning_rate": 0.001, |
| "loss": 1.5432, |
| "step": 177300 |
| }, |
| { |
| "epoch": 57.33678086619263, |
| "grad_norm": 1.3199012279510498, |
| "learning_rate": 0.001, |
| "loss": 1.558, |
| "step": 177400 |
| }, |
| { |
| "epoch": 57.36910148674855, |
| "grad_norm": 2.138596773147583, |
| "learning_rate": 0.001, |
| "loss": 1.5618, |
| "step": 177500 |
| }, |
| { |
| "epoch": 57.40142210730446, |
| "grad_norm": 1.5825165510177612, |
| "learning_rate": 0.001, |
| "loss": 1.5702, |
| "step": 177600 |
| }, |
| { |
| "epoch": 57.43374272786038, |
| "grad_norm": 1.6623446941375732, |
| "learning_rate": 0.001, |
| "loss": 1.5728, |
| "step": 177700 |
| }, |
| { |
| "epoch": 57.46606334841629, |
| "grad_norm": 1.442103385925293, |
| "learning_rate": 0.001, |
| "loss": 1.5674, |
| "step": 177800 |
| }, |
| { |
| "epoch": 57.498383968972206, |
| "grad_norm": 1.7547607421875, |
| "learning_rate": 0.001, |
| "loss": 1.5792, |
| "step": 177900 |
| }, |
| { |
| "epoch": 57.53070458952812, |
| "grad_norm": 1.3971937894821167, |
| "learning_rate": 0.001, |
| "loss": 1.5822, |
| "step": 178000 |
| }, |
| { |
| "epoch": 57.563025210084035, |
| "grad_norm": 1.6914944648742676, |
| "learning_rate": 0.001, |
| "loss": 1.5839, |
| "step": 178100 |
| }, |
| { |
| "epoch": 57.59534583063995, |
| "grad_norm": 1.3228996992111206, |
| "learning_rate": 0.001, |
| "loss": 1.5744, |
| "step": 178200 |
| }, |
| { |
| "epoch": 57.627666451195864, |
| "grad_norm": 1.7055774927139282, |
| "learning_rate": 0.001, |
| "loss": 1.5888, |
| "step": 178300 |
| }, |
| { |
| "epoch": 57.65998707175178, |
| "grad_norm": 1.8422220945358276, |
| "learning_rate": 0.001, |
| "loss": 1.5785, |
| "step": 178400 |
| }, |
| { |
| "epoch": 57.69230769230769, |
| "grad_norm": 1.2623335123062134, |
| "learning_rate": 0.001, |
| "loss": 1.6017, |
| "step": 178500 |
| }, |
| { |
| "epoch": 57.72462831286361, |
| "grad_norm": 1.4726150035858154, |
| "learning_rate": 0.001, |
| "loss": 1.5917, |
| "step": 178600 |
| }, |
| { |
| "epoch": 57.75694893341952, |
| "grad_norm": 1.7706819772720337, |
| "learning_rate": 0.001, |
| "loss": 1.6056, |
| "step": 178700 |
| }, |
| { |
| "epoch": 57.78926955397544, |
| "grad_norm": 1.4827563762664795, |
| "learning_rate": 0.001, |
| "loss": 1.6008, |
| "step": 178800 |
| }, |
| { |
| "epoch": 57.82159017453135, |
| "grad_norm": 1.75459885597229, |
| "learning_rate": 0.001, |
| "loss": 1.6035, |
| "step": 178900 |
| }, |
| { |
| "epoch": 57.853910795087266, |
| "grad_norm": 1.9852813482284546, |
| "learning_rate": 0.001, |
| "loss": 1.6028, |
| "step": 179000 |
| }, |
| { |
| "epoch": 57.88623141564318, |
| "grad_norm": 1.5619901418685913, |
| "learning_rate": 0.001, |
| "loss": 1.6125, |
| "step": 179100 |
| }, |
| { |
| "epoch": 57.918552036199095, |
| "grad_norm": 1.407097339630127, |
| "learning_rate": 0.001, |
| "loss": 1.6248, |
| "step": 179200 |
| }, |
| { |
| "epoch": 57.95087265675501, |
| "grad_norm": 1.6684885025024414, |
| "learning_rate": 0.001, |
| "loss": 1.6413, |
| "step": 179300 |
| }, |
| { |
| "epoch": 57.983193277310924, |
| "grad_norm": 1.371626377105713, |
| "learning_rate": 0.001, |
| "loss": 1.6387, |
| "step": 179400 |
| }, |
| { |
| "epoch": 58.01551389786684, |
| "grad_norm": 1.6691104173660278, |
| "learning_rate": 0.001, |
| "loss": 1.5481, |
| "step": 179500 |
| }, |
| { |
| "epoch": 58.04783451842275, |
| "grad_norm": 2.0170748233795166, |
| "learning_rate": 0.001, |
| "loss": 1.5036, |
| "step": 179600 |
| }, |
| { |
| "epoch": 58.08015513897867, |
| "grad_norm": 2.1037421226501465, |
| "learning_rate": 0.001, |
| "loss": 1.5065, |
| "step": 179700 |
| }, |
| { |
| "epoch": 58.11247575953458, |
| "grad_norm": 1.8298563957214355, |
| "learning_rate": 0.001, |
| "loss": 1.4945, |
| "step": 179800 |
| }, |
| { |
| "epoch": 58.1447963800905, |
| "grad_norm": 2.10650634765625, |
| "learning_rate": 0.001, |
| "loss": 1.5188, |
| "step": 179900 |
| }, |
| { |
| "epoch": 58.17711700064641, |
| "grad_norm": 2.177595853805542, |
| "learning_rate": 0.001, |
| "loss": 1.5035, |
| "step": 180000 |
| }, |
| { |
| "epoch": 58.209437621202326, |
| "grad_norm": 1.913504958152771, |
| "learning_rate": 0.001, |
| "loss": 1.5228, |
| "step": 180100 |
| }, |
| { |
| "epoch": 58.24175824175824, |
| "grad_norm": 1.8941245079040527, |
| "learning_rate": 0.001, |
| "loss": 1.5434, |
| "step": 180200 |
| }, |
| { |
| "epoch": 58.274078862314155, |
| "grad_norm": 1.6749768257141113, |
| "learning_rate": 0.001, |
| "loss": 1.5297, |
| "step": 180300 |
| }, |
| { |
| "epoch": 58.30639948287007, |
| "grad_norm": 1.793357491493225, |
| "learning_rate": 0.001, |
| "loss": 1.5384, |
| "step": 180400 |
| }, |
| { |
| "epoch": 58.338720103425985, |
| "grad_norm": 2.0181198120117188, |
| "learning_rate": 0.001, |
| "loss": 1.5615, |
| "step": 180500 |
| }, |
| { |
| "epoch": 58.3710407239819, |
| "grad_norm": 1.7327299118041992, |
| "learning_rate": 0.001, |
| "loss": 1.5527, |
| "step": 180600 |
| }, |
| { |
| "epoch": 58.403361344537814, |
| "grad_norm": 2.45485782623291, |
| "learning_rate": 0.001, |
| "loss": 1.5408, |
| "step": 180700 |
| }, |
| { |
| "epoch": 58.43568196509373, |
| "grad_norm": 1.6525640487670898, |
| "learning_rate": 0.001, |
| "loss": 1.5539, |
| "step": 180800 |
| }, |
| { |
| "epoch": 58.46800258564964, |
| "grad_norm": 1.7305655479431152, |
| "learning_rate": 0.001, |
| "loss": 1.5571, |
| "step": 180900 |
| }, |
| { |
| "epoch": 58.50032320620556, |
| "grad_norm": 2.0535521507263184, |
| "learning_rate": 0.001, |
| "loss": 1.5599, |
| "step": 181000 |
| }, |
| { |
| "epoch": 58.53264382676147, |
| "grad_norm": 1.970602035522461, |
| "learning_rate": 0.001, |
| "loss": 1.5651, |
| "step": 181100 |
| }, |
| { |
| "epoch": 58.56496444731739, |
| "grad_norm": 2.031547784805298, |
| "learning_rate": 0.001, |
| "loss": 1.562, |
| "step": 181200 |
| }, |
| { |
| "epoch": 58.5972850678733, |
| "grad_norm": 1.9550069570541382, |
| "learning_rate": 0.001, |
| "loss": 1.5795, |
| "step": 181300 |
| }, |
| { |
| "epoch": 58.629605688429216, |
| "grad_norm": 2.0826876163482666, |
| "learning_rate": 0.001, |
| "loss": 1.5786, |
| "step": 181400 |
| }, |
| { |
| "epoch": 58.66192630898513, |
| "grad_norm": 2.283815860748291, |
| "learning_rate": 0.001, |
| "loss": 1.5809, |
| "step": 181500 |
| }, |
| { |
| "epoch": 58.694246929541045, |
| "grad_norm": 2.006481170654297, |
| "learning_rate": 0.001, |
| "loss": 1.5804, |
| "step": 181600 |
| }, |
| { |
| "epoch": 58.72656755009696, |
| "grad_norm": 1.988154411315918, |
| "learning_rate": 0.001, |
| "loss": 1.591, |
| "step": 181700 |
| }, |
| { |
| "epoch": 58.758888170652874, |
| "grad_norm": 1.6653863191604614, |
| "learning_rate": 0.001, |
| "loss": 1.5923, |
| "step": 181800 |
| }, |
| { |
| "epoch": 58.79120879120879, |
| "grad_norm": 1.7848979234695435, |
| "learning_rate": 0.001, |
| "loss": 1.6274, |
| "step": 181900 |
| }, |
| { |
| "epoch": 58.8235294117647, |
| "grad_norm": 2.0813205242156982, |
| "learning_rate": 0.001, |
| "loss": 1.6043, |
| "step": 182000 |
| }, |
| { |
| "epoch": 58.85585003232062, |
| "grad_norm": 2.4704935550689697, |
| "learning_rate": 0.001, |
| "loss": 1.5916, |
| "step": 182100 |
| }, |
| { |
| "epoch": 58.88817065287653, |
| "grad_norm": 1.8839926719665527, |
| "learning_rate": 0.001, |
| "loss": 1.6067, |
| "step": 182200 |
| }, |
| { |
| "epoch": 58.92049127343245, |
| "grad_norm": 2.0230321884155273, |
| "learning_rate": 0.001, |
| "loss": 1.6179, |
| "step": 182300 |
| }, |
| { |
| "epoch": 58.95281189398836, |
| "grad_norm": 1.7509673833847046, |
| "learning_rate": 0.001, |
| "loss": 1.6122, |
| "step": 182400 |
| }, |
| { |
| "epoch": 58.985132514544276, |
| "grad_norm": 1.4696741104125977, |
| "learning_rate": 0.001, |
| "loss": 1.6118, |
| "step": 182500 |
| }, |
| { |
| "epoch": 59.0174531351002, |
| "grad_norm": 1.685705542564392, |
| "learning_rate": 0.001, |
| "loss": 1.5439, |
| "step": 182600 |
| }, |
| { |
| "epoch": 59.04977375565611, |
| "grad_norm": 1.7140778303146362, |
| "learning_rate": 0.001, |
| "loss": 1.4896, |
| "step": 182700 |
| }, |
| { |
| "epoch": 59.08209437621203, |
| "grad_norm": 1.9085273742675781, |
| "learning_rate": 0.001, |
| "loss": 1.4753, |
| "step": 182800 |
| }, |
| { |
| "epoch": 59.11441499676794, |
| "grad_norm": 1.8200005292892456, |
| "learning_rate": 0.001, |
| "loss": 1.496, |
| "step": 182900 |
| }, |
| { |
| "epoch": 59.146735617323856, |
| "grad_norm": 1.8019359111785889, |
| "learning_rate": 0.001, |
| "loss": 1.4856, |
| "step": 183000 |
| }, |
| { |
| "epoch": 59.17905623787977, |
| "grad_norm": 1.8619072437286377, |
| "learning_rate": 0.001, |
| "loss": 1.5037, |
| "step": 183100 |
| }, |
| { |
| "epoch": 59.211376858435685, |
| "grad_norm": 1.6231518983840942, |
| "learning_rate": 0.001, |
| "loss": 1.5317, |
| "step": 183200 |
| }, |
| { |
| "epoch": 59.2436974789916, |
| "grad_norm": 1.4733821153640747, |
| "learning_rate": 0.001, |
| "loss": 1.524, |
| "step": 183300 |
| }, |
| { |
| "epoch": 59.276018099547514, |
| "grad_norm": 1.8434406518936157, |
| "learning_rate": 0.001, |
| "loss": 1.5269, |
| "step": 183400 |
| }, |
| { |
| "epoch": 59.30833872010343, |
| "grad_norm": 2.057749032974243, |
| "learning_rate": 0.001, |
| "loss": 1.5341, |
| "step": 183500 |
| }, |
| { |
| "epoch": 59.34065934065934, |
| "grad_norm": 1.3642988204956055, |
| "learning_rate": 0.001, |
| "loss": 1.5506, |
| "step": 183600 |
| }, |
| { |
| "epoch": 59.37297996121526, |
| "grad_norm": 1.9842780828475952, |
| "learning_rate": 0.001, |
| "loss": 1.5516, |
| "step": 183700 |
| }, |
| { |
| "epoch": 59.40530058177117, |
| "grad_norm": 1.4964210987091064, |
| "learning_rate": 0.001, |
| "loss": 1.5571, |
| "step": 183800 |
| }, |
| { |
| "epoch": 59.43762120232709, |
| "grad_norm": 1.7212563753128052, |
| "learning_rate": 0.001, |
| "loss": 1.5561, |
| "step": 183900 |
| }, |
| { |
| "epoch": 59.469941822883, |
| "grad_norm": 1.554451823234558, |
| "learning_rate": 0.001, |
| "loss": 1.5543, |
| "step": 184000 |
| }, |
| { |
| "epoch": 59.502262443438916, |
| "grad_norm": 1.716330647468567, |
| "learning_rate": 0.001, |
| "loss": 1.5572, |
| "step": 184100 |
| }, |
| { |
| "epoch": 59.53458306399483, |
| "grad_norm": 1.8828051090240479, |
| "learning_rate": 0.001, |
| "loss": 1.5555, |
| "step": 184200 |
| }, |
| { |
| "epoch": 59.566903684550745, |
| "grad_norm": 1.6634325981140137, |
| "learning_rate": 0.001, |
| "loss": 1.5683, |
| "step": 184300 |
| }, |
| { |
| "epoch": 59.59922430510666, |
| "grad_norm": 1.8589503765106201, |
| "learning_rate": 0.001, |
| "loss": 1.5656, |
| "step": 184400 |
| }, |
| { |
| "epoch": 59.631544925662574, |
| "grad_norm": 1.3951932191848755, |
| "learning_rate": 0.001, |
| "loss": 1.5538, |
| "step": 184500 |
| }, |
| { |
| "epoch": 59.66386554621849, |
| "grad_norm": 1.4658318758010864, |
| "learning_rate": 0.001, |
| "loss": 1.5737, |
| "step": 184600 |
| }, |
| { |
| "epoch": 59.6961861667744, |
| "grad_norm": 1.5063440799713135, |
| "learning_rate": 0.001, |
| "loss": 1.5675, |
| "step": 184700 |
| }, |
| { |
| "epoch": 59.72850678733032, |
| "grad_norm": 1.8502857685089111, |
| "learning_rate": 0.001, |
| "loss": 1.5768, |
| "step": 184800 |
| }, |
| { |
| "epoch": 59.76082740788623, |
| "grad_norm": 1.649667739868164, |
| "learning_rate": 0.001, |
| "loss": 1.5736, |
| "step": 184900 |
| }, |
| { |
| "epoch": 59.79314802844215, |
| "grad_norm": 1.669318437576294, |
| "learning_rate": 0.001, |
| "loss": 1.5923, |
| "step": 185000 |
| }, |
| { |
| "epoch": 59.82546864899806, |
| "grad_norm": 1.6056246757507324, |
| "learning_rate": 0.001, |
| "loss": 1.6, |
| "step": 185100 |
| }, |
| { |
| "epoch": 59.857789269553976, |
| "grad_norm": 1.629918098449707, |
| "learning_rate": 0.001, |
| "loss": 1.5971, |
| "step": 185200 |
| }, |
| { |
| "epoch": 59.89010989010989, |
| "grad_norm": 1.788417935371399, |
| "learning_rate": 0.001, |
| "loss": 1.5848, |
| "step": 185300 |
| }, |
| { |
| "epoch": 59.922430510665805, |
| "grad_norm": 1.831823706626892, |
| "learning_rate": 0.001, |
| "loss": 1.5893, |
| "step": 185400 |
| }, |
| { |
| "epoch": 59.95475113122172, |
| "grad_norm": 1.2721410989761353, |
| "learning_rate": 0.001, |
| "loss": 1.5994, |
| "step": 185500 |
| }, |
| { |
| "epoch": 59.987071751777634, |
| "grad_norm": 1.7463867664337158, |
| "learning_rate": 0.001, |
| "loss": 1.6055, |
| "step": 185600 |
| }, |
| { |
| "epoch": 60.01939237233355, |
| "grad_norm": 1.6742517948150635, |
| "learning_rate": 0.001, |
| "loss": 1.5455, |
| "step": 185700 |
| }, |
| { |
| "epoch": 60.05171299288946, |
| "grad_norm": 1.5779138803482056, |
| "learning_rate": 0.001, |
| "loss": 1.4833, |
| "step": 185800 |
| }, |
| { |
| "epoch": 60.08403361344538, |
| "grad_norm": 1.3153148889541626, |
| "learning_rate": 0.001, |
| "loss": 1.482, |
| "step": 185900 |
| }, |
| { |
| "epoch": 60.11635423400129, |
| "grad_norm": 1.6626505851745605, |
| "learning_rate": 0.001, |
| "loss": 1.5004, |
| "step": 186000 |
| }, |
| { |
| "epoch": 60.14867485455721, |
| "grad_norm": 1.7429184913635254, |
| "learning_rate": 0.001, |
| "loss": 1.4972, |
| "step": 186100 |
| }, |
| { |
| "epoch": 60.18099547511312, |
| "grad_norm": 1.5026839971542358, |
| "learning_rate": 0.001, |
| "loss": 1.5169, |
| "step": 186200 |
| }, |
| { |
| "epoch": 60.213316095669036, |
| "grad_norm": 1.4758402109146118, |
| "learning_rate": 0.001, |
| "loss": 1.511, |
| "step": 186300 |
| }, |
| { |
| "epoch": 60.24563671622495, |
| "grad_norm": 1.134965419769287, |
| "learning_rate": 0.001, |
| "loss": 1.5019, |
| "step": 186400 |
| }, |
| { |
| "epoch": 60.277957336780865, |
| "grad_norm": 1.6340572834014893, |
| "learning_rate": 0.001, |
| "loss": 1.5235, |
| "step": 186500 |
| }, |
| { |
| "epoch": 60.31027795733678, |
| "grad_norm": 1.5940749645233154, |
| "learning_rate": 0.001, |
| "loss": 1.5112, |
| "step": 186600 |
| }, |
| { |
| "epoch": 60.342598577892694, |
| "grad_norm": 1.5816295146942139, |
| "learning_rate": 0.001, |
| "loss": 1.5415, |
| "step": 186700 |
| }, |
| { |
| "epoch": 60.37491919844861, |
| "grad_norm": 1.4606095552444458, |
| "learning_rate": 0.001, |
| "loss": 1.5149, |
| "step": 186800 |
| }, |
| { |
| "epoch": 60.40723981900452, |
| "grad_norm": 1.4606409072875977, |
| "learning_rate": 0.001, |
| "loss": 1.5175, |
| "step": 186900 |
| }, |
| { |
| "epoch": 60.43956043956044, |
| "grad_norm": 1.3545846939086914, |
| "learning_rate": 0.001, |
| "loss": 1.5255, |
| "step": 187000 |
| }, |
| { |
| "epoch": 60.47188106011635, |
| "grad_norm": 1.2157231569290161, |
| "learning_rate": 0.001, |
| "loss": 1.5399, |
| "step": 187100 |
| }, |
| { |
| "epoch": 60.50420168067227, |
| "grad_norm": 1.1669540405273438, |
| "learning_rate": 0.001, |
| "loss": 1.5439, |
| "step": 187200 |
| }, |
| { |
| "epoch": 60.53652230122818, |
| "grad_norm": 1.5640186071395874, |
| "learning_rate": 0.001, |
| "loss": 1.5655, |
| "step": 187300 |
| }, |
| { |
| "epoch": 60.568842921784096, |
| "grad_norm": 1.237733006477356, |
| "learning_rate": 0.001, |
| "loss": 1.5536, |
| "step": 187400 |
| }, |
| { |
| "epoch": 60.60116354234001, |
| "grad_norm": 1.4821105003356934, |
| "learning_rate": 0.001, |
| "loss": 1.5481, |
| "step": 187500 |
| }, |
| { |
| "epoch": 60.633484162895925, |
| "grad_norm": 1.6118173599243164, |
| "learning_rate": 0.001, |
| "loss": 1.5542, |
| "step": 187600 |
| }, |
| { |
| "epoch": 60.66580478345184, |
| "grad_norm": 1.2756856679916382, |
| "learning_rate": 0.001, |
| "loss": 1.552, |
| "step": 187700 |
| }, |
| { |
| "epoch": 60.698125404007754, |
| "grad_norm": 1.218289852142334, |
| "learning_rate": 0.001, |
| "loss": 1.5685, |
| "step": 187800 |
| }, |
| { |
| "epoch": 60.73044602456367, |
| "grad_norm": 1.4309395551681519, |
| "learning_rate": 0.001, |
| "loss": 1.5637, |
| "step": 187900 |
| }, |
| { |
| "epoch": 60.762766645119584, |
| "grad_norm": 1.3158926963806152, |
| "learning_rate": 0.001, |
| "loss": 1.5833, |
| "step": 188000 |
| }, |
| { |
| "epoch": 60.7950872656755, |
| "grad_norm": 1.5380806922912598, |
| "learning_rate": 0.001, |
| "loss": 1.5753, |
| "step": 188100 |
| }, |
| { |
| "epoch": 60.82740788623141, |
| "grad_norm": 1.1491740942001343, |
| "learning_rate": 0.001, |
| "loss": 1.5885, |
| "step": 188200 |
| }, |
| { |
| "epoch": 60.85972850678733, |
| "grad_norm": 1.592486023902893, |
| "learning_rate": 0.001, |
| "loss": 1.5668, |
| "step": 188300 |
| }, |
| { |
| "epoch": 60.89204912734324, |
| "grad_norm": 1.9322662353515625, |
| "learning_rate": 0.001, |
| "loss": 1.5971, |
| "step": 188400 |
| }, |
| { |
| "epoch": 60.924369747899156, |
| "grad_norm": 1.2906183004379272, |
| "learning_rate": 0.001, |
| "loss": 1.5866, |
| "step": 188500 |
| }, |
| { |
| "epoch": 60.95669036845507, |
| "grad_norm": 1.088273048400879, |
| "learning_rate": 0.001, |
| "loss": 1.5761, |
| "step": 188600 |
| }, |
| { |
| "epoch": 60.98901098901099, |
| "grad_norm": 1.6351017951965332, |
| "learning_rate": 0.001, |
| "loss": 1.5897, |
| "step": 188700 |
| }, |
| { |
| "epoch": 61.02133160956691, |
| "grad_norm": 1.1794394254684448, |
| "learning_rate": 0.001, |
| "loss": 1.5269, |
| "step": 188800 |
| }, |
| { |
| "epoch": 61.05365223012282, |
| "grad_norm": 1.6122803688049316, |
| "learning_rate": 0.001, |
| "loss": 1.4612, |
| "step": 188900 |
| }, |
| { |
| "epoch": 61.085972850678736, |
| "grad_norm": 1.1957712173461914, |
| "learning_rate": 0.001, |
| "loss": 1.479, |
| "step": 189000 |
| }, |
| { |
| "epoch": 61.11829347123465, |
| "grad_norm": 1.351511836051941, |
| "learning_rate": 0.001, |
| "loss": 1.4906, |
| "step": 189100 |
| }, |
| { |
| "epoch": 61.150614091790565, |
| "grad_norm": 1.3004039525985718, |
| "learning_rate": 0.001, |
| "loss": 1.4888, |
| "step": 189200 |
| }, |
| { |
| "epoch": 61.18293471234648, |
| "grad_norm": 1.1758172512054443, |
| "learning_rate": 0.001, |
| "loss": 1.5036, |
| "step": 189300 |
| }, |
| { |
| "epoch": 61.215255332902395, |
| "grad_norm": 1.198196291923523, |
| "learning_rate": 0.001, |
| "loss": 1.5142, |
| "step": 189400 |
| }, |
| { |
| "epoch": 61.24757595345831, |
| "grad_norm": 1.2117112874984741, |
| "learning_rate": 0.001, |
| "loss": 1.4881, |
| "step": 189500 |
| }, |
| { |
| "epoch": 61.279896574014224, |
| "grad_norm": 1.1988940238952637, |
| "learning_rate": 0.001, |
| "loss": 1.5021, |
| "step": 189600 |
| }, |
| { |
| "epoch": 61.31221719457014, |
| "grad_norm": 1.9979110956192017, |
| "learning_rate": 0.001, |
| "loss": 1.5264, |
| "step": 189700 |
| }, |
| { |
| "epoch": 61.34453781512605, |
| "grad_norm": 1.6691573858261108, |
| "learning_rate": 0.001, |
| "loss": 1.5069, |
| "step": 189800 |
| }, |
| { |
| "epoch": 61.37685843568197, |
| "grad_norm": 1.5724232196807861, |
| "learning_rate": 0.001, |
| "loss": 1.5237, |
| "step": 189900 |
| }, |
| { |
| "epoch": 61.40917905623788, |
| "grad_norm": 1.1864527463912964, |
| "learning_rate": 0.001, |
| "loss": 1.5269, |
| "step": 190000 |
| }, |
| { |
| "epoch": 61.441499676793796, |
| "grad_norm": 1.6931548118591309, |
| "learning_rate": 0.001, |
| "loss": 1.5398, |
| "step": 190100 |
| }, |
| { |
| "epoch": 61.47382029734971, |
| "grad_norm": 1.3704824447631836, |
| "learning_rate": 0.001, |
| "loss": 1.5117, |
| "step": 190200 |
| }, |
| { |
| "epoch": 61.506140917905626, |
| "grad_norm": 1.9242581129074097, |
| "learning_rate": 0.001, |
| "loss": 1.5334, |
| "step": 190300 |
| }, |
| { |
| "epoch": 61.53846153846154, |
| "grad_norm": 1.3759894371032715, |
| "learning_rate": 0.001, |
| "loss": 1.5466, |
| "step": 190400 |
| }, |
| { |
| "epoch": 61.570782159017455, |
| "grad_norm": 1.9773072004318237, |
| "learning_rate": 0.001, |
| "loss": 1.5369, |
| "step": 190500 |
| }, |
| { |
| "epoch": 61.60310277957337, |
| "grad_norm": 1.736612319946289, |
| "learning_rate": 0.001, |
| "loss": 1.5414, |
| "step": 190600 |
| }, |
| { |
| "epoch": 61.635423400129284, |
| "grad_norm": 1.4435505867004395, |
| "learning_rate": 0.001, |
| "loss": 1.5401, |
| "step": 190700 |
| }, |
| { |
| "epoch": 61.6677440206852, |
| "grad_norm": 1.3289904594421387, |
| "learning_rate": 0.001, |
| "loss": 1.5401, |
| "step": 190800 |
| }, |
| { |
| "epoch": 61.70006464124111, |
| "grad_norm": 1.6186822652816772, |
| "learning_rate": 0.001, |
| "loss": 1.5489, |
| "step": 190900 |
| }, |
| { |
| "epoch": 61.73238526179703, |
| "grad_norm": 1.3240885734558105, |
| "learning_rate": 0.001, |
| "loss": 1.5423, |
| "step": 191000 |
| }, |
| { |
| "epoch": 61.76470588235294, |
| "grad_norm": 1.7645050287246704, |
| "learning_rate": 0.001, |
| "loss": 1.5673, |
| "step": 191100 |
| }, |
| { |
| "epoch": 61.79702650290886, |
| "grad_norm": 1.4898263216018677, |
| "learning_rate": 0.001, |
| "loss": 1.5592, |
| "step": 191200 |
| }, |
| { |
| "epoch": 61.82934712346477, |
| "grad_norm": 1.2853797674179077, |
| "learning_rate": 0.001, |
| "loss": 1.573, |
| "step": 191300 |
| }, |
| { |
| "epoch": 61.861667744020686, |
| "grad_norm": 1.157740592956543, |
| "learning_rate": 0.001, |
| "loss": 1.5736, |
| "step": 191400 |
| }, |
| { |
| "epoch": 61.8939883645766, |
| "grad_norm": 1.4297878742218018, |
| "learning_rate": 0.001, |
| "loss": 1.5674, |
| "step": 191500 |
| }, |
| { |
| "epoch": 61.926308985132515, |
| "grad_norm": 1.212586760520935, |
| "learning_rate": 0.001, |
| "loss": 1.5619, |
| "step": 191600 |
| }, |
| { |
| "epoch": 61.95862960568843, |
| "grad_norm": 1.6560161113739014, |
| "learning_rate": 0.001, |
| "loss": 1.5732, |
| "step": 191700 |
| }, |
| { |
| "epoch": 61.990950226244344, |
| "grad_norm": 1.7148549556732178, |
| "learning_rate": 0.001, |
| "loss": 1.597, |
| "step": 191800 |
| }, |
| { |
| "epoch": 62.02327084680026, |
| "grad_norm": 1.8989347219467163, |
| "learning_rate": 0.001, |
| "loss": 1.5026, |
| "step": 191900 |
| }, |
| { |
| "epoch": 62.05559146735617, |
| "grad_norm": 1.1705671548843384, |
| "learning_rate": 0.001, |
| "loss": 1.4667, |
| "step": 192000 |
| }, |
| { |
| "epoch": 62.08791208791209, |
| "grad_norm": 1.3613288402557373, |
| "learning_rate": 0.001, |
| "loss": 1.4673, |
| "step": 192100 |
| }, |
| { |
| "epoch": 62.120232708468, |
| "grad_norm": 1.5595178604125977, |
| "learning_rate": 0.001, |
| "loss": 1.4802, |
| "step": 192200 |
| }, |
| { |
| "epoch": 62.15255332902392, |
| "grad_norm": 1.4763859510421753, |
| "learning_rate": 0.001, |
| "loss": 1.4813, |
| "step": 192300 |
| }, |
| { |
| "epoch": 62.18487394957983, |
| "grad_norm": 2.1364266872406006, |
| "learning_rate": 0.001, |
| "loss": 1.4854, |
| "step": 192400 |
| }, |
| { |
| "epoch": 62.217194570135746, |
| "grad_norm": 1.6717660427093506, |
| "learning_rate": 0.001, |
| "loss": 1.4927, |
| "step": 192500 |
| }, |
| { |
| "epoch": 62.24951519069166, |
| "grad_norm": 1.4224249124526978, |
| "learning_rate": 0.001, |
| "loss": 1.4813, |
| "step": 192600 |
| }, |
| { |
| "epoch": 62.281835811247575, |
| "grad_norm": 1.2694761753082275, |
| "learning_rate": 0.001, |
| "loss": 1.4936, |
| "step": 192700 |
| }, |
| { |
| "epoch": 62.31415643180349, |
| "grad_norm": 1.412690281867981, |
| "learning_rate": 0.001, |
| "loss": 1.4977, |
| "step": 192800 |
| }, |
| { |
| "epoch": 62.346477052359404, |
| "grad_norm": 1.2019102573394775, |
| "learning_rate": 0.001, |
| "loss": 1.5026, |
| "step": 192900 |
| }, |
| { |
| "epoch": 62.37879767291532, |
| "grad_norm": 1.1802592277526855, |
| "learning_rate": 0.001, |
| "loss": 1.5033, |
| "step": 193000 |
| }, |
| { |
| "epoch": 62.41111829347123, |
| "grad_norm": 1.4369657039642334, |
| "learning_rate": 0.001, |
| "loss": 1.5047, |
| "step": 193100 |
| }, |
| { |
| "epoch": 62.44343891402715, |
| "grad_norm": 2.012082576751709, |
| "learning_rate": 0.001, |
| "loss": 1.5172, |
| "step": 193200 |
| }, |
| { |
| "epoch": 62.47575953458306, |
| "grad_norm": 1.4512064456939697, |
| "learning_rate": 0.001, |
| "loss": 1.531, |
| "step": 193300 |
| }, |
| { |
| "epoch": 62.50808015513898, |
| "grad_norm": 1.473134160041809, |
| "learning_rate": 0.001, |
| "loss": 1.5123, |
| "step": 193400 |
| }, |
| { |
| "epoch": 62.54040077569489, |
| "grad_norm": 1.535452961921692, |
| "learning_rate": 0.001, |
| "loss": 1.5278, |
| "step": 193500 |
| }, |
| { |
| "epoch": 62.572721396250806, |
| "grad_norm": 1.4422191381454468, |
| "learning_rate": 0.001, |
| "loss": 1.5345, |
| "step": 193600 |
| }, |
| { |
| "epoch": 62.60504201680672, |
| "grad_norm": 1.3806055784225464, |
| "learning_rate": 0.001, |
| "loss": 1.5424, |
| "step": 193700 |
| }, |
| { |
| "epoch": 62.637362637362635, |
| "grad_norm": 1.1664576530456543, |
| "learning_rate": 0.001, |
| "loss": 1.5507, |
| "step": 193800 |
| }, |
| { |
| "epoch": 62.66968325791855, |
| "grad_norm": 1.386029839515686, |
| "learning_rate": 0.001, |
| "loss": 1.5445, |
| "step": 193900 |
| }, |
| { |
| "epoch": 62.702003878474464, |
| "grad_norm": 1.5522286891937256, |
| "learning_rate": 0.001, |
| "loss": 1.5355, |
| "step": 194000 |
| }, |
| { |
| "epoch": 62.73432449903038, |
| "grad_norm": 1.3496739864349365, |
| "learning_rate": 0.001, |
| "loss": 1.5467, |
| "step": 194100 |
| }, |
| { |
| "epoch": 62.76664511958629, |
| "grad_norm": 1.4825329780578613, |
| "learning_rate": 0.001, |
| "loss": 1.5519, |
| "step": 194200 |
| }, |
| { |
| "epoch": 62.79896574014221, |
| "grad_norm": 1.8328793048858643, |
| "learning_rate": 0.001, |
| "loss": 1.5495, |
| "step": 194300 |
| }, |
| { |
| "epoch": 62.83128636069812, |
| "grad_norm": 1.2069604396820068, |
| "learning_rate": 0.001, |
| "loss": 1.5528, |
| "step": 194400 |
| }, |
| { |
| "epoch": 62.86360698125404, |
| "grad_norm": 1.3283594846725464, |
| "learning_rate": 0.001, |
| "loss": 1.5547, |
| "step": 194500 |
| }, |
| { |
| "epoch": 62.89592760180995, |
| "grad_norm": 1.2219054698944092, |
| "learning_rate": 0.001, |
| "loss": 1.566, |
| "step": 194600 |
| }, |
| { |
| "epoch": 62.928248222365866, |
| "grad_norm": 1.385223150253296, |
| "learning_rate": 0.001, |
| "loss": 1.5755, |
| "step": 194700 |
| }, |
| { |
| "epoch": 62.96056884292178, |
| "grad_norm": 1.4032306671142578, |
| "learning_rate": 0.001, |
| "loss": 1.5737, |
| "step": 194800 |
| }, |
| { |
| "epoch": 62.992889463477695, |
| "grad_norm": 1.1347252130508423, |
| "learning_rate": 0.001, |
| "loss": 1.5622, |
| "step": 194900 |
| }, |
| { |
| "epoch": 63.02521008403362, |
| "grad_norm": 1.2946356534957886, |
| "learning_rate": 0.001, |
| "loss": 1.4943, |
| "step": 195000 |
| }, |
| { |
| "epoch": 63.05753070458953, |
| "grad_norm": 1.4891058206558228, |
| "learning_rate": 0.001, |
| "loss": 1.4452, |
| "step": 195100 |
| }, |
| { |
| "epoch": 63.089851325145446, |
| "grad_norm": 1.3868176937103271, |
| "learning_rate": 0.001, |
| "loss": 1.4695, |
| "step": 195200 |
| }, |
| { |
| "epoch": 63.12217194570136, |
| "grad_norm": 1.577714443206787, |
| "learning_rate": 0.001, |
| "loss": 1.4669, |
| "step": 195300 |
| }, |
| { |
| "epoch": 63.154492566257275, |
| "grad_norm": 1.3947536945343018, |
| "learning_rate": 0.001, |
| "loss": 1.4661, |
| "step": 195400 |
| }, |
| { |
| "epoch": 63.18681318681319, |
| "grad_norm": 1.5214914083480835, |
| "learning_rate": 0.001, |
| "loss": 1.4743, |
| "step": 195500 |
| }, |
| { |
| "epoch": 63.219133807369104, |
| "grad_norm": 1.5438205003738403, |
| "learning_rate": 0.001, |
| "loss": 1.4707, |
| "step": 195600 |
| }, |
| { |
| "epoch": 63.25145442792502, |
| "grad_norm": 1.4677503108978271, |
| "learning_rate": 0.001, |
| "loss": 1.4798, |
| "step": 195700 |
| }, |
| { |
| "epoch": 63.28377504848093, |
| "grad_norm": 1.7563285827636719, |
| "learning_rate": 0.001, |
| "loss": 1.4916, |
| "step": 195800 |
| }, |
| { |
| "epoch": 63.31609566903685, |
| "grad_norm": 1.5936578512191772, |
| "learning_rate": 0.001, |
| "loss": 1.4947, |
| "step": 195900 |
| }, |
| { |
| "epoch": 63.34841628959276, |
| "grad_norm": 2.0166072845458984, |
| "learning_rate": 0.001, |
| "loss": 1.482, |
| "step": 196000 |
| }, |
| { |
| "epoch": 63.38073691014868, |
| "grad_norm": 2.286106824874878, |
| "learning_rate": 0.001, |
| "loss": 1.4874, |
| "step": 196100 |
| }, |
| { |
| "epoch": 63.41305753070459, |
| "grad_norm": 1.1472430229187012, |
| "learning_rate": 0.001, |
| "loss": 1.5037, |
| "step": 196200 |
| }, |
| { |
| "epoch": 63.445378151260506, |
| "grad_norm": 1.718253493309021, |
| "learning_rate": 0.001, |
| "loss": 1.5103, |
| "step": 196300 |
| }, |
| { |
| "epoch": 63.47769877181642, |
| "grad_norm": 1.5824445486068726, |
| "learning_rate": 0.001, |
| "loss": 1.5, |
| "step": 196400 |
| }, |
| { |
| "epoch": 63.510019392372335, |
| "grad_norm": 1.6907223463058472, |
| "learning_rate": 0.001, |
| "loss": 1.5095, |
| "step": 196500 |
| }, |
| { |
| "epoch": 63.54234001292825, |
| "grad_norm": 1.6708906888961792, |
| "learning_rate": 0.001, |
| "loss": 1.5171, |
| "step": 196600 |
| }, |
| { |
| "epoch": 63.574660633484164, |
| "grad_norm": 1.2911310195922852, |
| "learning_rate": 0.001, |
| "loss": 1.5061, |
| "step": 196700 |
| }, |
| { |
| "epoch": 63.60698125404008, |
| "grad_norm": 1.8056236505508423, |
| "learning_rate": 0.001, |
| "loss": 1.5303, |
| "step": 196800 |
| }, |
| { |
| "epoch": 63.63930187459599, |
| "grad_norm": 1.9205259084701538, |
| "learning_rate": 0.001, |
| "loss": 1.5448, |
| "step": 196900 |
| }, |
| { |
| "epoch": 63.67162249515191, |
| "grad_norm": 1.5220394134521484, |
| "learning_rate": 0.001, |
| "loss": 1.5306, |
| "step": 197000 |
| }, |
| { |
| "epoch": 63.70394311570782, |
| "grad_norm": 1.3217031955718994, |
| "learning_rate": 0.001, |
| "loss": 1.5296, |
| "step": 197100 |
| }, |
| { |
| "epoch": 63.73626373626374, |
| "grad_norm": 1.3838924169540405, |
| "learning_rate": 0.001, |
| "loss": 1.5338, |
| "step": 197200 |
| }, |
| { |
| "epoch": 63.76858435681965, |
| "grad_norm": 1.1662184000015259, |
| "learning_rate": 0.001, |
| "loss": 1.5573, |
| "step": 197300 |
| }, |
| { |
| "epoch": 63.800904977375566, |
| "grad_norm": 1.1319751739501953, |
| "learning_rate": 0.001, |
| "loss": 1.5474, |
| "step": 197400 |
| }, |
| { |
| "epoch": 63.83322559793148, |
| "grad_norm": 1.2447794675827026, |
| "learning_rate": 0.001, |
| "loss": 1.5355, |
| "step": 197500 |
| }, |
| { |
| "epoch": 63.865546218487395, |
| "grad_norm": 1.6842137575149536, |
| "learning_rate": 0.001, |
| "loss": 1.5658, |
| "step": 197600 |
| }, |
| { |
| "epoch": 63.89786683904331, |
| "grad_norm": 1.3559083938598633, |
| "learning_rate": 0.001, |
| "loss": 1.5476, |
| "step": 197700 |
| }, |
| { |
| "epoch": 63.930187459599225, |
| "grad_norm": 1.6996843814849854, |
| "learning_rate": 0.001, |
| "loss": 1.553, |
| "step": 197800 |
| }, |
| { |
| "epoch": 63.96250808015514, |
| "grad_norm": 1.1497136354446411, |
| "learning_rate": 0.001, |
| "loss": 1.5744, |
| "step": 197900 |
| }, |
| { |
| "epoch": 63.994828700711054, |
| "grad_norm": 1.206533432006836, |
| "learning_rate": 0.001, |
| "loss": 1.5465, |
| "step": 198000 |
| }, |
| { |
| "epoch": 64.02714932126698, |
| "grad_norm": 1.6470812559127808, |
| "learning_rate": 0.001, |
| "loss": 1.4777, |
| "step": 198100 |
| }, |
| { |
| "epoch": 64.05946994182288, |
| "grad_norm": 1.5419559478759766, |
| "learning_rate": 0.001, |
| "loss": 1.4359, |
| "step": 198200 |
| }, |
| { |
| "epoch": 64.0917905623788, |
| "grad_norm": 1.462594747543335, |
| "learning_rate": 0.001, |
| "loss": 1.4459, |
| "step": 198300 |
| }, |
| { |
| "epoch": 64.12411118293471, |
| "grad_norm": 1.344820499420166, |
| "learning_rate": 0.001, |
| "loss": 1.4427, |
| "step": 198400 |
| }, |
| { |
| "epoch": 64.15643180349063, |
| "grad_norm": 1.4749201536178589, |
| "learning_rate": 0.001, |
| "loss": 1.4529, |
| "step": 198500 |
| }, |
| { |
| "epoch": 64.18875242404654, |
| "grad_norm": 1.2434271574020386, |
| "learning_rate": 0.001, |
| "loss": 1.4589, |
| "step": 198600 |
| }, |
| { |
| "epoch": 64.22107304460246, |
| "grad_norm": 1.9539108276367188, |
| "learning_rate": 0.001, |
| "loss": 1.4697, |
| "step": 198700 |
| }, |
| { |
| "epoch": 64.25339366515837, |
| "grad_norm": 1.7359395027160645, |
| "learning_rate": 0.001, |
| "loss": 1.4675, |
| "step": 198800 |
| }, |
| { |
| "epoch": 64.28571428571429, |
| "grad_norm": 1.6121803522109985, |
| "learning_rate": 0.001, |
| "loss": 1.4893, |
| "step": 198900 |
| }, |
| { |
| "epoch": 64.3180349062702, |
| "grad_norm": 1.3574376106262207, |
| "learning_rate": 0.001, |
| "loss": 1.4833, |
| "step": 199000 |
| }, |
| { |
| "epoch": 64.35035552682612, |
| "grad_norm": 1.2601439952850342, |
| "learning_rate": 0.001, |
| "loss": 1.4871, |
| "step": 199100 |
| }, |
| { |
| "epoch": 64.38267614738203, |
| "grad_norm": 1.9087001085281372, |
| "learning_rate": 0.001, |
| "loss": 1.4774, |
| "step": 199200 |
| }, |
| { |
| "epoch": 64.41499676793795, |
| "grad_norm": 1.5292835235595703, |
| "learning_rate": 0.001, |
| "loss": 1.4792, |
| "step": 199300 |
| }, |
| { |
| "epoch": 64.44731738849386, |
| "grad_norm": 1.6694928407669067, |
| "learning_rate": 0.001, |
| "loss": 1.4967, |
| "step": 199400 |
| }, |
| { |
| "epoch": 64.47963800904978, |
| "grad_norm": 1.6116362810134888, |
| "learning_rate": 0.001, |
| "loss": 1.4972, |
| "step": 199500 |
| }, |
| { |
| "epoch": 64.51195862960569, |
| "grad_norm": 1.9715216159820557, |
| "learning_rate": 0.001, |
| "loss": 1.5139, |
| "step": 199600 |
| }, |
| { |
| "epoch": 64.54427925016161, |
| "grad_norm": 1.5410069227218628, |
| "learning_rate": 0.001, |
| "loss": 1.5203, |
| "step": 199700 |
| }, |
| { |
| "epoch": 64.57659987071752, |
| "grad_norm": 1.3730778694152832, |
| "learning_rate": 0.001, |
| "loss": 1.5153, |
| "step": 199800 |
| }, |
| { |
| "epoch": 64.60892049127344, |
| "grad_norm": 1.2727103233337402, |
| "learning_rate": 0.001, |
| "loss": 1.5312, |
| "step": 199900 |
| }, |
| { |
| "epoch": 64.64124111182934, |
| "grad_norm": 1.5073052644729614, |
| "learning_rate": 0.001, |
| "loss": 1.5263, |
| "step": 200000 |
| }, |
| { |
| "epoch": 64.67356173238527, |
| "grad_norm": 2.369584321975708, |
| "learning_rate": 0.001, |
| "loss": 1.5204, |
| "step": 200100 |
| }, |
| { |
| "epoch": 64.70588235294117, |
| "grad_norm": 1.1945735216140747, |
| "learning_rate": 0.001, |
| "loss": 1.5412, |
| "step": 200200 |
| }, |
| { |
| "epoch": 64.7382029734971, |
| "grad_norm": 1.331651210784912, |
| "learning_rate": 0.001, |
| "loss": 1.524, |
| "step": 200300 |
| }, |
| { |
| "epoch": 64.770523594053, |
| "grad_norm": 1.285965919494629, |
| "learning_rate": 0.001, |
| "loss": 1.5177, |
| "step": 200400 |
| }, |
| { |
| "epoch": 64.80284421460892, |
| "grad_norm": 1.4945372343063354, |
| "learning_rate": 0.001, |
| "loss": 1.5508, |
| "step": 200500 |
| }, |
| { |
| "epoch": 64.83516483516483, |
| "grad_norm": 1.5643702745437622, |
| "learning_rate": 0.001, |
| "loss": 1.543, |
| "step": 200600 |
| }, |
| { |
| "epoch": 64.86748545572075, |
| "grad_norm": 1.7975209951400757, |
| "learning_rate": 0.001, |
| "loss": 1.5458, |
| "step": 200700 |
| }, |
| { |
| "epoch": 64.89980607627666, |
| "grad_norm": 1.5219054222106934, |
| "learning_rate": 0.001, |
| "loss": 1.5455, |
| "step": 200800 |
| }, |
| { |
| "epoch": 64.93212669683258, |
| "grad_norm": 1.8230688571929932, |
| "learning_rate": 0.001, |
| "loss": 1.5479, |
| "step": 200900 |
| }, |
| { |
| "epoch": 64.96444731738849, |
| "grad_norm": 1.3300844430923462, |
| "learning_rate": 0.001, |
| "loss": 1.5498, |
| "step": 201000 |
| }, |
| { |
| "epoch": 64.99676793794441, |
| "grad_norm": 1.855148196220398, |
| "learning_rate": 0.001, |
| "loss": 1.5354, |
| "step": 201100 |
| }, |
| { |
| "epoch": 65.02908855850032, |
| "grad_norm": 1.267386555671692, |
| "learning_rate": 0.001, |
| "loss": 1.4431, |
| "step": 201200 |
| }, |
| { |
| "epoch": 65.06140917905624, |
| "grad_norm": 1.9030799865722656, |
| "learning_rate": 0.001, |
| "loss": 1.4201, |
| "step": 201300 |
| }, |
| { |
| "epoch": 65.09372979961215, |
| "grad_norm": 1.768187165260315, |
| "learning_rate": 0.001, |
| "loss": 1.4284, |
| "step": 201400 |
| }, |
| { |
| "epoch": 65.12605042016807, |
| "grad_norm": 1.4768826961517334, |
| "learning_rate": 0.001, |
| "loss": 1.4641, |
| "step": 201500 |
| }, |
| { |
| "epoch": 65.15837104072398, |
| "grad_norm": 1.999047040939331, |
| "learning_rate": 0.001, |
| "loss": 1.4743, |
| "step": 201600 |
| }, |
| { |
| "epoch": 65.1906916612799, |
| "grad_norm": 1.8538107872009277, |
| "learning_rate": 0.001, |
| "loss": 1.4551, |
| "step": 201700 |
| }, |
| { |
| "epoch": 65.2230122818358, |
| "grad_norm": 1.830643892288208, |
| "learning_rate": 0.001, |
| "loss": 1.4559, |
| "step": 201800 |
| }, |
| { |
| "epoch": 65.25533290239173, |
| "grad_norm": 1.6101281642913818, |
| "learning_rate": 0.001, |
| "loss": 1.4737, |
| "step": 201900 |
| }, |
| { |
| "epoch": 65.28765352294764, |
| "grad_norm": 1.428889513015747, |
| "learning_rate": 0.001, |
| "loss": 1.4659, |
| "step": 202000 |
| }, |
| { |
| "epoch": 65.31997414350356, |
| "grad_norm": 1.7600284814834595, |
| "learning_rate": 0.001, |
| "loss": 1.4773, |
| "step": 202100 |
| }, |
| { |
| "epoch": 65.35229476405947, |
| "grad_norm": 1.3676204681396484, |
| "learning_rate": 0.001, |
| "loss": 1.4902, |
| "step": 202200 |
| }, |
| { |
| "epoch": 65.38461538461539, |
| "grad_norm": 1.7085014581680298, |
| "learning_rate": 0.001, |
| "loss": 1.4743, |
| "step": 202300 |
| }, |
| { |
| "epoch": 65.4169360051713, |
| "grad_norm": 1.8393412828445435, |
| "learning_rate": 0.001, |
| "loss": 1.4702, |
| "step": 202400 |
| }, |
| { |
| "epoch": 65.44925662572722, |
| "grad_norm": 1.8859540224075317, |
| "learning_rate": 0.001, |
| "loss": 1.4855, |
| "step": 202500 |
| }, |
| { |
| "epoch": 65.48157724628312, |
| "grad_norm": 1.6094419956207275, |
| "learning_rate": 0.001, |
| "loss": 1.4896, |
| "step": 202600 |
| }, |
| { |
| "epoch": 65.51389786683905, |
| "grad_norm": 1.5427072048187256, |
| "learning_rate": 0.001, |
| "loss": 1.5094, |
| "step": 202700 |
| }, |
| { |
| "epoch": 65.54621848739495, |
| "grad_norm": 1.7525871992111206, |
| "learning_rate": 0.001, |
| "loss": 1.502, |
| "step": 202800 |
| }, |
| { |
| "epoch": 65.57853910795087, |
| "grad_norm": 1.5022687911987305, |
| "learning_rate": 0.001, |
| "loss": 1.5054, |
| "step": 202900 |
| }, |
| { |
| "epoch": 65.61085972850678, |
| "grad_norm": 1.9936814308166504, |
| "learning_rate": 0.001, |
| "loss": 1.5191, |
| "step": 203000 |
| }, |
| { |
| "epoch": 65.6431803490627, |
| "grad_norm": 1.6191157102584839, |
| "learning_rate": 0.001, |
| "loss": 1.5084, |
| "step": 203100 |
| }, |
| { |
| "epoch": 65.67550096961861, |
| "grad_norm": 1.296644687652588, |
| "learning_rate": 0.001, |
| "loss": 1.5226, |
| "step": 203200 |
| }, |
| { |
| "epoch": 65.70782159017453, |
| "grad_norm": 1.4507417678833008, |
| "learning_rate": 0.001, |
| "loss": 1.5152, |
| "step": 203300 |
| }, |
| { |
| "epoch": 65.74014221073044, |
| "grad_norm": 1.6204619407653809, |
| "learning_rate": 0.001, |
| "loss": 1.5141, |
| "step": 203400 |
| }, |
| { |
| "epoch": 65.77246283128636, |
| "grad_norm": 1.3483736515045166, |
| "learning_rate": 0.001, |
| "loss": 1.5228, |
| "step": 203500 |
| }, |
| { |
| "epoch": 65.80478345184227, |
| "grad_norm": 1.2593847513198853, |
| "learning_rate": 0.001, |
| "loss": 1.5208, |
| "step": 203600 |
| }, |
| { |
| "epoch": 65.83710407239819, |
| "grad_norm": 1.685653567314148, |
| "learning_rate": 0.001, |
| "loss": 1.5312, |
| "step": 203700 |
| }, |
| { |
| "epoch": 65.8694246929541, |
| "grad_norm": 1.7216143608093262, |
| "learning_rate": 0.001, |
| "loss": 1.5274, |
| "step": 203800 |
| }, |
| { |
| "epoch": 65.90174531351002, |
| "grad_norm": 2.0874483585357666, |
| "learning_rate": 0.001, |
| "loss": 1.5377, |
| "step": 203900 |
| }, |
| { |
| "epoch": 65.93406593406593, |
| "grad_norm": 1.7108731269836426, |
| "learning_rate": 0.001, |
| "loss": 1.5434, |
| "step": 204000 |
| }, |
| { |
| "epoch": 65.96638655462185, |
| "grad_norm": 1.8892107009887695, |
| "learning_rate": 0.001, |
| "loss": 1.5339, |
| "step": 204100 |
| }, |
| { |
| "epoch": 65.99870717517777, |
| "grad_norm": 1.5250515937805176, |
| "learning_rate": 0.001, |
| "loss": 1.5228, |
| "step": 204200 |
| }, |
| { |
| "epoch": 66.03102779573368, |
| "grad_norm": 2.090299129486084, |
| "learning_rate": 0.001, |
| "loss": 1.4188, |
| "step": 204300 |
| }, |
| { |
| "epoch": 66.0633484162896, |
| "grad_norm": 1.6703615188598633, |
| "learning_rate": 0.001, |
| "loss": 1.4204, |
| "step": 204400 |
| }, |
| { |
| "epoch": 66.0956690368455, |
| "grad_norm": 1.480782389640808, |
| "learning_rate": 0.001, |
| "loss": 1.4396, |
| "step": 204500 |
| }, |
| { |
| "epoch": 66.12798965740143, |
| "grad_norm": 1.6650584936141968, |
| "learning_rate": 0.001, |
| "loss": 1.4409, |
| "step": 204600 |
| }, |
| { |
| "epoch": 66.16031027795734, |
| "grad_norm": 2.058734178543091, |
| "learning_rate": 0.001, |
| "loss": 1.4552, |
| "step": 204700 |
| }, |
| { |
| "epoch": 66.19263089851326, |
| "grad_norm": 1.3215047121047974, |
| "learning_rate": 0.001, |
| "loss": 1.4592, |
| "step": 204800 |
| }, |
| { |
| "epoch": 66.22495151906917, |
| "grad_norm": 1.9270176887512207, |
| "learning_rate": 0.001, |
| "loss": 1.4427, |
| "step": 204900 |
| }, |
| { |
| "epoch": 66.25727213962509, |
| "grad_norm": 1.6073015928268433, |
| "learning_rate": 0.001, |
| "loss": 1.4681, |
| "step": 205000 |
| }, |
| { |
| "epoch": 66.289592760181, |
| "grad_norm": 1.4834781885147095, |
| "learning_rate": 0.001, |
| "loss": 1.4491, |
| "step": 205100 |
| }, |
| { |
| "epoch": 66.32191338073692, |
| "grad_norm": 1.7395453453063965, |
| "learning_rate": 0.001, |
| "loss": 1.473, |
| "step": 205200 |
| }, |
| { |
| "epoch": 66.35423400129282, |
| "grad_norm": 1.753955364227295, |
| "learning_rate": 0.001, |
| "loss": 1.4717, |
| "step": 205300 |
| }, |
| { |
| "epoch": 66.38655462184875, |
| "grad_norm": 2.1493654251098633, |
| "learning_rate": 0.001, |
| "loss": 1.4602, |
| "step": 205400 |
| }, |
| { |
| "epoch": 66.41887524240465, |
| "grad_norm": 1.885465145111084, |
| "learning_rate": 0.001, |
| "loss": 1.4788, |
| "step": 205500 |
| }, |
| { |
| "epoch": 66.45119586296057, |
| "grad_norm": 1.9838215112686157, |
| "learning_rate": 0.001, |
| "loss": 1.492, |
| "step": 205600 |
| }, |
| { |
| "epoch": 66.48351648351648, |
| "grad_norm": 1.5979394912719727, |
| "learning_rate": 0.001, |
| "loss": 1.4711, |
| "step": 205700 |
| }, |
| { |
| "epoch": 66.5158371040724, |
| "grad_norm": 1.5088797807693481, |
| "learning_rate": 0.001, |
| "loss": 1.48, |
| "step": 205800 |
| }, |
| { |
| "epoch": 66.54815772462831, |
| "grad_norm": 1.7536591291427612, |
| "learning_rate": 0.001, |
| "loss": 1.4945, |
| "step": 205900 |
| }, |
| { |
| "epoch": 66.58047834518423, |
| "grad_norm": 1.5535506010055542, |
| "learning_rate": 0.001, |
| "loss": 1.4962, |
| "step": 206000 |
| }, |
| { |
| "epoch": 66.61279896574014, |
| "grad_norm": 2.4775919914245605, |
| "learning_rate": 0.001, |
| "loss": 1.4974, |
| "step": 206100 |
| }, |
| { |
| "epoch": 66.64511958629606, |
| "grad_norm": 2.5249624252319336, |
| "learning_rate": 0.001, |
| "loss": 1.4985, |
| "step": 206200 |
| }, |
| { |
| "epoch": 66.67744020685197, |
| "grad_norm": 2.7047958374023438, |
| "learning_rate": 0.001, |
| "loss": 1.5138, |
| "step": 206300 |
| }, |
| { |
| "epoch": 66.70976082740789, |
| "grad_norm": 2.077918291091919, |
| "learning_rate": 0.001, |
| "loss": 1.5047, |
| "step": 206400 |
| }, |
| { |
| "epoch": 66.7420814479638, |
| "grad_norm": 1.75831937789917, |
| "learning_rate": 0.001, |
| "loss": 1.5122, |
| "step": 206500 |
| }, |
| { |
| "epoch": 66.77440206851972, |
| "grad_norm": 1.9813868999481201, |
| "learning_rate": 0.001, |
| "loss": 1.5156, |
| "step": 206600 |
| }, |
| { |
| "epoch": 66.80672268907563, |
| "grad_norm": 1.6824148893356323, |
| "learning_rate": 0.001, |
| "loss": 1.5052, |
| "step": 206700 |
| }, |
| { |
| "epoch": 66.83904330963155, |
| "grad_norm": 1.811954379081726, |
| "learning_rate": 0.001, |
| "loss": 1.52, |
| "step": 206800 |
| }, |
| { |
| "epoch": 66.87136393018746, |
| "grad_norm": 1.7890185117721558, |
| "learning_rate": 0.001, |
| "loss": 1.5331, |
| "step": 206900 |
| }, |
| { |
| "epoch": 66.90368455074338, |
| "grad_norm": 1.6107674837112427, |
| "learning_rate": 0.001, |
| "loss": 1.5288, |
| "step": 207000 |
| }, |
| { |
| "epoch": 66.93600517129929, |
| "grad_norm": 1.9740185737609863, |
| "learning_rate": 0.001, |
| "loss": 1.5201, |
| "step": 207100 |
| }, |
| { |
| "epoch": 66.96832579185521, |
| "grad_norm": 1.9510257244110107, |
| "learning_rate": 0.001, |
| "loss": 1.5429, |
| "step": 207200 |
| }, |
| { |
| "epoch": 67.00064641241111, |
| "grad_norm": 1.516554355621338, |
| "learning_rate": 0.001, |
| "loss": 1.516, |
| "step": 207300 |
| }, |
| { |
| "epoch": 67.03296703296704, |
| "grad_norm": 1.8894391059875488, |
| "learning_rate": 0.001, |
| "loss": 1.4038, |
| "step": 207400 |
| }, |
| { |
| "epoch": 67.06528765352294, |
| "grad_norm": 1.7522579431533813, |
| "learning_rate": 0.001, |
| "loss": 1.4134, |
| "step": 207500 |
| }, |
| { |
| "epoch": 67.09760827407887, |
| "grad_norm": 2.453711748123169, |
| "learning_rate": 0.001, |
| "loss": 1.4311, |
| "step": 207600 |
| }, |
| { |
| "epoch": 67.12992889463477, |
| "grad_norm": 1.8193609714508057, |
| "learning_rate": 0.001, |
| "loss": 1.4322, |
| "step": 207700 |
| }, |
| { |
| "epoch": 67.1622495151907, |
| "grad_norm": 1.7121226787567139, |
| "learning_rate": 0.001, |
| "loss": 1.4251, |
| "step": 207800 |
| }, |
| { |
| "epoch": 67.1945701357466, |
| "grad_norm": 2.3753931522369385, |
| "learning_rate": 0.001, |
| "loss": 1.4413, |
| "step": 207900 |
| }, |
| { |
| "epoch": 67.22689075630252, |
| "grad_norm": 1.9751302003860474, |
| "learning_rate": 0.001, |
| "loss": 1.4579, |
| "step": 208000 |
| }, |
| { |
| "epoch": 67.25921137685843, |
| "grad_norm": 1.4990696907043457, |
| "learning_rate": 0.001, |
| "loss": 1.4414, |
| "step": 208100 |
| }, |
| { |
| "epoch": 67.29153199741435, |
| "grad_norm": 2.51631236076355, |
| "learning_rate": 0.001, |
| "loss": 1.4483, |
| "step": 208200 |
| }, |
| { |
| "epoch": 67.32385261797026, |
| "grad_norm": 1.1820275783538818, |
| "learning_rate": 0.001, |
| "loss": 1.4559, |
| "step": 208300 |
| }, |
| { |
| "epoch": 67.35617323852618, |
| "grad_norm": 1.8012865781784058, |
| "learning_rate": 0.001, |
| "loss": 1.4585, |
| "step": 208400 |
| }, |
| { |
| "epoch": 67.38849385908209, |
| "grad_norm": 1.9256772994995117, |
| "learning_rate": 0.001, |
| "loss": 1.4568, |
| "step": 208500 |
| }, |
| { |
| "epoch": 67.42081447963801, |
| "grad_norm": 2.4829864501953125, |
| "learning_rate": 0.001, |
| "loss": 1.4823, |
| "step": 208600 |
| }, |
| { |
| "epoch": 67.45313510019392, |
| "grad_norm": 1.6891846656799316, |
| "learning_rate": 0.001, |
| "loss": 1.4668, |
| "step": 208700 |
| }, |
| { |
| "epoch": 67.48545572074984, |
| "grad_norm": 2.690870523452759, |
| "learning_rate": 0.001, |
| "loss": 1.4826, |
| "step": 208800 |
| }, |
| { |
| "epoch": 67.51777634130575, |
| "grad_norm": 2.1645565032958984, |
| "learning_rate": 0.001, |
| "loss": 1.4863, |
| "step": 208900 |
| }, |
| { |
| "epoch": 67.55009696186167, |
| "grad_norm": 1.723306655883789, |
| "learning_rate": 0.001, |
| "loss": 1.4837, |
| "step": 209000 |
| }, |
| { |
| "epoch": 67.58241758241758, |
| "grad_norm": 1.591162919998169, |
| "learning_rate": 0.001, |
| "loss": 1.4975, |
| "step": 209100 |
| }, |
| { |
| "epoch": 67.6147382029735, |
| "grad_norm": 1.988744854927063, |
| "learning_rate": 0.001, |
| "loss": 1.4862, |
| "step": 209200 |
| }, |
| { |
| "epoch": 67.6470588235294, |
| "grad_norm": 1.8148858547210693, |
| "learning_rate": 0.001, |
| "loss": 1.4787, |
| "step": 209300 |
| }, |
| { |
| "epoch": 67.67937944408533, |
| "grad_norm": 1.5285876989364624, |
| "learning_rate": 0.001, |
| "loss": 1.4985, |
| "step": 209400 |
| }, |
| { |
| "epoch": 67.71170006464124, |
| "grad_norm": 1.3206168413162231, |
| "learning_rate": 0.001, |
| "loss": 1.4879, |
| "step": 209500 |
| }, |
| { |
| "epoch": 67.74402068519716, |
| "grad_norm": 1.4289054870605469, |
| "learning_rate": 0.001, |
| "loss": 1.4955, |
| "step": 209600 |
| }, |
| { |
| "epoch": 67.77634130575306, |
| "grad_norm": 1.9192121028900146, |
| "learning_rate": 0.001, |
| "loss": 1.5137, |
| "step": 209700 |
| }, |
| { |
| "epoch": 67.80866192630899, |
| "grad_norm": 2.1331663131713867, |
| "learning_rate": 0.001, |
| "loss": 1.4892, |
| "step": 209800 |
| }, |
| { |
| "epoch": 67.8409825468649, |
| "grad_norm": 1.5964634418487549, |
| "learning_rate": 0.001, |
| "loss": 1.5037, |
| "step": 209900 |
| }, |
| { |
| "epoch": 67.87330316742081, |
| "grad_norm": 2.2187821865081787, |
| "learning_rate": 0.001, |
| "loss": 1.5305, |
| "step": 210000 |
| }, |
| { |
| "epoch": 67.90562378797672, |
| "grad_norm": 1.634127140045166, |
| "learning_rate": 0.001, |
| "loss": 1.5144, |
| "step": 210100 |
| }, |
| { |
| "epoch": 67.93794440853264, |
| "grad_norm": 2.275919198989868, |
| "learning_rate": 0.001, |
| "loss": 1.5178, |
| "step": 210200 |
| }, |
| { |
| "epoch": 67.97026502908855, |
| "grad_norm": 2.2488794326782227, |
| "learning_rate": 0.001, |
| "loss": 1.5312, |
| "step": 210300 |
| }, |
| { |
| "epoch": 68.00258564964447, |
| "grad_norm": 1.4717329740524292, |
| "learning_rate": 0.001, |
| "loss": 1.5331, |
| "step": 210400 |
| }, |
| { |
| "epoch": 68.0349062702004, |
| "grad_norm": 1.4725350141525269, |
| "learning_rate": 0.001, |
| "loss": 1.3944, |
| "step": 210500 |
| }, |
| { |
| "epoch": 68.0672268907563, |
| "grad_norm": 1.347701907157898, |
| "learning_rate": 0.001, |
| "loss": 1.4202, |
| "step": 210600 |
| }, |
| { |
| "epoch": 68.09954751131222, |
| "grad_norm": 1.8046602010726929, |
| "learning_rate": 0.001, |
| "loss": 1.4218, |
| "step": 210700 |
| }, |
| { |
| "epoch": 68.13186813186813, |
| "grad_norm": 1.684139609336853, |
| "learning_rate": 0.001, |
| "loss": 1.4481, |
| "step": 210800 |
| }, |
| { |
| "epoch": 68.16418875242405, |
| "grad_norm": 2.3283982276916504, |
| "learning_rate": 0.001, |
| "loss": 1.4293, |
| "step": 210900 |
| }, |
| { |
| "epoch": 68.19650937297996, |
| "grad_norm": 1.7807434797286987, |
| "learning_rate": 0.001, |
| "loss": 1.4385, |
| "step": 211000 |
| }, |
| { |
| "epoch": 68.22882999353588, |
| "grad_norm": 1.2383530139923096, |
| "learning_rate": 0.001, |
| "loss": 1.44, |
| "step": 211100 |
| }, |
| { |
| "epoch": 68.26115061409179, |
| "grad_norm": 1.3734993934631348, |
| "learning_rate": 0.001, |
| "loss": 1.4455, |
| "step": 211200 |
| }, |
| { |
| "epoch": 68.29347123464771, |
| "grad_norm": 1.8806383609771729, |
| "learning_rate": 0.001, |
| "loss": 1.4463, |
| "step": 211300 |
| }, |
| { |
| "epoch": 68.32579185520362, |
| "grad_norm": 1.524986743927002, |
| "learning_rate": 0.001, |
| "loss": 1.4357, |
| "step": 211400 |
| }, |
| { |
| "epoch": 68.35811247575954, |
| "grad_norm": 1.678237795829773, |
| "learning_rate": 0.001, |
| "loss": 1.4457, |
| "step": 211500 |
| }, |
| { |
| "epoch": 68.39043309631545, |
| "grad_norm": 1.5592761039733887, |
| "learning_rate": 0.001, |
| "loss": 1.459, |
| "step": 211600 |
| }, |
| { |
| "epoch": 68.42275371687137, |
| "grad_norm": 1.8970290422439575, |
| "learning_rate": 0.001, |
| "loss": 1.4517, |
| "step": 211700 |
| }, |
| { |
| "epoch": 68.45507433742728, |
| "grad_norm": 1.3881733417510986, |
| "learning_rate": 0.001, |
| "loss": 1.4662, |
| "step": 211800 |
| }, |
| { |
| "epoch": 68.4873949579832, |
| "grad_norm": 1.554467797279358, |
| "learning_rate": 0.001, |
| "loss": 1.4669, |
| "step": 211900 |
| }, |
| { |
| "epoch": 68.5197155785391, |
| "grad_norm": 1.3873423337936401, |
| "learning_rate": 0.001, |
| "loss": 1.4684, |
| "step": 212000 |
| }, |
| { |
| "epoch": 68.55203619909503, |
| "grad_norm": 1.2659697532653809, |
| "learning_rate": 0.001, |
| "loss": 1.4759, |
| "step": 212100 |
| }, |
| { |
| "epoch": 68.58435681965094, |
| "grad_norm": 2.0712532997131348, |
| "learning_rate": 0.001, |
| "loss": 1.4665, |
| "step": 212200 |
| }, |
| { |
| "epoch": 68.61667744020686, |
| "grad_norm": 1.8107807636260986, |
| "learning_rate": 0.001, |
| "loss": 1.481, |
| "step": 212300 |
| }, |
| { |
| "epoch": 68.64899806076276, |
| "grad_norm": 1.7803953886032104, |
| "learning_rate": 0.001, |
| "loss": 1.4729, |
| "step": 212400 |
| }, |
| { |
| "epoch": 68.68131868131869, |
| "grad_norm": 1.3027220964431763, |
| "learning_rate": 0.001, |
| "loss": 1.4872, |
| "step": 212500 |
| }, |
| { |
| "epoch": 68.7136393018746, |
| "grad_norm": 1.3887300491333008, |
| "learning_rate": 0.001, |
| "loss": 1.4839, |
| "step": 212600 |
| }, |
| { |
| "epoch": 68.74595992243052, |
| "grad_norm": 1.626133918762207, |
| "learning_rate": 0.001, |
| "loss": 1.493, |
| "step": 212700 |
| }, |
| { |
| "epoch": 68.77828054298642, |
| "grad_norm": 1.6508300304412842, |
| "learning_rate": 0.001, |
| "loss": 1.4785, |
| "step": 212800 |
| }, |
| { |
| "epoch": 68.81060116354234, |
| "grad_norm": 1.3312135934829712, |
| "learning_rate": 0.001, |
| "loss": 1.4954, |
| "step": 212900 |
| }, |
| { |
| "epoch": 68.84292178409825, |
| "grad_norm": 2.1756434440612793, |
| "learning_rate": 0.001, |
| "loss": 1.5145, |
| "step": 213000 |
| }, |
| { |
| "epoch": 68.87524240465417, |
| "grad_norm": 1.4839807748794556, |
| "learning_rate": 0.001, |
| "loss": 1.5196, |
| "step": 213100 |
| }, |
| { |
| "epoch": 68.90756302521008, |
| "grad_norm": 1.1613637208938599, |
| "learning_rate": 0.001, |
| "loss": 1.5089, |
| "step": 213200 |
| }, |
| { |
| "epoch": 68.939883645766, |
| "grad_norm": 1.2411142587661743, |
| "learning_rate": 0.001, |
| "loss": 1.5188, |
| "step": 213300 |
| }, |
| { |
| "epoch": 68.97220426632191, |
| "grad_norm": 1.54549241065979, |
| "learning_rate": 0.001, |
| "loss": 1.5081, |
| "step": 213400 |
| }, |
| { |
| "epoch": 69.00452488687783, |
| "grad_norm": 1.4730149507522583, |
| "learning_rate": 0.001, |
| "loss": 1.5034, |
| "step": 213500 |
| }, |
| { |
| "epoch": 69.03684550743374, |
| "grad_norm": 1.7247742414474487, |
| "learning_rate": 0.001, |
| "loss": 1.4005, |
| "step": 213600 |
| }, |
| { |
| "epoch": 69.06916612798966, |
| "grad_norm": 1.249778389930725, |
| "learning_rate": 0.001, |
| "loss": 1.4052, |
| "step": 213700 |
| }, |
| { |
| "epoch": 69.10148674854557, |
| "grad_norm": 1.4992344379425049, |
| "learning_rate": 0.001, |
| "loss": 1.4217, |
| "step": 213800 |
| }, |
| { |
| "epoch": 69.13380736910149, |
| "grad_norm": 1.7354190349578857, |
| "learning_rate": 0.001, |
| "loss": 1.417, |
| "step": 213900 |
| }, |
| { |
| "epoch": 69.1661279896574, |
| "grad_norm": 1.257768154144287, |
| "learning_rate": 0.001, |
| "loss": 1.4199, |
| "step": 214000 |
| }, |
| { |
| "epoch": 69.19844861021332, |
| "grad_norm": 1.6139253377914429, |
| "learning_rate": 0.001, |
| "loss": 1.4124, |
| "step": 214100 |
| }, |
| { |
| "epoch": 69.23076923076923, |
| "grad_norm": 1.6175397634506226, |
| "learning_rate": 0.001, |
| "loss": 1.4277, |
| "step": 214200 |
| }, |
| { |
| "epoch": 69.26308985132515, |
| "grad_norm": 1.3936841487884521, |
| "learning_rate": 0.001, |
| "loss": 1.4343, |
| "step": 214300 |
| }, |
| { |
| "epoch": 69.29541047188106, |
| "grad_norm": 1.8072025775909424, |
| "learning_rate": 0.001, |
| "loss": 1.4298, |
| "step": 214400 |
| }, |
| { |
| "epoch": 69.32773109243698, |
| "grad_norm": 1.3766324520111084, |
| "learning_rate": 0.001, |
| "loss": 1.4584, |
| "step": 214500 |
| }, |
| { |
| "epoch": 69.36005171299288, |
| "grad_norm": 1.3047598600387573, |
| "learning_rate": 0.001, |
| "loss": 1.4385, |
| "step": 214600 |
| }, |
| { |
| "epoch": 69.3923723335488, |
| "grad_norm": 1.836218237876892, |
| "learning_rate": 0.001, |
| "loss": 1.4313, |
| "step": 214700 |
| }, |
| { |
| "epoch": 69.42469295410471, |
| "grad_norm": 1.3267902135849, |
| "learning_rate": 0.001, |
| "loss": 1.4397, |
| "step": 214800 |
| }, |
| { |
| "epoch": 69.45701357466064, |
| "grad_norm": 1.6668914556503296, |
| "learning_rate": 0.001, |
| "loss": 1.454, |
| "step": 214900 |
| }, |
| { |
| "epoch": 69.48933419521654, |
| "grad_norm": 1.2520886659622192, |
| "learning_rate": 0.001, |
| "loss": 1.4726, |
| "step": 215000 |
| }, |
| { |
| "epoch": 69.52165481577246, |
| "grad_norm": 1.1094666719436646, |
| "learning_rate": 0.001, |
| "loss": 1.4574, |
| "step": 215100 |
| }, |
| { |
| "epoch": 69.55397543632837, |
| "grad_norm": 1.2861254215240479, |
| "learning_rate": 0.001, |
| "loss": 1.4626, |
| "step": 215200 |
| }, |
| { |
| "epoch": 69.5862960568843, |
| "grad_norm": 1.371955394744873, |
| "learning_rate": 0.001, |
| "loss": 1.4746, |
| "step": 215300 |
| }, |
| { |
| "epoch": 69.6186166774402, |
| "grad_norm": 1.4750844240188599, |
| "learning_rate": 0.001, |
| "loss": 1.4771, |
| "step": 215400 |
| }, |
| { |
| "epoch": 69.65093729799612, |
| "grad_norm": 1.984249472618103, |
| "learning_rate": 0.001, |
| "loss": 1.4874, |
| "step": 215500 |
| }, |
| { |
| "epoch": 69.68325791855203, |
| "grad_norm": 1.4645411968231201, |
| "learning_rate": 0.001, |
| "loss": 1.4786, |
| "step": 215600 |
| }, |
| { |
| "epoch": 69.71557853910795, |
| "grad_norm": 1.7948253154754639, |
| "learning_rate": 0.001, |
| "loss": 1.4895, |
| "step": 215700 |
| }, |
| { |
| "epoch": 69.74789915966386, |
| "grad_norm": 1.4121185541152954, |
| "learning_rate": 0.001, |
| "loss": 1.482, |
| "step": 215800 |
| }, |
| { |
| "epoch": 69.78021978021978, |
| "grad_norm": 2.0017154216766357, |
| "learning_rate": 0.001, |
| "loss": 1.4704, |
| "step": 215900 |
| }, |
| { |
| "epoch": 69.81254040077569, |
| "grad_norm": 1.3346022367477417, |
| "learning_rate": 0.001, |
| "loss": 1.4875, |
| "step": 216000 |
| }, |
| { |
| "epoch": 69.84486102133161, |
| "grad_norm": 1.9833992719650269, |
| "learning_rate": 0.001, |
| "loss": 1.4983, |
| "step": 216100 |
| }, |
| { |
| "epoch": 69.87718164188752, |
| "grad_norm": 1.3632301092147827, |
| "learning_rate": 0.001, |
| "loss": 1.4786, |
| "step": 216200 |
| }, |
| { |
| "epoch": 69.90950226244344, |
| "grad_norm": 1.5925140380859375, |
| "learning_rate": 0.001, |
| "loss": 1.4915, |
| "step": 216300 |
| }, |
| { |
| "epoch": 69.94182288299935, |
| "grad_norm": 1.153775691986084, |
| "learning_rate": 0.001, |
| "loss": 1.5128, |
| "step": 216400 |
| }, |
| { |
| "epoch": 69.97414350355527, |
| "grad_norm": 1.3893563747406006, |
| "learning_rate": 0.001, |
| "loss": 1.5076, |
| "step": 216500 |
| }, |
| { |
| "epoch": 70.00646412411119, |
| "grad_norm": 1.3907309770584106, |
| "learning_rate": 0.001, |
| "loss": 1.4967, |
| "step": 216600 |
| }, |
| { |
| "epoch": 70.0387847446671, |
| "grad_norm": 1.9319952726364136, |
| "learning_rate": 0.001, |
| "loss": 1.3789, |
| "step": 216700 |
| }, |
| { |
| "epoch": 70.07110536522302, |
| "grad_norm": 0.9622918963432312, |
| "learning_rate": 0.001, |
| "loss": 1.3895, |
| "step": 216800 |
| }, |
| { |
| "epoch": 70.10342598577893, |
| "grad_norm": 1.3357387781143188, |
| "learning_rate": 0.001, |
| "loss": 1.4076, |
| "step": 216900 |
| }, |
| { |
| "epoch": 70.13574660633485, |
| "grad_norm": 1.3829296827316284, |
| "learning_rate": 0.001, |
| "loss": 1.4108, |
| "step": 217000 |
| }, |
| { |
| "epoch": 70.16806722689076, |
| "grad_norm": 1.5978502035140991, |
| "learning_rate": 0.001, |
| "loss": 1.4024, |
| "step": 217100 |
| }, |
| { |
| "epoch": 70.20038784744668, |
| "grad_norm": 1.342020034790039, |
| "learning_rate": 0.001, |
| "loss": 1.414, |
| "step": 217200 |
| }, |
| { |
| "epoch": 70.23270846800258, |
| "grad_norm": 1.5343835353851318, |
| "learning_rate": 0.001, |
| "loss": 1.4224, |
| "step": 217300 |
| }, |
| { |
| "epoch": 70.2650290885585, |
| "grad_norm": 1.2469536066055298, |
| "learning_rate": 0.001, |
| "loss": 1.428, |
| "step": 217400 |
| }, |
| { |
| "epoch": 70.29734970911441, |
| "grad_norm": 1.3249125480651855, |
| "learning_rate": 0.001, |
| "loss": 1.4307, |
| "step": 217500 |
| }, |
| { |
| "epoch": 70.32967032967034, |
| "grad_norm": 2.6667747497558594, |
| "learning_rate": 0.001, |
| "loss": 1.4408, |
| "step": 217600 |
| }, |
| { |
| "epoch": 70.36199095022624, |
| "grad_norm": 1.1911907196044922, |
| "learning_rate": 0.001, |
| "loss": 1.4318, |
| "step": 217700 |
| }, |
| { |
| "epoch": 70.39431157078216, |
| "grad_norm": 1.585929274559021, |
| "learning_rate": 0.001, |
| "loss": 1.4405, |
| "step": 217800 |
| }, |
| { |
| "epoch": 70.42663219133807, |
| "grad_norm": 1.2287694215774536, |
| "learning_rate": 0.001, |
| "loss": 1.4416, |
| "step": 217900 |
| }, |
| { |
| "epoch": 70.458952811894, |
| "grad_norm": 1.3678672313690186, |
| "learning_rate": 0.001, |
| "loss": 1.4487, |
| "step": 218000 |
| }, |
| { |
| "epoch": 70.4912734324499, |
| "grad_norm": 1.9033615589141846, |
| "learning_rate": 0.001, |
| "loss": 1.4612, |
| "step": 218100 |
| }, |
| { |
| "epoch": 70.52359405300582, |
| "grad_norm": 1.2729027271270752, |
| "learning_rate": 0.001, |
| "loss": 1.4598, |
| "step": 218200 |
| }, |
| { |
| "epoch": 70.55591467356173, |
| "grad_norm": 1.6137685775756836, |
| "learning_rate": 0.001, |
| "loss": 1.4697, |
| "step": 218300 |
| }, |
| { |
| "epoch": 70.58823529411765, |
| "grad_norm": 1.2115042209625244, |
| "learning_rate": 0.001, |
| "loss": 1.4754, |
| "step": 218400 |
| }, |
| { |
| "epoch": 70.62055591467356, |
| "grad_norm": 1.4818336963653564, |
| "learning_rate": 0.001, |
| "loss": 1.4469, |
| "step": 218500 |
| }, |
| { |
| "epoch": 70.65287653522948, |
| "grad_norm": 1.2290292978286743, |
| "learning_rate": 0.001, |
| "loss": 1.4621, |
| "step": 218600 |
| }, |
| { |
| "epoch": 70.68519715578539, |
| "grad_norm": 1.5580615997314453, |
| "learning_rate": 0.001, |
| "loss": 1.4557, |
| "step": 218700 |
| }, |
| { |
| "epoch": 70.71751777634131, |
| "grad_norm": 1.2235702276229858, |
| "learning_rate": 0.001, |
| "loss": 1.4739, |
| "step": 218800 |
| }, |
| { |
| "epoch": 70.74983839689722, |
| "grad_norm": 1.3991035223007202, |
| "learning_rate": 0.001, |
| "loss": 1.4676, |
| "step": 218900 |
| }, |
| { |
| "epoch": 70.78215901745314, |
| "grad_norm": 2.4721031188964844, |
| "learning_rate": 0.001, |
| "loss": 1.4849, |
| "step": 219000 |
| }, |
| { |
| "epoch": 70.81447963800905, |
| "grad_norm": 1.246346354484558, |
| "learning_rate": 0.001, |
| "loss": 1.4824, |
| "step": 219100 |
| }, |
| { |
| "epoch": 70.84680025856497, |
| "grad_norm": 1.8876065015792847, |
| "learning_rate": 0.001, |
| "loss": 1.5024, |
| "step": 219200 |
| }, |
| { |
| "epoch": 70.87912087912088, |
| "grad_norm": 1.9916150569915771, |
| "learning_rate": 0.001, |
| "loss": 1.4854, |
| "step": 219300 |
| }, |
| { |
| "epoch": 70.9114414996768, |
| "grad_norm": 1.6076717376708984, |
| "learning_rate": 0.001, |
| "loss": 1.4978, |
| "step": 219400 |
| }, |
| { |
| "epoch": 70.9437621202327, |
| "grad_norm": 1.393532156944275, |
| "learning_rate": 0.001, |
| "loss": 1.4932, |
| "step": 219500 |
| }, |
| { |
| "epoch": 70.97608274078863, |
| "grad_norm": 1.4620736837387085, |
| "learning_rate": 0.001, |
| "loss": 1.4862, |
| "step": 219600 |
| }, |
| { |
| "epoch": 71.00840336134453, |
| "grad_norm": 1.4317773580551147, |
| "learning_rate": 0.001, |
| "loss": 1.486, |
| "step": 219700 |
| }, |
| { |
| "epoch": 71.04072398190046, |
| "grad_norm": 1.4598522186279297, |
| "learning_rate": 0.001, |
| "loss": 1.3638, |
| "step": 219800 |
| }, |
| { |
| "epoch": 71.07304460245636, |
| "grad_norm": 1.6084305047988892, |
| "learning_rate": 0.001, |
| "loss": 1.3949, |
| "step": 219900 |
| }, |
| { |
| "epoch": 71.10536522301229, |
| "grad_norm": 1.2353745698928833, |
| "learning_rate": 0.001, |
| "loss": 1.3805, |
| "step": 220000 |
| }, |
| { |
| "epoch": 71.13768584356819, |
| "grad_norm": 1.1433589458465576, |
| "learning_rate": 0.001, |
| "loss": 1.3986, |
| "step": 220100 |
| }, |
| { |
| "epoch": 71.17000646412411, |
| "grad_norm": 1.6471236944198608, |
| "learning_rate": 0.001, |
| "loss": 1.3963, |
| "step": 220200 |
| }, |
| { |
| "epoch": 71.20232708468002, |
| "grad_norm": 1.7072899341583252, |
| "learning_rate": 0.001, |
| "loss": 1.412, |
| "step": 220300 |
| }, |
| { |
| "epoch": 71.23464770523594, |
| "grad_norm": 1.123854637145996, |
| "learning_rate": 0.001, |
| "loss": 1.422, |
| "step": 220400 |
| }, |
| { |
| "epoch": 71.26696832579185, |
| "grad_norm": 1.6275721788406372, |
| "learning_rate": 0.001, |
| "loss": 1.3955, |
| "step": 220500 |
| }, |
| { |
| "epoch": 71.29928894634777, |
| "grad_norm": 1.697174072265625, |
| "learning_rate": 0.001, |
| "loss": 1.4271, |
| "step": 220600 |
| }, |
| { |
| "epoch": 71.33160956690368, |
| "grad_norm": 1.770520806312561, |
| "learning_rate": 0.001, |
| "loss": 1.4234, |
| "step": 220700 |
| }, |
| { |
| "epoch": 71.3639301874596, |
| "grad_norm": 1.3543721437454224, |
| "learning_rate": 0.001, |
| "loss": 1.4203, |
| "step": 220800 |
| }, |
| { |
| "epoch": 71.39625080801551, |
| "grad_norm": 1.134240984916687, |
| "learning_rate": 0.001, |
| "loss": 1.4453, |
| "step": 220900 |
| }, |
| { |
| "epoch": 71.42857142857143, |
| "grad_norm": 1.407908320426941, |
| "learning_rate": 0.001, |
| "loss": 1.4446, |
| "step": 221000 |
| }, |
| { |
| "epoch": 71.46089204912734, |
| "grad_norm": 2.0162112712860107, |
| "learning_rate": 0.001, |
| "loss": 1.4301, |
| "step": 221100 |
| }, |
| { |
| "epoch": 71.49321266968326, |
| "grad_norm": 1.4100801944732666, |
| "learning_rate": 0.001, |
| "loss": 1.4479, |
| "step": 221200 |
| }, |
| { |
| "epoch": 71.52553329023917, |
| "grad_norm": 1.562717318534851, |
| "learning_rate": 0.001, |
| "loss": 1.4426, |
| "step": 221300 |
| }, |
| { |
| "epoch": 71.55785391079509, |
| "grad_norm": 1.5795971155166626, |
| "learning_rate": 0.001, |
| "loss": 1.4547, |
| "step": 221400 |
| }, |
| { |
| "epoch": 71.590174531351, |
| "grad_norm": 1.2706962823867798, |
| "learning_rate": 0.001, |
| "loss": 1.4572, |
| "step": 221500 |
| }, |
| { |
| "epoch": 71.62249515190692, |
| "grad_norm": 1.587480068206787, |
| "learning_rate": 0.001, |
| "loss": 1.4593, |
| "step": 221600 |
| }, |
| { |
| "epoch": 71.65481577246283, |
| "grad_norm": 1.6591274738311768, |
| "learning_rate": 0.001, |
| "loss": 1.4629, |
| "step": 221700 |
| }, |
| { |
| "epoch": 71.68713639301875, |
| "grad_norm": 1.419433355331421, |
| "learning_rate": 0.001, |
| "loss": 1.454, |
| "step": 221800 |
| }, |
| { |
| "epoch": 71.71945701357465, |
| "grad_norm": 1.8765602111816406, |
| "learning_rate": 0.001, |
| "loss": 1.4504, |
| "step": 221900 |
| }, |
| { |
| "epoch": 71.75177763413058, |
| "grad_norm": 1.2230541706085205, |
| "learning_rate": 0.001, |
| "loss": 1.4643, |
| "step": 222000 |
| }, |
| { |
| "epoch": 71.78409825468648, |
| "grad_norm": 1.6914892196655273, |
| "learning_rate": 0.001, |
| "loss": 1.4887, |
| "step": 222100 |
| }, |
| { |
| "epoch": 71.8164188752424, |
| "grad_norm": 1.3152620792388916, |
| "learning_rate": 0.001, |
| "loss": 1.4798, |
| "step": 222200 |
| }, |
| { |
| "epoch": 71.84873949579831, |
| "grad_norm": 1.575087070465088, |
| "learning_rate": 0.001, |
| "loss": 1.4931, |
| "step": 222300 |
| }, |
| { |
| "epoch": 71.88106011635423, |
| "grad_norm": 1.1092244386672974, |
| "learning_rate": 0.001, |
| "loss": 1.4747, |
| "step": 222400 |
| }, |
| { |
| "epoch": 71.91338073691014, |
| "grad_norm": 2.302403450012207, |
| "learning_rate": 0.001, |
| "loss": 1.4785, |
| "step": 222500 |
| }, |
| { |
| "epoch": 71.94570135746606, |
| "grad_norm": 1.587856650352478, |
| "learning_rate": 0.001, |
| "loss": 1.4847, |
| "step": 222600 |
| }, |
| { |
| "epoch": 71.97802197802197, |
| "grad_norm": 1.3023202419281006, |
| "learning_rate": 0.001, |
| "loss": 1.5005, |
| "step": 222700 |
| }, |
| { |
| "epoch": 72.01034259857789, |
| "grad_norm": 1.4531750679016113, |
| "learning_rate": 0.001, |
| "loss": 1.4403, |
| "step": 222800 |
| }, |
| { |
| "epoch": 72.04266321913381, |
| "grad_norm": 1.6974835395812988, |
| "learning_rate": 0.001, |
| "loss": 1.3789, |
| "step": 222900 |
| }, |
| { |
| "epoch": 72.07498383968972, |
| "grad_norm": 2.306978702545166, |
| "learning_rate": 0.001, |
| "loss": 1.3747, |
| "step": 223000 |
| }, |
| { |
| "epoch": 72.10730446024564, |
| "grad_norm": 1.7029024362564087, |
| "learning_rate": 0.001, |
| "loss": 1.3762, |
| "step": 223100 |
| }, |
| { |
| "epoch": 72.13962508080155, |
| "grad_norm": 1.4630860090255737, |
| "learning_rate": 0.001, |
| "loss": 1.3893, |
| "step": 223200 |
| }, |
| { |
| "epoch": 72.17194570135747, |
| "grad_norm": 1.477061152458191, |
| "learning_rate": 0.001, |
| "loss": 1.3885, |
| "step": 223300 |
| }, |
| { |
| "epoch": 72.20426632191338, |
| "grad_norm": 1.8601970672607422, |
| "learning_rate": 0.001, |
| "loss": 1.4036, |
| "step": 223400 |
| }, |
| { |
| "epoch": 72.2365869424693, |
| "grad_norm": 1.5833276510238647, |
| "learning_rate": 0.001, |
| "loss": 1.4037, |
| "step": 223500 |
| }, |
| { |
| "epoch": 72.26890756302521, |
| "grad_norm": 1.3761273622512817, |
| "learning_rate": 0.001, |
| "loss": 1.4053, |
| "step": 223600 |
| }, |
| { |
| "epoch": 72.30122818358113, |
| "grad_norm": 1.475314736366272, |
| "learning_rate": 0.001, |
| "loss": 1.4223, |
| "step": 223700 |
| }, |
| { |
| "epoch": 72.33354880413704, |
| "grad_norm": 1.369614601135254, |
| "learning_rate": 0.001, |
| "loss": 1.405, |
| "step": 223800 |
| }, |
| { |
| "epoch": 72.36586942469296, |
| "grad_norm": 1.848175287246704, |
| "learning_rate": 0.001, |
| "loss": 1.425, |
| "step": 223900 |
| }, |
| { |
| "epoch": 72.39819004524887, |
| "grad_norm": 1.6656006574630737, |
| "learning_rate": 0.001, |
| "loss": 1.424, |
| "step": 224000 |
| }, |
| { |
| "epoch": 72.43051066580479, |
| "grad_norm": 1.6123826503753662, |
| "learning_rate": 0.001, |
| "loss": 1.4376, |
| "step": 224100 |
| }, |
| { |
| "epoch": 72.4628312863607, |
| "grad_norm": 1.6487890481948853, |
| "learning_rate": 0.001, |
| "loss": 1.4475, |
| "step": 224200 |
| }, |
| { |
| "epoch": 72.49515190691662, |
| "grad_norm": 1.5751174688339233, |
| "learning_rate": 0.001, |
| "loss": 1.4366, |
| "step": 224300 |
| }, |
| { |
| "epoch": 72.52747252747253, |
| "grad_norm": 1.2488092184066772, |
| "learning_rate": 0.001, |
| "loss": 1.4392, |
| "step": 224400 |
| }, |
| { |
| "epoch": 72.55979314802845, |
| "grad_norm": 1.2981414794921875, |
| "learning_rate": 0.001, |
| "loss": 1.4297, |
| "step": 224500 |
| }, |
| { |
| "epoch": 72.59211376858435, |
| "grad_norm": 2.0852699279785156, |
| "learning_rate": 0.001, |
| "loss": 1.4447, |
| "step": 224600 |
| }, |
| { |
| "epoch": 72.62443438914028, |
| "grad_norm": 1.5971038341522217, |
| "learning_rate": 0.001, |
| "loss": 1.4467, |
| "step": 224700 |
| }, |
| { |
| "epoch": 72.65675500969618, |
| "grad_norm": 2.1098439693450928, |
| "learning_rate": 0.001, |
| "loss": 1.451, |
| "step": 224800 |
| }, |
| { |
| "epoch": 72.6890756302521, |
| "grad_norm": 2.0493147373199463, |
| "learning_rate": 0.001, |
| "loss": 1.4557, |
| "step": 224900 |
| }, |
| { |
| "epoch": 72.72139625080801, |
| "grad_norm": 1.8858246803283691, |
| "learning_rate": 0.001, |
| "loss": 1.4635, |
| "step": 225000 |
| }, |
| { |
| "epoch": 72.75371687136393, |
| "grad_norm": 1.9451382160186768, |
| "learning_rate": 0.001, |
| "loss": 1.4601, |
| "step": 225100 |
| }, |
| { |
| "epoch": 72.78603749191984, |
| "grad_norm": 1.993582844734192, |
| "learning_rate": 0.001, |
| "loss": 1.4711, |
| "step": 225200 |
| }, |
| { |
| "epoch": 72.81835811247576, |
| "grad_norm": 1.8193069696426392, |
| "learning_rate": 0.001, |
| "loss": 1.481, |
| "step": 225300 |
| }, |
| { |
| "epoch": 72.85067873303167, |
| "grad_norm": 1.814009189605713, |
| "learning_rate": 0.001, |
| "loss": 1.4684, |
| "step": 225400 |
| }, |
| { |
| "epoch": 72.88299935358759, |
| "grad_norm": 1.269718885421753, |
| "learning_rate": 0.001, |
| "loss": 1.4661, |
| "step": 225500 |
| }, |
| { |
| "epoch": 72.9153199741435, |
| "grad_norm": 1.751963496208191, |
| "learning_rate": 0.001, |
| "loss": 1.4599, |
| "step": 225600 |
| }, |
| { |
| "epoch": 72.94764059469942, |
| "grad_norm": 1.4845296144485474, |
| "learning_rate": 0.001, |
| "loss": 1.48, |
| "step": 225700 |
| }, |
| { |
| "epoch": 72.97996121525533, |
| "grad_norm": 1.3906903266906738, |
| "learning_rate": 0.001, |
| "loss": 1.492, |
| "step": 225800 |
| }, |
| { |
| "epoch": 73.01228183581125, |
| "grad_norm": 1.5307003259658813, |
| "learning_rate": 0.001, |
| "loss": 1.4285, |
| "step": 225900 |
| }, |
| { |
| "epoch": 73.04460245636716, |
| "grad_norm": 1.7780219316482544, |
| "learning_rate": 0.001, |
| "loss": 1.3561, |
| "step": 226000 |
| }, |
| { |
| "epoch": 73.07692307692308, |
| "grad_norm": 2.0099196434020996, |
| "learning_rate": 0.001, |
| "loss": 1.3792, |
| "step": 226100 |
| }, |
| { |
| "epoch": 73.10924369747899, |
| "grad_norm": 1.7311296463012695, |
| "learning_rate": 0.001, |
| "loss": 1.3763, |
| "step": 226200 |
| }, |
| { |
| "epoch": 73.14156431803491, |
| "grad_norm": 1.5329862833023071, |
| "learning_rate": 0.001, |
| "loss": 1.3805, |
| "step": 226300 |
| }, |
| { |
| "epoch": 73.17388493859082, |
| "grad_norm": 1.6151118278503418, |
| "learning_rate": 0.001, |
| "loss": 1.3825, |
| "step": 226400 |
| }, |
| { |
| "epoch": 73.20620555914674, |
| "grad_norm": 2.4479856491088867, |
| "learning_rate": 0.001, |
| "loss": 1.3989, |
| "step": 226500 |
| }, |
| { |
| "epoch": 73.23852617970265, |
| "grad_norm": 1.8405131101608276, |
| "learning_rate": 0.001, |
| "loss": 1.3833, |
| "step": 226600 |
| }, |
| { |
| "epoch": 73.27084680025857, |
| "grad_norm": 1.5066527128219604, |
| "learning_rate": 0.001, |
| "loss": 1.3924, |
| "step": 226700 |
| }, |
| { |
| "epoch": 73.30316742081448, |
| "grad_norm": 1.6452630758285522, |
| "learning_rate": 0.001, |
| "loss": 1.4048, |
| "step": 226800 |
| }, |
| { |
| "epoch": 73.3354880413704, |
| "grad_norm": 1.8133480548858643, |
| "learning_rate": 0.001, |
| "loss": 1.4033, |
| "step": 226900 |
| }, |
| { |
| "epoch": 73.3678086619263, |
| "grad_norm": 1.680242657661438, |
| "learning_rate": 0.001, |
| "loss": 1.4081, |
| "step": 227000 |
| }, |
| { |
| "epoch": 73.40012928248223, |
| "grad_norm": 1.4392516613006592, |
| "learning_rate": 0.001, |
| "loss": 1.4146, |
| "step": 227100 |
| }, |
| { |
| "epoch": 73.43244990303813, |
| "grad_norm": 1.518934965133667, |
| "learning_rate": 0.001, |
| "loss": 1.419, |
| "step": 227200 |
| }, |
| { |
| "epoch": 73.46477052359405, |
| "grad_norm": 1.6375625133514404, |
| "learning_rate": 0.001, |
| "loss": 1.4169, |
| "step": 227300 |
| }, |
| { |
| "epoch": 73.49709114414996, |
| "grad_norm": 1.5314610004425049, |
| "learning_rate": 0.001, |
| "loss": 1.4245, |
| "step": 227400 |
| }, |
| { |
| "epoch": 73.52941176470588, |
| "grad_norm": 1.4845598936080933, |
| "learning_rate": 0.001, |
| "loss": 1.4381, |
| "step": 227500 |
| }, |
| { |
| "epoch": 73.56173238526179, |
| "grad_norm": 1.3921239376068115, |
| "learning_rate": 0.001, |
| "loss": 1.4488, |
| "step": 227600 |
| }, |
| { |
| "epoch": 73.59405300581771, |
| "grad_norm": 1.6126028299331665, |
| "learning_rate": 0.001, |
| "loss": 1.4386, |
| "step": 227700 |
| }, |
| { |
| "epoch": 73.62637362637362, |
| "grad_norm": 1.7768104076385498, |
| "learning_rate": 0.001, |
| "loss": 1.4442, |
| "step": 227800 |
| }, |
| { |
| "epoch": 73.65869424692954, |
| "grad_norm": 2.0095489025115967, |
| "learning_rate": 0.001, |
| "loss": 1.4483, |
| "step": 227900 |
| }, |
| { |
| "epoch": 73.69101486748545, |
| "grad_norm": 1.9532029628753662, |
| "learning_rate": 0.001, |
| "loss": 1.446, |
| "step": 228000 |
| }, |
| { |
| "epoch": 73.72333548804137, |
| "grad_norm": 1.411705732345581, |
| "learning_rate": 0.001, |
| "loss": 1.452, |
| "step": 228100 |
| }, |
| { |
| "epoch": 73.75565610859728, |
| "grad_norm": 1.5540069341659546, |
| "learning_rate": 0.001, |
| "loss": 1.4569, |
| "step": 228200 |
| }, |
| { |
| "epoch": 73.7879767291532, |
| "grad_norm": 1.84498929977417, |
| "learning_rate": 0.001, |
| "loss": 1.4545, |
| "step": 228300 |
| }, |
| { |
| "epoch": 73.82029734970911, |
| "grad_norm": 1.4782354831695557, |
| "learning_rate": 0.001, |
| "loss": 1.4609, |
| "step": 228400 |
| }, |
| { |
| "epoch": 73.85261797026503, |
| "grad_norm": 1.464579701423645, |
| "learning_rate": 0.001, |
| "loss": 1.4645, |
| "step": 228500 |
| }, |
| { |
| "epoch": 73.88493859082094, |
| "grad_norm": 1.541219711303711, |
| "learning_rate": 0.001, |
| "loss": 1.467, |
| "step": 228600 |
| }, |
| { |
| "epoch": 73.91725921137686, |
| "grad_norm": 1.4827616214752197, |
| "learning_rate": 0.001, |
| "loss": 1.4673, |
| "step": 228700 |
| }, |
| { |
| "epoch": 73.94957983193277, |
| "grad_norm": 1.661637783050537, |
| "learning_rate": 0.001, |
| "loss": 1.4626, |
| "step": 228800 |
| }, |
| { |
| "epoch": 73.98190045248869, |
| "grad_norm": 1.7068231105804443, |
| "learning_rate": 0.001, |
| "loss": 1.4766, |
| "step": 228900 |
| }, |
| { |
| "epoch": 74.01422107304461, |
| "grad_norm": 2.049675226211548, |
| "learning_rate": 0.001, |
| "loss": 1.3957, |
| "step": 229000 |
| }, |
| { |
| "epoch": 74.04654169360052, |
| "grad_norm": 1.8042556047439575, |
| "learning_rate": 0.001, |
| "loss": 1.35, |
| "step": 229100 |
| }, |
| { |
| "epoch": 74.07886231415644, |
| "grad_norm": 1.642234206199646, |
| "learning_rate": 0.001, |
| "loss": 1.3546, |
| "step": 229200 |
| }, |
| { |
| "epoch": 74.11118293471235, |
| "grad_norm": 1.505800485610962, |
| "learning_rate": 0.001, |
| "loss": 1.3772, |
| "step": 229300 |
| }, |
| { |
| "epoch": 74.14350355526827, |
| "grad_norm": 1.6339256763458252, |
| "learning_rate": 0.001, |
| "loss": 1.3674, |
| "step": 229400 |
| }, |
| { |
| "epoch": 74.17582417582418, |
| "grad_norm": 1.936946988105774, |
| "learning_rate": 0.001, |
| "loss": 1.3837, |
| "step": 229500 |
| }, |
| { |
| "epoch": 74.2081447963801, |
| "grad_norm": 1.9458225965499878, |
| "learning_rate": 0.001, |
| "loss": 1.3847, |
| "step": 229600 |
| }, |
| { |
| "epoch": 74.240465416936, |
| "grad_norm": 1.7668321132659912, |
| "learning_rate": 0.001, |
| "loss": 1.3762, |
| "step": 229700 |
| }, |
| { |
| "epoch": 74.27278603749193, |
| "grad_norm": 2.21065354347229, |
| "learning_rate": 0.001, |
| "loss": 1.3969, |
| "step": 229800 |
| }, |
| { |
| "epoch": 74.30510665804783, |
| "grad_norm": 2.361572504043579, |
| "learning_rate": 0.001, |
| "loss": 1.3836, |
| "step": 229900 |
| }, |
| { |
| "epoch": 74.33742727860376, |
| "grad_norm": 2.1231894493103027, |
| "learning_rate": 0.001, |
| "loss": 1.4046, |
| "step": 230000 |
| }, |
| { |
| "epoch": 74.36974789915966, |
| "grad_norm": 2.058026075363159, |
| "learning_rate": 0.001, |
| "loss": 1.3956, |
| "step": 230100 |
| }, |
| { |
| "epoch": 74.40206851971558, |
| "grad_norm": 1.578091025352478, |
| "learning_rate": 0.001, |
| "loss": 1.4145, |
| "step": 230200 |
| }, |
| { |
| "epoch": 74.43438914027149, |
| "grad_norm": 2.1628458499908447, |
| "learning_rate": 0.001, |
| "loss": 1.4188, |
| "step": 230300 |
| }, |
| { |
| "epoch": 74.46670976082741, |
| "grad_norm": 1.9572947025299072, |
| "learning_rate": 0.001, |
| "loss": 1.4106, |
| "step": 230400 |
| }, |
| { |
| "epoch": 74.49903038138332, |
| "grad_norm": 1.8140960931777954, |
| "learning_rate": 0.001, |
| "loss": 1.4309, |
| "step": 230500 |
| }, |
| { |
| "epoch": 74.53135100193924, |
| "grad_norm": 2.127092123031616, |
| "learning_rate": 0.001, |
| "loss": 1.4259, |
| "step": 230600 |
| }, |
| { |
| "epoch": 74.56367162249515, |
| "grad_norm": 1.8602912425994873, |
| "learning_rate": 0.001, |
| "loss": 1.4266, |
| "step": 230700 |
| }, |
| { |
| "epoch": 74.59599224305107, |
| "grad_norm": 2.0972392559051514, |
| "learning_rate": 0.001, |
| "loss": 1.4374, |
| "step": 230800 |
| }, |
| { |
| "epoch": 74.62831286360698, |
| "grad_norm": 2.137838125228882, |
| "learning_rate": 0.001, |
| "loss": 1.4413, |
| "step": 230900 |
| }, |
| { |
| "epoch": 74.6606334841629, |
| "grad_norm": 1.6585432291030884, |
| "learning_rate": 0.001, |
| "loss": 1.4344, |
| "step": 231000 |
| }, |
| { |
| "epoch": 74.69295410471881, |
| "grad_norm": 1.632261037826538, |
| "learning_rate": 0.001, |
| "loss": 1.4372, |
| "step": 231100 |
| }, |
| { |
| "epoch": 74.72527472527473, |
| "grad_norm": 1.964889645576477, |
| "learning_rate": 0.001, |
| "loss": 1.4358, |
| "step": 231200 |
| }, |
| { |
| "epoch": 74.75759534583064, |
| "grad_norm": 1.7561458349227905, |
| "learning_rate": 0.001, |
| "loss": 1.4463, |
| "step": 231300 |
| }, |
| { |
| "epoch": 74.78991596638656, |
| "grad_norm": 1.3315393924713135, |
| "learning_rate": 0.001, |
| "loss": 1.4494, |
| "step": 231400 |
| }, |
| { |
| "epoch": 74.82223658694247, |
| "grad_norm": 1.650225281715393, |
| "learning_rate": 0.001, |
| "loss": 1.4558, |
| "step": 231500 |
| }, |
| { |
| "epoch": 74.85455720749839, |
| "grad_norm": 2.464940309524536, |
| "learning_rate": 0.001, |
| "loss": 1.4441, |
| "step": 231600 |
| }, |
| { |
| "epoch": 74.8868778280543, |
| "grad_norm": 2.1637585163116455, |
| "learning_rate": 0.001, |
| "loss": 1.4602, |
| "step": 231700 |
| }, |
| { |
| "epoch": 74.91919844861022, |
| "grad_norm": 1.736090064048767, |
| "learning_rate": 0.001, |
| "loss": 1.4664, |
| "step": 231800 |
| }, |
| { |
| "epoch": 74.95151906916612, |
| "grad_norm": 1.712194800376892, |
| "learning_rate": 0.001, |
| "loss": 1.463, |
| "step": 231900 |
| }, |
| { |
| "epoch": 74.98383968972205, |
| "grad_norm": 1.8816744089126587, |
| "learning_rate": 0.001, |
| "loss": 1.4736, |
| "step": 232000 |
| }, |
| { |
| "epoch": 75.01616031027795, |
| "grad_norm": 1.9753756523132324, |
| "learning_rate": 0.001, |
| "loss": 1.3708, |
| "step": 232100 |
| }, |
| { |
| "epoch": 75.04848093083388, |
| "grad_norm": 1.853826642036438, |
| "learning_rate": 0.001, |
| "loss": 1.355, |
| "step": 232200 |
| }, |
| { |
| "epoch": 75.08080155138978, |
| "grad_norm": 2.6528196334838867, |
| "learning_rate": 0.001, |
| "loss": 1.3362, |
| "step": 232300 |
| }, |
| { |
| "epoch": 75.1131221719457, |
| "grad_norm": 2.437607526779175, |
| "learning_rate": 0.001, |
| "loss": 1.3671, |
| "step": 232400 |
| }, |
| { |
| "epoch": 75.14544279250161, |
| "grad_norm": 3.34757924079895, |
| "learning_rate": 0.001, |
| "loss": 1.379, |
| "step": 232500 |
| }, |
| { |
| "epoch": 75.17776341305753, |
| "grad_norm": 2.346930742263794, |
| "learning_rate": 0.001, |
| "loss": 1.3635, |
| "step": 232600 |
| }, |
| { |
| "epoch": 75.21008403361344, |
| "grad_norm": 1.8064361810684204, |
| "learning_rate": 0.001, |
| "loss": 1.3669, |
| "step": 232700 |
| }, |
| { |
| "epoch": 75.24240465416936, |
| "grad_norm": 2.354738235473633, |
| "learning_rate": 0.001, |
| "loss": 1.3849, |
| "step": 232800 |
| }, |
| { |
| "epoch": 75.27472527472527, |
| "grad_norm": 1.9407877922058105, |
| "learning_rate": 0.001, |
| "loss": 1.4039, |
| "step": 232900 |
| }, |
| { |
| "epoch": 75.30704589528119, |
| "grad_norm": 3.2403366565704346, |
| "learning_rate": 0.001, |
| "loss": 1.3963, |
| "step": 233000 |
| }, |
| { |
| "epoch": 75.3393665158371, |
| "grad_norm": 2.8879992961883545, |
| "learning_rate": 0.001, |
| "loss": 1.3914, |
| "step": 233100 |
| }, |
| { |
| "epoch": 75.37168713639302, |
| "grad_norm": 2.049025774002075, |
| "learning_rate": 0.001, |
| "loss": 1.3899, |
| "step": 233200 |
| }, |
| { |
| "epoch": 75.40400775694893, |
| "grad_norm": 2.4537274837493896, |
| "learning_rate": 0.001, |
| "loss": 1.4024, |
| "step": 233300 |
| }, |
| { |
| "epoch": 75.43632837750485, |
| "grad_norm": 1.6752196550369263, |
| "learning_rate": 0.001, |
| "loss": 1.4074, |
| "step": 233400 |
| }, |
| { |
| "epoch": 75.46864899806076, |
| "grad_norm": 1.8149118423461914, |
| "learning_rate": 0.001, |
| "loss": 1.4232, |
| "step": 233500 |
| }, |
| { |
| "epoch": 75.50096961861668, |
| "grad_norm": 2.647524118423462, |
| "learning_rate": 0.001, |
| "loss": 1.3983, |
| "step": 233600 |
| }, |
| { |
| "epoch": 75.53329023917259, |
| "grad_norm": 1.855002999305725, |
| "learning_rate": 0.001, |
| "loss": 1.4196, |
| "step": 233700 |
| }, |
| { |
| "epoch": 75.56561085972851, |
| "grad_norm": 2.0383918285369873, |
| "learning_rate": 0.001, |
| "loss": 1.4305, |
| "step": 233800 |
| }, |
| { |
| "epoch": 75.59793148028442, |
| "grad_norm": 2.192237377166748, |
| "learning_rate": 0.001, |
| "loss": 1.4212, |
| "step": 233900 |
| }, |
| { |
| "epoch": 75.63025210084034, |
| "grad_norm": 3.2227871417999268, |
| "learning_rate": 0.001, |
| "loss": 1.4237, |
| "step": 234000 |
| }, |
| { |
| "epoch": 75.66257272139624, |
| "grad_norm": 2.169130325317383, |
| "learning_rate": 0.001, |
| "loss": 1.4232, |
| "step": 234100 |
| }, |
| { |
| "epoch": 75.69489334195217, |
| "grad_norm": 1.9388642311096191, |
| "learning_rate": 0.001, |
| "loss": 1.423, |
| "step": 234200 |
| }, |
| { |
| "epoch": 75.72721396250807, |
| "grad_norm": 1.8224081993103027, |
| "learning_rate": 0.001, |
| "loss": 1.4394, |
| "step": 234300 |
| }, |
| { |
| "epoch": 75.759534583064, |
| "grad_norm": 2.401827335357666, |
| "learning_rate": 0.001, |
| "loss": 1.4528, |
| "step": 234400 |
| }, |
| { |
| "epoch": 75.7918552036199, |
| "grad_norm": 1.6107642650604248, |
| "learning_rate": 0.001, |
| "loss": 1.4336, |
| "step": 234500 |
| }, |
| { |
| "epoch": 75.82417582417582, |
| "grad_norm": 2.166220188140869, |
| "learning_rate": 0.001, |
| "loss": 1.4319, |
| "step": 234600 |
| }, |
| { |
| "epoch": 75.85649644473173, |
| "grad_norm": 2.4662373065948486, |
| "learning_rate": 0.001, |
| "loss": 1.4491, |
| "step": 234700 |
| }, |
| { |
| "epoch": 75.88881706528765, |
| "grad_norm": 2.3787710666656494, |
| "learning_rate": 0.001, |
| "loss": 1.4581, |
| "step": 234800 |
| }, |
| { |
| "epoch": 75.92113768584356, |
| "grad_norm": 1.996765375137329, |
| "learning_rate": 0.001, |
| "loss": 1.46, |
| "step": 234900 |
| }, |
| { |
| "epoch": 75.95345830639948, |
| "grad_norm": 1.8469176292419434, |
| "learning_rate": 0.001, |
| "loss": 1.4564, |
| "step": 235000 |
| }, |
| { |
| "epoch": 75.98577892695539, |
| "grad_norm": 2.4234673976898193, |
| "learning_rate": 0.001, |
| "loss": 1.4635, |
| "step": 235100 |
| }, |
| { |
| "epoch": 76.01809954751131, |
| "grad_norm": 1.4825252294540405, |
| "learning_rate": 0.001, |
| "loss": 1.407, |
| "step": 235200 |
| }, |
| { |
| "epoch": 76.05042016806723, |
| "grad_norm": 1.2833598852157593, |
| "learning_rate": 0.001, |
| "loss": 1.3349, |
| "step": 235300 |
| }, |
| { |
| "epoch": 76.08274078862314, |
| "grad_norm": 1.134111762046814, |
| "learning_rate": 0.001, |
| "loss": 1.3442, |
| "step": 235400 |
| }, |
| { |
| "epoch": 76.11506140917906, |
| "grad_norm": 1.416589617729187, |
| "learning_rate": 0.001, |
| "loss": 1.3481, |
| "step": 235500 |
| }, |
| { |
| "epoch": 76.14738202973497, |
| "grad_norm": 1.3772751092910767, |
| "learning_rate": 0.001, |
| "loss": 1.3542, |
| "step": 235600 |
| }, |
| { |
| "epoch": 76.17970265029089, |
| "grad_norm": 1.658582329750061, |
| "learning_rate": 0.001, |
| "loss": 1.3667, |
| "step": 235700 |
| }, |
| { |
| "epoch": 76.2120232708468, |
| "grad_norm": 1.726936936378479, |
| "learning_rate": 0.001, |
| "loss": 1.3662, |
| "step": 235800 |
| }, |
| { |
| "epoch": 76.24434389140272, |
| "grad_norm": 1.7430275678634644, |
| "learning_rate": 0.001, |
| "loss": 1.3698, |
| "step": 235900 |
| }, |
| { |
| "epoch": 76.27666451195863, |
| "grad_norm": 1.8353071212768555, |
| "learning_rate": 0.001, |
| "loss": 1.3772, |
| "step": 236000 |
| }, |
| { |
| "epoch": 76.30898513251455, |
| "grad_norm": 1.454705834388733, |
| "learning_rate": 0.001, |
| "loss": 1.3869, |
| "step": 236100 |
| }, |
| { |
| "epoch": 76.34130575307046, |
| "grad_norm": 1.9173542261123657, |
| "learning_rate": 0.001, |
| "loss": 1.3844, |
| "step": 236200 |
| }, |
| { |
| "epoch": 76.37362637362638, |
| "grad_norm": 1.5633434057235718, |
| "learning_rate": 0.001, |
| "loss": 1.3971, |
| "step": 236300 |
| }, |
| { |
| "epoch": 76.40594699418229, |
| "grad_norm": 1.1957085132598877, |
| "learning_rate": 0.001, |
| "loss": 1.3983, |
| "step": 236400 |
| }, |
| { |
| "epoch": 76.43826761473821, |
| "grad_norm": 1.5280741453170776, |
| "learning_rate": 0.001, |
| "loss": 1.4086, |
| "step": 236500 |
| }, |
| { |
| "epoch": 76.47058823529412, |
| "grad_norm": 1.8125216960906982, |
| "learning_rate": 0.001, |
| "loss": 1.4004, |
| "step": 236600 |
| }, |
| { |
| "epoch": 76.50290885585004, |
| "grad_norm": 1.591016173362732, |
| "learning_rate": 0.001, |
| "loss": 1.4034, |
| "step": 236700 |
| }, |
| { |
| "epoch": 76.53522947640595, |
| "grad_norm": 1.4297205209732056, |
| "learning_rate": 0.001, |
| "loss": 1.4066, |
| "step": 236800 |
| }, |
| { |
| "epoch": 76.56755009696187, |
| "grad_norm": 2.205801248550415, |
| "learning_rate": 0.001, |
| "loss": 1.4221, |
| "step": 236900 |
| }, |
| { |
| "epoch": 76.59987071751777, |
| "grad_norm": 1.6374117136001587, |
| "learning_rate": 0.001, |
| "loss": 1.4368, |
| "step": 237000 |
| }, |
| { |
| "epoch": 76.6321913380737, |
| "grad_norm": 1.2918282747268677, |
| "learning_rate": 0.001, |
| "loss": 1.4166, |
| "step": 237100 |
| }, |
| { |
| "epoch": 76.6645119586296, |
| "grad_norm": 1.585101842880249, |
| "learning_rate": 0.001, |
| "loss": 1.4093, |
| "step": 237200 |
| }, |
| { |
| "epoch": 76.69683257918552, |
| "grad_norm": 1.4704420566558838, |
| "learning_rate": 0.001, |
| "loss": 1.4287, |
| "step": 237300 |
| }, |
| { |
| "epoch": 76.72915319974143, |
| "grad_norm": 1.5124256610870361, |
| "learning_rate": 0.001, |
| "loss": 1.4389, |
| "step": 237400 |
| }, |
| { |
| "epoch": 76.76147382029735, |
| "grad_norm": 1.4330766201019287, |
| "learning_rate": 0.001, |
| "loss": 1.403, |
| "step": 237500 |
| }, |
| { |
| "epoch": 76.79379444085326, |
| "grad_norm": 1.3854273557662964, |
| "learning_rate": 0.001, |
| "loss": 1.4406, |
| "step": 237600 |
| }, |
| { |
| "epoch": 76.82611506140918, |
| "grad_norm": 1.5856598615646362, |
| "learning_rate": 0.001, |
| "loss": 1.426, |
| "step": 237700 |
| }, |
| { |
| "epoch": 76.85843568196509, |
| "grad_norm": 2.090639114379883, |
| "learning_rate": 0.001, |
| "loss": 1.45, |
| "step": 237800 |
| }, |
| { |
| "epoch": 76.89075630252101, |
| "grad_norm": 1.3450896739959717, |
| "learning_rate": 0.001, |
| "loss": 1.445, |
| "step": 237900 |
| }, |
| { |
| "epoch": 76.92307692307692, |
| "grad_norm": 1.5369879007339478, |
| "learning_rate": 0.001, |
| "loss": 1.4475, |
| "step": 238000 |
| }, |
| { |
| "epoch": 76.95539754363284, |
| "grad_norm": 1.3403236865997314, |
| "learning_rate": 0.001, |
| "loss": 1.4611, |
| "step": 238100 |
| }, |
| { |
| "epoch": 76.98771816418875, |
| "grad_norm": 1.4753422737121582, |
| "learning_rate": 0.001, |
| "loss": 1.4393, |
| "step": 238200 |
| }, |
| { |
| "epoch": 77.02003878474467, |
| "grad_norm": 1.7514846324920654, |
| "learning_rate": 0.001, |
| "loss": 1.3893, |
| "step": 238300 |
| }, |
| { |
| "epoch": 77.05235940530058, |
| "grad_norm": 1.336416482925415, |
| "learning_rate": 0.001, |
| "loss": 1.3374, |
| "step": 238400 |
| }, |
| { |
| "epoch": 77.0846800258565, |
| "grad_norm": 1.7057934999465942, |
| "learning_rate": 0.001, |
| "loss": 1.3572, |
| "step": 238500 |
| }, |
| { |
| "epoch": 77.11700064641241, |
| "grad_norm": 1.643702745437622, |
| "learning_rate": 0.001, |
| "loss": 1.3525, |
| "step": 238600 |
| }, |
| { |
| "epoch": 77.14932126696833, |
| "grad_norm": 1.624499797821045, |
| "learning_rate": 0.001, |
| "loss": 1.351, |
| "step": 238700 |
| }, |
| { |
| "epoch": 77.18164188752424, |
| "grad_norm": 1.3781622648239136, |
| "learning_rate": 0.001, |
| "loss": 1.3642, |
| "step": 238800 |
| }, |
| { |
| "epoch": 77.21396250808016, |
| "grad_norm": 1.6872320175170898, |
| "learning_rate": 0.001, |
| "loss": 1.3662, |
| "step": 238900 |
| }, |
| { |
| "epoch": 77.24628312863607, |
| "grad_norm": 2.0610127449035645, |
| "learning_rate": 0.001, |
| "loss": 1.3575, |
| "step": 239000 |
| }, |
| { |
| "epoch": 77.27860374919199, |
| "grad_norm": 2.052093029022217, |
| "learning_rate": 0.001, |
| "loss": 1.3666, |
| "step": 239100 |
| }, |
| { |
| "epoch": 77.3109243697479, |
| "grad_norm": 1.6374685764312744, |
| "learning_rate": 0.001, |
| "loss": 1.3874, |
| "step": 239200 |
| }, |
| { |
| "epoch": 77.34324499030382, |
| "grad_norm": 1.2537875175476074, |
| "learning_rate": 0.001, |
| "loss": 1.3643, |
| "step": 239300 |
| }, |
| { |
| "epoch": 77.37556561085972, |
| "grad_norm": 1.6449648141860962, |
| "learning_rate": 0.001, |
| "loss": 1.3924, |
| "step": 239400 |
| }, |
| { |
| "epoch": 77.40788623141565, |
| "grad_norm": 1.5691554546356201, |
| "learning_rate": 0.001, |
| "loss": 1.39, |
| "step": 239500 |
| }, |
| { |
| "epoch": 77.44020685197155, |
| "grad_norm": 1.6928367614746094, |
| "learning_rate": 0.001, |
| "loss": 1.3914, |
| "step": 239600 |
| }, |
| { |
| "epoch": 77.47252747252747, |
| "grad_norm": 1.9325333833694458, |
| "learning_rate": 0.001, |
| "loss": 1.3968, |
| "step": 239700 |
| }, |
| { |
| "epoch": 77.50484809308338, |
| "grad_norm": 1.136733055114746, |
| "learning_rate": 0.001, |
| "loss": 1.3843, |
| "step": 239800 |
| }, |
| { |
| "epoch": 77.5371687136393, |
| "grad_norm": 1.2559707164764404, |
| "learning_rate": 0.001, |
| "loss": 1.4021, |
| "step": 239900 |
| }, |
| { |
| "epoch": 77.56948933419521, |
| "grad_norm": 1.187476634979248, |
| "learning_rate": 0.001, |
| "loss": 1.3994, |
| "step": 240000 |
| }, |
| { |
| "epoch": 77.60180995475113, |
| "grad_norm": 2.1751227378845215, |
| "learning_rate": 0.001, |
| "loss": 1.4008, |
| "step": 240100 |
| }, |
| { |
| "epoch": 77.63413057530704, |
| "grad_norm": 1.358201503753662, |
| "learning_rate": 0.001, |
| "loss": 1.418, |
| "step": 240200 |
| }, |
| { |
| "epoch": 77.66645119586296, |
| "grad_norm": 2.1808340549468994, |
| "learning_rate": 0.001, |
| "loss": 1.4228, |
| "step": 240300 |
| }, |
| { |
| "epoch": 77.69877181641887, |
| "grad_norm": 1.439213752746582, |
| "learning_rate": 0.001, |
| "loss": 1.4134, |
| "step": 240400 |
| }, |
| { |
| "epoch": 77.73109243697479, |
| "grad_norm": 1.6052058935165405, |
| "learning_rate": 0.001, |
| "loss": 1.4239, |
| "step": 240500 |
| }, |
| { |
| "epoch": 77.7634130575307, |
| "grad_norm": 1.9990873336791992, |
| "learning_rate": 0.001, |
| "loss": 1.4161, |
| "step": 240600 |
| }, |
| { |
| "epoch": 77.79573367808662, |
| "grad_norm": 1.7052737474441528, |
| "learning_rate": 0.001, |
| "loss": 1.4266, |
| "step": 240700 |
| }, |
| { |
| "epoch": 77.82805429864253, |
| "grad_norm": 1.595995306968689, |
| "learning_rate": 0.001, |
| "loss": 1.4319, |
| "step": 240800 |
| }, |
| { |
| "epoch": 77.86037491919845, |
| "grad_norm": 1.4590080976486206, |
| "learning_rate": 0.001, |
| "loss": 1.4249, |
| "step": 240900 |
| }, |
| { |
| "epoch": 77.89269553975436, |
| "grad_norm": 1.320956826210022, |
| "learning_rate": 0.001, |
| "loss": 1.4256, |
| "step": 241000 |
| }, |
| { |
| "epoch": 77.92501616031028, |
| "grad_norm": 1.5802559852600098, |
| "learning_rate": 0.001, |
| "loss": 1.4435, |
| "step": 241100 |
| }, |
| { |
| "epoch": 77.95733678086619, |
| "grad_norm": 1.6122311353683472, |
| "learning_rate": 0.001, |
| "loss": 1.4411, |
| "step": 241200 |
| }, |
| { |
| "epoch": 77.98965740142211, |
| "grad_norm": 1.8389743566513062, |
| "learning_rate": 0.001, |
| "loss": 1.4368, |
| "step": 241300 |
| }, |
| { |
| "epoch": 78.02197802197803, |
| "grad_norm": 1.7278374433517456, |
| "learning_rate": 0.001, |
| "loss": 1.3722, |
| "step": 241400 |
| }, |
| { |
| "epoch": 78.05429864253394, |
| "grad_norm": 1.2872110605239868, |
| "learning_rate": 0.001, |
| "loss": 1.3232, |
| "step": 241500 |
| }, |
| { |
| "epoch": 78.08661926308986, |
| "grad_norm": 1.199270486831665, |
| "learning_rate": 0.001, |
| "loss": 1.3319, |
| "step": 241600 |
| }, |
| { |
| "epoch": 78.11893988364577, |
| "grad_norm": 2.1235804557800293, |
| "learning_rate": 0.001, |
| "loss": 1.3375, |
| "step": 241700 |
| }, |
| { |
| "epoch": 78.15126050420169, |
| "grad_norm": 1.7035527229309082, |
| "learning_rate": 0.001, |
| "loss": 1.3381, |
| "step": 241800 |
| }, |
| { |
| "epoch": 78.1835811247576, |
| "grad_norm": 1.2697619199752808, |
| "learning_rate": 0.001, |
| "loss": 1.3564, |
| "step": 241900 |
| }, |
| { |
| "epoch": 78.21590174531352, |
| "grad_norm": 1.3939249515533447, |
| "learning_rate": 0.001, |
| "loss": 1.3411, |
| "step": 242000 |
| }, |
| { |
| "epoch": 78.24822236586942, |
| "grad_norm": 1.811110019683838, |
| "learning_rate": 0.001, |
| "loss": 1.3553, |
| "step": 242100 |
| }, |
| { |
| "epoch": 78.28054298642535, |
| "grad_norm": 1.4949676990509033, |
| "learning_rate": 0.001, |
| "loss": 1.3715, |
| "step": 242200 |
| }, |
| { |
| "epoch": 78.31286360698125, |
| "grad_norm": 1.3313863277435303, |
| "learning_rate": 0.001, |
| "loss": 1.3589, |
| "step": 242300 |
| }, |
| { |
| "epoch": 78.34518422753717, |
| "grad_norm": 1.227896809577942, |
| "learning_rate": 0.001, |
| "loss": 1.3671, |
| "step": 242400 |
| }, |
| { |
| "epoch": 78.37750484809308, |
| "grad_norm": 1.4327553510665894, |
| "learning_rate": 0.001, |
| "loss": 1.3876, |
| "step": 242500 |
| }, |
| { |
| "epoch": 78.409825468649, |
| "grad_norm": 1.4522910118103027, |
| "learning_rate": 0.001, |
| "loss": 1.3734, |
| "step": 242600 |
| }, |
| { |
| "epoch": 78.44214608920491, |
| "grad_norm": 1.439487099647522, |
| "learning_rate": 0.001, |
| "loss": 1.3789, |
| "step": 242700 |
| }, |
| { |
| "epoch": 78.47446670976083, |
| "grad_norm": 1.3468687534332275, |
| "learning_rate": 0.001, |
| "loss": 1.3858, |
| "step": 242800 |
| }, |
| { |
| "epoch": 78.50678733031674, |
| "grad_norm": 1.7873753309249878, |
| "learning_rate": 0.001, |
| "loss": 1.3794, |
| "step": 242900 |
| }, |
| { |
| "epoch": 78.53910795087266, |
| "grad_norm": 1.6426472663879395, |
| "learning_rate": 0.001, |
| "loss": 1.4109, |
| "step": 243000 |
| }, |
| { |
| "epoch": 78.57142857142857, |
| "grad_norm": 1.3578706979751587, |
| "learning_rate": 0.001, |
| "loss": 1.4018, |
| "step": 243100 |
| }, |
| { |
| "epoch": 78.60374919198449, |
| "grad_norm": 1.8222377300262451, |
| "learning_rate": 0.001, |
| "loss": 1.4009, |
| "step": 243200 |
| }, |
| { |
| "epoch": 78.6360698125404, |
| "grad_norm": 1.3069651126861572, |
| "learning_rate": 0.001, |
| "loss": 1.4035, |
| "step": 243300 |
| }, |
| { |
| "epoch": 78.66839043309632, |
| "grad_norm": 1.7161260843276978, |
| "learning_rate": 0.001, |
| "loss": 1.4051, |
| "step": 243400 |
| }, |
| { |
| "epoch": 78.70071105365223, |
| "grad_norm": 1.2696171998977661, |
| "learning_rate": 0.001, |
| "loss": 1.406, |
| "step": 243500 |
| }, |
| { |
| "epoch": 78.73303167420815, |
| "grad_norm": 1.432579517364502, |
| "learning_rate": 0.001, |
| "loss": 1.4206, |
| "step": 243600 |
| }, |
| { |
| "epoch": 78.76535229476406, |
| "grad_norm": 1.4368444681167603, |
| "learning_rate": 0.001, |
| "loss": 1.4183, |
| "step": 243700 |
| }, |
| { |
| "epoch": 78.79767291531998, |
| "grad_norm": 1.452947735786438, |
| "learning_rate": 0.001, |
| "loss": 1.4229, |
| "step": 243800 |
| }, |
| { |
| "epoch": 78.82999353587589, |
| "grad_norm": 1.554884910583496, |
| "learning_rate": 0.001, |
| "loss": 1.4293, |
| "step": 243900 |
| }, |
| { |
| "epoch": 78.86231415643181, |
| "grad_norm": 1.30029296875, |
| "learning_rate": 0.001, |
| "loss": 1.4314, |
| "step": 244000 |
| }, |
| { |
| "epoch": 78.89463477698771, |
| "grad_norm": 1.4925910234451294, |
| "learning_rate": 0.001, |
| "loss": 1.4202, |
| "step": 244100 |
| }, |
| { |
| "epoch": 78.92695539754364, |
| "grad_norm": 1.5145576000213623, |
| "learning_rate": 0.001, |
| "loss": 1.4292, |
| "step": 244200 |
| }, |
| { |
| "epoch": 78.95927601809954, |
| "grad_norm": 1.426664113998413, |
| "learning_rate": 0.001, |
| "loss": 1.4427, |
| "step": 244300 |
| }, |
| { |
| "epoch": 78.99159663865547, |
| "grad_norm": 1.0887047052383423, |
| "learning_rate": 0.001, |
| "loss": 1.4285, |
| "step": 244400 |
| }, |
| { |
| "epoch": 79.02391725921137, |
| "grad_norm": 1.4953248500823975, |
| "learning_rate": 0.001, |
| "loss": 1.3586, |
| "step": 244500 |
| }, |
| { |
| "epoch": 79.0562378797673, |
| "grad_norm": 1.7633830308914185, |
| "learning_rate": 0.001, |
| "loss": 1.3118, |
| "step": 244600 |
| }, |
| { |
| "epoch": 79.0885585003232, |
| "grad_norm": 1.8045114278793335, |
| "learning_rate": 0.001, |
| "loss": 1.3062, |
| "step": 244700 |
| }, |
| { |
| "epoch": 79.12087912087912, |
| "grad_norm": 1.6315350532531738, |
| "learning_rate": 0.001, |
| "loss": 1.3279, |
| "step": 244800 |
| }, |
| { |
| "epoch": 79.15319974143503, |
| "grad_norm": 1.222394347190857, |
| "learning_rate": 0.001, |
| "loss": 1.3467, |
| "step": 244900 |
| }, |
| { |
| "epoch": 79.18552036199095, |
| "grad_norm": 1.5777305364608765, |
| "learning_rate": 0.001, |
| "loss": 1.3519, |
| "step": 245000 |
| }, |
| { |
| "epoch": 79.21784098254686, |
| "grad_norm": 1.2325632572174072, |
| "learning_rate": 0.001, |
| "loss": 1.3639, |
| "step": 245100 |
| }, |
| { |
| "epoch": 79.25016160310278, |
| "grad_norm": 1.2535754442214966, |
| "learning_rate": 0.001, |
| "loss": 1.3473, |
| "step": 245200 |
| }, |
| { |
| "epoch": 79.28248222365869, |
| "grad_norm": 1.4184768199920654, |
| "learning_rate": 0.001, |
| "loss": 1.3584, |
| "step": 245300 |
| }, |
| { |
| "epoch": 79.31480284421461, |
| "grad_norm": 1.9302608966827393, |
| "learning_rate": 0.001, |
| "loss": 1.3527, |
| "step": 245400 |
| }, |
| { |
| "epoch": 79.34712346477052, |
| "grad_norm": 1.5769174098968506, |
| "learning_rate": 0.001, |
| "loss": 1.3684, |
| "step": 245500 |
| }, |
| { |
| "epoch": 79.37944408532644, |
| "grad_norm": 1.8105542659759521, |
| "learning_rate": 0.001, |
| "loss": 1.3696, |
| "step": 245600 |
| }, |
| { |
| "epoch": 79.41176470588235, |
| "grad_norm": 1.3575557470321655, |
| "learning_rate": 0.001, |
| "loss": 1.3777, |
| "step": 245700 |
| }, |
| { |
| "epoch": 79.44408532643827, |
| "grad_norm": 1.6130093336105347, |
| "learning_rate": 0.001, |
| "loss": 1.3734, |
| "step": 245800 |
| }, |
| { |
| "epoch": 79.47640594699418, |
| "grad_norm": 1.426985740661621, |
| "learning_rate": 0.001, |
| "loss": 1.3688, |
| "step": 245900 |
| }, |
| { |
| "epoch": 79.5087265675501, |
| "grad_norm": 1.4327107667922974, |
| "learning_rate": 0.001, |
| "loss": 1.3744, |
| "step": 246000 |
| }, |
| { |
| "epoch": 79.541047188106, |
| "grad_norm": 1.8427186012268066, |
| "learning_rate": 0.001, |
| "loss": 1.3844, |
| "step": 246100 |
| }, |
| { |
| "epoch": 79.57336780866193, |
| "grad_norm": 1.5242940187454224, |
| "learning_rate": 0.001, |
| "loss": 1.3893, |
| "step": 246200 |
| }, |
| { |
| "epoch": 79.60568842921784, |
| "grad_norm": 1.4538577795028687, |
| "learning_rate": 0.001, |
| "loss": 1.3919, |
| "step": 246300 |
| }, |
| { |
| "epoch": 79.63800904977376, |
| "grad_norm": 1.25160551071167, |
| "learning_rate": 0.001, |
| "loss": 1.4005, |
| "step": 246400 |
| }, |
| { |
| "epoch": 79.67032967032966, |
| "grad_norm": 1.2579987049102783, |
| "learning_rate": 0.001, |
| "loss": 1.3975, |
| "step": 246500 |
| }, |
| { |
| "epoch": 79.70265029088559, |
| "grad_norm": 2.07604718208313, |
| "learning_rate": 0.001, |
| "loss": 1.3984, |
| "step": 246600 |
| }, |
| { |
| "epoch": 79.7349709114415, |
| "grad_norm": 1.2995349168777466, |
| "learning_rate": 0.001, |
| "loss": 1.3993, |
| "step": 246700 |
| }, |
| { |
| "epoch": 79.76729153199742, |
| "grad_norm": 1.375459909439087, |
| "learning_rate": 0.001, |
| "loss": 1.3973, |
| "step": 246800 |
| }, |
| { |
| "epoch": 79.79961215255332, |
| "grad_norm": 1.4694504737854004, |
| "learning_rate": 0.001, |
| "loss": 1.4256, |
| "step": 246900 |
| }, |
| { |
| "epoch": 79.83193277310924, |
| "grad_norm": 1.3293267488479614, |
| "learning_rate": 0.001, |
| "loss": 1.4212, |
| "step": 247000 |
| }, |
| { |
| "epoch": 79.86425339366515, |
| "grad_norm": 1.365396499633789, |
| "learning_rate": 0.001, |
| "loss": 1.4241, |
| "step": 247100 |
| }, |
| { |
| "epoch": 79.89657401422107, |
| "grad_norm": 1.585095763206482, |
| "learning_rate": 0.001, |
| "loss": 1.4243, |
| "step": 247200 |
| }, |
| { |
| "epoch": 79.92889463477698, |
| "grad_norm": 1.4808619022369385, |
| "learning_rate": 0.001, |
| "loss": 1.4353, |
| "step": 247300 |
| }, |
| { |
| "epoch": 79.9612152553329, |
| "grad_norm": 1.486207365989685, |
| "learning_rate": 0.001, |
| "loss": 1.4415, |
| "step": 247400 |
| }, |
| { |
| "epoch": 79.99353587588882, |
| "grad_norm": 1.218306303024292, |
| "learning_rate": 0.001, |
| "loss": 1.4317, |
| "step": 247500 |
| }, |
| { |
| "epoch": 80.02585649644473, |
| "grad_norm": 1.3337205648422241, |
| "learning_rate": 0.001, |
| "loss": 1.3458, |
| "step": 247600 |
| }, |
| { |
| "epoch": 80.05817711700065, |
| "grad_norm": 1.48308527469635, |
| "learning_rate": 0.001, |
| "loss": 1.3185, |
| "step": 247700 |
| }, |
| { |
| "epoch": 80.09049773755656, |
| "grad_norm": 1.4150961637496948, |
| "learning_rate": 0.001, |
| "loss": 1.307, |
| "step": 247800 |
| }, |
| { |
| "epoch": 80.12281835811248, |
| "grad_norm": 1.5947446823120117, |
| "learning_rate": 0.001, |
| "loss": 1.3251, |
| "step": 247900 |
| }, |
| { |
| "epoch": 80.15513897866839, |
| "grad_norm": 1.572667121887207, |
| "learning_rate": 0.001, |
| "loss": 1.3371, |
| "step": 248000 |
| }, |
| { |
| "epoch": 80.18745959922431, |
| "grad_norm": 1.3705382347106934, |
| "learning_rate": 0.001, |
| "loss": 1.3321, |
| "step": 248100 |
| }, |
| { |
| "epoch": 80.21978021978022, |
| "grad_norm": 1.3644392490386963, |
| "learning_rate": 0.001, |
| "loss": 1.3476, |
| "step": 248200 |
| }, |
| { |
| "epoch": 80.25210084033614, |
| "grad_norm": 1.9536592960357666, |
| "learning_rate": 0.001, |
| "loss": 1.341, |
| "step": 248300 |
| }, |
| { |
| "epoch": 80.28442146089205, |
| "grad_norm": 1.5622602701187134, |
| "learning_rate": 0.001, |
| "loss": 1.3687, |
| "step": 248400 |
| }, |
| { |
| "epoch": 80.31674208144797, |
| "grad_norm": 1.4836679697036743, |
| "learning_rate": 0.001, |
| "loss": 1.3541, |
| "step": 248500 |
| }, |
| { |
| "epoch": 80.34906270200388, |
| "grad_norm": 1.6780399084091187, |
| "learning_rate": 0.001, |
| "loss": 1.3646, |
| "step": 248600 |
| }, |
| { |
| "epoch": 80.3813833225598, |
| "grad_norm": 1.3770062923431396, |
| "learning_rate": 0.001, |
| "loss": 1.3766, |
| "step": 248700 |
| }, |
| { |
| "epoch": 80.4137039431157, |
| "grad_norm": 1.387833595275879, |
| "learning_rate": 0.001, |
| "loss": 1.3698, |
| "step": 248800 |
| }, |
| { |
| "epoch": 80.44602456367163, |
| "grad_norm": 1.4488047361373901, |
| "learning_rate": 0.001, |
| "loss": 1.3665, |
| "step": 248900 |
| }, |
| { |
| "epoch": 80.47834518422754, |
| "grad_norm": 1.252181053161621, |
| "learning_rate": 0.001, |
| "loss": 1.3767, |
| "step": 249000 |
| }, |
| { |
| "epoch": 80.51066580478346, |
| "grad_norm": 1.396899938583374, |
| "learning_rate": 0.001, |
| "loss": 1.3709, |
| "step": 249100 |
| }, |
| { |
| "epoch": 80.54298642533936, |
| "grad_norm": 1.8860456943511963, |
| "learning_rate": 0.001, |
| "loss": 1.3729, |
| "step": 249200 |
| }, |
| { |
| "epoch": 80.57530704589529, |
| "grad_norm": 1.6136033535003662, |
| "learning_rate": 0.001, |
| "loss": 1.392, |
| "step": 249300 |
| }, |
| { |
| "epoch": 80.6076276664512, |
| "grad_norm": 1.41468346118927, |
| "learning_rate": 0.001, |
| "loss": 1.3889, |
| "step": 249400 |
| }, |
| { |
| "epoch": 80.63994828700712, |
| "grad_norm": 1.5444872379302979, |
| "learning_rate": 0.001, |
| "loss": 1.3756, |
| "step": 249500 |
| }, |
| { |
| "epoch": 80.67226890756302, |
| "grad_norm": 1.5710728168487549, |
| "learning_rate": 0.001, |
| "loss": 1.3886, |
| "step": 249600 |
| }, |
| { |
| "epoch": 80.70458952811894, |
| "grad_norm": 1.7316027879714966, |
| "learning_rate": 0.001, |
| "loss": 1.3814, |
| "step": 249700 |
| }, |
| { |
| "epoch": 80.73691014867485, |
| "grad_norm": 1.8401299715042114, |
| "learning_rate": 0.001, |
| "loss": 1.3982, |
| "step": 249800 |
| }, |
| { |
| "epoch": 80.76923076923077, |
| "grad_norm": 1.5051045417785645, |
| "learning_rate": 0.001, |
| "loss": 1.3961, |
| "step": 249900 |
| }, |
| { |
| "epoch": 80.80155138978668, |
| "grad_norm": 1.2414015531539917, |
| "learning_rate": 0.001, |
| "loss": 1.4, |
| "step": 250000 |
| }, |
| { |
| "epoch": 80.8338720103426, |
| "grad_norm": 1.8896925449371338, |
| "learning_rate": 0.001, |
| "loss": 1.4005, |
| "step": 250100 |
| }, |
| { |
| "epoch": 80.86619263089851, |
| "grad_norm": 1.4821033477783203, |
| "learning_rate": 0.001, |
| "loss": 1.414, |
| "step": 250200 |
| }, |
| { |
| "epoch": 80.89851325145443, |
| "grad_norm": 1.7811696529388428, |
| "learning_rate": 0.001, |
| "loss": 1.4248, |
| "step": 250300 |
| }, |
| { |
| "epoch": 80.93083387201034, |
| "grad_norm": 1.293363332748413, |
| "learning_rate": 0.001, |
| "loss": 1.4217, |
| "step": 250400 |
| }, |
| { |
| "epoch": 80.96315449256626, |
| "grad_norm": 1.5930306911468506, |
| "learning_rate": 0.001, |
| "loss": 1.4193, |
| "step": 250500 |
| }, |
| { |
| "epoch": 80.99547511312217, |
| "grad_norm": 1.5660650730133057, |
| "learning_rate": 0.001, |
| "loss": 1.4292, |
| "step": 250600 |
| }, |
| { |
| "epoch": 81.02779573367809, |
| "grad_norm": 1.3551260232925415, |
| "learning_rate": 0.001, |
| "loss": 1.3123, |
| "step": 250700 |
| }, |
| { |
| "epoch": 81.060116354234, |
| "grad_norm": 1.4512505531311035, |
| "learning_rate": 0.001, |
| "loss": 1.3193, |
| "step": 250800 |
| }, |
| { |
| "epoch": 81.09243697478992, |
| "grad_norm": 1.7034558057785034, |
| "learning_rate": 0.001, |
| "loss": 1.3068, |
| "step": 250900 |
| }, |
| { |
| "epoch": 81.12475759534583, |
| "grad_norm": 1.5442638397216797, |
| "learning_rate": 0.001, |
| "loss": 1.3242, |
| "step": 251000 |
| }, |
| { |
| "epoch": 81.15707821590175, |
| "grad_norm": 1.3783601522445679, |
| "learning_rate": 0.001, |
| "loss": 1.3209, |
| "step": 251100 |
| }, |
| { |
| "epoch": 81.18939883645766, |
| "grad_norm": 1.6340938806533813, |
| "learning_rate": 0.001, |
| "loss": 1.3332, |
| "step": 251200 |
| }, |
| { |
| "epoch": 81.22171945701358, |
| "grad_norm": 1.429200291633606, |
| "learning_rate": 0.001, |
| "loss": 1.3159, |
| "step": 251300 |
| }, |
| { |
| "epoch": 81.25404007756948, |
| "grad_norm": 1.5749098062515259, |
| "learning_rate": 0.001, |
| "loss": 1.3318, |
| "step": 251400 |
| }, |
| { |
| "epoch": 81.2863606981254, |
| "grad_norm": 1.5038208961486816, |
| "learning_rate": 0.001, |
| "loss": 1.3545, |
| "step": 251500 |
| }, |
| { |
| "epoch": 81.31868131868131, |
| "grad_norm": 1.7742317914962769, |
| "learning_rate": 0.001, |
| "loss": 1.3519, |
| "step": 251600 |
| }, |
| { |
| "epoch": 81.35100193923724, |
| "grad_norm": 1.8789876699447632, |
| "learning_rate": 0.001, |
| "loss": 1.3502, |
| "step": 251700 |
| }, |
| { |
| "epoch": 81.38332255979314, |
| "grad_norm": 1.5382606983184814, |
| "learning_rate": 0.001, |
| "loss": 1.356, |
| "step": 251800 |
| }, |
| { |
| "epoch": 81.41564318034906, |
| "grad_norm": 1.2025485038757324, |
| "learning_rate": 0.001, |
| "loss": 1.3393, |
| "step": 251900 |
| }, |
| { |
| "epoch": 81.44796380090497, |
| "grad_norm": 1.3355134725570679, |
| "learning_rate": 0.001, |
| "loss": 1.3805, |
| "step": 252000 |
| }, |
| { |
| "epoch": 81.4802844214609, |
| "grad_norm": 1.201806664466858, |
| "learning_rate": 0.001, |
| "loss": 1.3651, |
| "step": 252100 |
| }, |
| { |
| "epoch": 81.5126050420168, |
| "grad_norm": 1.2222882509231567, |
| "learning_rate": 0.001, |
| "loss": 1.3714, |
| "step": 252200 |
| }, |
| { |
| "epoch": 81.54492566257272, |
| "grad_norm": 1.7975983619689941, |
| "learning_rate": 0.001, |
| "loss": 1.3684, |
| "step": 252300 |
| }, |
| { |
| "epoch": 81.57724628312863, |
| "grad_norm": 1.6185847520828247, |
| "learning_rate": 0.001, |
| "loss": 1.3823, |
| "step": 252400 |
| }, |
| { |
| "epoch": 81.60956690368455, |
| "grad_norm": 2.2196691036224365, |
| "learning_rate": 0.001, |
| "loss": 1.3948, |
| "step": 252500 |
| }, |
| { |
| "epoch": 81.64188752424046, |
| "grad_norm": 1.5387237071990967, |
| "learning_rate": 0.001, |
| "loss": 1.3736, |
| "step": 252600 |
| }, |
| { |
| "epoch": 81.67420814479638, |
| "grad_norm": 1.4336892366409302, |
| "learning_rate": 0.001, |
| "loss": 1.3801, |
| "step": 252700 |
| }, |
| { |
| "epoch": 81.70652876535229, |
| "grad_norm": 2.1763482093811035, |
| "learning_rate": 0.001, |
| "loss": 1.3884, |
| "step": 252800 |
| }, |
| { |
| "epoch": 81.73884938590821, |
| "grad_norm": 1.289603352546692, |
| "learning_rate": 0.001, |
| "loss": 1.4009, |
| "step": 252900 |
| }, |
| { |
| "epoch": 81.77117000646412, |
| "grad_norm": 2.088289499282837, |
| "learning_rate": 0.001, |
| "loss": 1.386, |
| "step": 253000 |
| }, |
| { |
| "epoch": 81.80349062702004, |
| "grad_norm": 1.4783682823181152, |
| "learning_rate": 0.001, |
| "loss": 1.4053, |
| "step": 253100 |
| }, |
| { |
| "epoch": 81.83581124757595, |
| "grad_norm": 1.3838250637054443, |
| "learning_rate": 0.001, |
| "loss": 1.4138, |
| "step": 253200 |
| }, |
| { |
| "epoch": 81.86813186813187, |
| "grad_norm": 1.427379846572876, |
| "learning_rate": 0.001, |
| "loss": 1.4119, |
| "step": 253300 |
| }, |
| { |
| "epoch": 81.90045248868778, |
| "grad_norm": 1.6082667112350464, |
| "learning_rate": 0.001, |
| "loss": 1.403, |
| "step": 253400 |
| }, |
| { |
| "epoch": 81.9327731092437, |
| "grad_norm": 1.5411512851715088, |
| "learning_rate": 0.001, |
| "loss": 1.4213, |
| "step": 253500 |
| }, |
| { |
| "epoch": 81.9650937297996, |
| "grad_norm": 1.5107688903808594, |
| "learning_rate": 0.001, |
| "loss": 1.4177, |
| "step": 253600 |
| }, |
| { |
| "epoch": 81.99741435035553, |
| "grad_norm": 1.9440659284591675, |
| "learning_rate": 0.001, |
| "loss": 1.4189, |
| "step": 253700 |
| }, |
| { |
| "epoch": 82.02973497091145, |
| "grad_norm": 1.7341686487197876, |
| "learning_rate": 0.001, |
| "loss": 1.3041, |
| "step": 253800 |
| }, |
| { |
| "epoch": 82.06205559146736, |
| "grad_norm": 1.5955758094787598, |
| "learning_rate": 0.001, |
| "loss": 1.3014, |
| "step": 253900 |
| }, |
| { |
| "epoch": 82.09437621202328, |
| "grad_norm": 1.4994001388549805, |
| "learning_rate": 0.001, |
| "loss": 1.3156, |
| "step": 254000 |
| }, |
| { |
| "epoch": 82.12669683257919, |
| "grad_norm": 1.6090569496154785, |
| "learning_rate": 0.001, |
| "loss": 1.3052, |
| "step": 254100 |
| }, |
| { |
| "epoch": 82.1590174531351, |
| "grad_norm": 2.008561849594116, |
| "learning_rate": 0.001, |
| "loss": 1.3321, |
| "step": 254200 |
| }, |
| { |
| "epoch": 82.19133807369101, |
| "grad_norm": 1.6287059783935547, |
| "learning_rate": 0.001, |
| "loss": 1.3321, |
| "step": 254300 |
| }, |
| { |
| "epoch": 82.22365869424694, |
| "grad_norm": 1.3676732778549194, |
| "learning_rate": 0.001, |
| "loss": 1.3275, |
| "step": 254400 |
| }, |
| { |
| "epoch": 82.25597931480284, |
| "grad_norm": 2.070295572280884, |
| "learning_rate": 0.001, |
| "loss": 1.3326, |
| "step": 254500 |
| }, |
| { |
| "epoch": 82.28829993535876, |
| "grad_norm": 1.616440773010254, |
| "learning_rate": 0.001, |
| "loss": 1.3309, |
| "step": 254600 |
| }, |
| { |
| "epoch": 82.32062055591467, |
| "grad_norm": 1.398174524307251, |
| "learning_rate": 0.001, |
| "loss": 1.3364, |
| "step": 254700 |
| }, |
| { |
| "epoch": 82.3529411764706, |
| "grad_norm": 1.4400818347930908, |
| "learning_rate": 0.001, |
| "loss": 1.3502, |
| "step": 254800 |
| }, |
| { |
| "epoch": 82.3852617970265, |
| "grad_norm": 1.355803370475769, |
| "learning_rate": 0.001, |
| "loss": 1.3546, |
| "step": 254900 |
| }, |
| { |
| "epoch": 82.41758241758242, |
| "grad_norm": 1.6952476501464844, |
| "learning_rate": 0.001, |
| "loss": 1.3459, |
| "step": 255000 |
| }, |
| { |
| "epoch": 82.44990303813833, |
| "grad_norm": 1.9007939100265503, |
| "learning_rate": 0.001, |
| "loss": 1.3412, |
| "step": 255100 |
| }, |
| { |
| "epoch": 82.48222365869425, |
| "grad_norm": 1.2914032936096191, |
| "learning_rate": 0.001, |
| "loss": 1.3539, |
| "step": 255200 |
| }, |
| { |
| "epoch": 82.51454427925016, |
| "grad_norm": 1.625333309173584, |
| "learning_rate": 0.001, |
| "loss": 1.3661, |
| "step": 255300 |
| }, |
| { |
| "epoch": 82.54686489980608, |
| "grad_norm": 1.394900918006897, |
| "learning_rate": 0.001, |
| "loss": 1.3736, |
| "step": 255400 |
| }, |
| { |
| "epoch": 82.57918552036199, |
| "grad_norm": 1.809337854385376, |
| "learning_rate": 0.001, |
| "loss": 1.3596, |
| "step": 255500 |
| }, |
| { |
| "epoch": 82.61150614091791, |
| "grad_norm": 1.5900062322616577, |
| "learning_rate": 0.001, |
| "loss": 1.3831, |
| "step": 255600 |
| }, |
| { |
| "epoch": 82.64382676147382, |
| "grad_norm": 1.26161789894104, |
| "learning_rate": 0.001, |
| "loss": 1.3672, |
| "step": 255700 |
| }, |
| { |
| "epoch": 82.67614738202974, |
| "grad_norm": 1.521141767501831, |
| "learning_rate": 0.001, |
| "loss": 1.3775, |
| "step": 255800 |
| }, |
| { |
| "epoch": 82.70846800258565, |
| "grad_norm": 1.447628140449524, |
| "learning_rate": 0.001, |
| "loss": 1.3968, |
| "step": 255900 |
| }, |
| { |
| "epoch": 82.74078862314157, |
| "grad_norm": 1.5999592542648315, |
| "learning_rate": 0.001, |
| "loss": 1.3887, |
| "step": 256000 |
| }, |
| { |
| "epoch": 82.77310924369748, |
| "grad_norm": 2.224372386932373, |
| "learning_rate": 0.001, |
| "loss": 1.3877, |
| "step": 256100 |
| }, |
| { |
| "epoch": 82.8054298642534, |
| "grad_norm": 1.854551911354065, |
| "learning_rate": 0.001, |
| "loss": 1.3854, |
| "step": 256200 |
| }, |
| { |
| "epoch": 82.8377504848093, |
| "grad_norm": 1.9979218244552612, |
| "learning_rate": 0.001, |
| "loss": 1.3951, |
| "step": 256300 |
| }, |
| { |
| "epoch": 82.87007110536523, |
| "grad_norm": 1.5547971725463867, |
| "learning_rate": 0.001, |
| "loss": 1.3918, |
| "step": 256400 |
| }, |
| { |
| "epoch": 82.90239172592113, |
| "grad_norm": 1.7112253904342651, |
| "learning_rate": 0.001, |
| "loss": 1.4129, |
| "step": 256500 |
| }, |
| { |
| "epoch": 82.93471234647706, |
| "grad_norm": 1.6911065578460693, |
| "learning_rate": 0.001, |
| "loss": 1.4009, |
| "step": 256600 |
| }, |
| { |
| "epoch": 82.96703296703296, |
| "grad_norm": 1.5267651081085205, |
| "learning_rate": 0.001, |
| "loss": 1.4078, |
| "step": 256700 |
| }, |
| { |
| "epoch": 82.99935358758889, |
| "grad_norm": 1.7838150262832642, |
| "learning_rate": 0.001, |
| "loss": 1.3939, |
| "step": 256800 |
| }, |
| { |
| "epoch": 83.03167420814479, |
| "grad_norm": 1.4784297943115234, |
| "learning_rate": 0.001, |
| "loss": 1.2894, |
| "step": 256900 |
| }, |
| { |
| "epoch": 83.06399482870071, |
| "grad_norm": 2.03774094581604, |
| "learning_rate": 0.001, |
| "loss": 1.2907, |
| "step": 257000 |
| }, |
| { |
| "epoch": 83.09631544925662, |
| "grad_norm": 1.9565315246582031, |
| "learning_rate": 0.001, |
| "loss": 1.3025, |
| "step": 257100 |
| }, |
| { |
| "epoch": 83.12863606981254, |
| "grad_norm": 1.5921435356140137, |
| "learning_rate": 0.001, |
| "loss": 1.3043, |
| "step": 257200 |
| }, |
| { |
| "epoch": 83.16095669036845, |
| "grad_norm": 1.700293779373169, |
| "learning_rate": 0.001, |
| "loss": 1.3172, |
| "step": 257300 |
| }, |
| { |
| "epoch": 83.19327731092437, |
| "grad_norm": 1.9305436611175537, |
| "learning_rate": 0.001, |
| "loss": 1.3173, |
| "step": 257400 |
| }, |
| { |
| "epoch": 83.22559793148028, |
| "grad_norm": 1.34230637550354, |
| "learning_rate": 0.001, |
| "loss": 1.3124, |
| "step": 257500 |
| }, |
| { |
| "epoch": 83.2579185520362, |
| "grad_norm": 1.5128107070922852, |
| "learning_rate": 0.001, |
| "loss": 1.3324, |
| "step": 257600 |
| }, |
| { |
| "epoch": 83.29023917259211, |
| "grad_norm": 1.4874143600463867, |
| "learning_rate": 0.001, |
| "loss": 1.3352, |
| "step": 257700 |
| }, |
| { |
| "epoch": 83.32255979314803, |
| "grad_norm": 1.7499667406082153, |
| "learning_rate": 0.001, |
| "loss": 1.3258, |
| "step": 257800 |
| }, |
| { |
| "epoch": 83.35488041370394, |
| "grad_norm": 2.480471611022949, |
| "learning_rate": 0.001, |
| "loss": 1.3301, |
| "step": 257900 |
| }, |
| { |
| "epoch": 83.38720103425986, |
| "grad_norm": 1.7621139287948608, |
| "learning_rate": 0.001, |
| "loss": 1.3329, |
| "step": 258000 |
| }, |
| { |
| "epoch": 83.41952165481577, |
| "grad_norm": 1.40170156955719, |
| "learning_rate": 0.001, |
| "loss": 1.3384, |
| "step": 258100 |
| }, |
| { |
| "epoch": 83.45184227537169, |
| "grad_norm": 1.850550889968872, |
| "learning_rate": 0.001, |
| "loss": 1.3502, |
| "step": 258200 |
| }, |
| { |
| "epoch": 83.4841628959276, |
| "grad_norm": 2.19671630859375, |
| "learning_rate": 0.001, |
| "loss": 1.3566, |
| "step": 258300 |
| }, |
| { |
| "epoch": 83.51648351648352, |
| "grad_norm": 2.058199405670166, |
| "learning_rate": 0.001, |
| "loss": 1.3641, |
| "step": 258400 |
| }, |
| { |
| "epoch": 83.54880413703943, |
| "grad_norm": 2.0466177463531494, |
| "learning_rate": 0.001, |
| "loss": 1.3537, |
| "step": 258500 |
| }, |
| { |
| "epoch": 83.58112475759535, |
| "grad_norm": 1.7297381162643433, |
| "learning_rate": 0.001, |
| "loss": 1.362, |
| "step": 258600 |
| }, |
| { |
| "epoch": 83.61344537815125, |
| "grad_norm": 2.032058000564575, |
| "learning_rate": 0.001, |
| "loss": 1.378, |
| "step": 258700 |
| }, |
| { |
| "epoch": 83.64576599870718, |
| "grad_norm": 1.6873259544372559, |
| "learning_rate": 0.001, |
| "loss": 1.3815, |
| "step": 258800 |
| }, |
| { |
| "epoch": 83.67808661926308, |
| "grad_norm": 2.212801456451416, |
| "learning_rate": 0.001, |
| "loss": 1.3635, |
| "step": 258900 |
| }, |
| { |
| "epoch": 83.710407239819, |
| "grad_norm": 1.6788281202316284, |
| "learning_rate": 0.001, |
| "loss": 1.3743, |
| "step": 259000 |
| }, |
| { |
| "epoch": 83.74272786037491, |
| "grad_norm": 1.7809021472930908, |
| "learning_rate": 0.001, |
| "loss": 1.3808, |
| "step": 259100 |
| }, |
| { |
| "epoch": 83.77504848093083, |
| "grad_norm": 2.2536916732788086, |
| "learning_rate": 0.001, |
| "loss": 1.3789, |
| "step": 259200 |
| }, |
| { |
| "epoch": 83.80736910148674, |
| "grad_norm": 1.964429259300232, |
| "learning_rate": 0.001, |
| "loss": 1.3914, |
| "step": 259300 |
| }, |
| { |
| "epoch": 83.83968972204266, |
| "grad_norm": 1.6335227489471436, |
| "learning_rate": 0.001, |
| "loss": 1.3955, |
| "step": 259400 |
| }, |
| { |
| "epoch": 83.87201034259857, |
| "grad_norm": 2.0025200843811035, |
| "learning_rate": 0.001, |
| "loss": 1.3997, |
| "step": 259500 |
| }, |
| { |
| "epoch": 83.9043309631545, |
| "grad_norm": 1.9986916780471802, |
| "learning_rate": 0.001, |
| "loss": 1.4021, |
| "step": 259600 |
| }, |
| { |
| "epoch": 83.9366515837104, |
| "grad_norm": 1.4429872035980225, |
| "learning_rate": 0.001, |
| "loss": 1.4065, |
| "step": 259700 |
| }, |
| { |
| "epoch": 83.96897220426632, |
| "grad_norm": 2.2495028972625732, |
| "learning_rate": 0.001, |
| "loss": 1.4194, |
| "step": 259800 |
| }, |
| { |
| "epoch": 84.00129282482224, |
| "grad_norm": 1.3998140096664429, |
| "learning_rate": 0.001, |
| "loss": 1.3954, |
| "step": 259900 |
| }, |
| { |
| "epoch": 84.03361344537815, |
| "grad_norm": 2.64650559425354, |
| "learning_rate": 0.001, |
| "loss": 1.2849, |
| "step": 260000 |
| }, |
| { |
| "epoch": 84.06593406593407, |
| "grad_norm": 1.8668577671051025, |
| "learning_rate": 0.001, |
| "loss": 1.2997, |
| "step": 260100 |
| }, |
| { |
| "epoch": 84.09825468648998, |
| "grad_norm": 2.370088577270508, |
| "learning_rate": 0.001, |
| "loss": 1.2801, |
| "step": 260200 |
| }, |
| { |
| "epoch": 84.1305753070459, |
| "grad_norm": 1.514963984489441, |
| "learning_rate": 0.001, |
| "loss": 1.3137, |
| "step": 260300 |
| }, |
| { |
| "epoch": 84.16289592760181, |
| "grad_norm": 1.824122667312622, |
| "learning_rate": 0.001, |
| "loss": 1.2998, |
| "step": 260400 |
| }, |
| { |
| "epoch": 84.19521654815773, |
| "grad_norm": 2.221118450164795, |
| "learning_rate": 0.001, |
| "loss": 1.3128, |
| "step": 260500 |
| }, |
| { |
| "epoch": 84.22753716871364, |
| "grad_norm": 1.5105535984039307, |
| "learning_rate": 0.001, |
| "loss": 1.3176, |
| "step": 260600 |
| }, |
| { |
| "epoch": 84.25985778926956, |
| "grad_norm": 1.4515846967697144, |
| "learning_rate": 0.001, |
| "loss": 1.3114, |
| "step": 260700 |
| }, |
| { |
| "epoch": 84.29217840982547, |
| "grad_norm": 2.0730106830596924, |
| "learning_rate": 0.001, |
| "loss": 1.3138, |
| "step": 260800 |
| }, |
| { |
| "epoch": 84.32449903038139, |
| "grad_norm": 1.670240044593811, |
| "learning_rate": 0.001, |
| "loss": 1.3265, |
| "step": 260900 |
| }, |
| { |
| "epoch": 84.3568196509373, |
| "grad_norm": 1.5092387199401855, |
| "learning_rate": 0.001, |
| "loss": 1.349, |
| "step": 261000 |
| }, |
| { |
| "epoch": 84.38914027149322, |
| "grad_norm": 1.756965160369873, |
| "learning_rate": 0.001, |
| "loss": 1.3311, |
| "step": 261100 |
| }, |
| { |
| "epoch": 84.42146089204913, |
| "grad_norm": 1.6709240674972534, |
| "learning_rate": 0.001, |
| "loss": 1.3532, |
| "step": 261200 |
| }, |
| { |
| "epoch": 84.45378151260505, |
| "grad_norm": 1.6208750009536743, |
| "learning_rate": 0.001, |
| "loss": 1.3453, |
| "step": 261300 |
| }, |
| { |
| "epoch": 84.48610213316095, |
| "grad_norm": 1.4181149005889893, |
| "learning_rate": 0.001, |
| "loss": 1.3424, |
| "step": 261400 |
| }, |
| { |
| "epoch": 84.51842275371688, |
| "grad_norm": 1.5968010425567627, |
| "learning_rate": 0.001, |
| "loss": 1.3502, |
| "step": 261500 |
| }, |
| { |
| "epoch": 84.55074337427278, |
| "grad_norm": 1.5664920806884766, |
| "learning_rate": 0.001, |
| "loss": 1.3603, |
| "step": 261600 |
| }, |
| { |
| "epoch": 84.5830639948287, |
| "grad_norm": 1.6634669303894043, |
| "learning_rate": 0.001, |
| "loss": 1.3668, |
| "step": 261700 |
| }, |
| { |
| "epoch": 84.61538461538461, |
| "grad_norm": 1.7504676580429077, |
| "learning_rate": 0.001, |
| "loss": 1.356, |
| "step": 261800 |
| }, |
| { |
| "epoch": 84.64770523594053, |
| "grad_norm": 1.6489694118499756, |
| "learning_rate": 0.001, |
| "loss": 1.351, |
| "step": 261900 |
| }, |
| { |
| "epoch": 84.68002585649644, |
| "grad_norm": 1.3372224569320679, |
| "learning_rate": 0.001, |
| "loss": 1.3626, |
| "step": 262000 |
| }, |
| { |
| "epoch": 84.71234647705236, |
| "grad_norm": 1.7510300874710083, |
| "learning_rate": 0.001, |
| "loss": 1.3782, |
| "step": 262100 |
| }, |
| { |
| "epoch": 84.74466709760827, |
| "grad_norm": 2.0836756229400635, |
| "learning_rate": 0.001, |
| "loss": 1.3742, |
| "step": 262200 |
| }, |
| { |
| "epoch": 84.7769877181642, |
| "grad_norm": 1.98588228225708, |
| "learning_rate": 0.001, |
| "loss": 1.3877, |
| "step": 262300 |
| }, |
| { |
| "epoch": 84.8093083387201, |
| "grad_norm": 1.7963939905166626, |
| "learning_rate": 0.001, |
| "loss": 1.389, |
| "step": 262400 |
| }, |
| { |
| "epoch": 84.84162895927602, |
| "grad_norm": 1.6637582778930664, |
| "learning_rate": 0.001, |
| "loss": 1.3757, |
| "step": 262500 |
| }, |
| { |
| "epoch": 84.87394957983193, |
| "grad_norm": 1.9273699522018433, |
| "learning_rate": 0.001, |
| "loss": 1.3857, |
| "step": 262600 |
| }, |
| { |
| "epoch": 84.90627020038785, |
| "grad_norm": 1.724212884902954, |
| "learning_rate": 0.001, |
| "loss": 1.3878, |
| "step": 262700 |
| }, |
| { |
| "epoch": 84.93859082094376, |
| "grad_norm": 1.5962377786636353, |
| "learning_rate": 0.001, |
| "loss": 1.4054, |
| "step": 262800 |
| }, |
| { |
| "epoch": 84.97091144149968, |
| "grad_norm": 1.804415225982666, |
| "learning_rate": 0.001, |
| "loss": 1.4049, |
| "step": 262900 |
| }, |
| { |
| "epoch": 85.00323206205559, |
| "grad_norm": 1.5397160053253174, |
| "learning_rate": 0.001, |
| "loss": 1.3863, |
| "step": 263000 |
| }, |
| { |
| "epoch": 85.03555268261151, |
| "grad_norm": 1.7321134805679321, |
| "learning_rate": 0.001, |
| "loss": 1.2727, |
| "step": 263100 |
| }, |
| { |
| "epoch": 85.06787330316742, |
| "grad_norm": 1.2003074884414673, |
| "learning_rate": 0.001, |
| "loss": 1.2812, |
| "step": 263200 |
| }, |
| { |
| "epoch": 85.10019392372334, |
| "grad_norm": 1.9206042289733887, |
| "learning_rate": 0.001, |
| "loss": 1.294, |
| "step": 263300 |
| }, |
| { |
| "epoch": 85.13251454427925, |
| "grad_norm": 1.7302062511444092, |
| "learning_rate": 0.001, |
| "loss": 1.3028, |
| "step": 263400 |
| }, |
| { |
| "epoch": 85.16483516483517, |
| "grad_norm": 1.5176318883895874, |
| "learning_rate": 0.001, |
| "loss": 1.305, |
| "step": 263500 |
| }, |
| { |
| "epoch": 85.19715578539108, |
| "grad_norm": 1.221076488494873, |
| "learning_rate": 0.001, |
| "loss": 1.2955, |
| "step": 263600 |
| }, |
| { |
| "epoch": 85.229476405947, |
| "grad_norm": 1.2028017044067383, |
| "learning_rate": 0.001, |
| "loss": 1.2969, |
| "step": 263700 |
| }, |
| { |
| "epoch": 85.2617970265029, |
| "grad_norm": 1.3734420537948608, |
| "learning_rate": 0.001, |
| "loss": 1.3183, |
| "step": 263800 |
| }, |
| { |
| "epoch": 85.29411764705883, |
| "grad_norm": 1.6204302310943604, |
| "learning_rate": 0.001, |
| "loss": 1.3096, |
| "step": 263900 |
| }, |
| { |
| "epoch": 85.32643826761473, |
| "grad_norm": 1.4113692045211792, |
| "learning_rate": 0.001, |
| "loss": 1.324, |
| "step": 264000 |
| }, |
| { |
| "epoch": 85.35875888817066, |
| "grad_norm": 1.7580782175064087, |
| "learning_rate": 0.001, |
| "loss": 1.3349, |
| "step": 264100 |
| }, |
| { |
| "epoch": 85.39107950872656, |
| "grad_norm": 1.8114993572235107, |
| "learning_rate": 0.001, |
| "loss": 1.3232, |
| "step": 264200 |
| }, |
| { |
| "epoch": 85.42340012928248, |
| "grad_norm": 1.8745627403259277, |
| "learning_rate": 0.001, |
| "loss": 1.3376, |
| "step": 264300 |
| }, |
| { |
| "epoch": 85.45572074983839, |
| "grad_norm": 1.2741578817367554, |
| "learning_rate": 0.001, |
| "loss": 1.335, |
| "step": 264400 |
| }, |
| { |
| "epoch": 85.48804137039431, |
| "grad_norm": 1.4233325719833374, |
| "learning_rate": 0.001, |
| "loss": 1.3441, |
| "step": 264500 |
| }, |
| { |
| "epoch": 85.52036199095022, |
| "grad_norm": 1.3417024612426758, |
| "learning_rate": 0.001, |
| "loss": 1.3323, |
| "step": 264600 |
| }, |
| { |
| "epoch": 85.55268261150614, |
| "grad_norm": 1.574867844581604, |
| "learning_rate": 0.001, |
| "loss": 1.3619, |
| "step": 264700 |
| }, |
| { |
| "epoch": 85.58500323206205, |
| "grad_norm": 1.5219658613204956, |
| "learning_rate": 0.001, |
| "loss": 1.3517, |
| "step": 264800 |
| }, |
| { |
| "epoch": 85.61732385261797, |
| "grad_norm": 1.6881141662597656, |
| "learning_rate": 0.001, |
| "loss": 1.3683, |
| "step": 264900 |
| }, |
| { |
| "epoch": 85.64964447317388, |
| "grad_norm": 1.40277099609375, |
| "learning_rate": 0.001, |
| "loss": 1.3682, |
| "step": 265000 |
| }, |
| { |
| "epoch": 85.6819650937298, |
| "grad_norm": 1.6496849060058594, |
| "learning_rate": 0.001, |
| "loss": 1.3553, |
| "step": 265100 |
| }, |
| { |
| "epoch": 85.71428571428571, |
| "grad_norm": 1.9583226442337036, |
| "learning_rate": 0.001, |
| "loss": 1.3658, |
| "step": 265200 |
| }, |
| { |
| "epoch": 85.74660633484163, |
| "grad_norm": 1.6086351871490479, |
| "learning_rate": 0.001, |
| "loss": 1.3623, |
| "step": 265300 |
| }, |
| { |
| "epoch": 85.77892695539754, |
| "grad_norm": 1.4960424900054932, |
| "learning_rate": 0.001, |
| "loss": 1.3561, |
| "step": 265400 |
| }, |
| { |
| "epoch": 85.81124757595346, |
| "grad_norm": 1.6357324123382568, |
| "learning_rate": 0.001, |
| "loss": 1.3876, |
| "step": 265500 |
| }, |
| { |
| "epoch": 85.84356819650937, |
| "grad_norm": 1.3379572629928589, |
| "learning_rate": 0.001, |
| "loss": 1.3576, |
| "step": 265600 |
| }, |
| { |
| "epoch": 85.87588881706529, |
| "grad_norm": 1.2775260210037231, |
| "learning_rate": 0.001, |
| "loss": 1.3735, |
| "step": 265700 |
| }, |
| { |
| "epoch": 85.9082094376212, |
| "grad_norm": 1.6418566703796387, |
| "learning_rate": 0.001, |
| "loss": 1.384, |
| "step": 265800 |
| }, |
| { |
| "epoch": 85.94053005817712, |
| "grad_norm": 1.4974275827407837, |
| "learning_rate": 0.001, |
| "loss": 1.39, |
| "step": 265900 |
| }, |
| { |
| "epoch": 85.97285067873302, |
| "grad_norm": 1.4520913362503052, |
| "learning_rate": 0.001, |
| "loss": 1.3886, |
| "step": 266000 |
| }, |
| { |
| "epoch": 86.00517129928895, |
| "grad_norm": 1.6784367561340332, |
| "learning_rate": 0.001, |
| "loss": 1.3867, |
| "step": 266100 |
| }, |
| { |
| "epoch": 86.03749191984487, |
| "grad_norm": 1.85789155960083, |
| "learning_rate": 0.001, |
| "loss": 1.2681, |
| "step": 266200 |
| }, |
| { |
| "epoch": 86.06981254040078, |
| "grad_norm": 2.0245468616485596, |
| "learning_rate": 0.001, |
| "loss": 1.2846, |
| "step": 266300 |
| }, |
| { |
| "epoch": 86.1021331609567, |
| "grad_norm": 1.0982847213745117, |
| "learning_rate": 0.001, |
| "loss": 1.2784, |
| "step": 266400 |
| }, |
| { |
| "epoch": 86.1344537815126, |
| "grad_norm": 1.1000640392303467, |
| "learning_rate": 0.001, |
| "loss": 1.296, |
| "step": 266500 |
| }, |
| { |
| "epoch": 86.16677440206853, |
| "grad_norm": 2.0382845401763916, |
| "learning_rate": 0.001, |
| "loss": 1.2977, |
| "step": 266600 |
| }, |
| { |
| "epoch": 86.19909502262443, |
| "grad_norm": 2.8659310340881348, |
| "learning_rate": 0.001, |
| "loss": 1.2818, |
| "step": 266700 |
| }, |
| { |
| "epoch": 86.23141564318036, |
| "grad_norm": 1.5173022747039795, |
| "learning_rate": 0.001, |
| "loss": 1.3047, |
| "step": 266800 |
| }, |
| { |
| "epoch": 86.26373626373626, |
| "grad_norm": 1.183382511138916, |
| "learning_rate": 0.001, |
| "loss": 1.3167, |
| "step": 266900 |
| }, |
| { |
| "epoch": 86.29605688429218, |
| "grad_norm": 1.4100364446640015, |
| "learning_rate": 0.001, |
| "loss": 1.3181, |
| "step": 267000 |
| }, |
| { |
| "epoch": 86.32837750484809, |
| "grad_norm": 1.3291791677474976, |
| "learning_rate": 0.001, |
| "loss": 1.3172, |
| "step": 267100 |
| }, |
| { |
| "epoch": 86.36069812540401, |
| "grad_norm": 1.5253900289535522, |
| "learning_rate": 0.001, |
| "loss": 1.3138, |
| "step": 267200 |
| }, |
| { |
| "epoch": 86.39301874595992, |
| "grad_norm": 1.2333720922470093, |
| "learning_rate": 0.001, |
| "loss": 1.3248, |
| "step": 267300 |
| }, |
| { |
| "epoch": 86.42533936651584, |
| "grad_norm": 1.2060329914093018, |
| "learning_rate": 0.001, |
| "loss": 1.3278, |
| "step": 267400 |
| }, |
| { |
| "epoch": 86.45765998707175, |
| "grad_norm": 1.940195918083191, |
| "learning_rate": 0.001, |
| "loss": 1.3232, |
| "step": 267500 |
| }, |
| { |
| "epoch": 86.48998060762767, |
| "grad_norm": 1.408094882965088, |
| "learning_rate": 0.001, |
| "loss": 1.339, |
| "step": 267600 |
| }, |
| { |
| "epoch": 86.52230122818358, |
| "grad_norm": 1.3757351636886597, |
| "learning_rate": 0.001, |
| "loss": 1.3378, |
| "step": 267700 |
| }, |
| { |
| "epoch": 86.5546218487395, |
| "grad_norm": 1.6911505460739136, |
| "learning_rate": 0.001, |
| "loss": 1.3426, |
| "step": 267800 |
| }, |
| { |
| "epoch": 86.58694246929541, |
| "grad_norm": 1.095239520072937, |
| "learning_rate": 0.001, |
| "loss": 1.3479, |
| "step": 267900 |
| }, |
| { |
| "epoch": 86.61926308985133, |
| "grad_norm": 1.3571579456329346, |
| "learning_rate": 0.001, |
| "loss": 1.3418, |
| "step": 268000 |
| }, |
| { |
| "epoch": 86.65158371040724, |
| "grad_norm": 1.5289149284362793, |
| "learning_rate": 0.001, |
| "loss": 1.3636, |
| "step": 268100 |
| }, |
| { |
| "epoch": 86.68390433096316, |
| "grad_norm": 2.3517847061157227, |
| "learning_rate": 0.001, |
| "loss": 1.3577, |
| "step": 268200 |
| }, |
| { |
| "epoch": 86.71622495151907, |
| "grad_norm": 1.7623791694641113, |
| "learning_rate": 0.001, |
| "loss": 1.3621, |
| "step": 268300 |
| }, |
| { |
| "epoch": 86.74854557207499, |
| "grad_norm": 1.7347806692123413, |
| "learning_rate": 0.001, |
| "loss": 1.3591, |
| "step": 268400 |
| }, |
| { |
| "epoch": 86.7808661926309, |
| "grad_norm": 1.9795016050338745, |
| "learning_rate": 0.001, |
| "loss": 1.3551, |
| "step": 268500 |
| }, |
| { |
| "epoch": 86.81318681318682, |
| "grad_norm": 1.7498890161514282, |
| "learning_rate": 0.001, |
| "loss": 1.3696, |
| "step": 268600 |
| }, |
| { |
| "epoch": 86.84550743374272, |
| "grad_norm": 1.118046522140503, |
| "learning_rate": 0.001, |
| "loss": 1.3541, |
| "step": 268700 |
| }, |
| { |
| "epoch": 86.87782805429865, |
| "grad_norm": 1.7081708908081055, |
| "learning_rate": 0.001, |
| "loss": 1.3692, |
| "step": 268800 |
| }, |
| { |
| "epoch": 86.91014867485455, |
| "grad_norm": 1.2541394233703613, |
| "learning_rate": 0.001, |
| "loss": 1.3738, |
| "step": 268900 |
| }, |
| { |
| "epoch": 86.94246929541048, |
| "grad_norm": 1.55007803440094, |
| "learning_rate": 0.001, |
| "loss": 1.3728, |
| "step": 269000 |
| }, |
| { |
| "epoch": 86.97478991596638, |
| "grad_norm": 1.1029446125030518, |
| "learning_rate": 0.001, |
| "loss": 1.3822, |
| "step": 269100 |
| }, |
| { |
| "epoch": 87.0071105365223, |
| "grad_norm": 1.4424092769622803, |
| "learning_rate": 0.001, |
| "loss": 1.364, |
| "step": 269200 |
| }, |
| { |
| "epoch": 87.03943115707821, |
| "grad_norm": 1.4126336574554443, |
| "learning_rate": 0.001, |
| "loss": 1.2606, |
| "step": 269300 |
| }, |
| { |
| "epoch": 87.07175177763413, |
| "grad_norm": 1.1296864748001099, |
| "learning_rate": 0.001, |
| "loss": 1.2772, |
| "step": 269400 |
| }, |
| { |
| "epoch": 87.10407239819004, |
| "grad_norm": 1.0883948802947998, |
| "learning_rate": 0.001, |
| "loss": 1.287, |
| "step": 269500 |
| }, |
| { |
| "epoch": 87.13639301874596, |
| "grad_norm": 2.3048338890075684, |
| "learning_rate": 0.001, |
| "loss": 1.2915, |
| "step": 269600 |
| }, |
| { |
| "epoch": 87.16871363930187, |
| "grad_norm": 1.4624378681182861, |
| "learning_rate": 0.001, |
| "loss": 1.2696, |
| "step": 269700 |
| }, |
| { |
| "epoch": 87.20103425985779, |
| "grad_norm": 2.080893039703369, |
| "learning_rate": 0.001, |
| "loss": 1.2922, |
| "step": 269800 |
| }, |
| { |
| "epoch": 87.2333548804137, |
| "grad_norm": 1.1155803203582764, |
| "learning_rate": 0.001, |
| "loss": 1.2881, |
| "step": 269900 |
| }, |
| { |
| "epoch": 87.26567550096962, |
| "grad_norm": 2.0003838539123535, |
| "learning_rate": 0.001, |
| "loss": 1.3031, |
| "step": 270000 |
| }, |
| { |
| "epoch": 87.29799612152553, |
| "grad_norm": 1.1450769901275635, |
| "learning_rate": 0.001, |
| "loss": 1.3158, |
| "step": 270100 |
| }, |
| { |
| "epoch": 87.33031674208145, |
| "grad_norm": 1.6422492265701294, |
| "learning_rate": 0.001, |
| "loss": 1.3028, |
| "step": 270200 |
| }, |
| { |
| "epoch": 87.36263736263736, |
| "grad_norm": 1.5813195705413818, |
| "learning_rate": 0.001, |
| "loss": 1.3043, |
| "step": 270300 |
| }, |
| { |
| "epoch": 87.39495798319328, |
| "grad_norm": 2.072030782699585, |
| "learning_rate": 0.001, |
| "loss": 1.3095, |
| "step": 270400 |
| }, |
| { |
| "epoch": 87.42727860374919, |
| "grad_norm": 1.496835470199585, |
| "learning_rate": 0.001, |
| "loss": 1.321, |
| "step": 270500 |
| }, |
| { |
| "epoch": 87.45959922430511, |
| "grad_norm": 1.9745844602584839, |
| "learning_rate": 0.001, |
| "loss": 1.3344, |
| "step": 270600 |
| }, |
| { |
| "epoch": 87.49191984486102, |
| "grad_norm": 1.3407188653945923, |
| "learning_rate": 0.001, |
| "loss": 1.3394, |
| "step": 270700 |
| }, |
| { |
| "epoch": 87.52424046541694, |
| "grad_norm": 1.3977127075195312, |
| "learning_rate": 0.001, |
| "loss": 1.3158, |
| "step": 270800 |
| }, |
| { |
| "epoch": 87.55656108597285, |
| "grad_norm": 1.1998000144958496, |
| "learning_rate": 0.001, |
| "loss": 1.3412, |
| "step": 270900 |
| }, |
| { |
| "epoch": 87.58888170652877, |
| "grad_norm": 1.5857043266296387, |
| "learning_rate": 0.001, |
| "loss": 1.3398, |
| "step": 271000 |
| }, |
| { |
| "epoch": 87.62120232708467, |
| "grad_norm": 1.3662618398666382, |
| "learning_rate": 0.001, |
| "loss": 1.3452, |
| "step": 271100 |
| }, |
| { |
| "epoch": 87.6535229476406, |
| "grad_norm": 1.5763628482818604, |
| "learning_rate": 0.001, |
| "loss": 1.3347, |
| "step": 271200 |
| }, |
| { |
| "epoch": 87.6858435681965, |
| "grad_norm": 1.1839090585708618, |
| "learning_rate": 0.001, |
| "loss": 1.354, |
| "step": 271300 |
| }, |
| { |
| "epoch": 87.71816418875243, |
| "grad_norm": 1.1503945589065552, |
| "learning_rate": 0.001, |
| "loss": 1.351, |
| "step": 271400 |
| }, |
| { |
| "epoch": 87.75048480930833, |
| "grad_norm": 1.364113688468933, |
| "learning_rate": 0.001, |
| "loss": 1.3464, |
| "step": 271500 |
| }, |
| { |
| "epoch": 87.78280542986425, |
| "grad_norm": 1.640329122543335, |
| "learning_rate": 0.001, |
| "loss": 1.3523, |
| "step": 271600 |
| }, |
| { |
| "epoch": 87.81512605042016, |
| "grad_norm": 1.3196192979812622, |
| "learning_rate": 0.001, |
| "loss": 1.3648, |
| "step": 271700 |
| }, |
| { |
| "epoch": 87.84744667097608, |
| "grad_norm": 1.7141294479370117, |
| "learning_rate": 0.001, |
| "loss": 1.366, |
| "step": 271800 |
| }, |
| { |
| "epoch": 87.87976729153199, |
| "grad_norm": 1.4717209339141846, |
| "learning_rate": 0.001, |
| "loss": 1.3733, |
| "step": 271900 |
| }, |
| { |
| "epoch": 87.91208791208791, |
| "grad_norm": 1.763951063156128, |
| "learning_rate": 0.001, |
| "loss": 1.3805, |
| "step": 272000 |
| }, |
| { |
| "epoch": 87.94440853264382, |
| "grad_norm": 2.454180955886841, |
| "learning_rate": 0.001, |
| "loss": 1.3777, |
| "step": 272100 |
| }, |
| { |
| "epoch": 87.97672915319974, |
| "grad_norm": 1.2732707262039185, |
| "learning_rate": 0.001, |
| "loss": 1.3935, |
| "step": 272200 |
| }, |
| { |
| "epoch": 88.00904977375566, |
| "grad_norm": 1.2473344802856445, |
| "learning_rate": 0.001, |
| "loss": 1.3515, |
| "step": 272300 |
| }, |
| { |
| "epoch": 88.04137039431157, |
| "grad_norm": 1.722395896911621, |
| "learning_rate": 0.001, |
| "loss": 1.262, |
| "step": 272400 |
| }, |
| { |
| "epoch": 88.07369101486749, |
| "grad_norm": 1.162625789642334, |
| "learning_rate": 0.001, |
| "loss": 1.2573, |
| "step": 272500 |
| }, |
| { |
| "epoch": 88.1060116354234, |
| "grad_norm": 1.3701404333114624, |
| "learning_rate": 0.001, |
| "loss": 1.2692, |
| "step": 272600 |
| }, |
| { |
| "epoch": 88.13833225597932, |
| "grad_norm": 1.517385721206665, |
| "learning_rate": 0.001, |
| "loss": 1.274, |
| "step": 272700 |
| }, |
| { |
| "epoch": 88.17065287653523, |
| "grad_norm": 1.3972104787826538, |
| "learning_rate": 0.001, |
| "loss": 1.2774, |
| "step": 272800 |
| }, |
| { |
| "epoch": 88.20297349709115, |
| "grad_norm": 1.48856520652771, |
| "learning_rate": 0.001, |
| "loss": 1.2776, |
| "step": 272900 |
| }, |
| { |
| "epoch": 88.23529411764706, |
| "grad_norm": 1.5007410049438477, |
| "learning_rate": 0.001, |
| "loss": 1.289, |
| "step": 273000 |
| }, |
| { |
| "epoch": 88.26761473820298, |
| "grad_norm": 1.6463698148727417, |
| "learning_rate": 0.001, |
| "loss": 1.2989, |
| "step": 273100 |
| }, |
| { |
| "epoch": 88.29993535875889, |
| "grad_norm": 1.3843050003051758, |
| "learning_rate": 0.001, |
| "loss": 1.3107, |
| "step": 273200 |
| }, |
| { |
| "epoch": 88.33225597931481, |
| "grad_norm": 1.6429800987243652, |
| "learning_rate": 0.001, |
| "loss": 1.3062, |
| "step": 273300 |
| }, |
| { |
| "epoch": 88.36457659987072, |
| "grad_norm": 1.243739128112793, |
| "learning_rate": 0.001, |
| "loss": 1.3073, |
| "step": 273400 |
| }, |
| { |
| "epoch": 88.39689722042664, |
| "grad_norm": 1.6305221319198608, |
| "learning_rate": 0.001, |
| "loss": 1.3134, |
| "step": 273500 |
| }, |
| { |
| "epoch": 88.42921784098255, |
| "grad_norm": 1.9867161512374878, |
| "learning_rate": 0.001, |
| "loss": 1.3087, |
| "step": 273600 |
| }, |
| { |
| "epoch": 88.46153846153847, |
| "grad_norm": 1.5061590671539307, |
| "learning_rate": 0.001, |
| "loss": 1.3178, |
| "step": 273700 |
| }, |
| { |
| "epoch": 88.49385908209437, |
| "grad_norm": 1.2209558486938477, |
| "learning_rate": 0.001, |
| "loss": 1.3232, |
| "step": 273800 |
| }, |
| { |
| "epoch": 88.5261797026503, |
| "grad_norm": 1.838896632194519, |
| "learning_rate": 0.001, |
| "loss": 1.3391, |
| "step": 273900 |
| }, |
| { |
| "epoch": 88.5585003232062, |
| "grad_norm": 1.5392895936965942, |
| "learning_rate": 0.001, |
| "loss": 1.3292, |
| "step": 274000 |
| }, |
| { |
| "epoch": 88.59082094376213, |
| "grad_norm": 1.872672438621521, |
| "learning_rate": 0.001, |
| "loss": 1.3353, |
| "step": 274100 |
| }, |
| { |
| "epoch": 88.62314156431803, |
| "grad_norm": 1.6945205926895142, |
| "learning_rate": 0.001, |
| "loss": 1.3424, |
| "step": 274200 |
| }, |
| { |
| "epoch": 88.65546218487395, |
| "grad_norm": 1.7650153636932373, |
| "learning_rate": 0.001, |
| "loss": 1.3442, |
| "step": 274300 |
| }, |
| { |
| "epoch": 88.68778280542986, |
| "grad_norm": 1.4047414064407349, |
| "learning_rate": 0.001, |
| "loss": 1.3518, |
| "step": 274400 |
| }, |
| { |
| "epoch": 88.72010342598578, |
| "grad_norm": 1.1727465391159058, |
| "learning_rate": 0.001, |
| "loss": 1.3535, |
| "step": 274500 |
| }, |
| { |
| "epoch": 88.75242404654169, |
| "grad_norm": 1.6336814165115356, |
| "learning_rate": 0.001, |
| "loss": 1.3382, |
| "step": 274600 |
| }, |
| { |
| "epoch": 88.78474466709761, |
| "grad_norm": 1.5113426446914673, |
| "learning_rate": 0.001, |
| "loss": 1.3513, |
| "step": 274700 |
| }, |
| { |
| "epoch": 88.81706528765352, |
| "grad_norm": 1.4181830883026123, |
| "learning_rate": 0.001, |
| "loss": 1.3532, |
| "step": 274800 |
| }, |
| { |
| "epoch": 88.84938590820944, |
| "grad_norm": 1.3083226680755615, |
| "learning_rate": 0.001, |
| "loss": 1.3592, |
| "step": 274900 |
| }, |
| { |
| "epoch": 88.88170652876535, |
| "grad_norm": 1.3872716426849365, |
| "learning_rate": 0.001, |
| "loss": 1.3664, |
| "step": 275000 |
| }, |
| { |
| "epoch": 88.91402714932127, |
| "grad_norm": 1.555129051208496, |
| "learning_rate": 0.001, |
| "loss": 1.364, |
| "step": 275100 |
| }, |
| { |
| "epoch": 88.94634776987718, |
| "grad_norm": 2.2075741291046143, |
| "learning_rate": 0.001, |
| "loss": 1.3506, |
| "step": 275200 |
| }, |
| { |
| "epoch": 88.9786683904331, |
| "grad_norm": 2.4672770500183105, |
| "learning_rate": 0.001, |
| "loss": 1.3606, |
| "step": 275300 |
| }, |
| { |
| "epoch": 89.01098901098901, |
| "grad_norm": 1.3792235851287842, |
| "learning_rate": 0.001, |
| "loss": 1.3278, |
| "step": 275400 |
| }, |
| { |
| "epoch": 89.04330963154493, |
| "grad_norm": 1.343404769897461, |
| "learning_rate": 0.001, |
| "loss": 1.2656, |
| "step": 275500 |
| }, |
| { |
| "epoch": 89.07563025210084, |
| "grad_norm": 1.621608018875122, |
| "learning_rate": 0.001, |
| "loss": 1.2515, |
| "step": 275600 |
| }, |
| { |
| "epoch": 89.10795087265676, |
| "grad_norm": 1.3299224376678467, |
| "learning_rate": 0.001, |
| "loss": 1.2787, |
| "step": 275700 |
| }, |
| { |
| "epoch": 89.14027149321267, |
| "grad_norm": 1.6409183740615845, |
| "learning_rate": 0.001, |
| "loss": 1.2737, |
| "step": 275800 |
| }, |
| { |
| "epoch": 89.17259211376859, |
| "grad_norm": 1.5294100046157837, |
| "learning_rate": 0.001, |
| "loss": 1.2814, |
| "step": 275900 |
| }, |
| { |
| "epoch": 89.2049127343245, |
| "grad_norm": 1.9135456085205078, |
| "learning_rate": 0.001, |
| "loss": 1.2889, |
| "step": 276000 |
| }, |
| { |
| "epoch": 89.23723335488042, |
| "grad_norm": 1.2390012741088867, |
| "learning_rate": 0.001, |
| "loss": 1.2958, |
| "step": 276100 |
| }, |
| { |
| "epoch": 89.26955397543632, |
| "grad_norm": 1.4384171962738037, |
| "learning_rate": 0.001, |
| "loss": 1.288, |
| "step": 276200 |
| }, |
| { |
| "epoch": 89.30187459599225, |
| "grad_norm": 1.1214848756790161, |
| "learning_rate": 0.001, |
| "loss": 1.28, |
| "step": 276300 |
| }, |
| { |
| "epoch": 89.33419521654815, |
| "grad_norm": 1.3984450101852417, |
| "learning_rate": 0.001, |
| "loss": 1.2968, |
| "step": 276400 |
| }, |
| { |
| "epoch": 89.36651583710407, |
| "grad_norm": 1.6903371810913086, |
| "learning_rate": 0.001, |
| "loss": 1.3195, |
| "step": 276500 |
| }, |
| { |
| "epoch": 89.39883645765998, |
| "grad_norm": 1.9482558965682983, |
| "learning_rate": 0.001, |
| "loss": 1.3102, |
| "step": 276600 |
| }, |
| { |
| "epoch": 89.4311570782159, |
| "grad_norm": 1.6029472351074219, |
| "learning_rate": 0.001, |
| "loss": 1.298, |
| "step": 276700 |
| }, |
| { |
| "epoch": 89.46347769877181, |
| "grad_norm": 1.807937741279602, |
| "learning_rate": 0.001, |
| "loss": 1.311, |
| "step": 276800 |
| }, |
| { |
| "epoch": 89.49579831932773, |
| "grad_norm": 1.967272400856018, |
| "learning_rate": 0.001, |
| "loss": 1.3206, |
| "step": 276900 |
| }, |
| { |
| "epoch": 89.52811893988364, |
| "grad_norm": 1.4797887802124023, |
| "learning_rate": 0.001, |
| "loss": 1.325, |
| "step": 277000 |
| }, |
| { |
| "epoch": 89.56043956043956, |
| "grad_norm": 1.414186954498291, |
| "learning_rate": 0.001, |
| "loss": 1.3176, |
| "step": 277100 |
| }, |
| { |
| "epoch": 89.59276018099547, |
| "grad_norm": 1.4756308794021606, |
| "learning_rate": 0.001, |
| "loss": 1.322, |
| "step": 277200 |
| }, |
| { |
| "epoch": 89.62508080155139, |
| "grad_norm": 1.391809105873108, |
| "learning_rate": 0.001, |
| "loss": 1.3396, |
| "step": 277300 |
| }, |
| { |
| "epoch": 89.6574014221073, |
| "grad_norm": 1.6174664497375488, |
| "learning_rate": 0.001, |
| "loss": 1.3329, |
| "step": 277400 |
| }, |
| { |
| "epoch": 89.68972204266322, |
| "grad_norm": 1.5664608478546143, |
| "learning_rate": 0.001, |
| "loss": 1.3236, |
| "step": 277500 |
| }, |
| { |
| "epoch": 89.72204266321913, |
| "grad_norm": 1.3522831201553345, |
| "learning_rate": 0.001, |
| "loss": 1.346, |
| "step": 277600 |
| }, |
| { |
| "epoch": 89.75436328377505, |
| "grad_norm": 1.4911127090454102, |
| "learning_rate": 0.001, |
| "loss": 1.3262, |
| "step": 277700 |
| }, |
| { |
| "epoch": 89.78668390433096, |
| "grad_norm": 1.5883923768997192, |
| "learning_rate": 0.001, |
| "loss": 1.3321, |
| "step": 277800 |
| }, |
| { |
| "epoch": 89.81900452488688, |
| "grad_norm": 1.4092780351638794, |
| "learning_rate": 0.001, |
| "loss": 1.3624, |
| "step": 277900 |
| }, |
| { |
| "epoch": 89.85132514544279, |
| "grad_norm": 1.4824280738830566, |
| "learning_rate": 0.001, |
| "loss": 1.3345, |
| "step": 278000 |
| }, |
| { |
| "epoch": 89.88364576599871, |
| "grad_norm": 1.2456746101379395, |
| "learning_rate": 0.001, |
| "loss": 1.3524, |
| "step": 278100 |
| }, |
| { |
| "epoch": 89.91596638655462, |
| "grad_norm": 1.416329264640808, |
| "learning_rate": 0.001, |
| "loss": 1.3446, |
| "step": 278200 |
| }, |
| { |
| "epoch": 89.94828700711054, |
| "grad_norm": 1.303393006324768, |
| "learning_rate": 0.001, |
| "loss": 1.3649, |
| "step": 278300 |
| }, |
| { |
| "epoch": 89.98060762766644, |
| "grad_norm": 1.396873950958252, |
| "learning_rate": 0.001, |
| "loss": 1.3544, |
| "step": 278400 |
| }, |
| { |
| "epoch": 90.01292824822237, |
| "grad_norm": 1.4742708206176758, |
| "learning_rate": 0.001, |
| "loss": 1.3035, |
| "step": 278500 |
| }, |
| { |
| "epoch": 90.04524886877829, |
| "grad_norm": 1.7946443557739258, |
| "learning_rate": 0.001, |
| "loss": 1.2522, |
| "step": 278600 |
| }, |
| { |
| "epoch": 90.0775694893342, |
| "grad_norm": 1.8204759359359741, |
| "learning_rate": 0.001, |
| "loss": 1.2583, |
| "step": 278700 |
| }, |
| { |
| "epoch": 90.10989010989012, |
| "grad_norm": 1.6528264284133911, |
| "learning_rate": 0.001, |
| "loss": 1.2519, |
| "step": 278800 |
| }, |
| { |
| "epoch": 90.14221073044602, |
| "grad_norm": 1.623073697090149, |
| "learning_rate": 0.001, |
| "loss": 1.2672, |
| "step": 278900 |
| }, |
| { |
| "epoch": 90.17453135100195, |
| "grad_norm": 1.9576362371444702, |
| "learning_rate": 0.001, |
| "loss": 1.2641, |
| "step": 279000 |
| }, |
| { |
| "epoch": 90.20685197155785, |
| "grad_norm": 1.9356460571289062, |
| "learning_rate": 0.001, |
| "loss": 1.2575, |
| "step": 279100 |
| }, |
| { |
| "epoch": 90.23917259211377, |
| "grad_norm": 1.4619381427764893, |
| "learning_rate": 0.001, |
| "loss": 1.2932, |
| "step": 279200 |
| }, |
| { |
| "epoch": 90.27149321266968, |
| "grad_norm": 1.5990549325942993, |
| "learning_rate": 0.001, |
| "loss": 1.2894, |
| "step": 279300 |
| }, |
| { |
| "epoch": 90.3038138332256, |
| "grad_norm": 1.3064255714416504, |
| "learning_rate": 0.001, |
| "loss": 1.2767, |
| "step": 279400 |
| }, |
| { |
| "epoch": 90.33613445378151, |
| "grad_norm": 1.3489830493927002, |
| "learning_rate": 0.001, |
| "loss": 1.289, |
| "step": 279500 |
| }, |
| { |
| "epoch": 90.36845507433743, |
| "grad_norm": 1.441953420639038, |
| "learning_rate": 0.001, |
| "loss": 1.2965, |
| "step": 279600 |
| }, |
| { |
| "epoch": 90.40077569489334, |
| "grad_norm": 1.8671915531158447, |
| "learning_rate": 0.001, |
| "loss": 1.3034, |
| "step": 279700 |
| }, |
| { |
| "epoch": 90.43309631544926, |
| "grad_norm": 1.6628390550613403, |
| "learning_rate": 0.001, |
| "loss": 1.2943, |
| "step": 279800 |
| }, |
| { |
| "epoch": 90.46541693600517, |
| "grad_norm": 1.5824347734451294, |
| "learning_rate": 0.001, |
| "loss": 1.3141, |
| "step": 279900 |
| }, |
| { |
| "epoch": 90.49773755656109, |
| "grad_norm": 1.70791494846344, |
| "learning_rate": 0.001, |
| "loss": 1.3112, |
| "step": 280000 |
| }, |
| { |
| "epoch": 90.530058177117, |
| "grad_norm": 2.0625507831573486, |
| "learning_rate": 0.001, |
| "loss": 1.3166, |
| "step": 280100 |
| }, |
| { |
| "epoch": 90.56237879767292, |
| "grad_norm": 2.4934349060058594, |
| "learning_rate": 0.001, |
| "loss": 1.3166, |
| "step": 280200 |
| }, |
| { |
| "epoch": 90.59469941822883, |
| "grad_norm": 1.3685812950134277, |
| "learning_rate": 0.001, |
| "loss": 1.317, |
| "step": 280300 |
| }, |
| { |
| "epoch": 90.62702003878475, |
| "grad_norm": 1.607825517654419, |
| "learning_rate": 0.001, |
| "loss": 1.3241, |
| "step": 280400 |
| }, |
| { |
| "epoch": 90.65934065934066, |
| "grad_norm": 1.5159963369369507, |
| "learning_rate": 0.001, |
| "loss": 1.3092, |
| "step": 280500 |
| }, |
| { |
| "epoch": 90.69166127989658, |
| "grad_norm": 1.5785311460494995, |
| "learning_rate": 0.001, |
| "loss": 1.3321, |
| "step": 280600 |
| }, |
| { |
| "epoch": 90.72398190045249, |
| "grad_norm": 1.2325959205627441, |
| "learning_rate": 0.001, |
| "loss": 1.3318, |
| "step": 280700 |
| }, |
| { |
| "epoch": 90.75630252100841, |
| "grad_norm": 1.4121829271316528, |
| "learning_rate": 0.001, |
| "loss": 1.3351, |
| "step": 280800 |
| }, |
| { |
| "epoch": 90.78862314156432, |
| "grad_norm": 1.5694589614868164, |
| "learning_rate": 0.001, |
| "loss": 1.3312, |
| "step": 280900 |
| }, |
| { |
| "epoch": 90.82094376212024, |
| "grad_norm": 1.5602014064788818, |
| "learning_rate": 0.001, |
| "loss": 1.3473, |
| "step": 281000 |
| }, |
| { |
| "epoch": 90.85326438267614, |
| "grad_norm": 1.5325652360916138, |
| "learning_rate": 0.001, |
| "loss": 1.3635, |
| "step": 281100 |
| }, |
| { |
| "epoch": 90.88558500323207, |
| "grad_norm": 1.6036229133605957, |
| "learning_rate": 0.001, |
| "loss": 1.3579, |
| "step": 281200 |
| }, |
| { |
| "epoch": 90.91790562378797, |
| "grad_norm": 1.6385998725891113, |
| "learning_rate": 0.001, |
| "loss": 1.3632, |
| "step": 281300 |
| }, |
| { |
| "epoch": 90.9502262443439, |
| "grad_norm": 1.8205591440200806, |
| "learning_rate": 0.001, |
| "loss": 1.3462, |
| "step": 281400 |
| }, |
| { |
| "epoch": 90.9825468648998, |
| "grad_norm": 1.7536708116531372, |
| "learning_rate": 0.001, |
| "loss": 1.3528, |
| "step": 281500 |
| }, |
| { |
| "epoch": 91.01486748545572, |
| "grad_norm": 1.2094005346298218, |
| "learning_rate": 0.001, |
| "loss": 1.2857, |
| "step": 281600 |
| }, |
| { |
| "epoch": 91.04718810601163, |
| "grad_norm": 2.219756841659546, |
| "learning_rate": 0.001, |
| "loss": 1.2417, |
| "step": 281700 |
| }, |
| { |
| "epoch": 91.07950872656755, |
| "grad_norm": 2.2155497074127197, |
| "learning_rate": 0.001, |
| "loss": 1.2447, |
| "step": 281800 |
| }, |
| { |
| "epoch": 91.11182934712346, |
| "grad_norm": 2.1346004009246826, |
| "learning_rate": 0.001, |
| "loss": 1.2614, |
| "step": 281900 |
| }, |
| { |
| "epoch": 91.14414996767938, |
| "grad_norm": 1.6468212604522705, |
| "learning_rate": 0.001, |
| "loss": 1.2746, |
| "step": 282000 |
| }, |
| { |
| "epoch": 91.17647058823529, |
| "grad_norm": 1.9895401000976562, |
| "learning_rate": 0.001, |
| "loss": 1.2707, |
| "step": 282100 |
| }, |
| { |
| "epoch": 91.20879120879121, |
| "grad_norm": 2.380171298980713, |
| "learning_rate": 0.001, |
| "loss": 1.2691, |
| "step": 282200 |
| }, |
| { |
| "epoch": 91.24111182934712, |
| "grad_norm": 1.7068277597427368, |
| "learning_rate": 0.001, |
| "loss": 1.271, |
| "step": 282300 |
| }, |
| { |
| "epoch": 91.27343244990304, |
| "grad_norm": 1.8234671354293823, |
| "learning_rate": 0.001, |
| "loss": 1.2688, |
| "step": 282400 |
| }, |
| { |
| "epoch": 91.30575307045895, |
| "grad_norm": 1.459957480430603, |
| "learning_rate": 0.001, |
| "loss": 1.2912, |
| "step": 282500 |
| }, |
| { |
| "epoch": 91.33807369101487, |
| "grad_norm": 1.7018338441848755, |
| "learning_rate": 0.001, |
| "loss": 1.2881, |
| "step": 282600 |
| }, |
| { |
| "epoch": 91.37039431157078, |
| "grad_norm": 1.862154483795166, |
| "learning_rate": 0.001, |
| "loss": 1.2976, |
| "step": 282700 |
| }, |
| { |
| "epoch": 91.4027149321267, |
| "grad_norm": 1.876776933670044, |
| "learning_rate": 0.001, |
| "loss": 1.2975, |
| "step": 282800 |
| }, |
| { |
| "epoch": 91.4350355526826, |
| "grad_norm": 2.4025259017944336, |
| "learning_rate": 0.001, |
| "loss": 1.2911, |
| "step": 282900 |
| }, |
| { |
| "epoch": 91.46735617323853, |
| "grad_norm": 1.6050946712493896, |
| "learning_rate": 0.001, |
| "loss": 1.297, |
| "step": 283000 |
| }, |
| { |
| "epoch": 91.49967679379444, |
| "grad_norm": 1.7577511072158813, |
| "learning_rate": 0.001, |
| "loss": 1.311, |
| "step": 283100 |
| }, |
| { |
| "epoch": 91.53199741435036, |
| "grad_norm": 1.6006762981414795, |
| "learning_rate": 0.001, |
| "loss": 1.3057, |
| "step": 283200 |
| }, |
| { |
| "epoch": 91.56431803490626, |
| "grad_norm": 1.7033071517944336, |
| "learning_rate": 0.001, |
| "loss": 1.3044, |
| "step": 283300 |
| }, |
| { |
| "epoch": 91.59663865546219, |
| "grad_norm": 1.6848423480987549, |
| "learning_rate": 0.001, |
| "loss": 1.3219, |
| "step": 283400 |
| }, |
| { |
| "epoch": 91.6289592760181, |
| "grad_norm": 1.886040449142456, |
| "learning_rate": 0.001, |
| "loss": 1.314, |
| "step": 283500 |
| }, |
| { |
| "epoch": 91.66127989657402, |
| "grad_norm": 1.5038381814956665, |
| "learning_rate": 0.001, |
| "loss": 1.3306, |
| "step": 283600 |
| }, |
| { |
| "epoch": 91.69360051712992, |
| "grad_norm": 1.553776741027832, |
| "learning_rate": 0.001, |
| "loss": 1.3221, |
| "step": 283700 |
| }, |
| { |
| "epoch": 91.72592113768584, |
| "grad_norm": 1.759239912033081, |
| "learning_rate": 0.001, |
| "loss": 1.3269, |
| "step": 283800 |
| }, |
| { |
| "epoch": 91.75824175824175, |
| "grad_norm": 2.047476291656494, |
| "learning_rate": 0.001, |
| "loss": 1.3342, |
| "step": 283900 |
| }, |
| { |
| "epoch": 91.79056237879767, |
| "grad_norm": 1.8062100410461426, |
| "learning_rate": 0.001, |
| "loss": 1.3431, |
| "step": 284000 |
| }, |
| { |
| "epoch": 91.82288299935358, |
| "grad_norm": 1.7818137407302856, |
| "learning_rate": 0.001, |
| "loss": 1.3331, |
| "step": 284100 |
| }, |
| { |
| "epoch": 91.8552036199095, |
| "grad_norm": 1.8032774925231934, |
| "learning_rate": 0.001, |
| "loss": 1.3454, |
| "step": 284200 |
| }, |
| { |
| "epoch": 91.88752424046541, |
| "grad_norm": 1.5718131065368652, |
| "learning_rate": 0.001, |
| "loss": 1.336, |
| "step": 284300 |
| }, |
| { |
| "epoch": 91.91984486102133, |
| "grad_norm": 1.789857029914856, |
| "learning_rate": 0.001, |
| "loss": 1.3349, |
| "step": 284400 |
| }, |
| { |
| "epoch": 91.95216548157724, |
| "grad_norm": 1.8243910074234009, |
| "learning_rate": 0.001, |
| "loss": 1.347, |
| "step": 284500 |
| }, |
| { |
| "epoch": 91.98448610213316, |
| "grad_norm": 2.6804580688476562, |
| "learning_rate": 0.001, |
| "loss": 1.3379, |
| "step": 284600 |
| }, |
| { |
| "epoch": 92.01680672268908, |
| "grad_norm": 1.7941631078720093, |
| "learning_rate": 0.001, |
| "loss": 1.2708, |
| "step": 284700 |
| }, |
| { |
| "epoch": 92.04912734324499, |
| "grad_norm": 1.7584716081619263, |
| "learning_rate": 0.001, |
| "loss": 1.2498, |
| "step": 284800 |
| }, |
| { |
| "epoch": 92.08144796380091, |
| "grad_norm": 1.6040362119674683, |
| "learning_rate": 0.001, |
| "loss": 1.2461, |
| "step": 284900 |
| }, |
| { |
| "epoch": 92.11376858435682, |
| "grad_norm": 1.6586319208145142, |
| "learning_rate": 0.001, |
| "loss": 1.2484, |
| "step": 285000 |
| }, |
| { |
| "epoch": 92.14608920491274, |
| "grad_norm": 1.869040846824646, |
| "learning_rate": 0.001, |
| "loss": 1.2543, |
| "step": 285100 |
| }, |
| { |
| "epoch": 92.17840982546865, |
| "grad_norm": 1.9890912771224976, |
| "learning_rate": 0.001, |
| "loss": 1.2621, |
| "step": 285200 |
| }, |
| { |
| "epoch": 92.21073044602457, |
| "grad_norm": 1.7279739379882812, |
| "learning_rate": 0.001, |
| "loss": 1.2587, |
| "step": 285300 |
| }, |
| { |
| "epoch": 92.24305106658048, |
| "grad_norm": 1.527393102645874, |
| "learning_rate": 0.001, |
| "loss": 1.2656, |
| "step": 285400 |
| }, |
| { |
| "epoch": 92.2753716871364, |
| "grad_norm": 2.0269243717193604, |
| "learning_rate": 0.001, |
| "loss": 1.2876, |
| "step": 285500 |
| }, |
| { |
| "epoch": 92.3076923076923, |
| "grad_norm": 2.9750266075134277, |
| "learning_rate": 0.001, |
| "loss": 1.2761, |
| "step": 285600 |
| }, |
| { |
| "epoch": 92.34001292824823, |
| "grad_norm": 1.699476718902588, |
| "learning_rate": 0.001, |
| "loss": 1.279, |
| "step": 285700 |
| }, |
| { |
| "epoch": 92.37233354880414, |
| "grad_norm": 1.8639898300170898, |
| "learning_rate": 0.001, |
| "loss": 1.2823, |
| "step": 285800 |
| }, |
| { |
| "epoch": 92.40465416936006, |
| "grad_norm": 1.3638863563537598, |
| "learning_rate": 0.001, |
| "loss": 1.284, |
| "step": 285900 |
| }, |
| { |
| "epoch": 92.43697478991596, |
| "grad_norm": 1.5757185220718384, |
| "learning_rate": 0.001, |
| "loss": 1.2923, |
| "step": 286000 |
| }, |
| { |
| "epoch": 92.46929541047189, |
| "grad_norm": 1.8936587572097778, |
| "learning_rate": 0.001, |
| "loss": 1.3045, |
| "step": 286100 |
| }, |
| { |
| "epoch": 92.5016160310278, |
| "grad_norm": 2.0886335372924805, |
| "learning_rate": 0.001, |
| "loss": 1.2932, |
| "step": 286200 |
| }, |
| { |
| "epoch": 92.53393665158372, |
| "grad_norm": 2.6820244789123535, |
| "learning_rate": 0.001, |
| "loss": 1.3142, |
| "step": 286300 |
| }, |
| { |
| "epoch": 92.56625727213962, |
| "grad_norm": 1.8949545621871948, |
| "learning_rate": 0.001, |
| "loss": 1.3036, |
| "step": 286400 |
| }, |
| { |
| "epoch": 92.59857789269554, |
| "grad_norm": 1.7866016626358032, |
| "learning_rate": 0.001, |
| "loss": 1.3071, |
| "step": 286500 |
| }, |
| { |
| "epoch": 92.63089851325145, |
| "grad_norm": 2.0061726570129395, |
| "learning_rate": 0.001, |
| "loss": 1.3092, |
| "step": 286600 |
| }, |
| { |
| "epoch": 92.66321913380737, |
| "grad_norm": 1.950553059577942, |
| "learning_rate": 0.001, |
| "loss": 1.3201, |
| "step": 286700 |
| }, |
| { |
| "epoch": 92.69553975436328, |
| "grad_norm": 2.6318914890289307, |
| "learning_rate": 0.001, |
| "loss": 1.3117, |
| "step": 286800 |
| }, |
| { |
| "epoch": 92.7278603749192, |
| "grad_norm": 1.6457170248031616, |
| "learning_rate": 0.001, |
| "loss": 1.3083, |
| "step": 286900 |
| }, |
| { |
| "epoch": 92.76018099547511, |
| "grad_norm": 2.6424992084503174, |
| "learning_rate": 0.001, |
| "loss": 1.3334, |
| "step": 287000 |
| }, |
| { |
| "epoch": 92.79250161603103, |
| "grad_norm": 1.5322871208190918, |
| "learning_rate": 0.001, |
| "loss": 1.3171, |
| "step": 287100 |
| }, |
| { |
| "epoch": 92.82482223658694, |
| "grad_norm": 1.9201703071594238, |
| "learning_rate": 0.001, |
| "loss": 1.3377, |
| "step": 287200 |
| }, |
| { |
| "epoch": 92.85714285714286, |
| "grad_norm": 2.1163434982299805, |
| "learning_rate": 0.001, |
| "loss": 1.333, |
| "step": 287300 |
| }, |
| { |
| "epoch": 92.88946347769877, |
| "grad_norm": 1.438014268875122, |
| "learning_rate": 0.001, |
| "loss": 1.3275, |
| "step": 287400 |
| }, |
| { |
| "epoch": 92.92178409825469, |
| "grad_norm": 1.786887764930725, |
| "learning_rate": 0.001, |
| "loss": 1.3299, |
| "step": 287500 |
| }, |
| { |
| "epoch": 92.9541047188106, |
| "grad_norm": 1.8135703802108765, |
| "learning_rate": 0.001, |
| "loss": 1.3392, |
| "step": 287600 |
| }, |
| { |
| "epoch": 92.98642533936652, |
| "grad_norm": 1.705079436302185, |
| "learning_rate": 0.001, |
| "loss": 1.3478, |
| "step": 287700 |
| }, |
| { |
| "epoch": 93.01874595992243, |
| "grad_norm": 2.3613109588623047, |
| "learning_rate": 0.001, |
| "loss": 1.2827, |
| "step": 287800 |
| }, |
| { |
| "epoch": 93.05106658047835, |
| "grad_norm": 1.4064383506774902, |
| "learning_rate": 0.001, |
| "loss": 1.2314, |
| "step": 287900 |
| }, |
| { |
| "epoch": 93.08338720103426, |
| "grad_norm": 1.7855405807495117, |
| "learning_rate": 0.001, |
| "loss": 1.2275, |
| "step": 288000 |
| }, |
| { |
| "epoch": 93.11570782159018, |
| "grad_norm": 1.2022162675857544, |
| "learning_rate": 0.001, |
| "loss": 1.2441, |
| "step": 288100 |
| }, |
| { |
| "epoch": 93.14802844214609, |
| "grad_norm": 1.501390814781189, |
| "learning_rate": 0.001, |
| "loss": 1.2495, |
| "step": 288200 |
| }, |
| { |
| "epoch": 93.180349062702, |
| "grad_norm": 1.2558820247650146, |
| "learning_rate": 0.001, |
| "loss": 1.2427, |
| "step": 288300 |
| }, |
| { |
| "epoch": 93.21266968325791, |
| "grad_norm": 1.2189136743545532, |
| "learning_rate": 0.001, |
| "loss": 1.2495, |
| "step": 288400 |
| }, |
| { |
| "epoch": 93.24499030381384, |
| "grad_norm": 1.3827097415924072, |
| "learning_rate": 0.001, |
| "loss": 1.249, |
| "step": 288500 |
| }, |
| { |
| "epoch": 93.27731092436974, |
| "grad_norm": 1.5776780843734741, |
| "learning_rate": 0.001, |
| "loss": 1.2677, |
| "step": 288600 |
| }, |
| { |
| "epoch": 93.30963154492567, |
| "grad_norm": 1.501815915107727, |
| "learning_rate": 0.001, |
| "loss": 1.2673, |
| "step": 288700 |
| }, |
| { |
| "epoch": 93.34195216548157, |
| "grad_norm": 1.7654236555099487, |
| "learning_rate": 0.001, |
| "loss": 1.2822, |
| "step": 288800 |
| }, |
| { |
| "epoch": 93.3742727860375, |
| "grad_norm": 1.8030627965927124, |
| "learning_rate": 0.001, |
| "loss": 1.2816, |
| "step": 288900 |
| }, |
| { |
| "epoch": 93.4065934065934, |
| "grad_norm": 1.3551745414733887, |
| "learning_rate": 0.001, |
| "loss": 1.2697, |
| "step": 289000 |
| }, |
| { |
| "epoch": 93.43891402714932, |
| "grad_norm": 1.3812977075576782, |
| "learning_rate": 0.001, |
| "loss": 1.2881, |
| "step": 289100 |
| }, |
| { |
| "epoch": 93.47123464770523, |
| "grad_norm": 1.2390937805175781, |
| "learning_rate": 0.001, |
| "loss": 1.2882, |
| "step": 289200 |
| }, |
| { |
| "epoch": 93.50355526826115, |
| "grad_norm": 1.3505865335464478, |
| "learning_rate": 0.001, |
| "loss": 1.2881, |
| "step": 289300 |
| }, |
| { |
| "epoch": 93.53587588881706, |
| "grad_norm": 1.8913111686706543, |
| "learning_rate": 0.001, |
| "loss": 1.3037, |
| "step": 289400 |
| }, |
| { |
| "epoch": 93.56819650937298, |
| "grad_norm": 2.1353416442871094, |
| "learning_rate": 0.001, |
| "loss": 1.3182, |
| "step": 289500 |
| }, |
| { |
| "epoch": 93.60051712992889, |
| "grad_norm": 1.4333372116088867, |
| "learning_rate": 0.001, |
| "loss": 1.3018, |
| "step": 289600 |
| }, |
| { |
| "epoch": 93.63283775048481, |
| "grad_norm": 1.4343737363815308, |
| "learning_rate": 0.001, |
| "loss": 1.2958, |
| "step": 289700 |
| }, |
| { |
| "epoch": 93.66515837104072, |
| "grad_norm": 1.8866076469421387, |
| "learning_rate": 0.001, |
| "loss": 1.3095, |
| "step": 289800 |
| }, |
| { |
| "epoch": 93.69747899159664, |
| "grad_norm": 1.802651047706604, |
| "learning_rate": 0.001, |
| "loss": 1.3296, |
| "step": 289900 |
| }, |
| { |
| "epoch": 93.72979961215255, |
| "grad_norm": 1.3045099973678589, |
| "learning_rate": 0.001, |
| "loss": 1.3177, |
| "step": 290000 |
| }, |
| { |
| "epoch": 93.76212023270847, |
| "grad_norm": 1.24888277053833, |
| "learning_rate": 0.001, |
| "loss": 1.3263, |
| "step": 290100 |
| }, |
| { |
| "epoch": 93.79444085326438, |
| "grad_norm": 1.5360679626464844, |
| "learning_rate": 0.001, |
| "loss": 1.3371, |
| "step": 290200 |
| }, |
| { |
| "epoch": 93.8267614738203, |
| "grad_norm": 1.491134524345398, |
| "learning_rate": 0.001, |
| "loss": 1.3337, |
| "step": 290300 |
| }, |
| { |
| "epoch": 93.8590820943762, |
| "grad_norm": 1.2645684480667114, |
| "learning_rate": 0.001, |
| "loss": 1.3272, |
| "step": 290400 |
| }, |
| { |
| "epoch": 93.89140271493213, |
| "grad_norm": 1.7189093828201294, |
| "learning_rate": 0.001, |
| "loss": 1.331, |
| "step": 290500 |
| }, |
| { |
| "epoch": 93.92372333548803, |
| "grad_norm": 1.4772014617919922, |
| "learning_rate": 0.001, |
| "loss": 1.3186, |
| "step": 290600 |
| }, |
| { |
| "epoch": 93.95604395604396, |
| "grad_norm": 1.5750775337219238, |
| "learning_rate": 0.001, |
| "loss": 1.3494, |
| "step": 290700 |
| }, |
| { |
| "epoch": 93.98836457659988, |
| "grad_norm": 1.560361623764038, |
| "learning_rate": 0.001, |
| "loss": 1.3308, |
| "step": 290800 |
| }, |
| { |
| "epoch": 94.02068519715579, |
| "grad_norm": 1.4874534606933594, |
| "learning_rate": 0.001, |
| "loss": 1.2771, |
| "step": 290900 |
| }, |
| { |
| "epoch": 94.0530058177117, |
| "grad_norm": 1.5172135829925537, |
| "learning_rate": 0.001, |
| "loss": 1.2225, |
| "step": 291000 |
| }, |
| { |
| "epoch": 94.08532643826761, |
| "grad_norm": 1.3056913614273071, |
| "learning_rate": 0.001, |
| "loss": 1.2375, |
| "step": 291100 |
| }, |
| { |
| "epoch": 94.11764705882354, |
| "grad_norm": 1.163444995880127, |
| "learning_rate": 0.001, |
| "loss": 1.2395, |
| "step": 291200 |
| }, |
| { |
| "epoch": 94.14996767937944, |
| "grad_norm": 1.3650455474853516, |
| "learning_rate": 0.001, |
| "loss": 1.2509, |
| "step": 291300 |
| }, |
| { |
| "epoch": 94.18228829993537, |
| "grad_norm": 1.5594156980514526, |
| "learning_rate": 0.001, |
| "loss": 1.2378, |
| "step": 291400 |
| }, |
| { |
| "epoch": 94.21460892049127, |
| "grad_norm": 1.3483871221542358, |
| "learning_rate": 0.001, |
| "loss": 1.2524, |
| "step": 291500 |
| }, |
| { |
| "epoch": 94.2469295410472, |
| "grad_norm": 1.5659109354019165, |
| "learning_rate": 0.001, |
| "loss": 1.2509, |
| "step": 291600 |
| }, |
| { |
| "epoch": 94.2792501616031, |
| "grad_norm": 2.272768497467041, |
| "learning_rate": 0.001, |
| "loss": 1.2641, |
| "step": 291700 |
| }, |
| { |
| "epoch": 94.31157078215902, |
| "grad_norm": 1.498880386352539, |
| "learning_rate": 0.001, |
| "loss": 1.2594, |
| "step": 291800 |
| }, |
| { |
| "epoch": 94.34389140271493, |
| "grad_norm": 3.030276298522949, |
| "learning_rate": 0.001, |
| "loss": 1.2609, |
| "step": 291900 |
| }, |
| { |
| "epoch": 94.37621202327085, |
| "grad_norm": 1.6324968338012695, |
| "learning_rate": 0.001, |
| "loss": 1.2594, |
| "step": 292000 |
| }, |
| { |
| "epoch": 94.40853264382676, |
| "grad_norm": 1.801637053489685, |
| "learning_rate": 0.001, |
| "loss": 1.276, |
| "step": 292100 |
| }, |
| { |
| "epoch": 94.44085326438268, |
| "grad_norm": 2.261122941970825, |
| "learning_rate": 0.001, |
| "loss": 1.2843, |
| "step": 292200 |
| }, |
| { |
| "epoch": 94.47317388493859, |
| "grad_norm": 2.3047244548797607, |
| "learning_rate": 0.001, |
| "loss": 1.2973, |
| "step": 292300 |
| }, |
| { |
| "epoch": 94.50549450549451, |
| "grad_norm": 2.708813428878784, |
| "learning_rate": 0.001, |
| "loss": 1.2903, |
| "step": 292400 |
| }, |
| { |
| "epoch": 94.53781512605042, |
| "grad_norm": 2.372790575027466, |
| "learning_rate": 0.001, |
| "loss": 1.2972, |
| "step": 292500 |
| }, |
| { |
| "epoch": 94.57013574660634, |
| "grad_norm": 1.5244978666305542, |
| "learning_rate": 0.001, |
| "loss": 1.2814, |
| "step": 292600 |
| }, |
| { |
| "epoch": 94.60245636716225, |
| "grad_norm": 2.138533592224121, |
| "learning_rate": 0.001, |
| "loss": 1.2886, |
| "step": 292700 |
| }, |
| { |
| "epoch": 94.63477698771817, |
| "grad_norm": 1.3166296482086182, |
| "learning_rate": 0.001, |
| "loss": 1.2948, |
| "step": 292800 |
| }, |
| { |
| "epoch": 94.66709760827408, |
| "grad_norm": 1.5935583114624023, |
| "learning_rate": 0.001, |
| "loss": 1.3114, |
| "step": 292900 |
| }, |
| { |
| "epoch": 94.69941822883, |
| "grad_norm": 1.9687855243682861, |
| "learning_rate": 0.001, |
| "loss": 1.3064, |
| "step": 293000 |
| }, |
| { |
| "epoch": 94.7317388493859, |
| "grad_norm": 1.6364773511886597, |
| "learning_rate": 0.001, |
| "loss": 1.3186, |
| "step": 293100 |
| }, |
| { |
| "epoch": 94.76405946994183, |
| "grad_norm": 1.3022596836090088, |
| "learning_rate": 0.001, |
| "loss": 1.3239, |
| "step": 293200 |
| }, |
| { |
| "epoch": 94.79638009049773, |
| "grad_norm": 1.4766348600387573, |
| "learning_rate": 0.001, |
| "loss": 1.3321, |
| "step": 293300 |
| }, |
| { |
| "epoch": 94.82870071105366, |
| "grad_norm": 2.4459609985351562, |
| "learning_rate": 0.001, |
| "loss": 1.3089, |
| "step": 293400 |
| }, |
| { |
| "epoch": 94.86102133160956, |
| "grad_norm": 1.246825098991394, |
| "learning_rate": 0.001, |
| "loss": 1.3026, |
| "step": 293500 |
| }, |
| { |
| "epoch": 94.89334195216549, |
| "grad_norm": 2.1387414932250977, |
| "learning_rate": 0.001, |
| "loss": 1.3201, |
| "step": 293600 |
| }, |
| { |
| "epoch": 94.9256625727214, |
| "grad_norm": 1.2364811897277832, |
| "learning_rate": 0.001, |
| "loss": 1.3298, |
| "step": 293700 |
| }, |
| { |
| "epoch": 94.95798319327731, |
| "grad_norm": 1.3467936515808105, |
| "learning_rate": 0.001, |
| "loss": 1.3386, |
| "step": 293800 |
| }, |
| { |
| "epoch": 94.99030381383322, |
| "grad_norm": 1.5889116525650024, |
| "learning_rate": 0.001, |
| "loss": 1.3304, |
| "step": 293900 |
| }, |
| { |
| "epoch": 95.02262443438914, |
| "grad_norm": 1.1252005100250244, |
| "learning_rate": 0.001, |
| "loss": 1.2648, |
| "step": 294000 |
| }, |
| { |
| "epoch": 95.05494505494505, |
| "grad_norm": 1.8917295932769775, |
| "learning_rate": 0.001, |
| "loss": 1.2281, |
| "step": 294100 |
| }, |
| { |
| "epoch": 95.08726567550097, |
| "grad_norm": 1.2165217399597168, |
| "learning_rate": 0.001, |
| "loss": 1.2275, |
| "step": 294200 |
| }, |
| { |
| "epoch": 95.11958629605688, |
| "grad_norm": 1.483284831047058, |
| "learning_rate": 0.001, |
| "loss": 1.2294, |
| "step": 294300 |
| }, |
| { |
| "epoch": 95.1519069166128, |
| "grad_norm": 1.5502418279647827, |
| "learning_rate": 0.001, |
| "loss": 1.2497, |
| "step": 294400 |
| }, |
| { |
| "epoch": 95.18422753716871, |
| "grad_norm": 1.3309822082519531, |
| "learning_rate": 0.001, |
| "loss": 1.2413, |
| "step": 294500 |
| }, |
| { |
| "epoch": 95.21654815772463, |
| "grad_norm": 1.3194745779037476, |
| "learning_rate": 0.001, |
| "loss": 1.2473, |
| "step": 294600 |
| }, |
| { |
| "epoch": 95.24886877828054, |
| "grad_norm": 1.3569340705871582, |
| "learning_rate": 0.001, |
| "loss": 1.2371, |
| "step": 294700 |
| }, |
| { |
| "epoch": 95.28118939883646, |
| "grad_norm": 1.3349523544311523, |
| "learning_rate": 0.001, |
| "loss": 1.2471, |
| "step": 294800 |
| }, |
| { |
| "epoch": 95.31351001939237, |
| "grad_norm": 2.0123939514160156, |
| "learning_rate": 0.001, |
| "loss": 1.2701, |
| "step": 294900 |
| }, |
| { |
| "epoch": 95.34583063994829, |
| "grad_norm": 1.622585654258728, |
| "learning_rate": 0.001, |
| "loss": 1.2563, |
| "step": 295000 |
| }, |
| { |
| "epoch": 95.3781512605042, |
| "grad_norm": 1.2212409973144531, |
| "learning_rate": 0.001, |
| "loss": 1.2853, |
| "step": 295100 |
| }, |
| { |
| "epoch": 95.41047188106012, |
| "grad_norm": 1.120845079421997, |
| "learning_rate": 0.001, |
| "loss": 1.2622, |
| "step": 295200 |
| }, |
| { |
| "epoch": 95.44279250161603, |
| "grad_norm": 1.106087565422058, |
| "learning_rate": 0.001, |
| "loss": 1.2759, |
| "step": 295300 |
| }, |
| { |
| "epoch": 95.47511312217195, |
| "grad_norm": 1.985410213470459, |
| "learning_rate": 0.001, |
| "loss": 1.2846, |
| "step": 295400 |
| }, |
| { |
| "epoch": 95.50743374272786, |
| "grad_norm": 1.1984760761260986, |
| "learning_rate": 0.001, |
| "loss": 1.2756, |
| "step": 295500 |
| }, |
| { |
| "epoch": 95.53975436328378, |
| "grad_norm": 1.565770149230957, |
| "learning_rate": 0.001, |
| "loss": 1.2914, |
| "step": 295600 |
| }, |
| { |
| "epoch": 95.57207498383968, |
| "grad_norm": 1.3624826669692993, |
| "learning_rate": 0.001, |
| "loss": 1.2835, |
| "step": 295700 |
| }, |
| { |
| "epoch": 95.6043956043956, |
| "grad_norm": 1.0454158782958984, |
| "learning_rate": 0.001, |
| "loss": 1.287, |
| "step": 295800 |
| }, |
| { |
| "epoch": 95.63671622495151, |
| "grad_norm": 1.0237979888916016, |
| "learning_rate": 0.001, |
| "loss": 1.2997, |
| "step": 295900 |
| }, |
| { |
| "epoch": 95.66903684550743, |
| "grad_norm": 1.2886712551116943, |
| "learning_rate": 0.001, |
| "loss": 1.2895, |
| "step": 296000 |
| }, |
| { |
| "epoch": 95.70135746606334, |
| "grad_norm": 1.6183465719223022, |
| "learning_rate": 0.001, |
| "loss": 1.297, |
| "step": 296100 |
| }, |
| { |
| "epoch": 95.73367808661926, |
| "grad_norm": 1.7799890041351318, |
| "learning_rate": 0.001, |
| "loss": 1.3121, |
| "step": 296200 |
| }, |
| { |
| "epoch": 95.76599870717517, |
| "grad_norm": 1.240616798400879, |
| "learning_rate": 0.001, |
| "loss": 1.2888, |
| "step": 296300 |
| }, |
| { |
| "epoch": 95.7983193277311, |
| "grad_norm": 1.814924955368042, |
| "learning_rate": 0.001, |
| "loss": 1.3176, |
| "step": 296400 |
| }, |
| { |
| "epoch": 95.830639948287, |
| "grad_norm": 1.1921446323394775, |
| "learning_rate": 0.001, |
| "loss": 1.3089, |
| "step": 296500 |
| }, |
| { |
| "epoch": 95.86296056884292, |
| "grad_norm": 1.1958500146865845, |
| "learning_rate": 0.001, |
| "loss": 1.3175, |
| "step": 296600 |
| }, |
| { |
| "epoch": 95.89528118939883, |
| "grad_norm": 1.388808250427246, |
| "learning_rate": 0.001, |
| "loss": 1.3137, |
| "step": 296700 |
| }, |
| { |
| "epoch": 95.92760180995475, |
| "grad_norm": 1.6556081771850586, |
| "learning_rate": 0.001, |
| "loss": 1.324, |
| "step": 296800 |
| }, |
| { |
| "epoch": 95.95992243051066, |
| "grad_norm": 1.1369446516036987, |
| "learning_rate": 0.001, |
| "loss": 1.3176, |
| "step": 296900 |
| }, |
| { |
| "epoch": 95.99224305106658, |
| "grad_norm": 1.4303462505340576, |
| "learning_rate": 0.001, |
| "loss": 1.3213, |
| "step": 297000 |
| }, |
| { |
| "epoch": 96.0245636716225, |
| "grad_norm": 1.7964837551116943, |
| "learning_rate": 0.001, |
| "loss": 1.2639, |
| "step": 297100 |
| }, |
| { |
| "epoch": 96.05688429217841, |
| "grad_norm": 2.2737128734588623, |
| "learning_rate": 0.001, |
| "loss": 1.2169, |
| "step": 297200 |
| }, |
| { |
| "epoch": 96.08920491273433, |
| "grad_norm": 1.316617727279663, |
| "learning_rate": 0.001, |
| "loss": 1.2285, |
| "step": 297300 |
| }, |
| { |
| "epoch": 96.12152553329024, |
| "grad_norm": 1.4206103086471558, |
| "learning_rate": 0.001, |
| "loss": 1.2253, |
| "step": 297400 |
| }, |
| { |
| "epoch": 96.15384615384616, |
| "grad_norm": 1.3422598838806152, |
| "learning_rate": 0.001, |
| "loss": 1.2307, |
| "step": 297500 |
| }, |
| { |
| "epoch": 96.18616677440207, |
| "grad_norm": 1.8675673007965088, |
| "learning_rate": 0.001, |
| "loss": 1.2436, |
| "step": 297600 |
| }, |
| { |
| "epoch": 96.21848739495799, |
| "grad_norm": 1.1970405578613281, |
| "learning_rate": 0.001, |
| "loss": 1.2485, |
| "step": 297700 |
| }, |
| { |
| "epoch": 96.2508080155139, |
| "grad_norm": 1.6834607124328613, |
| "learning_rate": 0.001, |
| "loss": 1.2387, |
| "step": 297800 |
| }, |
| { |
| "epoch": 96.28312863606982, |
| "grad_norm": 1.3664674758911133, |
| "learning_rate": 0.001, |
| "loss": 1.2287, |
| "step": 297900 |
| }, |
| { |
| "epoch": 96.31544925662573, |
| "grad_norm": 1.3463797569274902, |
| "learning_rate": 0.001, |
| "loss": 1.2583, |
| "step": 298000 |
| }, |
| { |
| "epoch": 96.34776987718165, |
| "grad_norm": 1.318743109703064, |
| "learning_rate": 0.001, |
| "loss": 1.2707, |
| "step": 298100 |
| }, |
| { |
| "epoch": 96.38009049773756, |
| "grad_norm": 1.703060269355774, |
| "learning_rate": 0.001, |
| "loss": 1.2761, |
| "step": 298200 |
| }, |
| { |
| "epoch": 96.41241111829348, |
| "grad_norm": 1.5728152990341187, |
| "learning_rate": 0.001, |
| "loss": 1.2626, |
| "step": 298300 |
| }, |
| { |
| "epoch": 96.44473173884938, |
| "grad_norm": 1.3078418970108032, |
| "learning_rate": 0.001, |
| "loss": 1.2598, |
| "step": 298400 |
| }, |
| { |
| "epoch": 96.4770523594053, |
| "grad_norm": 1.6507360935211182, |
| "learning_rate": 0.001, |
| "loss": 1.2679, |
| "step": 298500 |
| }, |
| { |
| "epoch": 96.50937297996121, |
| "grad_norm": 1.6421030759811401, |
| "learning_rate": 0.001, |
| "loss": 1.277, |
| "step": 298600 |
| }, |
| { |
| "epoch": 96.54169360051714, |
| "grad_norm": 1.5586189031600952, |
| "learning_rate": 0.001, |
| "loss": 1.2862, |
| "step": 298700 |
| }, |
| { |
| "epoch": 96.57401422107304, |
| "grad_norm": 1.9439473152160645, |
| "learning_rate": 0.001, |
| "loss": 1.283, |
| "step": 298800 |
| }, |
| { |
| "epoch": 96.60633484162896, |
| "grad_norm": 1.1835355758666992, |
| "learning_rate": 0.001, |
| "loss": 1.2904, |
| "step": 298900 |
| }, |
| { |
| "epoch": 96.63865546218487, |
| "grad_norm": 2.370730400085449, |
| "learning_rate": 0.001, |
| "loss": 1.2695, |
| "step": 299000 |
| }, |
| { |
| "epoch": 96.6709760827408, |
| "grad_norm": 1.409196376800537, |
| "learning_rate": 0.001, |
| "loss": 1.2858, |
| "step": 299100 |
| }, |
| { |
| "epoch": 96.7032967032967, |
| "grad_norm": 1.6037887334823608, |
| "learning_rate": 0.001, |
| "loss": 1.2972, |
| "step": 299200 |
| }, |
| { |
| "epoch": 96.73561732385262, |
| "grad_norm": 1.8080615997314453, |
| "learning_rate": 0.001, |
| "loss": 1.2963, |
| "step": 299300 |
| }, |
| { |
| "epoch": 96.76793794440853, |
| "grad_norm": 1.3899056911468506, |
| "learning_rate": 0.001, |
| "loss": 1.3146, |
| "step": 299400 |
| }, |
| { |
| "epoch": 96.80025856496445, |
| "grad_norm": 1.2540109157562256, |
| "learning_rate": 0.001, |
| "loss": 1.3093, |
| "step": 299500 |
| }, |
| { |
| "epoch": 96.83257918552036, |
| "grad_norm": 1.2840240001678467, |
| "learning_rate": 0.001, |
| "loss": 1.3132, |
| "step": 299600 |
| }, |
| { |
| "epoch": 96.86489980607628, |
| "grad_norm": 2.769483804702759, |
| "learning_rate": 0.001, |
| "loss": 1.3173, |
| "step": 299700 |
| }, |
| { |
| "epoch": 96.89722042663219, |
| "grad_norm": 1.460633397102356, |
| "learning_rate": 0.001, |
| "loss": 1.3161, |
| "step": 299800 |
| }, |
| { |
| "epoch": 96.92954104718811, |
| "grad_norm": 1.941650390625, |
| "learning_rate": 0.001, |
| "loss": 1.3029, |
| "step": 299900 |
| }, |
| { |
| "epoch": 96.96186166774402, |
| "grad_norm": 1.4547431468963623, |
| "learning_rate": 0.001, |
| "loss": 1.3191, |
| "step": 300000 |
| }, |
| { |
| "epoch": 96.99418228829994, |
| "grad_norm": 1.1949020624160767, |
| "learning_rate": 0.001, |
| "loss": 1.3033, |
| "step": 300100 |
| }, |
| { |
| "epoch": 97.02650290885585, |
| "grad_norm": 1.4821178913116455, |
| "learning_rate": 0.001, |
| "loss": 1.2344, |
| "step": 300200 |
| }, |
| { |
| "epoch": 97.05882352941177, |
| "grad_norm": 2.7771239280700684, |
| "learning_rate": 0.001, |
| "loss": 1.2183, |
| "step": 300300 |
| }, |
| { |
| "epoch": 97.09114414996768, |
| "grad_norm": 1.1940053701400757, |
| "learning_rate": 0.001, |
| "loss": 1.218, |
| "step": 300400 |
| }, |
| { |
| "epoch": 97.1234647705236, |
| "grad_norm": 1.2287544012069702, |
| "learning_rate": 0.001, |
| "loss": 1.2185, |
| "step": 300500 |
| }, |
| { |
| "epoch": 97.1557853910795, |
| "grad_norm": 1.2844361066818237, |
| "learning_rate": 0.001, |
| "loss": 1.2334, |
| "step": 300600 |
| }, |
| { |
| "epoch": 97.18810601163543, |
| "grad_norm": 1.7986161708831787, |
| "learning_rate": 0.001, |
| "loss": 1.2308, |
| "step": 300700 |
| }, |
| { |
| "epoch": 97.22042663219133, |
| "grad_norm": 2.0331642627716064, |
| "learning_rate": 0.001, |
| "loss": 1.2359, |
| "step": 300800 |
| }, |
| { |
| "epoch": 97.25274725274726, |
| "grad_norm": 1.260055422782898, |
| "learning_rate": 0.001, |
| "loss": 1.2452, |
| "step": 300900 |
| }, |
| { |
| "epoch": 97.28506787330316, |
| "grad_norm": 1.5814372301101685, |
| "learning_rate": 0.001, |
| "loss": 1.2561, |
| "step": 301000 |
| }, |
| { |
| "epoch": 97.31738849385908, |
| "grad_norm": 1.2864145040512085, |
| "learning_rate": 0.001, |
| "loss": 1.2641, |
| "step": 301100 |
| }, |
| { |
| "epoch": 97.34970911441499, |
| "grad_norm": 1.138455867767334, |
| "learning_rate": 0.001, |
| "loss": 1.2574, |
| "step": 301200 |
| }, |
| { |
| "epoch": 97.38202973497091, |
| "grad_norm": 1.9856462478637695, |
| "learning_rate": 0.001, |
| "loss": 1.2631, |
| "step": 301300 |
| }, |
| { |
| "epoch": 97.41435035552682, |
| "grad_norm": 1.3746229410171509, |
| "learning_rate": 0.001, |
| "loss": 1.2537, |
| "step": 301400 |
| }, |
| { |
| "epoch": 97.44667097608274, |
| "grad_norm": 1.5862191915512085, |
| "learning_rate": 0.001, |
| "loss": 1.2684, |
| "step": 301500 |
| }, |
| { |
| "epoch": 97.47899159663865, |
| "grad_norm": 2.5958855152130127, |
| "learning_rate": 0.001, |
| "loss": 1.2607, |
| "step": 301600 |
| }, |
| { |
| "epoch": 97.51131221719457, |
| "grad_norm": 1.5952675342559814, |
| "learning_rate": 0.001, |
| "loss": 1.2868, |
| "step": 301700 |
| }, |
| { |
| "epoch": 97.54363283775048, |
| "grad_norm": 1.5219841003417969, |
| "learning_rate": 0.001, |
| "loss": 1.2769, |
| "step": 301800 |
| }, |
| { |
| "epoch": 97.5759534583064, |
| "grad_norm": 1.7715810537338257, |
| "learning_rate": 0.001, |
| "loss": 1.2765, |
| "step": 301900 |
| }, |
| { |
| "epoch": 97.60827407886231, |
| "grad_norm": 1.4715001583099365, |
| "learning_rate": 0.001, |
| "loss": 1.2896, |
| "step": 302000 |
| }, |
| { |
| "epoch": 97.64059469941823, |
| "grad_norm": 1.7011549472808838, |
| "learning_rate": 0.001, |
| "loss": 1.2806, |
| "step": 302100 |
| }, |
| { |
| "epoch": 97.67291531997414, |
| "grad_norm": 1.7269728183746338, |
| "learning_rate": 0.001, |
| "loss": 1.2731, |
| "step": 302200 |
| }, |
| { |
| "epoch": 97.70523594053006, |
| "grad_norm": 1.3083314895629883, |
| "learning_rate": 0.001, |
| "loss": 1.2806, |
| "step": 302300 |
| }, |
| { |
| "epoch": 97.73755656108597, |
| "grad_norm": 1.3248363733291626, |
| "learning_rate": 0.001, |
| "loss": 1.2924, |
| "step": 302400 |
| }, |
| { |
| "epoch": 97.76987718164189, |
| "grad_norm": 1.6495850086212158, |
| "learning_rate": 0.001, |
| "loss": 1.3018, |
| "step": 302500 |
| }, |
| { |
| "epoch": 97.8021978021978, |
| "grad_norm": 1.550723910331726, |
| "learning_rate": 0.001, |
| "loss": 1.3067, |
| "step": 302600 |
| }, |
| { |
| "epoch": 97.83451842275372, |
| "grad_norm": 1.4989489316940308, |
| "learning_rate": 0.001, |
| "loss": 1.3016, |
| "step": 302700 |
| }, |
| { |
| "epoch": 97.86683904330962, |
| "grad_norm": 1.403679370880127, |
| "learning_rate": 0.001, |
| "loss": 1.3039, |
| "step": 302800 |
| }, |
| { |
| "epoch": 97.89915966386555, |
| "grad_norm": 1.3110108375549316, |
| "learning_rate": 0.001, |
| "loss": 1.2979, |
| "step": 302900 |
| }, |
| { |
| "epoch": 97.93148028442145, |
| "grad_norm": 1.416511058807373, |
| "learning_rate": 0.001, |
| "loss": 1.3202, |
| "step": 303000 |
| }, |
| { |
| "epoch": 97.96380090497738, |
| "grad_norm": 2.252964735031128, |
| "learning_rate": 0.001, |
| "loss": 1.3163, |
| "step": 303100 |
| }, |
| { |
| "epoch": 97.99612152553328, |
| "grad_norm": 1.4983501434326172, |
| "learning_rate": 0.001, |
| "loss": 1.3016, |
| "step": 303200 |
| }, |
| { |
| "epoch": 98.0284421460892, |
| "grad_norm": 1.783571720123291, |
| "learning_rate": 0.001, |
| "loss": 1.2019, |
| "step": 303300 |
| }, |
| { |
| "epoch": 98.06076276664513, |
| "grad_norm": 1.5965900421142578, |
| "learning_rate": 0.001, |
| "loss": 1.2057, |
| "step": 303400 |
| }, |
| { |
| "epoch": 98.09308338720103, |
| "grad_norm": 1.517217755317688, |
| "learning_rate": 0.001, |
| "loss": 1.2201, |
| "step": 303500 |
| }, |
| { |
| "epoch": 98.12540400775696, |
| "grad_norm": 1.5559382438659668, |
| "learning_rate": 0.001, |
| "loss": 1.2208, |
| "step": 303600 |
| }, |
| { |
| "epoch": 98.15772462831286, |
| "grad_norm": 1.6932978630065918, |
| "learning_rate": 0.001, |
| "loss": 1.2203, |
| "step": 303700 |
| }, |
| { |
| "epoch": 98.19004524886878, |
| "grad_norm": 1.4250338077545166, |
| "learning_rate": 0.001, |
| "loss": 1.2283, |
| "step": 303800 |
| }, |
| { |
| "epoch": 98.22236586942469, |
| "grad_norm": 1.4945716857910156, |
| "learning_rate": 0.001, |
| "loss": 1.2295, |
| "step": 303900 |
| }, |
| { |
| "epoch": 98.25468648998061, |
| "grad_norm": 1.9087316989898682, |
| "learning_rate": 0.001, |
| "loss": 1.2385, |
| "step": 304000 |
| }, |
| { |
| "epoch": 98.28700711053652, |
| "grad_norm": 1.467848777770996, |
| "learning_rate": 0.001, |
| "loss": 1.2366, |
| "step": 304100 |
| }, |
| { |
| "epoch": 98.31932773109244, |
| "grad_norm": 1.7165251970291138, |
| "learning_rate": 0.001, |
| "loss": 1.2442, |
| "step": 304200 |
| }, |
| { |
| "epoch": 98.35164835164835, |
| "grad_norm": 1.2803969383239746, |
| "learning_rate": 0.001, |
| "loss": 1.2407, |
| "step": 304300 |
| }, |
| { |
| "epoch": 98.38396897220427, |
| "grad_norm": 1.6253660917282104, |
| "learning_rate": 0.001, |
| "loss": 1.2553, |
| "step": 304400 |
| }, |
| { |
| "epoch": 98.41628959276018, |
| "grad_norm": 1.965498447418213, |
| "learning_rate": 0.001, |
| "loss": 1.2583, |
| "step": 304500 |
| }, |
| { |
| "epoch": 98.4486102133161, |
| "grad_norm": 1.4171141386032104, |
| "learning_rate": 0.001, |
| "loss": 1.2593, |
| "step": 304600 |
| }, |
| { |
| "epoch": 98.48093083387201, |
| "grad_norm": 1.5294244289398193, |
| "learning_rate": 0.001, |
| "loss": 1.2507, |
| "step": 304700 |
| }, |
| { |
| "epoch": 98.51325145442793, |
| "grad_norm": 1.8220654726028442, |
| "learning_rate": 0.001, |
| "loss": 1.2554, |
| "step": 304800 |
| }, |
| { |
| "epoch": 98.54557207498384, |
| "grad_norm": 2.1719462871551514, |
| "learning_rate": 0.001, |
| "loss": 1.2863, |
| "step": 304900 |
| }, |
| { |
| "epoch": 98.57789269553976, |
| "grad_norm": 1.4300793409347534, |
| "learning_rate": 0.001, |
| "loss": 1.2797, |
| "step": 305000 |
| }, |
| { |
| "epoch": 98.61021331609567, |
| "grad_norm": 2.0281763076782227, |
| "learning_rate": 0.001, |
| "loss": 1.2714, |
| "step": 305100 |
| }, |
| { |
| "epoch": 98.64253393665159, |
| "grad_norm": 2.3171002864837646, |
| "learning_rate": 0.001, |
| "loss": 1.2826, |
| "step": 305200 |
| }, |
| { |
| "epoch": 98.6748545572075, |
| "grad_norm": 1.9922728538513184, |
| "learning_rate": 0.001, |
| "loss": 1.2843, |
| "step": 305300 |
| }, |
| { |
| "epoch": 98.70717517776342, |
| "grad_norm": 1.9118890762329102, |
| "learning_rate": 0.001, |
| "loss": 1.2728, |
| "step": 305400 |
| }, |
| { |
| "epoch": 98.73949579831933, |
| "grad_norm": 2.1546452045440674, |
| "learning_rate": 0.001, |
| "loss": 1.2845, |
| "step": 305500 |
| }, |
| { |
| "epoch": 98.77181641887525, |
| "grad_norm": 1.4353299140930176, |
| "learning_rate": 0.001, |
| "loss": 1.2918, |
| "step": 305600 |
| }, |
| { |
| "epoch": 98.80413703943115, |
| "grad_norm": 1.4719852209091187, |
| "learning_rate": 0.001, |
| "loss": 1.2865, |
| "step": 305700 |
| }, |
| { |
| "epoch": 98.83645765998708, |
| "grad_norm": 1.617453694343567, |
| "learning_rate": 0.001, |
| "loss": 1.2939, |
| "step": 305800 |
| }, |
| { |
| "epoch": 98.86877828054298, |
| "grad_norm": 1.263366460800171, |
| "learning_rate": 0.001, |
| "loss": 1.2932, |
| "step": 305900 |
| }, |
| { |
| "epoch": 98.9010989010989, |
| "grad_norm": 1.5086780786514282, |
| "learning_rate": 0.001, |
| "loss": 1.2963, |
| "step": 306000 |
| }, |
| { |
| "epoch": 98.93341952165481, |
| "grad_norm": 1.6143393516540527, |
| "learning_rate": 0.001, |
| "loss": 1.3091, |
| "step": 306100 |
| }, |
| { |
| "epoch": 98.96574014221073, |
| "grad_norm": 1.3707364797592163, |
| "learning_rate": 0.001, |
| "loss": 1.3095, |
| "step": 306200 |
| }, |
| { |
| "epoch": 98.99806076276664, |
| "grad_norm": 2.250619649887085, |
| "learning_rate": 0.001, |
| "loss": 1.2998, |
| "step": 306300 |
| }, |
| { |
| "epoch": 99.03038138332256, |
| "grad_norm": 1.6753365993499756, |
| "learning_rate": 0.001, |
| "loss": 1.1985, |
| "step": 306400 |
| }, |
| { |
| "epoch": 99.06270200387847, |
| "grad_norm": 1.8750298023223877, |
| "learning_rate": 0.001, |
| "loss": 1.1983, |
| "step": 306500 |
| }, |
| { |
| "epoch": 99.09502262443439, |
| "grad_norm": 1.4335172176361084, |
| "learning_rate": 0.001, |
| "loss": 1.214, |
| "step": 306600 |
| }, |
| { |
| "epoch": 99.1273432449903, |
| "grad_norm": 1.7616883516311646, |
| "learning_rate": 0.001, |
| "loss": 1.2191, |
| "step": 306700 |
| }, |
| { |
| "epoch": 99.15966386554622, |
| "grad_norm": 1.9447722434997559, |
| "learning_rate": 0.001, |
| "loss": 1.2059, |
| "step": 306800 |
| }, |
| { |
| "epoch": 99.19198448610213, |
| "grad_norm": 1.7094531059265137, |
| "learning_rate": 0.001, |
| "loss": 1.2185, |
| "step": 306900 |
| }, |
| { |
| "epoch": 99.22430510665805, |
| "grad_norm": 2.3314735889434814, |
| "learning_rate": 0.001, |
| "loss": 1.2213, |
| "step": 307000 |
| }, |
| { |
| "epoch": 99.25662572721396, |
| "grad_norm": 1.6030874252319336, |
| "learning_rate": 0.001, |
| "loss": 1.2341, |
| "step": 307100 |
| }, |
| { |
| "epoch": 99.28894634776988, |
| "grad_norm": 1.6456331014633179, |
| "learning_rate": 0.001, |
| "loss": 1.2499, |
| "step": 307200 |
| }, |
| { |
| "epoch": 99.32126696832579, |
| "grad_norm": 1.9911348819732666, |
| "learning_rate": 0.001, |
| "loss": 1.2435, |
| "step": 307300 |
| }, |
| { |
| "epoch": 99.35358758888171, |
| "grad_norm": 1.4561117887496948, |
| "learning_rate": 0.001, |
| "loss": 1.2372, |
| "step": 307400 |
| }, |
| { |
| "epoch": 99.38590820943762, |
| "grad_norm": 2.072667121887207, |
| "learning_rate": 0.001, |
| "loss": 1.2546, |
| "step": 307500 |
| }, |
| { |
| "epoch": 99.41822882999354, |
| "grad_norm": 1.9429185390472412, |
| "learning_rate": 0.001, |
| "loss": 1.2557, |
| "step": 307600 |
| }, |
| { |
| "epoch": 99.45054945054945, |
| "grad_norm": 1.8015973567962646, |
| "learning_rate": 0.001, |
| "loss": 1.2533, |
| "step": 307700 |
| }, |
| { |
| "epoch": 99.48287007110537, |
| "grad_norm": 1.6045411825180054, |
| "learning_rate": 0.001, |
| "loss": 1.2552, |
| "step": 307800 |
| }, |
| { |
| "epoch": 99.51519069166127, |
| "grad_norm": 1.370804786682129, |
| "learning_rate": 0.001, |
| "loss": 1.2708, |
| "step": 307900 |
| }, |
| { |
| "epoch": 99.5475113122172, |
| "grad_norm": 1.4999316930770874, |
| "learning_rate": 0.001, |
| "loss": 1.2585, |
| "step": 308000 |
| }, |
| { |
| "epoch": 99.5798319327731, |
| "grad_norm": 1.7964613437652588, |
| "learning_rate": 0.001, |
| "loss": 1.2804, |
| "step": 308100 |
| }, |
| { |
| "epoch": 99.61215255332903, |
| "grad_norm": 1.7374740839004517, |
| "learning_rate": 0.001, |
| "loss": 1.2789, |
| "step": 308200 |
| }, |
| { |
| "epoch": 99.64447317388493, |
| "grad_norm": 1.268214225769043, |
| "learning_rate": 0.001, |
| "loss": 1.2704, |
| "step": 308300 |
| }, |
| { |
| "epoch": 99.67679379444085, |
| "grad_norm": 2.9452226161956787, |
| "learning_rate": 0.001, |
| "loss": 1.2776, |
| "step": 308400 |
| }, |
| { |
| "epoch": 99.70911441499676, |
| "grad_norm": 2.1403918266296387, |
| "learning_rate": 0.001, |
| "loss": 1.2827, |
| "step": 308500 |
| }, |
| { |
| "epoch": 99.74143503555268, |
| "grad_norm": 1.5536611080169678, |
| "learning_rate": 0.001, |
| "loss": 1.2794, |
| "step": 308600 |
| }, |
| { |
| "epoch": 99.77375565610859, |
| "grad_norm": 2.0996694564819336, |
| "learning_rate": 0.001, |
| "loss": 1.2843, |
| "step": 308700 |
| }, |
| { |
| "epoch": 99.80607627666451, |
| "grad_norm": 1.7627614736557007, |
| "learning_rate": 0.001, |
| "loss": 1.2726, |
| "step": 308800 |
| }, |
| { |
| "epoch": 99.83839689722042, |
| "grad_norm": 1.360751748085022, |
| "learning_rate": 0.001, |
| "loss": 1.2856, |
| "step": 308900 |
| }, |
| { |
| "epoch": 99.87071751777634, |
| "grad_norm": 1.9599181413650513, |
| "learning_rate": 0.001, |
| "loss": 1.2973, |
| "step": 309000 |
| }, |
| { |
| "epoch": 99.90303813833225, |
| "grad_norm": 1.8387882709503174, |
| "learning_rate": 0.001, |
| "loss": 1.306, |
| "step": 309100 |
| }, |
| { |
| "epoch": 99.93535875888817, |
| "grad_norm": 1.9950312376022339, |
| "learning_rate": 0.001, |
| "loss": 1.3021, |
| "step": 309200 |
| }, |
| { |
| "epoch": 99.96767937944408, |
| "grad_norm": 1.6858352422714233, |
| "learning_rate": 0.001, |
| "loss": 1.2996, |
| "step": 309300 |
| }, |
| { |
| "epoch": 100.0, |
| "grad_norm": 2.136234998703003, |
| "learning_rate": 0.001, |
| "loss": 1.2644, |
| "step": 309400 |
| }, |
| { |
| "epoch": 100.0, |
| "step": 309400, |
| "total_flos": 3.2122963901952e+18, |
| "train_loss": 1.7981142194978483, |
| "train_runtime": 30930.6445, |
| "train_samples_per_second": 320.071, |
| "train_steps_per_second": 10.003 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 309400, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 100, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.2122963901952e+18, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|