| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 1389, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0021598272138228943, | |
| "grad_norm": 2.878943681716919, | |
| "learning_rate": 7.194244604316547e-08, | |
| "loss": 0.7835, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.004319654427645789, | |
| "grad_norm": 2.902249813079834, | |
| "learning_rate": 1.4388489208633095e-07, | |
| "loss": 0.7896, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0064794816414686825, | |
| "grad_norm": 2.8640873432159424, | |
| "learning_rate": 2.1582733812949643e-07, | |
| "loss": 0.7588, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.008639308855291577, | |
| "grad_norm": 2.825040102005005, | |
| "learning_rate": 2.877697841726619e-07, | |
| "loss": 0.779, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.01079913606911447, | |
| "grad_norm": 2.9926884174346924, | |
| "learning_rate": 3.5971223021582736e-07, | |
| "loss": 0.7816, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.012958963282937365, | |
| "grad_norm": 2.8692467212677, | |
| "learning_rate": 4.3165467625899287e-07, | |
| "loss": 0.7695, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.01511879049676026, | |
| "grad_norm": 2.79731822013855, | |
| "learning_rate": 5.035971223021583e-07, | |
| "loss": 0.7683, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.017278617710583154, | |
| "grad_norm": 2.832988739013672, | |
| "learning_rate": 5.755395683453238e-07, | |
| "loss": 0.7865, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.019438444924406047, | |
| "grad_norm": 2.787931442260742, | |
| "learning_rate": 6.474820143884893e-07, | |
| "loss": 0.7716, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.02159827213822894, | |
| "grad_norm": 2.6542158126831055, | |
| "learning_rate": 7.194244604316547e-07, | |
| "loss": 0.7708, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.023758099352051837, | |
| "grad_norm": 2.5756170749664307, | |
| "learning_rate": 7.913669064748202e-07, | |
| "loss": 0.7548, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.02591792656587473, | |
| "grad_norm": 2.2221007347106934, | |
| "learning_rate": 8.633093525179857e-07, | |
| "loss": 0.7544, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.028077753779697623, | |
| "grad_norm": 2.165950298309326, | |
| "learning_rate": 9.352517985611512e-07, | |
| "loss": 0.7345, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.03023758099352052, | |
| "grad_norm": 2.1415212154388428, | |
| "learning_rate": 1.0071942446043167e-06, | |
| "loss": 0.7375, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.032397408207343416, | |
| "grad_norm": 2.045217275619507, | |
| "learning_rate": 1.079136690647482e-06, | |
| "loss": 0.7251, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.03455723542116631, | |
| "grad_norm": 1.8833245038986206, | |
| "learning_rate": 1.1510791366906476e-06, | |
| "loss": 0.734, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0367170626349892, | |
| "grad_norm": 1.4383106231689453, | |
| "learning_rate": 1.2230215827338131e-06, | |
| "loss": 0.7126, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.038876889848812095, | |
| "grad_norm": 1.3764389753341675, | |
| "learning_rate": 1.2949640287769785e-06, | |
| "loss": 0.69, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.04103671706263499, | |
| "grad_norm": 1.3699392080307007, | |
| "learning_rate": 1.366906474820144e-06, | |
| "loss": 0.7071, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.04319654427645788, | |
| "grad_norm": 1.2943273782730103, | |
| "learning_rate": 1.4388489208633094e-06, | |
| "loss": 0.686, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.04535637149028078, | |
| "grad_norm": 1.2634108066558838, | |
| "learning_rate": 1.510791366906475e-06, | |
| "loss": 0.6902, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.047516198704103674, | |
| "grad_norm": 1.066751480102539, | |
| "learning_rate": 1.5827338129496403e-06, | |
| "loss": 0.6644, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.04967602591792657, | |
| "grad_norm": 1.004930019378662, | |
| "learning_rate": 1.654676258992806e-06, | |
| "loss": 0.6602, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.05183585313174946, | |
| "grad_norm": 0.9834485054016113, | |
| "learning_rate": 1.7266187050359715e-06, | |
| "loss": 0.6525, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.05399568034557235, | |
| "grad_norm": 0.9758538007736206, | |
| "learning_rate": 1.7985611510791368e-06, | |
| "loss": 0.6452, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.056155507559395246, | |
| "grad_norm": 0.9222759008407593, | |
| "learning_rate": 1.8705035971223024e-06, | |
| "loss": 0.6485, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.058315334773218146, | |
| "grad_norm": 0.8775356411933899, | |
| "learning_rate": 1.942446043165468e-06, | |
| "loss": 0.6388, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.06047516198704104, | |
| "grad_norm": 0.8008519411087036, | |
| "learning_rate": 2.0143884892086333e-06, | |
| "loss": 0.6328, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.06263498920086392, | |
| "grad_norm": 0.7609057426452637, | |
| "learning_rate": 2.0863309352517987e-06, | |
| "loss": 0.6253, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.06479481641468683, | |
| "grad_norm": 0.6197890043258667, | |
| "learning_rate": 2.158273381294964e-06, | |
| "loss": 0.6253, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06695464362850972, | |
| "grad_norm": 0.6675652265548706, | |
| "learning_rate": 2.23021582733813e-06, | |
| "loss": 0.605, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.06911447084233262, | |
| "grad_norm": 0.6976248621940613, | |
| "learning_rate": 2.302158273381295e-06, | |
| "loss": 0.6077, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.07127429805615551, | |
| "grad_norm": 0.6653661131858826, | |
| "learning_rate": 2.3741007194244605e-06, | |
| "loss": 0.6021, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.0734341252699784, | |
| "grad_norm": 0.6243202090263367, | |
| "learning_rate": 2.4460431654676263e-06, | |
| "loss": 0.6147, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.0755939524838013, | |
| "grad_norm": 0.5303459167480469, | |
| "learning_rate": 2.5179856115107916e-06, | |
| "loss": 0.6, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.07775377969762419, | |
| "grad_norm": 0.48958107829093933, | |
| "learning_rate": 2.589928057553957e-06, | |
| "loss": 0.5829, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.07991360691144708, | |
| "grad_norm": 0.4979974031448364, | |
| "learning_rate": 2.6618705035971228e-06, | |
| "loss": 0.5852, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.08207343412526998, | |
| "grad_norm": 0.508642852306366, | |
| "learning_rate": 2.733812949640288e-06, | |
| "loss": 0.5827, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.08423326133909287, | |
| "grad_norm": 0.5054506063461304, | |
| "learning_rate": 2.805755395683453e-06, | |
| "loss": 0.5627, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.08639308855291576, | |
| "grad_norm": 0.42791351675987244, | |
| "learning_rate": 2.877697841726619e-06, | |
| "loss": 0.557, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.08855291576673865, | |
| "grad_norm": 0.3770763874053955, | |
| "learning_rate": 2.949640287769784e-06, | |
| "loss": 0.5452, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.09071274298056156, | |
| "grad_norm": 0.38157957792282104, | |
| "learning_rate": 3.02158273381295e-06, | |
| "loss": 0.5552, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.09287257019438445, | |
| "grad_norm": 0.4018012583255768, | |
| "learning_rate": 3.0935251798561158e-06, | |
| "loss": 0.5559, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.09503239740820735, | |
| "grad_norm": 0.3959904611110687, | |
| "learning_rate": 3.1654676258992807e-06, | |
| "loss": 0.5493, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.09719222462203024, | |
| "grad_norm": 0.38622933626174927, | |
| "learning_rate": 3.237410071942446e-06, | |
| "loss": 0.5512, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.09935205183585313, | |
| "grad_norm": 0.3973333239555359, | |
| "learning_rate": 3.309352517985612e-06, | |
| "loss": 0.5413, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.10151187904967603, | |
| "grad_norm": 0.3897247910499573, | |
| "learning_rate": 3.381294964028777e-06, | |
| "loss": 0.5223, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.10367170626349892, | |
| "grad_norm": 0.37678107619285583, | |
| "learning_rate": 3.453237410071943e-06, | |
| "loss": 0.5296, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.10583153347732181, | |
| "grad_norm": 0.33324435353279114, | |
| "learning_rate": 3.525179856115108e-06, | |
| "loss": 0.5184, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.1079913606911447, | |
| "grad_norm": 0.303320974111557, | |
| "learning_rate": 3.5971223021582737e-06, | |
| "loss": 0.5331, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1101511879049676, | |
| "grad_norm": 0.30076754093170166, | |
| "learning_rate": 3.669064748201439e-06, | |
| "loss": 0.5331, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.11231101511879049, | |
| "grad_norm": 0.2589012086391449, | |
| "learning_rate": 3.741007194244605e-06, | |
| "loss": 0.5109, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.11447084233261338, | |
| "grad_norm": 0.2596394121646881, | |
| "learning_rate": 3.81294964028777e-06, | |
| "loss": 0.5227, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.11663066954643629, | |
| "grad_norm": 0.255307137966156, | |
| "learning_rate": 3.884892086330936e-06, | |
| "loss": 0.5169, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.11879049676025918, | |
| "grad_norm": 0.2433944046497345, | |
| "learning_rate": 3.956834532374101e-06, | |
| "loss": 0.5161, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.12095032397408208, | |
| "grad_norm": 0.2333260476589203, | |
| "learning_rate": 4.028776978417267e-06, | |
| "loss": 0.5096, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.12311015118790497, | |
| "grad_norm": 0.22751125693321228, | |
| "learning_rate": 4.100719424460432e-06, | |
| "loss": 0.5115, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.12526997840172785, | |
| "grad_norm": 0.2149927169084549, | |
| "learning_rate": 4.172661870503597e-06, | |
| "loss": 0.5132, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.12742980561555076, | |
| "grad_norm": 0.22358939051628113, | |
| "learning_rate": 4.244604316546763e-06, | |
| "loss": 0.5057, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.12958963282937366, | |
| "grad_norm": 0.19954045116901398, | |
| "learning_rate": 4.316546762589928e-06, | |
| "loss": 0.4994, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.13174946004319654, | |
| "grad_norm": 0.1936485469341278, | |
| "learning_rate": 4.388489208633094e-06, | |
| "loss": 0.4954, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.13390928725701945, | |
| "grad_norm": 0.1977352499961853, | |
| "learning_rate": 4.46043165467626e-06, | |
| "loss": 0.4919, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.13606911447084233, | |
| "grad_norm": 0.19697633385658264, | |
| "learning_rate": 4.5323741007194245e-06, | |
| "loss": 0.4895, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.13822894168466524, | |
| "grad_norm": 0.2068362534046173, | |
| "learning_rate": 4.60431654676259e-06, | |
| "loss": 0.4848, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.14038876889848811, | |
| "grad_norm": 0.2056417018175125, | |
| "learning_rate": 4.676258992805755e-06, | |
| "loss": 0.4901, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.14254859611231102, | |
| "grad_norm": 0.20445196330547333, | |
| "learning_rate": 4.748201438848921e-06, | |
| "loss": 0.4986, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.1447084233261339, | |
| "grad_norm": 0.17678698897361755, | |
| "learning_rate": 4.820143884892087e-06, | |
| "loss": 0.4784, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.1468682505399568, | |
| "grad_norm": 0.17606988549232483, | |
| "learning_rate": 4.892086330935253e-06, | |
| "loss": 0.4818, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.1490280777537797, | |
| "grad_norm": 0.1764959990978241, | |
| "learning_rate": 4.9640287769784175e-06, | |
| "loss": 0.4832, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.1511879049676026, | |
| "grad_norm": 0.18899278342723846, | |
| "learning_rate": 5.035971223021583e-06, | |
| "loss": 0.4832, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.15334773218142547, | |
| "grad_norm": 0.18127930164337158, | |
| "learning_rate": 5.107913669064749e-06, | |
| "loss": 0.4781, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.15550755939524838, | |
| "grad_norm": 0.15677423775196075, | |
| "learning_rate": 5.179856115107914e-06, | |
| "loss": 0.4795, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.15766738660907129, | |
| "grad_norm": 0.17852047085762024, | |
| "learning_rate": 5.251798561151079e-06, | |
| "loss": 0.4802, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.15982721382289417, | |
| "grad_norm": 0.16051283478736877, | |
| "learning_rate": 5.3237410071942456e-06, | |
| "loss": 0.4758, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.16198704103671707, | |
| "grad_norm": 0.15272092819213867, | |
| "learning_rate": 5.3956834532374105e-06, | |
| "loss": 0.4742, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.16414686825053995, | |
| "grad_norm": 0.18069250881671906, | |
| "learning_rate": 5.467625899280576e-06, | |
| "loss": 0.4788, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.16630669546436286, | |
| "grad_norm": 0.18495260179042816, | |
| "learning_rate": 5.539568345323741e-06, | |
| "loss": 0.477, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.16846652267818574, | |
| "grad_norm": 0.15244323015213013, | |
| "learning_rate": 5.611510791366906e-06, | |
| "loss": 0.4738, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.17062634989200864, | |
| "grad_norm": 0.15029869973659515, | |
| "learning_rate": 5.683453237410073e-06, | |
| "loss": 0.4809, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.17278617710583152, | |
| "grad_norm": 0.15908615291118622, | |
| "learning_rate": 5.755395683453238e-06, | |
| "loss": 0.4682, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.17494600431965443, | |
| "grad_norm": 0.16395969688892365, | |
| "learning_rate": 5.8273381294964035e-06, | |
| "loss": 0.4786, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.1771058315334773, | |
| "grad_norm": 0.15997102856636047, | |
| "learning_rate": 5.899280575539568e-06, | |
| "loss": 0.4728, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.17926565874730022, | |
| "grad_norm": 0.15442821383476257, | |
| "learning_rate": 5.971223021582734e-06, | |
| "loss": 0.4693, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.18142548596112312, | |
| "grad_norm": 0.17457455396652222, | |
| "learning_rate": 6.0431654676259e-06, | |
| "loss": 0.4535, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.183585313174946, | |
| "grad_norm": 0.17761239409446716, | |
| "learning_rate": 6.115107913669065e-06, | |
| "loss": 0.4615, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.1857451403887689, | |
| "grad_norm": 0.15749000012874603, | |
| "learning_rate": 6.1870503597122315e-06, | |
| "loss": 0.4757, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.1879049676025918, | |
| "grad_norm": 0.1500880867242813, | |
| "learning_rate": 6.2589928057553964e-06, | |
| "loss": 0.468, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.1900647948164147, | |
| "grad_norm": 0.16475360095500946, | |
| "learning_rate": 6.330935251798561e-06, | |
| "loss": 0.453, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.19222462203023757, | |
| "grad_norm": 0.15528172254562378, | |
| "learning_rate": 6.402877697841727e-06, | |
| "loss": 0.4606, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.19438444924406048, | |
| "grad_norm": 0.18330231308937073, | |
| "learning_rate": 6.474820143884892e-06, | |
| "loss": 0.4645, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.19654427645788336, | |
| "grad_norm": 0.15349973738193512, | |
| "learning_rate": 6.546762589928059e-06, | |
| "loss": 0.4589, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.19870410367170627, | |
| "grad_norm": 0.17889103293418884, | |
| "learning_rate": 6.618705035971224e-06, | |
| "loss": 0.4698, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.20086393088552915, | |
| "grad_norm": 0.16917382180690765, | |
| "learning_rate": 6.6906474820143886e-06, | |
| "loss": 0.45, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.20302375809935205, | |
| "grad_norm": 0.15472815930843353, | |
| "learning_rate": 6.762589928057554e-06, | |
| "loss": 0.4554, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.20518358531317496, | |
| "grad_norm": 0.15166456997394562, | |
| "learning_rate": 6.834532374100719e-06, | |
| "loss": 0.4603, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.20734341252699784, | |
| "grad_norm": 0.15480853617191315, | |
| "learning_rate": 6.906474820143886e-06, | |
| "loss": 0.4527, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.20950323974082075, | |
| "grad_norm": 0.18076568841934204, | |
| "learning_rate": 6.978417266187051e-06, | |
| "loss": 0.4543, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.21166306695464362, | |
| "grad_norm": 0.14898645877838135, | |
| "learning_rate": 7.050359712230216e-06, | |
| "loss": 0.4645, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.21382289416846653, | |
| "grad_norm": 0.16191677749156952, | |
| "learning_rate": 7.122302158273382e-06, | |
| "loss": 0.4556, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.2159827213822894, | |
| "grad_norm": 0.15693144500255585, | |
| "learning_rate": 7.194244604316547e-06, | |
| "loss": 0.4636, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.21814254859611232, | |
| "grad_norm": 0.1577419489622116, | |
| "learning_rate": 7.266187050359713e-06, | |
| "loss": 0.445, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.2203023758099352, | |
| "grad_norm": 0.1567850261926651, | |
| "learning_rate": 7.338129496402878e-06, | |
| "loss": 0.4579, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.2224622030237581, | |
| "grad_norm": 0.15102896094322205, | |
| "learning_rate": 7.410071942446043e-06, | |
| "loss": 0.4381, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.22462203023758098, | |
| "grad_norm": 0.18107765913009644, | |
| "learning_rate": 7.48201438848921e-06, | |
| "loss": 0.4479, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.2267818574514039, | |
| "grad_norm": 0.15492849051952362, | |
| "learning_rate": 7.5539568345323745e-06, | |
| "loss": 0.4466, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.22894168466522677, | |
| "grad_norm": 0.16862063109874725, | |
| "learning_rate": 7.62589928057554e-06, | |
| "loss": 0.4556, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.23110151187904968, | |
| "grad_norm": 0.1701633483171463, | |
| "learning_rate": 7.697841726618706e-06, | |
| "loss": 0.4483, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.23326133909287258, | |
| "grad_norm": 0.18902191519737244, | |
| "learning_rate": 7.769784172661872e-06, | |
| "loss": 0.4383, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.23542116630669546, | |
| "grad_norm": 0.16331182420253754, | |
| "learning_rate": 7.841726618705036e-06, | |
| "loss": 0.4438, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.23758099352051837, | |
| "grad_norm": 0.18327923119068146, | |
| "learning_rate": 7.913669064748202e-06, | |
| "loss": 0.4535, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.23974082073434125, | |
| "grad_norm": 0.16586214303970337, | |
| "learning_rate": 7.985611510791367e-06, | |
| "loss": 0.452, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.24190064794816415, | |
| "grad_norm": 0.1756211370229721, | |
| "learning_rate": 8.057553956834533e-06, | |
| "loss": 0.4461, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.24406047516198703, | |
| "grad_norm": 0.17397738993167877, | |
| "learning_rate": 8.129496402877699e-06, | |
| "loss": 0.4444, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.24622030237580994, | |
| "grad_norm": 0.1517469584941864, | |
| "learning_rate": 8.201438848920865e-06, | |
| "loss": 0.4423, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.24838012958963282, | |
| "grad_norm": 0.15296703577041626, | |
| "learning_rate": 8.273381294964029e-06, | |
| "loss": 0.4434, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.2505399568034557, | |
| "grad_norm": 0.17677851021289825, | |
| "learning_rate": 8.345323741007195e-06, | |
| "loss": 0.4352, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.2526997840172786, | |
| "grad_norm": 0.1546233892440796, | |
| "learning_rate": 8.41726618705036e-06, | |
| "loss": 0.4416, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.2548596112311015, | |
| "grad_norm": 0.17565761506557465, | |
| "learning_rate": 8.489208633093526e-06, | |
| "loss": 0.4484, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.2570194384449244, | |
| "grad_norm": 0.1443185657262802, | |
| "learning_rate": 8.561151079136692e-06, | |
| "loss": 0.4291, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.2591792656587473, | |
| "grad_norm": 0.17720922827720642, | |
| "learning_rate": 8.633093525179856e-06, | |
| "loss": 0.4356, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2613390928725702, | |
| "grad_norm": 0.17487414181232452, | |
| "learning_rate": 8.705035971223022e-06, | |
| "loss": 0.4465, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.2634989200863931, | |
| "grad_norm": 0.16723576188087463, | |
| "learning_rate": 8.776978417266188e-06, | |
| "loss": 0.4463, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.265658747300216, | |
| "grad_norm": 0.19939404726028442, | |
| "learning_rate": 8.848920863309353e-06, | |
| "loss": 0.4387, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.2678185745140389, | |
| "grad_norm": 0.1569490283727646, | |
| "learning_rate": 8.92086330935252e-06, | |
| "loss": 0.4332, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.26997840172786175, | |
| "grad_norm": 0.17922881245613098, | |
| "learning_rate": 8.992805755395683e-06, | |
| "loss": 0.4404, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.27213822894168466, | |
| "grad_norm": 0.17273768782615662, | |
| "learning_rate": 9.064748201438849e-06, | |
| "loss": 0.4445, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.27429805615550756, | |
| "grad_norm": 0.16782942414283752, | |
| "learning_rate": 9.136690647482015e-06, | |
| "loss": 0.4316, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.27645788336933047, | |
| "grad_norm": 0.17636790871620178, | |
| "learning_rate": 9.20863309352518e-06, | |
| "loss": 0.4361, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.2786177105831533, | |
| "grad_norm": 0.18042488396167755, | |
| "learning_rate": 9.280575539568346e-06, | |
| "loss": 0.4316, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.28077753779697623, | |
| "grad_norm": 0.21798282861709595, | |
| "learning_rate": 9.35251798561151e-06, | |
| "loss": 0.4384, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.28293736501079914, | |
| "grad_norm": 0.18524324893951416, | |
| "learning_rate": 9.424460431654678e-06, | |
| "loss": 0.4434, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.28509719222462204, | |
| "grad_norm": 0.19849282503128052, | |
| "learning_rate": 9.496402877697842e-06, | |
| "loss": 0.4454, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.28725701943844495, | |
| "grad_norm": 0.17093205451965332, | |
| "learning_rate": 9.568345323741008e-06, | |
| "loss": 0.4449, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.2894168466522678, | |
| "grad_norm": 0.19003981351852417, | |
| "learning_rate": 9.640287769784174e-06, | |
| "loss": 0.4244, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.2915766738660907, | |
| "grad_norm": 0.2193020135164261, | |
| "learning_rate": 9.712230215827338e-06, | |
| "loss": 0.434, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.2937365010799136, | |
| "grad_norm": 0.19183115661144257, | |
| "learning_rate": 9.784172661870505e-06, | |
| "loss": 0.4259, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.2958963282937365, | |
| "grad_norm": 0.17214708030223846, | |
| "learning_rate": 9.85611510791367e-06, | |
| "loss": 0.4433, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.2980561555075594, | |
| "grad_norm": 0.16226549446582794, | |
| "learning_rate": 9.928057553956835e-06, | |
| "loss": 0.4389, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.3002159827213823, | |
| "grad_norm": 0.17609405517578125, | |
| "learning_rate": 1e-05, | |
| "loss": 0.4387, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.3023758099352052, | |
| "grad_norm": 0.15736715495586395, | |
| "learning_rate": 9.999984208641271e-06, | |
| "loss": 0.4324, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3045356371490281, | |
| "grad_norm": 0.2223547101020813, | |
| "learning_rate": 9.99993683466483e-06, | |
| "loss": 0.4245, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.30669546436285094, | |
| "grad_norm": 0.17344172298908234, | |
| "learning_rate": 9.999857878369917e-06, | |
| "loss": 0.4302, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.30885529157667385, | |
| "grad_norm": 0.16877353191375732, | |
| "learning_rate": 9.99974734025526e-06, | |
| "loss": 0.4497, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.31101511879049676, | |
| "grad_norm": 0.1692124605178833, | |
| "learning_rate": 9.999605221019082e-06, | |
| "loss": 0.4414, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.31317494600431967, | |
| "grad_norm": 0.18339934945106506, | |
| "learning_rate": 9.999431521559081e-06, | |
| "loss": 0.4392, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.31533477321814257, | |
| "grad_norm": 0.19719652831554413, | |
| "learning_rate": 9.999226242972445e-06, | |
| "loss": 0.4331, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.3174946004319654, | |
| "grad_norm": 0.14894719421863556, | |
| "learning_rate": 9.998989386555815e-06, | |
| "loss": 0.4344, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.31965442764578833, | |
| "grad_norm": 0.20450158417224884, | |
| "learning_rate": 9.998720953805312e-06, | |
| "loss": 0.4397, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.32181425485961124, | |
| "grad_norm": 0.1889685094356537, | |
| "learning_rate": 9.9984209464165e-06, | |
| "loss": 0.4297, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.32397408207343414, | |
| "grad_norm": 0.16375744342803955, | |
| "learning_rate": 9.998089366284392e-06, | |
| "loss": 0.4228, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.326133909287257, | |
| "grad_norm": 0.15281440317630768, | |
| "learning_rate": 9.997726215503422e-06, | |
| "loss": 0.4264, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.3282937365010799, | |
| "grad_norm": 0.16808317601680756, | |
| "learning_rate": 9.997331496367455e-06, | |
| "loss": 0.4247, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.3304535637149028, | |
| "grad_norm": 0.168230339884758, | |
| "learning_rate": 9.996905211369748e-06, | |
| "loss": 0.4245, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.3326133909287257, | |
| "grad_norm": 0.1692979782819748, | |
| "learning_rate": 9.996447363202947e-06, | |
| "loss": 0.4309, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.3347732181425486, | |
| "grad_norm": 0.190389946103096, | |
| "learning_rate": 9.995957954759073e-06, | |
| "loss": 0.4239, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.3369330453563715, | |
| "grad_norm": 0.18425118923187256, | |
| "learning_rate": 9.995436989129495e-06, | |
| "loss": 0.4316, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.3390928725701944, | |
| "grad_norm": 0.1809394657611847, | |
| "learning_rate": 9.994884469604913e-06, | |
| "loss": 0.4276, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.3412526997840173, | |
| "grad_norm": 0.20476803183555603, | |
| "learning_rate": 9.994300399675342e-06, | |
| "loss": 0.4375, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.3434125269978402, | |
| "grad_norm": 0.16786271333694458, | |
| "learning_rate": 9.99368478303009e-06, | |
| "loss": 0.4352, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.34557235421166305, | |
| "grad_norm": 0.16701987385749817, | |
| "learning_rate": 9.993037623557716e-06, | |
| "loss": 0.4193, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.34773218142548595, | |
| "grad_norm": 0.19547881186008453, | |
| "learning_rate": 9.99235892534604e-06, | |
| "loss": 0.4264, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.34989200863930886, | |
| "grad_norm": 0.16596215963363647, | |
| "learning_rate": 9.991648692682083e-06, | |
| "loss": 0.4347, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.35205183585313177, | |
| "grad_norm": 0.1804916262626648, | |
| "learning_rate": 9.990906930052065e-06, | |
| "loss": 0.4168, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.3542116630669546, | |
| "grad_norm": 0.16082727909088135, | |
| "learning_rate": 9.990133642141359e-06, | |
| "loss": 0.4281, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.3563714902807775, | |
| "grad_norm": 0.180884450674057, | |
| "learning_rate": 9.989328833834472e-06, | |
| "loss": 0.4318, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.35853131749460043, | |
| "grad_norm": 0.16864454746246338, | |
| "learning_rate": 9.988492510215011e-06, | |
| "loss": 0.4306, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.36069114470842334, | |
| "grad_norm": 0.17244219779968262, | |
| "learning_rate": 9.987624676565652e-06, | |
| "loss": 0.4282, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.36285097192224625, | |
| "grad_norm": 0.1679103672504425, | |
| "learning_rate": 9.986725338368103e-06, | |
| "loss": 0.4195, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.3650107991360691, | |
| "grad_norm": 0.16607263684272766, | |
| "learning_rate": 9.98579450130307e-06, | |
| "loss": 0.4288, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.367170626349892, | |
| "grad_norm": 0.16679252684116364, | |
| "learning_rate": 9.98483217125023e-06, | |
| "loss": 0.418, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.3693304535637149, | |
| "grad_norm": 0.15752755105495453, | |
| "learning_rate": 9.983838354288181e-06, | |
| "loss": 0.438, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.3714902807775378, | |
| "grad_norm": 0.1747094839811325, | |
| "learning_rate": 9.982813056694411e-06, | |
| "loss": 0.4316, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.37365010799136067, | |
| "grad_norm": 0.1854209452867508, | |
| "learning_rate": 9.981756284945256e-06, | |
| "loss": 0.424, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.3758099352051836, | |
| "grad_norm": 0.1754128485918045, | |
| "learning_rate": 9.980668045715864e-06, | |
| "loss": 0.4115, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.3779697624190065, | |
| "grad_norm": 0.17791932821273804, | |
| "learning_rate": 9.979548345880142e-06, | |
| "loss": 0.4272, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.3801295896328294, | |
| "grad_norm": 0.15502074360847473, | |
| "learning_rate": 9.978397192510722e-06, | |
| "loss": 0.4161, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.38228941684665224, | |
| "grad_norm": 0.1928102672100067, | |
| "learning_rate": 9.977214592878917e-06, | |
| "loss": 0.4202, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.38444924406047515, | |
| "grad_norm": 0.1752733737230301, | |
| "learning_rate": 9.976000554454668e-06, | |
| "loss": 0.4251, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.38660907127429806, | |
| "grad_norm": 0.15899012982845306, | |
| "learning_rate": 9.974755084906503e-06, | |
| "loss": 0.4212, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.38876889848812096, | |
| "grad_norm": 0.20840278267860413, | |
| "learning_rate": 9.97347819210148e-06, | |
| "loss": 0.4193, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.39092872570194387, | |
| "grad_norm": 0.16049212217330933, | |
| "learning_rate": 9.972169884105155e-06, | |
| "loss": 0.4222, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.3930885529157667, | |
| "grad_norm": 0.21681340038776398, | |
| "learning_rate": 9.970830169181504e-06, | |
| "loss": 0.4221, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.3952483801295896, | |
| "grad_norm": 0.20257696509361267, | |
| "learning_rate": 9.969459055792903e-06, | |
| "loss": 0.412, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.39740820734341253, | |
| "grad_norm": 0.1784621775150299, | |
| "learning_rate": 9.968056552600043e-06, | |
| "loss": 0.4308, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.39956803455723544, | |
| "grad_norm": 0.21011000871658325, | |
| "learning_rate": 9.966622668461899e-06, | |
| "loss": 0.4196, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.4017278617710583, | |
| "grad_norm": 0.17967236042022705, | |
| "learning_rate": 9.965157412435663e-06, | |
| "loss": 0.4171, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.4038876889848812, | |
| "grad_norm": 0.21680930256843567, | |
| "learning_rate": 9.963660793776689e-06, | |
| "loss": 0.4188, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.4060475161987041, | |
| "grad_norm": 0.1738550364971161, | |
| "learning_rate": 9.96213282193843e-06, | |
| "loss": 0.4207, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.408207343412527, | |
| "grad_norm": 0.1727888137102127, | |
| "learning_rate": 9.960573506572391e-06, | |
| "loss": 0.4244, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.4103671706263499, | |
| "grad_norm": 0.19244728982448578, | |
| "learning_rate": 9.958982857528053e-06, | |
| "loss": 0.4162, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.41252699784017277, | |
| "grad_norm": 0.1902923285961151, | |
| "learning_rate": 9.957360884852819e-06, | |
| "loss": 0.4272, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.4146868250539957, | |
| "grad_norm": 0.15449483692646027, | |
| "learning_rate": 9.955707598791952e-06, | |
| "loss": 0.4103, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.4168466522678186, | |
| "grad_norm": 0.16577541828155518, | |
| "learning_rate": 9.954023009788505e-06, | |
| "loss": 0.4262, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.4190064794816415, | |
| "grad_norm": 0.17047494649887085, | |
| "learning_rate": 9.952307128483257e-06, | |
| "loss": 0.416, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.42116630669546434, | |
| "grad_norm": 0.16605447232723236, | |
| "learning_rate": 9.950559965714647e-06, | |
| "loss": 0.4118, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.42332613390928725, | |
| "grad_norm": 0.17296399176120758, | |
| "learning_rate": 9.948781532518706e-06, | |
| "loss": 0.415, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.42548596112311016, | |
| "grad_norm": 0.16557732224464417, | |
| "learning_rate": 9.946971840128982e-06, | |
| "loss": 0.399, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.42764578833693306, | |
| "grad_norm": 0.1601681262254715, | |
| "learning_rate": 9.945130899976477e-06, | |
| "loss": 0.4091, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.4298056155507559, | |
| "grad_norm": 0.17228373885154724, | |
| "learning_rate": 9.94325872368957e-06, | |
| "loss": 0.4169, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.4319654427645788, | |
| "grad_norm": 0.1871252954006195, | |
| "learning_rate": 9.941355323093944e-06, | |
| "loss": 0.4064, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.43412526997840173, | |
| "grad_norm": 0.16557608544826508, | |
| "learning_rate": 9.939420710212511e-06, | |
| "loss": 0.4022, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.43628509719222464, | |
| "grad_norm": 0.19201870262622833, | |
| "learning_rate": 9.937454897265338e-06, | |
| "loss": 0.4106, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.43844492440604754, | |
| "grad_norm": 0.20729224383831024, | |
| "learning_rate": 9.935457896669568e-06, | |
| "loss": 0.4231, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.4406047516198704, | |
| "grad_norm": 0.1870211958885193, | |
| "learning_rate": 9.93342972103934e-06, | |
| "loss": 0.4048, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.4427645788336933, | |
| "grad_norm": 0.17580384016036987, | |
| "learning_rate": 9.931370383185717e-06, | |
| "loss": 0.4088, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.4449244060475162, | |
| "grad_norm": 0.20925194025039673, | |
| "learning_rate": 9.929279896116595e-06, | |
| "loss": 0.4148, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.4470842332613391, | |
| "grad_norm": 0.2229665368795395, | |
| "learning_rate": 9.927158273036624e-06, | |
| "loss": 0.4185, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.44924406047516197, | |
| "grad_norm": 0.1665569692850113, | |
| "learning_rate": 9.925005527347132e-06, | |
| "loss": 0.4137, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.4514038876889849, | |
| "grad_norm": 0.18300025165081024, | |
| "learning_rate": 9.922821672646028e-06, | |
| "loss": 0.4098, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.4535637149028078, | |
| "grad_norm": 0.21622765064239502, | |
| "learning_rate": 9.920606722727726e-06, | |
| "loss": 0.413, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4557235421166307, | |
| "grad_norm": 0.1885174661874771, | |
| "learning_rate": 9.918360691583056e-06, | |
| "loss": 0.4156, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.45788336933045354, | |
| "grad_norm": 0.20590178668498993, | |
| "learning_rate": 9.916083593399167e-06, | |
| "loss": 0.4192, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.46004319654427644, | |
| "grad_norm": 0.19168834388256073, | |
| "learning_rate": 9.913775442559451e-06, | |
| "loss": 0.42, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.46220302375809935, | |
| "grad_norm": 0.2033228576183319, | |
| "learning_rate": 9.911436253643445e-06, | |
| "loss": 0.4294, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.46436285097192226, | |
| "grad_norm": 0.1603459119796753, | |
| "learning_rate": 9.909066041426733e-06, | |
| "loss": 0.4257, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.46652267818574517, | |
| "grad_norm": 0.17825163900852203, | |
| "learning_rate": 9.906664820880869e-06, | |
| "loss": 0.4196, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.468682505399568, | |
| "grad_norm": 0.19199080765247345, | |
| "learning_rate": 9.904232607173262e-06, | |
| "loss": 0.4213, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.4708423326133909, | |
| "grad_norm": 0.16068002581596375, | |
| "learning_rate": 9.9017694156671e-06, | |
| "loss": 0.4178, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.47300215982721383, | |
| "grad_norm": 0.18598708510398865, | |
| "learning_rate": 9.899275261921236e-06, | |
| "loss": 0.4119, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.47516198704103674, | |
| "grad_norm": 0.17735686898231506, | |
| "learning_rate": 9.8967501616901e-06, | |
| "loss": 0.4159, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4773218142548596, | |
| "grad_norm": 0.20054501295089722, | |
| "learning_rate": 9.894194130923602e-06, | |
| "loss": 0.4228, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.4794816414686825, | |
| "grad_norm": 0.1531924605369568, | |
| "learning_rate": 9.891607185767018e-06, | |
| "loss": 0.4182, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.4816414686825054, | |
| "grad_norm": 0.20048613846302032, | |
| "learning_rate": 9.8889893425609e-06, | |
| "loss": 0.4184, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.4838012958963283, | |
| "grad_norm": 0.16016018390655518, | |
| "learning_rate": 9.886340617840968e-06, | |
| "loss": 0.4162, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.48596112311015116, | |
| "grad_norm": 0.17939400672912598, | |
| "learning_rate": 9.883661028338009e-06, | |
| "loss": 0.4216, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.48812095032397407, | |
| "grad_norm": 0.17419300973415375, | |
| "learning_rate": 9.880950590977764e-06, | |
| "loss": 0.4165, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.490280777537797, | |
| "grad_norm": 0.18040290474891663, | |
| "learning_rate": 9.87820932288083e-06, | |
| "loss": 0.4148, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.4924406047516199, | |
| "grad_norm": 0.2009221911430359, | |
| "learning_rate": 9.875437241362546e-06, | |
| "loss": 0.4088, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.4946004319654428, | |
| "grad_norm": 0.16184359788894653, | |
| "learning_rate": 9.872634363932887e-06, | |
| "loss": 0.4246, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.49676025917926564, | |
| "grad_norm": 0.19945880770683289, | |
| "learning_rate": 9.869800708296347e-06, | |
| "loss": 0.415, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.49892008639308855, | |
| "grad_norm": 0.1834121197462082, | |
| "learning_rate": 9.866936292351837e-06, | |
| "loss": 0.413, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.5010799136069114, | |
| "grad_norm": 0.19155217707157135, | |
| "learning_rate": 9.864041134192563e-06, | |
| "loss": 0.4145, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.5032397408207343, | |
| "grad_norm": 0.16527444124221802, | |
| "learning_rate": 9.861115252105922e-06, | |
| "loss": 0.411, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.5053995680345572, | |
| "grad_norm": 0.2018229365348816, | |
| "learning_rate": 9.85815866457337e-06, | |
| "loss": 0.4144, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.5075593952483801, | |
| "grad_norm": 0.18045048415660858, | |
| "learning_rate": 9.855171390270325e-06, | |
| "loss": 0.4173, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.509719222462203, | |
| "grad_norm": 0.16335050761699677, | |
| "learning_rate": 9.852153448066031e-06, | |
| "loss": 0.4184, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.5118790496760259, | |
| "grad_norm": 0.1876971423625946, | |
| "learning_rate": 9.849104857023455e-06, | |
| "loss": 0.4149, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.5140388768898488, | |
| "grad_norm": 0.1632338911294937, | |
| "learning_rate": 9.846025636399152e-06, | |
| "loss": 0.4281, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.5161987041036717, | |
| "grad_norm": 0.1685461848974228, | |
| "learning_rate": 9.842915805643156e-06, | |
| "loss": 0.4168, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.5183585313174947, | |
| "grad_norm": 0.1753380447626114, | |
| "learning_rate": 9.839775384398846e-06, | |
| "loss": 0.4163, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5205183585313174, | |
| "grad_norm": 0.17196574807167053, | |
| "learning_rate": 9.836604392502829e-06, | |
| "loss": 0.4264, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.5226781857451404, | |
| "grad_norm": 0.22572582960128784, | |
| "learning_rate": 9.833402849984815e-06, | |
| "loss": 0.4116, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.5248380129589633, | |
| "grad_norm": 0.16782043874263763, | |
| "learning_rate": 9.830170777067486e-06, | |
| "loss": 0.416, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.5269978401727862, | |
| "grad_norm": 0.1964120864868164, | |
| "learning_rate": 9.82690819416637e-06, | |
| "loss": 0.4189, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.5291576673866091, | |
| "grad_norm": 0.16382494568824768, | |
| "learning_rate": 9.823615121889716e-06, | |
| "loss": 0.4216, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.531317494600432, | |
| "grad_norm": 0.19145262241363525, | |
| "learning_rate": 9.820291581038354e-06, | |
| "loss": 0.4069, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.5334773218142549, | |
| "grad_norm": 0.1793445199728012, | |
| "learning_rate": 9.81693759260558e-06, | |
| "loss": 0.4073, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.5356371490280778, | |
| "grad_norm": 0.20790617167949677, | |
| "learning_rate": 9.813553177777005e-06, | |
| "loss": 0.4098, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.5377969762419006, | |
| "grad_norm": 0.17739000916481018, | |
| "learning_rate": 9.81013835793043e-06, | |
| "loss": 0.416, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.5399568034557235, | |
| "grad_norm": 0.1868736743927002, | |
| "learning_rate": 9.806693154635719e-06, | |
| "loss": 0.4192, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5421166306695464, | |
| "grad_norm": 0.2077651023864746, | |
| "learning_rate": 9.803217589654642e-06, | |
| "loss": 0.4001, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.5442764578833693, | |
| "grad_norm": 0.17842766642570496, | |
| "learning_rate": 9.79971168494076e-06, | |
| "loss": 0.4122, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.5464362850971922, | |
| "grad_norm": 0.20299148559570312, | |
| "learning_rate": 9.796175462639273e-06, | |
| "loss": 0.4164, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.5485961123110151, | |
| "grad_norm": 0.20451399683952332, | |
| "learning_rate": 9.79260894508688e-06, | |
| "loss": 0.4194, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.550755939524838, | |
| "grad_norm": 0.16164958477020264, | |
| "learning_rate": 9.789012154811648e-06, | |
| "loss": 0.4037, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.5529157667386609, | |
| "grad_norm": 0.19269876182079315, | |
| "learning_rate": 9.785385114532858e-06, | |
| "loss": 0.4086, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.5550755939524838, | |
| "grad_norm": 0.22143694758415222, | |
| "learning_rate": 9.781727847160865e-06, | |
| "loss": 0.4205, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.5572354211663066, | |
| "grad_norm": 0.20241893827915192, | |
| "learning_rate": 9.77804037579696e-06, | |
| "loss": 0.4135, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.5593952483801296, | |
| "grad_norm": 0.19745588302612305, | |
| "learning_rate": 9.774322723733216e-06, | |
| "loss": 0.4129, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.5615550755939525, | |
| "grad_norm": 0.1914190798997879, | |
| "learning_rate": 9.770574914452343e-06, | |
| "loss": 0.4153, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5637149028077754, | |
| "grad_norm": 0.18239063024520874, | |
| "learning_rate": 9.766796971627543e-06, | |
| "loss": 0.4183, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.5658747300215983, | |
| "grad_norm": 0.18472230434417725, | |
| "learning_rate": 9.762988919122354e-06, | |
| "loss": 0.4129, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.5680345572354212, | |
| "grad_norm": 0.18945036828517914, | |
| "learning_rate": 9.759150780990508e-06, | |
| "loss": 0.4145, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.5701943844492441, | |
| "grad_norm": 0.18478325009346008, | |
| "learning_rate": 9.755282581475769e-06, | |
| "loss": 0.4057, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.572354211663067, | |
| "grad_norm": 0.21743497252464294, | |
| "learning_rate": 9.751384345011787e-06, | |
| "loss": 0.4161, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.5745140388768899, | |
| "grad_norm": 0.17114116251468658, | |
| "learning_rate": 9.747456096221946e-06, | |
| "loss": 0.4007, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.5766738660907127, | |
| "grad_norm": 0.187269389629364, | |
| "learning_rate": 9.743497859919196e-06, | |
| "loss": 0.4048, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.5788336933045356, | |
| "grad_norm": 0.16859321296215057, | |
| "learning_rate": 9.739509661105912e-06, | |
| "loss": 0.4109, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.5809935205183585, | |
| "grad_norm": 0.1999719887971878, | |
| "learning_rate": 9.735491524973723e-06, | |
| "loss": 0.3952, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.5831533477321814, | |
| "grad_norm": 0.176213800907135, | |
| "learning_rate": 9.73144347690336e-06, | |
| "loss": 0.4177, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5853131749460043, | |
| "grad_norm": 0.1951010376214981, | |
| "learning_rate": 9.727365542464498e-06, | |
| "loss": 0.4164, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.5874730021598272, | |
| "grad_norm": 0.17570029199123383, | |
| "learning_rate": 9.723257747415584e-06, | |
| "loss": 0.4094, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.5896328293736501, | |
| "grad_norm": 0.179957315325737, | |
| "learning_rate": 9.719120117703688e-06, | |
| "loss": 0.406, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.591792656587473, | |
| "grad_norm": 0.17086850106716156, | |
| "learning_rate": 9.714952679464324e-06, | |
| "loss": 0.403, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.593952483801296, | |
| "grad_norm": 0.17228035628795624, | |
| "learning_rate": 9.710755459021297e-06, | |
| "loss": 0.4109, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.5961123110151187, | |
| "grad_norm": 0.19265015423297882, | |
| "learning_rate": 9.706528482886535e-06, | |
| "loss": 0.4209, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.5982721382289417, | |
| "grad_norm": 0.16357901692390442, | |
| "learning_rate": 9.702271777759915e-06, | |
| "loss": 0.4061, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.6004319654427646, | |
| "grad_norm": 0.17083071172237396, | |
| "learning_rate": 9.697985370529101e-06, | |
| "loss": 0.3996, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.6025917926565875, | |
| "grad_norm": 0.1811821013689041, | |
| "learning_rate": 9.693669288269371e-06, | |
| "loss": 0.4182, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.6047516198704104, | |
| "grad_norm": 0.1488206833600998, | |
| "learning_rate": 9.689323558243446e-06, | |
| "loss": 0.3981, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.6069114470842333, | |
| "grad_norm": 0.185231015086174, | |
| "learning_rate": 9.684948207901315e-06, | |
| "loss": 0.4132, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.6090712742980562, | |
| "grad_norm": 0.14964009821414948, | |
| "learning_rate": 9.680543264880075e-06, | |
| "loss": 0.4098, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.6112311015118791, | |
| "grad_norm": 0.16769437491893768, | |
| "learning_rate": 9.676108757003735e-06, | |
| "loss": 0.418, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.6133909287257019, | |
| "grad_norm": 0.1763710230588913, | |
| "learning_rate": 9.671644712283061e-06, | |
| "loss": 0.4111, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.6155507559395248, | |
| "grad_norm": 0.17033055424690247, | |
| "learning_rate": 9.667151158915382e-06, | |
| "loss": 0.4138, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.6177105831533477, | |
| "grad_norm": 0.21479171514511108, | |
| "learning_rate": 9.662628125284426e-06, | |
| "loss": 0.4164, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.6198704103671706, | |
| "grad_norm": 0.18288952112197876, | |
| "learning_rate": 9.65807563996013e-06, | |
| "loss": 0.416, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.6220302375809935, | |
| "grad_norm": 0.20399482548236847, | |
| "learning_rate": 9.653493731698467e-06, | |
| "loss": 0.4145, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.6241900647948164, | |
| "grad_norm": 0.19287261366844177, | |
| "learning_rate": 9.648882429441258e-06, | |
| "loss": 0.4131, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.6263498920086393, | |
| "grad_norm": 0.17563579976558685, | |
| "learning_rate": 9.644241762315995e-06, | |
| "loss": 0.4097, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6285097192224622, | |
| "grad_norm": 0.18624839186668396, | |
| "learning_rate": 9.639571759635655e-06, | |
| "loss": 0.4176, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.6306695464362851, | |
| "grad_norm": 0.18379148840904236, | |
| "learning_rate": 9.634872450898511e-06, | |
| "loss": 0.4035, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.6328293736501079, | |
| "grad_norm": 0.1886526644229889, | |
| "learning_rate": 9.630143865787951e-06, | |
| "loss": 0.4068, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.6349892008639308, | |
| "grad_norm": 0.16463734209537506, | |
| "learning_rate": 9.62538603417229e-06, | |
| "loss": 0.4163, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.6371490280777538, | |
| "grad_norm": 0.1974654197692871, | |
| "learning_rate": 9.620598986104578e-06, | |
| "loss": 0.4039, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.6393088552915767, | |
| "grad_norm": 0.1882481724023819, | |
| "learning_rate": 9.615782751822413e-06, | |
| "loss": 0.4115, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.6414686825053996, | |
| "grad_norm": 0.15222138166427612, | |
| "learning_rate": 9.610937361747747e-06, | |
| "loss": 0.4045, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.6436285097192225, | |
| "grad_norm": 0.17053523659706116, | |
| "learning_rate": 9.606062846486698e-06, | |
| "loss": 0.4119, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.6457883369330454, | |
| "grad_norm": 0.15987005829811096, | |
| "learning_rate": 9.601159236829353e-06, | |
| "loss": 0.3964, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.6479481641468683, | |
| "grad_norm": 0.16534611582756042, | |
| "learning_rate": 9.596226563749575e-06, | |
| "loss": 0.4115, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6501079913606912, | |
| "grad_norm": 0.1743890643119812, | |
| "learning_rate": 9.591264858404809e-06, | |
| "loss": 0.4241, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.652267818574514, | |
| "grad_norm": 0.14473925530910492, | |
| "learning_rate": 9.586274152135883e-06, | |
| "loss": 0.4011, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.6544276457883369, | |
| "grad_norm": 0.1717105656862259, | |
| "learning_rate": 9.58125447646681e-06, | |
| "loss": 0.4128, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.6565874730021598, | |
| "grad_norm": 0.16893403232097626, | |
| "learning_rate": 9.576205863104588e-06, | |
| "loss": 0.3984, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.6587473002159827, | |
| "grad_norm": 0.19387434422969818, | |
| "learning_rate": 9.571128343939006e-06, | |
| "loss": 0.4086, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.6609071274298056, | |
| "grad_norm": 0.1532067507505417, | |
| "learning_rate": 9.566021951042432e-06, | |
| "loss": 0.413, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.6630669546436285, | |
| "grad_norm": 0.19082939624786377, | |
| "learning_rate": 9.56088671666962e-06, | |
| "loss": 0.4028, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.6652267818574514, | |
| "grad_norm": 0.1660735309123993, | |
| "learning_rate": 9.555722673257502e-06, | |
| "loss": 0.4048, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.6673866090712743, | |
| "grad_norm": 0.1646290272474289, | |
| "learning_rate": 9.550529853424979e-06, | |
| "loss": 0.401, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.6695464362850972, | |
| "grad_norm": 0.1946524977684021, | |
| "learning_rate": 9.545308289972727e-06, | |
| "loss": 0.3999, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.67170626349892, | |
| "grad_norm": 0.1731284260749817, | |
| "learning_rate": 9.54005801588298e-06, | |
| "loss": 0.4056, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.673866090712743, | |
| "grad_norm": 0.1577766239643097, | |
| "learning_rate": 9.534779064319318e-06, | |
| "loss": 0.3952, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.6760259179265659, | |
| "grad_norm": 0.20560896396636963, | |
| "learning_rate": 9.529471468626472e-06, | |
| "loss": 0.4082, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.6781857451403888, | |
| "grad_norm": 0.16146545112133026, | |
| "learning_rate": 9.524135262330098e-06, | |
| "loss": 0.4044, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.6803455723542117, | |
| "grad_norm": 0.18924373388290405, | |
| "learning_rate": 9.51877047913658e-06, | |
| "loss": 0.3949, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.6825053995680346, | |
| "grad_norm": 0.20120824873447418, | |
| "learning_rate": 9.513377152932796e-06, | |
| "loss": 0.4098, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.6846652267818575, | |
| "grad_norm": 0.17236529290676117, | |
| "learning_rate": 9.507955317785935e-06, | |
| "loss": 0.4005, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.6868250539956804, | |
| "grad_norm": 0.19479617476463318, | |
| "learning_rate": 9.502505007943248e-06, | |
| "loss": 0.4115, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.6889848812095032, | |
| "grad_norm": 0.18137769401073456, | |
| "learning_rate": 9.497026257831856e-06, | |
| "loss": 0.4006, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.6911447084233261, | |
| "grad_norm": 0.18386590480804443, | |
| "learning_rate": 9.491519102058523e-06, | |
| "loss": 0.4045, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.693304535637149, | |
| "grad_norm": 0.18597717583179474, | |
| "learning_rate": 9.48598357540944e-06, | |
| "loss": 0.3974, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.6954643628509719, | |
| "grad_norm": 0.19069334864616394, | |
| "learning_rate": 9.480419712849996e-06, | |
| "loss": 0.4139, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.6976241900647948, | |
| "grad_norm": 0.18793267011642456, | |
| "learning_rate": 9.474827549524574e-06, | |
| "loss": 0.4105, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.6997840172786177, | |
| "grad_norm": 0.19101367890834808, | |
| "learning_rate": 9.46920712075632e-06, | |
| "loss": 0.3988, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.7019438444924406, | |
| "grad_norm": 0.15915150940418243, | |
| "learning_rate": 9.463558462046912e-06, | |
| "loss": 0.4052, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.7041036717062635, | |
| "grad_norm": 0.20149967074394226, | |
| "learning_rate": 9.457881609076352e-06, | |
| "loss": 0.4039, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.7062634989200864, | |
| "grad_norm": 0.1692509800195694, | |
| "learning_rate": 9.452176597702724e-06, | |
| "loss": 0.4146, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.7084233261339092, | |
| "grad_norm": 0.16798816621303558, | |
| "learning_rate": 9.446443463961986e-06, | |
| "loss": 0.3943, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.7105831533477321, | |
| "grad_norm": 0.16925224661827087, | |
| "learning_rate": 9.440682244067724e-06, | |
| "loss": 0.3992, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.712742980561555, | |
| "grad_norm": 0.19609062373638153, | |
| "learning_rate": 9.434892974410932e-06, | |
| "loss": 0.4094, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.714902807775378, | |
| "grad_norm": 0.19346529245376587, | |
| "learning_rate": 9.429075691559788e-06, | |
| "loss": 0.4018, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.7170626349892009, | |
| "grad_norm": 0.18897579610347748, | |
| "learning_rate": 9.423230432259409e-06, | |
| "loss": 0.4012, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.7192224622030238, | |
| "grad_norm": 0.16114945709705353, | |
| "learning_rate": 9.41735723343163e-06, | |
| "loss": 0.3988, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.7213822894168467, | |
| "grad_norm": 0.1889643669128418, | |
| "learning_rate": 9.411456132174768e-06, | |
| "loss": 0.3912, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.7235421166306696, | |
| "grad_norm": 0.20438078045845032, | |
| "learning_rate": 9.405527165763384e-06, | |
| "loss": 0.4036, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.7257019438444925, | |
| "grad_norm": 0.1676449030637741, | |
| "learning_rate": 9.399570371648052e-06, | |
| "loss": 0.4085, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.7278617710583153, | |
| "grad_norm": 0.23122479021549225, | |
| "learning_rate": 9.393585787455125e-06, | |
| "loss": 0.4075, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.7300215982721382, | |
| "grad_norm": 0.15428000688552856, | |
| "learning_rate": 9.387573450986485e-06, | |
| "loss": 0.3979, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.7321814254859611, | |
| "grad_norm": 0.1889045536518097, | |
| "learning_rate": 9.381533400219319e-06, | |
| "loss": 0.4004, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.734341252699784, | |
| "grad_norm": 0.16855069994926453, | |
| "learning_rate": 9.37546567330587e-06, | |
| "loss": 0.4021, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7365010799136069, | |
| "grad_norm": 0.15914303064346313, | |
| "learning_rate": 9.369370308573198e-06, | |
| "loss": 0.4147, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.7386609071274298, | |
| "grad_norm": 0.18533971905708313, | |
| "learning_rate": 9.363247344522939e-06, | |
| "loss": 0.4025, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.7408207343412527, | |
| "grad_norm": 0.15280672907829285, | |
| "learning_rate": 9.357096819831065e-06, | |
| "loss": 0.4061, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.7429805615550756, | |
| "grad_norm": 0.1812913715839386, | |
| "learning_rate": 9.35091877334763e-06, | |
| "loss": 0.4008, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.7451403887688985, | |
| "grad_norm": 0.19496847689151764, | |
| "learning_rate": 9.344713244096533e-06, | |
| "loss": 0.4063, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.7473002159827213, | |
| "grad_norm": 0.15390554070472717, | |
| "learning_rate": 9.33848027127527e-06, | |
| "loss": 0.3943, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.7494600431965442, | |
| "grad_norm": 0.18108762800693512, | |
| "learning_rate": 9.332219894254686e-06, | |
| "loss": 0.4037, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.7516198704103672, | |
| "grad_norm": 0.172384575009346, | |
| "learning_rate": 9.325932152578726e-06, | |
| "loss": 0.404, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.7537796976241901, | |
| "grad_norm": 0.1718224287033081, | |
| "learning_rate": 9.319617085964177e-06, | |
| "loss": 0.4098, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.755939524838013, | |
| "grad_norm": 0.16733084619045258, | |
| "learning_rate": 9.31327473430044e-06, | |
| "loss": 0.41, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7580993520518359, | |
| "grad_norm": 0.15835174918174744, | |
| "learning_rate": 9.30690513764925e-06, | |
| "loss": 0.4108, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.7602591792656588, | |
| "grad_norm": 0.16416366398334503, | |
| "learning_rate": 9.300508336244443e-06, | |
| "loss": 0.4123, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.7624190064794817, | |
| "grad_norm": 0.15685053169727325, | |
| "learning_rate": 9.294084370491695e-06, | |
| "loss": 0.4026, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.7645788336933045, | |
| "grad_norm": 0.17324267327785492, | |
| "learning_rate": 9.287633280968263e-06, | |
| "loss": 0.4043, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.7667386609071274, | |
| "grad_norm": 0.16480839252471924, | |
| "learning_rate": 9.281155108422732e-06, | |
| "loss": 0.3903, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.7688984881209503, | |
| "grad_norm": 0.155819833278656, | |
| "learning_rate": 9.274649893774768e-06, | |
| "loss": 0.4163, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.7710583153347732, | |
| "grad_norm": 0.1437472552061081, | |
| "learning_rate": 9.268117678114833e-06, | |
| "loss": 0.3983, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.7732181425485961, | |
| "grad_norm": 0.1644992083311081, | |
| "learning_rate": 9.26155850270396e-06, | |
| "loss": 0.4143, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.775377969762419, | |
| "grad_norm": 0.15442179143428802, | |
| "learning_rate": 9.25497240897346e-06, | |
| "loss": 0.4186, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.7775377969762419, | |
| "grad_norm": 0.16961856186389923, | |
| "learning_rate": 9.248359438524683e-06, | |
| "loss": 0.4056, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7796976241900648, | |
| "grad_norm": 0.14529763162136078, | |
| "learning_rate": 9.241719633128743e-06, | |
| "loss": 0.4081, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.7818574514038877, | |
| "grad_norm": 0.17451095581054688, | |
| "learning_rate": 9.235053034726261e-06, | |
| "loss": 0.4011, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.7840172786177105, | |
| "grad_norm": 0.16993848979473114, | |
| "learning_rate": 9.228359685427095e-06, | |
| "loss": 0.4126, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.7861771058315334, | |
| "grad_norm": 0.1698153018951416, | |
| "learning_rate": 9.221639627510076e-06, | |
| "loss": 0.3983, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.7883369330453563, | |
| "grad_norm": 0.15617668628692627, | |
| "learning_rate": 9.214892903422745e-06, | |
| "loss": 0.3894, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.7904967602591793, | |
| "grad_norm": 0.1748441755771637, | |
| "learning_rate": 9.208119555781074e-06, | |
| "loss": 0.4042, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.7926565874730022, | |
| "grad_norm": 0.18701235949993134, | |
| "learning_rate": 9.201319627369211e-06, | |
| "loss": 0.4166, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.7948164146868251, | |
| "grad_norm": 0.15359680354595184, | |
| "learning_rate": 9.1944931611392e-06, | |
| "loss": 0.4025, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.796976241900648, | |
| "grad_norm": 0.17842437326908112, | |
| "learning_rate": 9.18764020021071e-06, | |
| "loss": 0.4157, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.7991360691144709, | |
| "grad_norm": 0.16838903725147247, | |
| "learning_rate": 9.180760787870766e-06, | |
| "loss": 0.4058, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.8012958963282938, | |
| "grad_norm": 0.17230413854122162, | |
| "learning_rate": 9.173854967573479e-06, | |
| "loss": 0.4063, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.8034557235421166, | |
| "grad_norm": 0.17813710868358612, | |
| "learning_rate": 9.166922782939759e-06, | |
| "loss": 0.4122, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.8056155507559395, | |
| "grad_norm": 0.19047455489635468, | |
| "learning_rate": 9.159964277757054e-06, | |
| "loss": 0.4026, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.8077753779697624, | |
| "grad_norm": 0.15476709604263306, | |
| "learning_rate": 9.152979495979064e-06, | |
| "loss": 0.3872, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.8099352051835853, | |
| "grad_norm": 0.15130369365215302, | |
| "learning_rate": 9.145968481725466e-06, | |
| "loss": 0.4018, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.8120950323974082, | |
| "grad_norm": 0.1687459796667099, | |
| "learning_rate": 9.13893127928164e-06, | |
| "loss": 0.3983, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.8142548596112311, | |
| "grad_norm": 0.1546049863100052, | |
| "learning_rate": 9.131867933098379e-06, | |
| "loss": 0.4109, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.816414686825054, | |
| "grad_norm": 0.1616266667842865, | |
| "learning_rate": 9.124778487791615e-06, | |
| "loss": 0.4039, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.8185745140388769, | |
| "grad_norm": 0.1830417811870575, | |
| "learning_rate": 9.117662988142138e-06, | |
| "loss": 0.4053, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.8207343412526998, | |
| "grad_norm": 0.15087199211120605, | |
| "learning_rate": 9.110521479095314e-06, | |
| "loss": 0.4111, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8228941684665226, | |
| "grad_norm": 0.15791049599647522, | |
| "learning_rate": 9.10335400576079e-06, | |
| "loss": 0.3882, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.8250539956803455, | |
| "grad_norm": 0.16011568903923035, | |
| "learning_rate": 9.096160613412228e-06, | |
| "loss": 0.4101, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.8272138228941684, | |
| "grad_norm": 0.1656263768672943, | |
| "learning_rate": 9.088941347487004e-06, | |
| "loss": 0.394, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.8293736501079914, | |
| "grad_norm": 0.15749986469745636, | |
| "learning_rate": 9.08169625358592e-06, | |
| "loss": 0.3972, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.8315334773218143, | |
| "grad_norm": 0.15940222144126892, | |
| "learning_rate": 9.074425377472932e-06, | |
| "loss": 0.4003, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.8336933045356372, | |
| "grad_norm": 0.17559286952018738, | |
| "learning_rate": 9.067128765074842e-06, | |
| "loss": 0.4046, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.8358531317494601, | |
| "grad_norm": 0.1646784096956253, | |
| "learning_rate": 9.059806462481022e-06, | |
| "loss": 0.3968, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.838012958963283, | |
| "grad_norm": 0.16697706282138824, | |
| "learning_rate": 9.052458515943112e-06, | |
| "loss": 0.4146, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.8401727861771058, | |
| "grad_norm": 0.17301729321479797, | |
| "learning_rate": 9.045084971874738e-06, | |
| "loss": 0.4037, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.8423326133909287, | |
| "grad_norm": 0.1766882836818695, | |
| "learning_rate": 9.037685876851211e-06, | |
| "loss": 0.4019, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.8444924406047516, | |
| "grad_norm": 0.16974475979804993, | |
| "learning_rate": 9.030261277609235e-06, | |
| "loss": 0.3978, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.8466522678185745, | |
| "grad_norm": 0.17788070440292358, | |
| "learning_rate": 9.022811221046618e-06, | |
| "loss": 0.4062, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.8488120950323974, | |
| "grad_norm": 0.16667339205741882, | |
| "learning_rate": 9.015335754221964e-06, | |
| "loss": 0.4167, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.8509719222462203, | |
| "grad_norm": 0.15693309903144836, | |
| "learning_rate": 9.007834924354384e-06, | |
| "loss": 0.3988, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.8531317494600432, | |
| "grad_norm": 0.16362878680229187, | |
| "learning_rate": 9.000308778823196e-06, | |
| "loss": 0.3995, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.8552915766738661, | |
| "grad_norm": 0.14635585248470306, | |
| "learning_rate": 8.992757365167625e-06, | |
| "loss": 0.4028, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.857451403887689, | |
| "grad_norm": 0.16527874767780304, | |
| "learning_rate": 8.985180731086505e-06, | |
| "loss": 0.406, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.8596112311015118, | |
| "grad_norm": 0.2163344919681549, | |
| "learning_rate": 8.977578924437976e-06, | |
| "loss": 0.3985, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.8617710583153347, | |
| "grad_norm": 0.14798112213611603, | |
| "learning_rate": 8.969951993239177e-06, | |
| "loss": 0.4011, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.8639308855291576, | |
| "grad_norm": 0.16196613013744354, | |
| "learning_rate": 8.962299985665955e-06, | |
| "loss": 0.4057, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8660907127429806, | |
| "grad_norm": 0.15940962731838226, | |
| "learning_rate": 8.954622950052543e-06, | |
| "loss": 0.4027, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.8682505399568035, | |
| "grad_norm": 0.16603127121925354, | |
| "learning_rate": 8.946920934891274e-06, | |
| "loss": 0.4106, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.8704103671706264, | |
| "grad_norm": 0.16625916957855225, | |
| "learning_rate": 8.939193988832261e-06, | |
| "loss": 0.3997, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.8725701943844493, | |
| "grad_norm": 0.17211325466632843, | |
| "learning_rate": 8.931442160683094e-06, | |
| "loss": 0.4036, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.8747300215982722, | |
| "grad_norm": 0.17657049000263214, | |
| "learning_rate": 8.923665499408535e-06, | |
| "loss": 0.393, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.8768898488120951, | |
| "grad_norm": 0.18346846103668213, | |
| "learning_rate": 8.915864054130203e-06, | |
| "loss": 0.3911, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.8790496760259179, | |
| "grad_norm": 0.17051193118095398, | |
| "learning_rate": 8.908037874126263e-06, | |
| "loss": 0.3916, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.8812095032397408, | |
| "grad_norm": 0.15643054246902466, | |
| "learning_rate": 8.900187008831124e-06, | |
| "loss": 0.3957, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.8833693304535637, | |
| "grad_norm": 0.18112455308437347, | |
| "learning_rate": 8.892311507835118e-06, | |
| "loss": 0.4006, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.8855291576673866, | |
| "grad_norm": 0.1472531408071518, | |
| "learning_rate": 8.88441142088419e-06, | |
| "loss": 0.3969, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.8876889848812095, | |
| "grad_norm": 0.16634514927864075, | |
| "learning_rate": 8.87648679787958e-06, | |
| "loss": 0.4052, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.8898488120950324, | |
| "grad_norm": 0.16606342792510986, | |
| "learning_rate": 8.868537688877516e-06, | |
| "loss": 0.3999, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.8920086393088553, | |
| "grad_norm": 0.16223309934139252, | |
| "learning_rate": 8.860564144088891e-06, | |
| "loss": 0.4053, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.8941684665226782, | |
| "grad_norm": 0.17775796353816986, | |
| "learning_rate": 8.852566213878947e-06, | |
| "loss": 0.3996, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.896328293736501, | |
| "grad_norm": 0.16113241016864777, | |
| "learning_rate": 8.844543948766958e-06, | |
| "loss": 0.3874, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.8984881209503239, | |
| "grad_norm": 0.19586795568466187, | |
| "learning_rate": 8.83649739942591e-06, | |
| "loss": 0.4012, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.9006479481641468, | |
| "grad_norm": 0.18052950501441956, | |
| "learning_rate": 8.828426616682184e-06, | |
| "loss": 0.3973, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.9028077753779697, | |
| "grad_norm": 0.16518956422805786, | |
| "learning_rate": 8.820331651515226e-06, | |
| "loss": 0.3997, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.9049676025917927, | |
| "grad_norm": 0.1827470362186432, | |
| "learning_rate": 8.81221255505724e-06, | |
| "loss": 0.4008, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.9071274298056156, | |
| "grad_norm": 0.18678082525730133, | |
| "learning_rate": 8.80406937859285e-06, | |
| "loss": 0.3953, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.9092872570194385, | |
| "grad_norm": 0.1759604662656784, | |
| "learning_rate": 8.795902173558784e-06, | |
| "loss": 0.4037, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.9114470842332614, | |
| "grad_norm": 0.1986621916294098, | |
| "learning_rate": 8.787710991543547e-06, | |
| "loss": 0.4125, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.9136069114470843, | |
| "grad_norm": 0.19601307809352875, | |
| "learning_rate": 8.779495884287099e-06, | |
| "loss": 0.4018, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.9157667386609071, | |
| "grad_norm": 0.16699747741222382, | |
| "learning_rate": 8.77125690368052e-06, | |
| "loss": 0.4029, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.91792656587473, | |
| "grad_norm": 0.16781239211559296, | |
| "learning_rate": 8.76299410176569e-06, | |
| "loss": 0.3956, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.9200863930885529, | |
| "grad_norm": 0.17204856872558594, | |
| "learning_rate": 8.754707530734958e-06, | |
| "loss": 0.4033, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.9222462203023758, | |
| "grad_norm": 0.1568082720041275, | |
| "learning_rate": 8.74639724293081e-06, | |
| "loss": 0.3937, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.9244060475161987, | |
| "grad_norm": 0.18325375020503998, | |
| "learning_rate": 8.738063290845536e-06, | |
| "loss": 0.4077, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.9265658747300216, | |
| "grad_norm": 0.15343928337097168, | |
| "learning_rate": 8.729705727120911e-06, | |
| "loss": 0.3997, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.9287257019438445, | |
| "grad_norm": 0.1750892996788025, | |
| "learning_rate": 8.721324604547851e-06, | |
| "loss": 0.4151, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9308855291576674, | |
| "grad_norm": 0.17041905224323273, | |
| "learning_rate": 8.712919976066078e-06, | |
| "loss": 0.4051, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.9330453563714903, | |
| "grad_norm": 0.17677395045757294, | |
| "learning_rate": 8.704491894763794e-06, | |
| "loss": 0.4031, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.9352051835853131, | |
| "grad_norm": 0.2149730920791626, | |
| "learning_rate": 8.696040413877344e-06, | |
| "loss": 0.4029, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.937365010799136, | |
| "grad_norm": 0.17261390388011932, | |
| "learning_rate": 8.68756558679087e-06, | |
| "loss": 0.3998, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.9395248380129589, | |
| "grad_norm": 0.17588981986045837, | |
| "learning_rate": 8.679067467035989e-06, | |
| "loss": 0.4127, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.9416846652267818, | |
| "grad_norm": 0.18429699540138245, | |
| "learning_rate": 8.670546108291443e-06, | |
| "loss": 0.3987, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.9438444924406048, | |
| "grad_norm": 0.15987183153629303, | |
| "learning_rate": 8.662001564382768e-06, | |
| "loss": 0.3911, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.9460043196544277, | |
| "grad_norm": 0.17549017071723938, | |
| "learning_rate": 8.65343388928194e-06, | |
| "loss": 0.4068, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.9481641468682506, | |
| "grad_norm": 0.1644325852394104, | |
| "learning_rate": 8.644843137107058e-06, | |
| "loss": 0.3938, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.9503239740820735, | |
| "grad_norm": 0.18092772364616394, | |
| "learning_rate": 8.636229362121979e-06, | |
| "loss": 0.4036, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9524838012958964, | |
| "grad_norm": 0.19745442271232605, | |
| "learning_rate": 8.627592618735989e-06, | |
| "loss": 0.4131, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.9546436285097192, | |
| "grad_norm": 0.15399040281772614, | |
| "learning_rate": 8.618932961503452e-06, | |
| "loss": 0.3956, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.9568034557235421, | |
| "grad_norm": 0.21613968908786774, | |
| "learning_rate": 8.610250445123472e-06, | |
| "loss": 0.3957, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.958963282937365, | |
| "grad_norm": 0.15756168961524963, | |
| "learning_rate": 8.601545124439535e-06, | |
| "loss": 0.401, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.9611231101511879, | |
| "grad_norm": 0.16475795209407806, | |
| "learning_rate": 8.592817054439184e-06, | |
| "loss": 0.4091, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.9632829373650108, | |
| "grad_norm": 0.17942647635936737, | |
| "learning_rate": 8.584066290253649e-06, | |
| "loss": 0.3818, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.9654427645788337, | |
| "grad_norm": 0.1804707795381546, | |
| "learning_rate": 8.575292887157515e-06, | |
| "loss": 0.4036, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.9676025917926566, | |
| "grad_norm": 0.1610308587551117, | |
| "learning_rate": 8.566496900568364e-06, | |
| "loss": 0.4046, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.9697624190064795, | |
| "grad_norm": 0.17367208003997803, | |
| "learning_rate": 8.557678386046429e-06, | |
| "loss": 0.399, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.9719222462203023, | |
| "grad_norm": 0.16975344717502594, | |
| "learning_rate": 8.548837399294235e-06, | |
| "loss": 0.3973, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.9740820734341252, | |
| "grad_norm": 0.16336052119731903, | |
| "learning_rate": 8.539973996156265e-06, | |
| "loss": 0.4077, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.9762419006479481, | |
| "grad_norm": 0.16016145050525665, | |
| "learning_rate": 8.531088232618587e-06, | |
| "loss": 0.4005, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.978401727861771, | |
| "grad_norm": 0.15805621445178986, | |
| "learning_rate": 8.522180164808515e-06, | |
| "loss": 0.3885, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.980561555075594, | |
| "grad_norm": 0.15626148879528046, | |
| "learning_rate": 8.513249848994248e-06, | |
| "loss": 0.3912, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.9827213822894169, | |
| "grad_norm": 0.1786354035139084, | |
| "learning_rate": 8.504297341584509e-06, | |
| "loss": 0.4034, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.9848812095032398, | |
| "grad_norm": 0.1438089907169342, | |
| "learning_rate": 8.495322699128206e-06, | |
| "loss": 0.4003, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.9870410367170627, | |
| "grad_norm": 0.16011767089366913, | |
| "learning_rate": 8.486325978314054e-06, | |
| "loss": 0.3985, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.9892008639308856, | |
| "grad_norm": 0.18413770198822021, | |
| "learning_rate": 8.477307235970235e-06, | |
| "loss": 0.3855, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.9913606911447084, | |
| "grad_norm": 0.15895338356494904, | |
| "learning_rate": 8.468266529064025e-06, | |
| "loss": 0.3918, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.9935205183585313, | |
| "grad_norm": 0.172573059797287, | |
| "learning_rate": 8.459203914701444e-06, | |
| "loss": 0.3903, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.9956803455723542, | |
| "grad_norm": 0.1525600552558899, | |
| "learning_rate": 8.450119450126889e-06, | |
| "loss": 0.4066, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.9978401727861771, | |
| "grad_norm": 0.1875782459974289, | |
| "learning_rate": 8.441013192722774e-06, | |
| "loss": 0.405, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.18118026852607727, | |
| "learning_rate": 8.431885200009172e-06, | |
| "loss": 0.402, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.0021598272138228, | |
| "grad_norm": 0.1752985566854477, | |
| "learning_rate": 8.422735529643445e-06, | |
| "loss": 0.3926, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.0043196544276458, | |
| "grad_norm": 0.1703169196844101, | |
| "learning_rate": 8.413564239419883e-06, | |
| "loss": 0.3838, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.0064794816414686, | |
| "grad_norm": 0.181954026222229, | |
| "learning_rate": 8.404371387269341e-06, | |
| "loss": 0.3863, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.0086393088552916, | |
| "grad_norm": 0.16215139627456665, | |
| "learning_rate": 8.39515703125887e-06, | |
| "loss": 0.3849, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.0107991360691144, | |
| "grad_norm": 0.23999503254890442, | |
| "learning_rate": 8.385921229591351e-06, | |
| "loss": 0.3917, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.0129589632829374, | |
| "grad_norm": 0.1752462089061737, | |
| "learning_rate": 8.376664040605122e-06, | |
| "loss": 0.3812, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.0151187904967602, | |
| "grad_norm": 0.17159010469913483, | |
| "learning_rate": 8.367385522773625e-06, | |
| "loss": 0.386, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.0172786177105833, | |
| "grad_norm": 0.19381286203861237, | |
| "learning_rate": 8.358085734705021e-06, | |
| "loss": 0.3958, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.019438444924406, | |
| "grad_norm": 0.17137818038463593, | |
| "learning_rate": 8.348764735141823e-06, | |
| "loss": 0.3867, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.0215982721382288, | |
| "grad_norm": 0.18011374771595, | |
| "learning_rate": 8.339422582960533e-06, | |
| "loss": 0.3974, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.0237580993520519, | |
| "grad_norm": 0.18092289566993713, | |
| "learning_rate": 8.33005933717126e-06, | |
| "loss": 0.3697, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.0259179265658747, | |
| "grad_norm": 0.15359187126159668, | |
| "learning_rate": 8.320675056917353e-06, | |
| "loss": 0.3813, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.0280777537796977, | |
| "grad_norm": 0.16927658021450043, | |
| "learning_rate": 8.311269801475026e-06, | |
| "loss": 0.3834, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.0302375809935205, | |
| "grad_norm": 0.17222736775875092, | |
| "learning_rate": 8.301843630252986e-06, | |
| "loss": 0.3869, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.0323974082073435, | |
| "grad_norm": 0.17333416640758514, | |
| "learning_rate": 8.29239660279205e-06, | |
| "loss": 0.3853, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.0345572354211663, | |
| "grad_norm": 0.18697193264961243, | |
| "learning_rate": 8.282928778764783e-06, | |
| "loss": 0.3974, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.0367170626349893, | |
| "grad_norm": 0.1769992560148239, | |
| "learning_rate": 8.273440217975103e-06, | |
| "loss": 0.39, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.038876889848812, | |
| "grad_norm": 0.1826915144920349, | |
| "learning_rate": 8.26393098035792e-06, | |
| "loss": 0.383, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.041036717062635, | |
| "grad_norm": 0.18807494640350342, | |
| "learning_rate": 8.254401125978744e-06, | |
| "loss": 0.3875, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.043196544276458, | |
| "grad_norm": 0.1729234904050827, | |
| "learning_rate": 8.244850715033316e-06, | |
| "loss": 0.3888, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.0453563714902807, | |
| "grad_norm": 0.18379338085651398, | |
| "learning_rate": 8.235279807847223e-06, | |
| "loss": 0.3867, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.0475161987041037, | |
| "grad_norm": 0.1450575441122055, | |
| "learning_rate": 8.225688464875514e-06, | |
| "loss": 0.3895, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.0496760259179265, | |
| "grad_norm": 0.15889526903629303, | |
| "learning_rate": 8.216076746702327e-06, | |
| "loss": 0.3817, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.0518358531317495, | |
| "grad_norm": 0.16916847229003906, | |
| "learning_rate": 8.206444714040496e-06, | |
| "loss": 0.382, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.0539956803455723, | |
| "grad_norm": 0.1597558856010437, | |
| "learning_rate": 8.196792427731175e-06, | |
| "loss": 0.3905, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.0561555075593954, | |
| "grad_norm": 0.1566566675901413, | |
| "learning_rate": 8.18711994874345e-06, | |
| "loss": 0.3841, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.0583153347732182, | |
| "grad_norm": 0.17559486627578735, | |
| "learning_rate": 8.177427338173955e-06, | |
| "loss": 0.3792, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.060475161987041, | |
| "grad_norm": 0.15165618062019348, | |
| "learning_rate": 8.167714657246486e-06, | |
| "loss": 0.3804, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.062634989200864, | |
| "grad_norm": 0.15612950921058655, | |
| "learning_rate": 8.157981967311614e-06, | |
| "loss": 0.382, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.0647948164146868, | |
| "grad_norm": 0.16774365305900574, | |
| "learning_rate": 8.1482293298463e-06, | |
| "loss": 0.3905, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.0669546436285098, | |
| "grad_norm": 0.1574973613023758, | |
| "learning_rate": 8.138456806453503e-06, | |
| "loss": 0.3881, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.0691144708423326, | |
| "grad_norm": 0.20335029065608978, | |
| "learning_rate": 8.12866445886179e-06, | |
| "loss": 0.3752, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.0712742980561556, | |
| "grad_norm": 0.15830448269844055, | |
| "learning_rate": 8.118852348924951e-06, | |
| "loss": 0.3814, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.0734341252699784, | |
| "grad_norm": 0.20952075719833374, | |
| "learning_rate": 8.109020538621607e-06, | |
| "loss": 0.3798, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.0755939524838012, | |
| "grad_norm": 0.18261830508708954, | |
| "learning_rate": 8.099169090054812e-06, | |
| "loss": 0.3895, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.0777537796976242, | |
| "grad_norm": 0.20644772052764893, | |
| "learning_rate": 8.089298065451673e-06, | |
| "loss": 0.3744, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.079913606911447, | |
| "grad_norm": 0.17039693892002106, | |
| "learning_rate": 8.079407527162944e-06, | |
| "loss": 0.385, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.08207343412527, | |
| "grad_norm": 0.1829117089509964, | |
| "learning_rate": 8.069497537662638e-06, | |
| "loss": 0.3745, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.0842332613390928, | |
| "grad_norm": 0.16001296043395996, | |
| "learning_rate": 8.05956815954764e-06, | |
| "loss": 0.3796, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.0863930885529158, | |
| "grad_norm": 0.1937176138162613, | |
| "learning_rate": 8.049619455537296e-06, | |
| "loss": 0.3814, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 1.0885529157667386, | |
| "grad_norm": 0.15796703100204468, | |
| "learning_rate": 8.039651488473028e-06, | |
| "loss": 0.3804, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.0907127429805616, | |
| "grad_norm": 0.21041610836982727, | |
| "learning_rate": 8.029664321317932e-06, | |
| "loss": 0.3862, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.0928725701943844, | |
| "grad_norm": 0.18780550360679626, | |
| "learning_rate": 8.019658017156384e-06, | |
| "loss": 0.3807, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.0950323974082075, | |
| "grad_norm": 0.1692945808172226, | |
| "learning_rate": 8.009632639193643e-06, | |
| "loss": 0.3845, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.0971922246220303, | |
| "grad_norm": 0.18981167674064636, | |
| "learning_rate": 7.999588250755442e-06, | |
| "loss": 0.3848, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.099352051835853, | |
| "grad_norm": 0.15760387480258942, | |
| "learning_rate": 7.989524915287595e-06, | |
| "loss": 0.3757, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 1.101511879049676, | |
| "grad_norm": 0.140371173620224, | |
| "learning_rate": 7.979442696355601e-06, | |
| "loss": 0.3825, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.1036717062634989, | |
| "grad_norm": 0.15832389891147614, | |
| "learning_rate": 7.969341657644236e-06, | |
| "loss": 0.3863, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.1058315334773219, | |
| "grad_norm": 0.14990824460983276, | |
| "learning_rate": 7.959221862957149e-06, | |
| "loss": 0.3917, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.1079913606911447, | |
| "grad_norm": 0.15084333717823029, | |
| "learning_rate": 7.94908337621646e-06, | |
| "loss": 0.3863, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.1101511879049677, | |
| "grad_norm": 0.15440189838409424, | |
| "learning_rate": 7.938926261462366e-06, | |
| "loss": 0.3785, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.1123110151187905, | |
| "grad_norm": 0.16310814023017883, | |
| "learning_rate": 7.928750582852722e-06, | |
| "loss": 0.3796, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.1144708423326133, | |
| "grad_norm": 0.15400250256061554, | |
| "learning_rate": 7.918556404662645e-06, | |
| "loss": 0.3913, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.1166306695464363, | |
| "grad_norm": 0.16480040550231934, | |
| "learning_rate": 7.908343791284104e-06, | |
| "loss": 0.3817, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.118790496760259, | |
| "grad_norm": 0.14894555509090424, | |
| "learning_rate": 7.898112807225517e-06, | |
| "loss": 0.3797, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.1209503239740821, | |
| "grad_norm": 0.16937804222106934, | |
| "learning_rate": 7.887863517111337e-06, | |
| "loss": 0.3832, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.123110151187905, | |
| "grad_norm": 0.1606750190258026, | |
| "learning_rate": 7.877595985681656e-06, | |
| "loss": 0.3735, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.125269978401728, | |
| "grad_norm": 0.1703948825597763, | |
| "learning_rate": 7.867310277791778e-06, | |
| "loss": 0.3754, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.1274298056155507, | |
| "grad_norm": 0.1625399887561798, | |
| "learning_rate": 7.857006458411826e-06, | |
| "loss": 0.3773, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.1295896328293737, | |
| "grad_norm": 0.17872779071331024, | |
| "learning_rate": 7.846684592626324e-06, | |
| "loss": 0.3867, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.1317494600431965, | |
| "grad_norm": 0.14789296686649323, | |
| "learning_rate": 7.836344745633785e-06, | |
| "loss": 0.3794, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.1339092872570196, | |
| "grad_norm": 0.15560902655124664, | |
| "learning_rate": 7.8259869827463e-06, | |
| "loss": 0.3795, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.1360691144708424, | |
| "grad_norm": 0.1677931696176529, | |
| "learning_rate": 7.815611369389134e-06, | |
| "loss": 0.3921, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.1382289416846652, | |
| "grad_norm": 0.15654879808425903, | |
| "learning_rate": 7.805217971100295e-06, | |
| "loss": 0.3893, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.1403887688984882, | |
| "grad_norm": 0.15903332829475403, | |
| "learning_rate": 7.794806853530139e-06, | |
| "loss": 0.3791, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.142548596112311, | |
| "grad_norm": 0.1683822125196457, | |
| "learning_rate": 7.78437808244094e-06, | |
| "loss": 0.3877, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.144708423326134, | |
| "grad_norm": 0.15309610962867737, | |
| "learning_rate": 7.773931723706487e-06, | |
| "loss": 0.3746, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.1468682505399568, | |
| "grad_norm": 0.14578138291835785, | |
| "learning_rate": 7.763467843311658e-06, | |
| "loss": 0.3767, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.1490280777537798, | |
| "grad_norm": 0.16950742900371552, | |
| "learning_rate": 7.752986507352009e-06, | |
| "loss": 0.3873, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.1511879049676026, | |
| "grad_norm": 0.1471281796693802, | |
| "learning_rate": 7.742487782033352e-06, | |
| "loss": 0.3837, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.1533477321814254, | |
| "grad_norm": 0.14385339617729187, | |
| "learning_rate": 7.731971733671347e-06, | |
| "loss": 0.3944, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.1555075593952484, | |
| "grad_norm": 0.14128537476062775, | |
| "learning_rate": 7.721438428691065e-06, | |
| "loss": 0.3802, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.1576673866090712, | |
| "grad_norm": 0.1677146852016449, | |
| "learning_rate": 7.71088793362659e-06, | |
| "loss": 0.3812, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.1598272138228942, | |
| "grad_norm": 0.14564774930477142, | |
| "learning_rate": 7.70032031512058e-06, | |
| "loss": 0.3827, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.161987041036717, | |
| "grad_norm": 0.15598656237125397, | |
| "learning_rate": 7.689735639923857e-06, | |
| "loss": 0.3829, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.16414686825054, | |
| "grad_norm": 0.14980514347553253, | |
| "learning_rate": 7.679133974894984e-06, | |
| "loss": 0.3767, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 1.1663066954643628, | |
| "grad_norm": 0.15688128769397736, | |
| "learning_rate": 7.668515386999837e-06, | |
| "loss": 0.3931, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.1684665226781856, | |
| "grad_norm": 0.15419645607471466, | |
| "learning_rate": 7.65787994331119e-06, | |
| "loss": 0.375, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 1.1706263498920086, | |
| "grad_norm": 0.15213316679000854, | |
| "learning_rate": 7.647227711008288e-06, | |
| "loss": 0.3841, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.1727861771058314, | |
| "grad_norm": 0.14635787904262543, | |
| "learning_rate": 7.636558757376413e-06, | |
| "loss": 0.379, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.1749460043196545, | |
| "grad_norm": 0.1601177304983139, | |
| "learning_rate": 7.6258731498064796e-06, | |
| "loss": 0.3741, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.1771058315334773, | |
| "grad_norm": 0.15203504264354706, | |
| "learning_rate": 7.615170955794592e-06, | |
| "loss": 0.3764, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.1792656587473003, | |
| "grad_norm": 0.1715112179517746, | |
| "learning_rate": 7.604452242941622e-06, | |
| "loss": 0.3811, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.181425485961123, | |
| "grad_norm": 0.17397920787334442, | |
| "learning_rate": 7.593717078952788e-06, | |
| "loss": 0.3826, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 1.183585313174946, | |
| "grad_norm": 0.14259247481822968, | |
| "learning_rate": 7.582965531637221e-06, | |
| "loss": 0.3725, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.1857451403887689, | |
| "grad_norm": 0.16911283135414124, | |
| "learning_rate": 7.572197668907533e-06, | |
| "loss": 0.3915, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.187904967602592, | |
| "grad_norm": 0.1575639694929123, | |
| "learning_rate": 7.561413558779401e-06, | |
| "loss": 0.3719, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.1900647948164147, | |
| "grad_norm": 0.15729346871376038, | |
| "learning_rate": 7.550613269371124e-06, | |
| "loss": 0.3802, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 1.1922246220302375, | |
| "grad_norm": 0.16103574633598328, | |
| "learning_rate": 7.5397968689032e-06, | |
| "loss": 0.379, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.1943844492440605, | |
| "grad_norm": 0.16614358127117157, | |
| "learning_rate": 7.528964425697895e-06, | |
| "loss": 0.3874, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 1.1965442764578833, | |
| "grad_norm": 0.14216990768909454, | |
| "learning_rate": 7.518116008178805e-06, | |
| "loss": 0.3791, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.1987041036717063, | |
| "grad_norm": 0.15424562990665436, | |
| "learning_rate": 7.507251684870433e-06, | |
| "loss": 0.3855, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.2008639308855291, | |
| "grad_norm": 0.15728497505187988, | |
| "learning_rate": 7.496371524397747e-06, | |
| "loss": 0.3767, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.2030237580993521, | |
| "grad_norm": 0.16239339113235474, | |
| "learning_rate": 7.485475595485756e-06, | |
| "loss": 0.39, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 1.205183585313175, | |
| "grad_norm": 0.18078574538230896, | |
| "learning_rate": 7.474563966959068e-06, | |
| "loss": 0.3805, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.2073434125269977, | |
| "grad_norm": 0.1507551670074463, | |
| "learning_rate": 7.463636707741458e-06, | |
| "loss": 0.385, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 1.2095032397408207, | |
| "grad_norm": 0.1794394552707672, | |
| "learning_rate": 7.452693886855438e-06, | |
| "loss": 0.3869, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.2116630669546435, | |
| "grad_norm": 0.17479896545410156, | |
| "learning_rate": 7.4417355734218085e-06, | |
| "loss": 0.3763, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.2138228941684666, | |
| "grad_norm": 0.15585078299045563, | |
| "learning_rate": 7.430761836659235e-06, | |
| "loss": 0.3893, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.2159827213822894, | |
| "grad_norm": 0.17647355794906616, | |
| "learning_rate": 7.4197727458837995e-06, | |
| "loss": 0.3858, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 1.2181425485961124, | |
| "grad_norm": 0.1657349020242691, | |
| "learning_rate": 7.408768370508577e-06, | |
| "loss": 0.3787, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.2203023758099352, | |
| "grad_norm": 0.15990415215492249, | |
| "learning_rate": 7.397748780043179e-06, | |
| "loss": 0.3816, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.2224622030237582, | |
| "grad_norm": 0.16552990674972534, | |
| "learning_rate": 7.386714044093331e-06, | |
| "loss": 0.3818, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.224622030237581, | |
| "grad_norm": 0.17762261629104614, | |
| "learning_rate": 7.375664232360421e-06, | |
| "loss": 0.3823, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.226781857451404, | |
| "grad_norm": 0.17362867295742035, | |
| "learning_rate": 7.364599414641064e-06, | |
| "loss": 0.3796, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.2289416846652268, | |
| "grad_norm": 0.15305167436599731, | |
| "learning_rate": 7.353519660826665e-06, | |
| "loss": 0.3816, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 1.2311015118790496, | |
| "grad_norm": 0.1919698268175125, | |
| "learning_rate": 7.342425040902967e-06, | |
| "loss": 0.3927, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.2332613390928726, | |
| "grad_norm": 0.15654806792736053, | |
| "learning_rate": 7.331315624949624e-06, | |
| "loss": 0.3844, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 1.2354211663066954, | |
| "grad_norm": 0.1898239254951477, | |
| "learning_rate": 7.320191483139742e-06, | |
| "loss": 0.3935, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.2375809935205184, | |
| "grad_norm": 0.15385276079177856, | |
| "learning_rate": 7.309052685739448e-06, | |
| "loss": 0.3731, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.2397408207343412, | |
| "grad_norm": 0.15585872530937195, | |
| "learning_rate": 7.297899303107441e-06, | |
| "loss": 0.3802, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.2419006479481642, | |
| "grad_norm": 0.14450909197330475, | |
| "learning_rate": 7.286731405694544e-06, | |
| "loss": 0.368, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.244060475161987, | |
| "grad_norm": 0.15306542813777924, | |
| "learning_rate": 7.275549064043269e-06, | |
| "loss": 0.3827, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.2462203023758098, | |
| "grad_norm": 0.15712149441242218, | |
| "learning_rate": 7.264352348787364e-06, | |
| "loss": 0.3933, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 1.2483801295896328, | |
| "grad_norm": 0.16853763163089752, | |
| "learning_rate": 7.253141330651367e-06, | |
| "loss": 0.3886, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.2505399568034556, | |
| "grad_norm": 0.15141934156417847, | |
| "learning_rate": 7.241916080450163e-06, | |
| "loss": 0.373, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 1.2526997840172787, | |
| "grad_norm": 0.16748425364494324, | |
| "learning_rate": 7.23067666908853e-06, | |
| "loss": 0.3779, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.2548596112311015, | |
| "grad_norm": 0.15394426882266998, | |
| "learning_rate": 7.219423167560701e-06, | |
| "loss": 0.3803, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 1.2570194384449245, | |
| "grad_norm": 0.15716637670993805, | |
| "learning_rate": 7.208155646949908e-06, | |
| "loss": 0.3903, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.2591792656587473, | |
| "grad_norm": 0.17571674287319183, | |
| "learning_rate": 7.196874178427933e-06, | |
| "loss": 0.3693, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 1.26133909287257, | |
| "grad_norm": 0.16210925579071045, | |
| "learning_rate": 7.185578833254665e-06, | |
| "loss": 0.3806, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.263498920086393, | |
| "grad_norm": 0.17312122881412506, | |
| "learning_rate": 7.1742696827776415e-06, | |
| "loss": 0.3867, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.265658747300216, | |
| "grad_norm": 0.16945572197437286, | |
| "learning_rate": 7.162946798431605e-06, | |
| "loss": 0.3834, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.267818574514039, | |
| "grad_norm": 0.15858979523181915, | |
| "learning_rate": 7.151610251738045e-06, | |
| "loss": 0.3837, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 1.2699784017278617, | |
| "grad_norm": 0.14600925147533417, | |
| "learning_rate": 7.1402601143047514e-06, | |
| "loss": 0.3797, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.2721382289416847, | |
| "grad_norm": 0.15963494777679443, | |
| "learning_rate": 7.128896457825364e-06, | |
| "loss": 0.3904, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 1.2742980561555075, | |
| "grad_norm": 0.1409822553396225, | |
| "learning_rate": 7.11751935407891e-06, | |
| "loss": 0.384, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.2764578833693305, | |
| "grad_norm": 0.1461641937494278, | |
| "learning_rate": 7.106128874929364e-06, | |
| "loss": 0.3769, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 1.2786177105831533, | |
| "grad_norm": 0.1487351655960083, | |
| "learning_rate": 7.094725092325177e-06, | |
| "loss": 0.3766, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.2807775377969763, | |
| "grad_norm": 0.1428721696138382, | |
| "learning_rate": 7.08330807829884e-06, | |
| "loss": 0.3833, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 1.2829373650107991, | |
| "grad_norm": 0.14245618879795074, | |
| "learning_rate": 7.071877904966422e-06, | |
| "loss": 0.382, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.285097192224622, | |
| "grad_norm": 0.1549312025308609, | |
| "learning_rate": 7.060434644527105e-06, | |
| "loss": 0.3723, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.287257019438445, | |
| "grad_norm": 0.14332742989063263, | |
| "learning_rate": 7.048978369262747e-06, | |
| "loss": 0.385, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.2894168466522677, | |
| "grad_norm": 0.15278279781341553, | |
| "learning_rate": 7.037509151537404e-06, | |
| "loss": 0.3715, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 1.2915766738660908, | |
| "grad_norm": 0.14458084106445312, | |
| "learning_rate": 7.026027063796891e-06, | |
| "loss": 0.3708, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.2937365010799136, | |
| "grad_norm": 0.15547068417072296, | |
| "learning_rate": 7.014532178568314e-06, | |
| "loss": 0.3784, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 1.2958963282937366, | |
| "grad_norm": 0.15412218868732452, | |
| "learning_rate": 7.003024568459614e-06, | |
| "loss": 0.3785, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.2980561555075594, | |
| "grad_norm": 0.15792393684387207, | |
| "learning_rate": 6.991504306159115e-06, | |
| "loss": 0.3912, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 1.3002159827213822, | |
| "grad_norm": 0.1512409746646881, | |
| "learning_rate": 6.9799714644350504e-06, | |
| "loss": 0.3822, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.3023758099352052, | |
| "grad_norm": 0.15624138712882996, | |
| "learning_rate": 6.968426116135118e-06, | |
| "loss": 0.3786, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 1.3045356371490282, | |
| "grad_norm": 0.1699935495853424, | |
| "learning_rate": 6.9568683341860135e-06, | |
| "loss": 0.382, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.306695464362851, | |
| "grad_norm": 0.1427888125181198, | |
| "learning_rate": 6.945298191592967e-06, | |
| "loss": 0.3694, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.3088552915766738, | |
| "grad_norm": 0.15631450712680817, | |
| "learning_rate": 6.93371576143929e-06, | |
| "loss": 0.3846, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.3110151187904968, | |
| "grad_norm": 0.15259280800819397, | |
| "learning_rate": 6.922121116885905e-06, | |
| "loss": 0.378, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 1.3131749460043196, | |
| "grad_norm": 0.13901083171367645, | |
| "learning_rate": 6.910514331170888e-06, | |
| "loss": 0.3852, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.3153347732181426, | |
| "grad_norm": 0.15216070413589478, | |
| "learning_rate": 6.898895477609007e-06, | |
| "loss": 0.3852, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 1.3174946004319654, | |
| "grad_norm": 0.13873577117919922, | |
| "learning_rate": 6.887264629591254e-06, | |
| "loss": 0.3677, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.3196544276457884, | |
| "grad_norm": 0.15047885477542877, | |
| "learning_rate": 6.875621860584389e-06, | |
| "loss": 0.3811, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 1.3218142548596112, | |
| "grad_norm": 0.13761691749095917, | |
| "learning_rate": 6.863967244130467e-06, | |
| "loss": 0.3766, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.323974082073434, | |
| "grad_norm": 0.14068377017974854, | |
| "learning_rate": 6.852300853846381e-06, | |
| "loss": 0.3768, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 1.326133909287257, | |
| "grad_norm": 0.14240694046020508, | |
| "learning_rate": 6.840622763423391e-06, | |
| "loss": 0.3804, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.3282937365010798, | |
| "grad_norm": 0.14259974658489227, | |
| "learning_rate": 6.8289330466266635e-06, | |
| "loss": 0.3796, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.3304535637149029, | |
| "grad_norm": 0.13572795689105988, | |
| "learning_rate": 6.817231777294804e-06, | |
| "loss": 0.3791, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.3326133909287257, | |
| "grad_norm": 0.14472903311252594, | |
| "learning_rate": 6.805519029339388e-06, | |
| "loss": 0.3825, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 1.3347732181425487, | |
| "grad_norm": 0.13924075663089752, | |
| "learning_rate": 6.793794876744499e-06, | |
| "loss": 0.3822, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.3369330453563715, | |
| "grad_norm": 0.1507209688425064, | |
| "learning_rate": 6.782059393566254e-06, | |
| "loss": 0.3799, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 1.3390928725701943, | |
| "grad_norm": 0.15504410862922668, | |
| "learning_rate": 6.770312653932346e-06, | |
| "loss": 0.396, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.3412526997840173, | |
| "grad_norm": 0.14195454120635986, | |
| "learning_rate": 6.758554732041564e-06, | |
| "loss": 0.3797, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 1.3434125269978403, | |
| "grad_norm": 0.158910870552063, | |
| "learning_rate": 6.7467857021633354e-06, | |
| "loss": 0.3923, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.345572354211663, | |
| "grad_norm": 0.1433819979429245, | |
| "learning_rate": 6.7350056386372485e-06, | |
| "loss": 0.3819, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 1.347732181425486, | |
| "grad_norm": 0.1474255919456482, | |
| "learning_rate": 6.723214615872585e-06, | |
| "loss": 0.3819, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.349892008639309, | |
| "grad_norm": 0.15845805406570435, | |
| "learning_rate": 6.711412708347857e-06, | |
| "loss": 0.39, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.3520518358531317, | |
| "grad_norm": 0.12925179302692413, | |
| "learning_rate": 6.699599990610324e-06, | |
| "loss": 0.3779, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.3542116630669545, | |
| "grad_norm": 0.1499335616827011, | |
| "learning_rate": 6.68777653727553e-06, | |
| "loss": 0.3804, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 1.3563714902807775, | |
| "grad_norm": 0.15274159610271454, | |
| "learning_rate": 6.675942423026834e-06, | |
| "loss": 0.3783, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.3585313174946005, | |
| "grad_norm": 0.13748182356357574, | |
| "learning_rate": 6.664097722614934e-06, | |
| "loss": 0.3735, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 1.3606911447084233, | |
| "grad_norm": 0.14609220623970032, | |
| "learning_rate": 6.652242510857395e-06, | |
| "loss": 0.392, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.3628509719222461, | |
| "grad_norm": 0.1698596030473709, | |
| "learning_rate": 6.640376862638176e-06, | |
| "loss": 0.3832, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 1.3650107991360692, | |
| "grad_norm": 0.1435316503047943, | |
| "learning_rate": 6.6285008529071615e-06, | |
| "loss": 0.3819, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.367170626349892, | |
| "grad_norm": 0.13530634343624115, | |
| "learning_rate": 6.616614556679684e-06, | |
| "loss": 0.3809, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 1.369330453563715, | |
| "grad_norm": 0.18893133103847504, | |
| "learning_rate": 6.604718049036047e-06, | |
| "loss": 0.3828, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.3714902807775378, | |
| "grad_norm": 0.15310388803482056, | |
| "learning_rate": 6.592811405121064e-06, | |
| "loss": 0.3831, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.3736501079913608, | |
| "grad_norm": 0.14660876989364624, | |
| "learning_rate": 6.580894700143565e-06, | |
| "loss": 0.3781, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.3758099352051836, | |
| "grad_norm": 0.14833448827266693, | |
| "learning_rate": 6.568968009375938e-06, | |
| "loss": 0.3775, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 1.3779697624190064, | |
| "grad_norm": 0.1682375818490982, | |
| "learning_rate": 6.557031408153642e-06, | |
| "loss": 0.3758, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.3801295896328294, | |
| "grad_norm": 0.1533748358488083, | |
| "learning_rate": 6.545084971874738e-06, | |
| "loss": 0.3793, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 1.3822894168466522, | |
| "grad_norm": 0.14140866696834564, | |
| "learning_rate": 6.533128775999411e-06, | |
| "loss": 0.384, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.3844492440604752, | |
| "grad_norm": 0.14936432242393494, | |
| "learning_rate": 6.521162896049491e-06, | |
| "loss": 0.3891, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 1.386609071274298, | |
| "grad_norm": 0.15731281042099, | |
| "learning_rate": 6.509187407607981e-06, | |
| "loss": 0.3841, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.388768898488121, | |
| "grad_norm": 0.15000282227993011, | |
| "learning_rate": 6.497202386318573e-06, | |
| "loss": 0.3851, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 1.3909287257019438, | |
| "grad_norm": 0.14697906374931335, | |
| "learning_rate": 6.485207907885175e-06, | |
| "loss": 0.3773, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.3930885529157666, | |
| "grad_norm": 0.1559685468673706, | |
| "learning_rate": 6.473204048071433e-06, | |
| "loss": 0.3821, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.3952483801295896, | |
| "grad_norm": 0.15039733052253723, | |
| "learning_rate": 6.4611908827002504e-06, | |
| "loss": 0.3847, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.3974082073434126, | |
| "grad_norm": 0.16008631885051727, | |
| "learning_rate": 6.449168487653305e-06, | |
| "loss": 0.3802, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 1.3995680345572354, | |
| "grad_norm": 0.14514020085334778, | |
| "learning_rate": 6.437136938870583e-06, | |
| "loss": 0.3841, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.4017278617710582, | |
| "grad_norm": 0.15419939160346985, | |
| "learning_rate": 6.425096312349881e-06, | |
| "loss": 0.3903, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 1.4038876889848813, | |
| "grad_norm": 0.1530395895242691, | |
| "learning_rate": 6.413046684146343e-06, | |
| "loss": 0.3794, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.406047516198704, | |
| "grad_norm": 0.15808750689029694, | |
| "learning_rate": 6.400988130371969e-06, | |
| "loss": 0.3766, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 1.408207343412527, | |
| "grad_norm": 0.14891669154167175, | |
| "learning_rate": 6.388920727195138e-06, | |
| "loss": 0.3781, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.4103671706263499, | |
| "grad_norm": 0.14925065636634827, | |
| "learning_rate": 6.376844550840126e-06, | |
| "loss": 0.3906, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 1.4125269978401729, | |
| "grad_norm": 0.16382241249084473, | |
| "learning_rate": 6.364759677586627e-06, | |
| "loss": 0.3771, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.4146868250539957, | |
| "grad_norm": 0.1546749770641327, | |
| "learning_rate": 6.352666183769269e-06, | |
| "loss": 0.3863, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.4168466522678185, | |
| "grad_norm": 0.14334626495838165, | |
| "learning_rate": 6.340564145777131e-06, | |
| "loss": 0.3742, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.4190064794816415, | |
| "grad_norm": 0.15877507627010345, | |
| "learning_rate": 6.328453640053264e-06, | |
| "loss": 0.3779, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 1.4211663066954643, | |
| "grad_norm": 0.1556321382522583, | |
| "learning_rate": 6.316334743094201e-06, | |
| "loss": 0.3739, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.4233261339092873, | |
| "grad_norm": 0.14519956707954407, | |
| "learning_rate": 6.304207531449486e-06, | |
| "loss": 0.3786, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 1.42548596112311, | |
| "grad_norm": 0.14613112807273865, | |
| "learning_rate": 6.292072081721173e-06, | |
| "loss": 0.381, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.4276457883369331, | |
| "grad_norm": 0.15813447535037994, | |
| "learning_rate": 6.279928470563365e-06, | |
| "loss": 0.3866, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 1.429805615550756, | |
| "grad_norm": 0.15421751141548157, | |
| "learning_rate": 6.267776774681703e-06, | |
| "loss": 0.3796, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.4319654427645787, | |
| "grad_norm": 0.15891966223716736, | |
| "learning_rate": 6.255617070832908e-06, | |
| "loss": 0.3717, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 1.4341252699784017, | |
| "grad_norm": 0.15779848396778107, | |
| "learning_rate": 6.243449435824276e-06, | |
| "loss": 0.3701, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.4362850971922247, | |
| "grad_norm": 0.14361926913261414, | |
| "learning_rate": 6.231273946513201e-06, | |
| "loss": 0.3698, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.4384449244060475, | |
| "grad_norm": 0.1553213894367218, | |
| "learning_rate": 6.219090679806694e-06, | |
| "loss": 0.381, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.4406047516198703, | |
| "grad_norm": 0.14260952174663544, | |
| "learning_rate": 6.206899712660887e-06, | |
| "loss": 0.3734, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 1.4427645788336934, | |
| "grad_norm": 0.147933229804039, | |
| "learning_rate": 6.1947011220805535e-06, | |
| "loss": 0.3799, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.4449244060475162, | |
| "grad_norm": 0.15127696096897125, | |
| "learning_rate": 6.182494985118625e-06, | |
| "loss": 0.3792, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 1.4470842332613392, | |
| "grad_norm": 0.14316388964653015, | |
| "learning_rate": 6.170281378875692e-06, | |
| "loss": 0.3727, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.449244060475162, | |
| "grad_norm": 0.14327897131443024, | |
| "learning_rate": 6.158060380499533e-06, | |
| "loss": 0.3823, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 1.451403887688985, | |
| "grad_norm": 0.1345098912715912, | |
| "learning_rate": 6.145832067184614e-06, | |
| "loss": 0.3924, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.4535637149028078, | |
| "grad_norm": 0.13999171555042267, | |
| "learning_rate": 6.133596516171609e-06, | |
| "loss": 0.3809, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 1.4557235421166306, | |
| "grad_norm": 0.12059102207422256, | |
| "learning_rate": 6.121353804746907e-06, | |
| "loss": 0.3788, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.4578833693304536, | |
| "grad_norm": 0.14187489449977875, | |
| "learning_rate": 6.109104010242127e-06, | |
| "loss": 0.3845, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.4600431965442764, | |
| "grad_norm": 0.14432717859745026, | |
| "learning_rate": 6.09684721003363e-06, | |
| "loss": 0.3801, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.4622030237580994, | |
| "grad_norm": 0.1373838186264038, | |
| "learning_rate": 6.084583481542028e-06, | |
| "loss": 0.3731, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 1.4643628509719222, | |
| "grad_norm": 0.15109464526176453, | |
| "learning_rate": 6.072312902231692e-06, | |
| "loss": 0.3895, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.4665226781857452, | |
| "grad_norm": 0.15525732934474945, | |
| "learning_rate": 6.060035549610275e-06, | |
| "loss": 0.3785, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 1.468682505399568, | |
| "grad_norm": 0.14632560312747955, | |
| "learning_rate": 6.047751501228203e-06, | |
| "loss": 0.3793, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.4708423326133908, | |
| "grad_norm": 0.1495695561170578, | |
| "learning_rate": 6.0354608346782075e-06, | |
| "loss": 0.3817, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 1.4730021598272138, | |
| "grad_norm": 0.1651640236377716, | |
| "learning_rate": 6.023163627594813e-06, | |
| "loss": 0.386, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.4751619870410368, | |
| "grad_norm": 0.14565086364746094, | |
| "learning_rate": 6.010859957653869e-06, | |
| "loss": 0.3749, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 1.4773218142548596, | |
| "grad_norm": 0.1538180410861969, | |
| "learning_rate": 5.9985499025720354e-06, | |
| "loss": 0.3769, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.4794816414686824, | |
| "grad_norm": 0.12949156761169434, | |
| "learning_rate": 5.986233540106315e-06, | |
| "loss": 0.3721, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.4816414686825055, | |
| "grad_norm": 0.14916200935840607, | |
| "learning_rate": 5.973910948053545e-06, | |
| "loss": 0.386, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.4838012958963283, | |
| "grad_norm": 0.1727481335401535, | |
| "learning_rate": 5.961582204249915e-06, | |
| "loss": 0.3769, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 1.485961123110151, | |
| "grad_norm": 0.1516205221414566, | |
| "learning_rate": 5.949247386570471e-06, | |
| "loss": 0.3865, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.488120950323974, | |
| "grad_norm": 0.164317786693573, | |
| "learning_rate": 5.936906572928625e-06, | |
| "loss": 0.3803, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 1.490280777537797, | |
| "grad_norm": 0.15949031710624695, | |
| "learning_rate": 5.924559841275661e-06, | |
| "loss": 0.3819, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.4924406047516199, | |
| "grad_norm": 0.13948768377304077, | |
| "learning_rate": 5.912207269600252e-06, | |
| "loss": 0.381, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 1.4946004319654427, | |
| "grad_norm": 0.17031709849834442, | |
| "learning_rate": 5.89984893592795e-06, | |
| "loss": 0.3837, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.4967602591792657, | |
| "grad_norm": 0.13423483073711395, | |
| "learning_rate": 5.887484918320708e-06, | |
| "loss": 0.3824, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 1.4989200863930885, | |
| "grad_norm": 0.15468288958072662, | |
| "learning_rate": 5.8751152948763815e-06, | |
| "loss": 0.372, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.5010799136069113, | |
| "grad_norm": 0.16139718890190125, | |
| "learning_rate": 5.8627401437282334e-06, | |
| "loss": 0.3775, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.5032397408207343, | |
| "grad_norm": 0.15016594529151917, | |
| "learning_rate": 5.850359543044446e-06, | |
| "loss": 0.3781, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.5053995680345573, | |
| "grad_norm": 0.1405385285615921, | |
| "learning_rate": 5.837973571027621e-06, | |
| "loss": 0.3789, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 1.5075593952483801, | |
| "grad_norm": 0.15115734934806824, | |
| "learning_rate": 5.82558230591429e-06, | |
| "loss": 0.384, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.509719222462203, | |
| "grad_norm": 0.14200249314308167, | |
| "learning_rate": 5.813185825974419e-06, | |
| "loss": 0.3846, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 1.511879049676026, | |
| "grad_norm": 0.147098109126091, | |
| "learning_rate": 5.80078420951091e-06, | |
| "loss": 0.3839, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.514038876889849, | |
| "grad_norm": 0.1505637764930725, | |
| "learning_rate": 5.7883775348591146e-06, | |
| "loss": 0.3795, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 1.5161987041036717, | |
| "grad_norm": 0.14681459963321686, | |
| "learning_rate": 5.77596588038633e-06, | |
| "loss": 0.3879, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.5183585313174945, | |
| "grad_norm": 0.1480962336063385, | |
| "learning_rate": 5.763549324491317e-06, | |
| "loss": 0.3851, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 1.5205183585313176, | |
| "grad_norm": 0.1447979360818863, | |
| "learning_rate": 5.751127945603786e-06, | |
| "loss": 0.379, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.5226781857451404, | |
| "grad_norm": 0.14420117437839508, | |
| "learning_rate": 5.7387018221839195e-06, | |
| "loss": 0.3844, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.5248380129589632, | |
| "grad_norm": 0.1400950700044632, | |
| "learning_rate": 5.726271032721864e-06, | |
| "loss": 0.3854, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.5269978401727862, | |
| "grad_norm": 0.14526066184043884, | |
| "learning_rate": 5.7138356557372444e-06, | |
| "loss": 0.3815, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 1.5291576673866092, | |
| "grad_norm": 0.15576431155204773, | |
| "learning_rate": 5.70139576977866e-06, | |
| "loss": 0.3866, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.531317494600432, | |
| "grad_norm": 0.1484983265399933, | |
| "learning_rate": 5.68895145342319e-06, | |
| "loss": 0.3695, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 1.5334773218142548, | |
| "grad_norm": 0.14368119835853577, | |
| "learning_rate": 5.6765027852759015e-06, | |
| "loss": 0.3751, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.5356371490280778, | |
| "grad_norm": 0.14985284209251404, | |
| "learning_rate": 5.664049843969348e-06, | |
| "loss": 0.3759, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 1.5377969762419006, | |
| "grad_norm": 0.14019222557544708, | |
| "learning_rate": 5.651592708163074e-06, | |
| "loss": 0.3768, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.5399568034557234, | |
| "grad_norm": 0.15098613500595093, | |
| "learning_rate": 5.639131456543119e-06, | |
| "loss": 0.3755, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 1.5421166306695464, | |
| "grad_norm": 0.13311173021793365, | |
| "learning_rate": 5.626666167821522e-06, | |
| "loss": 0.3727, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.5442764578833694, | |
| "grad_norm": 0.1466924548149109, | |
| "learning_rate": 5.614196920735822e-06, | |
| "loss": 0.3816, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.5464362850971922, | |
| "grad_norm": 0.14080245792865753, | |
| "learning_rate": 5.601723794048558e-06, | |
| "loss": 0.3808, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.548596112311015, | |
| "grad_norm": 0.13415026664733887, | |
| "learning_rate": 5.58924686654678e-06, | |
| "loss": 0.3846, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 1.550755939524838, | |
| "grad_norm": 0.14663489162921906, | |
| "learning_rate": 5.576766217041541e-06, | |
| "loss": 0.3728, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.552915766738661, | |
| "grad_norm": 0.14009855687618256, | |
| "learning_rate": 5.5642819243674085e-06, | |
| "loss": 0.3661, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 1.5550755939524838, | |
| "grad_norm": 0.12949179112911224, | |
| "learning_rate": 5.551794067381959e-06, | |
| "loss": 0.3766, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.5572354211663066, | |
| "grad_norm": 0.1387072652578354, | |
| "learning_rate": 5.5393027249652844e-06, | |
| "loss": 0.3863, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 1.5593952483801297, | |
| "grad_norm": 0.13852353394031525, | |
| "learning_rate": 5.526807976019492e-06, | |
| "loss": 0.3777, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.5615550755939525, | |
| "grad_norm": 0.144193634390831, | |
| "learning_rate": 5.514309899468209e-06, | |
| "loss": 0.3708, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 1.5637149028077753, | |
| "grad_norm": 0.13979433476924896, | |
| "learning_rate": 5.5018085742560745e-06, | |
| "loss": 0.3827, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.5658747300215983, | |
| "grad_norm": 0.1412208378314972, | |
| "learning_rate": 5.489304079348259e-06, | |
| "loss": 0.3819, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.5680345572354213, | |
| "grad_norm": 0.14092062413692474, | |
| "learning_rate": 5.476796493729943e-06, | |
| "loss": 0.38, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.570194384449244, | |
| "grad_norm": 0.15495967864990234, | |
| "learning_rate": 5.46428589640584e-06, | |
| "loss": 0.3941, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 1.5723542116630669, | |
| "grad_norm": 0.14512132108211517, | |
| "learning_rate": 5.451772366399678e-06, | |
| "loss": 0.3912, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.57451403887689, | |
| "grad_norm": 0.15392383933067322, | |
| "learning_rate": 5.439255982753717e-06, | |
| "loss": 0.3751, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 1.5766738660907127, | |
| "grad_norm": 0.14311961829662323, | |
| "learning_rate": 5.426736824528236e-06, | |
| "loss": 0.379, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.5788336933045355, | |
| "grad_norm": 0.14618806540966034, | |
| "learning_rate": 5.414214970801041e-06, | |
| "loss": 0.3794, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 1.5809935205183585, | |
| "grad_norm": 0.13308942317962646, | |
| "learning_rate": 5.401690500666972e-06, | |
| "loss": 0.3823, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.5831533477321815, | |
| "grad_norm": 0.14792852103710175, | |
| "learning_rate": 5.389163493237382e-06, | |
| "loss": 0.379, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 1.5853131749460043, | |
| "grad_norm": 0.15516813099384308, | |
| "learning_rate": 5.376634027639664e-06, | |
| "loss": 0.381, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.5874730021598271, | |
| "grad_norm": 0.13429994881153107, | |
| "learning_rate": 5.36410218301673e-06, | |
| "loss": 0.3848, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 1.5896328293736501, | |
| "grad_norm": 0.13838984072208405, | |
| "learning_rate": 5.35156803852652e-06, | |
| "loss": 0.3802, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.5917926565874732, | |
| "grad_norm": 0.14528213441371918, | |
| "learning_rate": 5.339031673341505e-06, | |
| "loss": 0.3677, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 1.593952483801296, | |
| "grad_norm": 0.14330457150936127, | |
| "learning_rate": 5.326493166648179e-06, | |
| "loss": 0.3754, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.5961123110151187, | |
| "grad_norm": 0.150346577167511, | |
| "learning_rate": 5.3139525976465675e-06, | |
| "loss": 0.3867, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 1.5982721382289418, | |
| "grad_norm": 0.13384312391281128, | |
| "learning_rate": 5.301410045549719e-06, | |
| "loss": 0.3807, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.6004319654427646, | |
| "grad_norm": 0.13676618039608002, | |
| "learning_rate": 5.2888655895832075e-06, | |
| "loss": 0.3776, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 1.6025917926565874, | |
| "grad_norm": 0.1279810667037964, | |
| "learning_rate": 5.276319308984637e-06, | |
| "loss": 0.3701, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.6047516198704104, | |
| "grad_norm": 0.14258207380771637, | |
| "learning_rate": 5.263771283003133e-06, | |
| "loss": 0.3724, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 1.6069114470842334, | |
| "grad_norm": 0.14498306810855865, | |
| "learning_rate": 5.251221590898848e-06, | |
| "loss": 0.3716, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.6090712742980562, | |
| "grad_norm": 0.14295688271522522, | |
| "learning_rate": 5.238670311942459e-06, | |
| "loss": 0.3877, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 1.611231101511879, | |
| "grad_norm": 0.12707604467868805, | |
| "learning_rate": 5.226117525414663e-06, | |
| "loss": 0.3724, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.613390928725702, | |
| "grad_norm": 0.14804835617542267, | |
| "learning_rate": 5.213563310605686e-06, | |
| "loss": 0.3827, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 1.6155507559395248, | |
| "grad_norm": 0.12879379093647003, | |
| "learning_rate": 5.201007746814767e-06, | |
| "loss": 0.3706, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.6177105831533476, | |
| "grad_norm": 0.14704205095767975, | |
| "learning_rate": 5.188450913349674e-06, | |
| "loss": 0.3869, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 1.6198704103671706, | |
| "grad_norm": 0.1534765362739563, | |
| "learning_rate": 5.175892889526189e-06, | |
| "loss": 0.3736, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.6220302375809936, | |
| "grad_norm": 0.1547222137451172, | |
| "learning_rate": 5.16333375466762e-06, | |
| "loss": 0.3796, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 1.6241900647948164, | |
| "grad_norm": 0.13741885125637054, | |
| "learning_rate": 5.150773588104284e-06, | |
| "loss": 0.3817, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.6263498920086392, | |
| "grad_norm": 0.13468679785728455, | |
| "learning_rate": 5.138212469173022e-06, | |
| "loss": 0.3781, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 1.6285097192224622, | |
| "grad_norm": 0.1447237730026245, | |
| "learning_rate": 5.1256504772166885e-06, | |
| "loss": 0.3609, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.6306695464362853, | |
| "grad_norm": 0.14089703559875488, | |
| "learning_rate": 5.1130876915836495e-06, | |
| "loss": 0.3609, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 1.6328293736501078, | |
| "grad_norm": 0.13828714191913605, | |
| "learning_rate": 5.100524191627289e-06, | |
| "loss": 0.377, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.6349892008639308, | |
| "grad_norm": 0.14428219199180603, | |
| "learning_rate": 5.087960056705499e-06, | |
| "loss": 0.3702, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 1.6371490280777539, | |
| "grad_norm": 0.15890643000602722, | |
| "learning_rate": 5.075395366180186e-06, | |
| "loss": 0.3838, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.6393088552915767, | |
| "grad_norm": 0.1514156460762024, | |
| "learning_rate": 5.062830199416764e-06, | |
| "loss": 0.3852, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 1.6414686825053995, | |
| "grad_norm": 0.13208477199077606, | |
| "learning_rate": 5.050264635783654e-06, | |
| "loss": 0.3925, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.6436285097192225, | |
| "grad_norm": 0.13601286709308624, | |
| "learning_rate": 5.037698754651786e-06, | |
| "loss": 0.3847, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 1.6457883369330455, | |
| "grad_norm": 0.1418139487504959, | |
| "learning_rate": 5.025132635394095e-06, | |
| "loss": 0.3744, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.6479481641468683, | |
| "grad_norm": 0.14959508180618286, | |
| "learning_rate": 5.0125663573850204e-06, | |
| "loss": 0.3712, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 1.650107991360691, | |
| "grad_norm": 0.12993188202381134, | |
| "learning_rate": 5e-06, | |
| "loss": 0.38, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.652267818574514, | |
| "grad_norm": 0.14041666686534882, | |
| "learning_rate": 4.987433642614981e-06, | |
| "loss": 0.3751, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 1.654427645788337, | |
| "grad_norm": 0.1548304408788681, | |
| "learning_rate": 4.974867364605906e-06, | |
| "loss": 0.3588, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.6565874730021597, | |
| "grad_norm": 0.12369633466005325, | |
| "learning_rate": 4.962301245348215e-06, | |
| "loss": 0.3822, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 1.6587473002159827, | |
| "grad_norm": 0.13229462504386902, | |
| "learning_rate": 4.949735364216348e-06, | |
| "loss": 0.3631, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.6609071274298057, | |
| "grad_norm": 0.13191936910152435, | |
| "learning_rate": 4.937169800583237e-06, | |
| "loss": 0.3783, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 1.6630669546436285, | |
| "grad_norm": 0.14189469814300537, | |
| "learning_rate": 4.924604633819815e-06, | |
| "loss": 0.3724, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.6652267818574513, | |
| "grad_norm": 0.1306021511554718, | |
| "learning_rate": 4.912039943294502e-06, | |
| "loss": 0.3736, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 1.6673866090712743, | |
| "grad_norm": 0.1423332244157791, | |
| "learning_rate": 4.899475808372714e-06, | |
| "loss": 0.3735, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.6695464362850974, | |
| "grad_norm": 0.13784444332122803, | |
| "learning_rate": 4.886912308416353e-06, | |
| "loss": 0.3737, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 1.67170626349892, | |
| "grad_norm": 0.13520213961601257, | |
| "learning_rate": 4.874349522783313e-06, | |
| "loss": 0.3678, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.673866090712743, | |
| "grad_norm": 0.15318076312541962, | |
| "learning_rate": 4.861787530826979e-06, | |
| "loss": 0.3716, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.676025917926566, | |
| "grad_norm": 0.12309125065803528, | |
| "learning_rate": 4.8492264118957165e-06, | |
| "loss": 0.386, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.6781857451403888, | |
| "grad_norm": 0.1470710188150406, | |
| "learning_rate": 4.8366662453323826e-06, | |
| "loss": 0.3848, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 1.6803455723542116, | |
| "grad_norm": 0.12907086312770844, | |
| "learning_rate": 4.8241071104738115e-06, | |
| "loss": 0.3689, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.6825053995680346, | |
| "grad_norm": 0.13970084488391876, | |
| "learning_rate": 4.811549086650327e-06, | |
| "loss": 0.3814, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 1.6846652267818576, | |
| "grad_norm": 0.13439306616783142, | |
| "learning_rate": 4.798992253185233e-06, | |
| "loss": 0.3717, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.6868250539956804, | |
| "grad_norm": 0.13519345223903656, | |
| "learning_rate": 4.786436689394317e-06, | |
| "loss": 0.3765, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 1.6889848812095032, | |
| "grad_norm": 0.13258984684944153, | |
| "learning_rate": 4.773882474585338e-06, | |
| "loss": 0.3809, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.6911447084233262, | |
| "grad_norm": 0.12966322898864746, | |
| "learning_rate": 4.761329688057543e-06, | |
| "loss": 0.3782, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 1.693304535637149, | |
| "grad_norm": 0.13643068075180054, | |
| "learning_rate": 4.748778409101153e-06, | |
| "loss": 0.3796, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.6954643628509718, | |
| "grad_norm": 0.1507895290851593, | |
| "learning_rate": 4.736228716996868e-06, | |
| "loss": 0.3789, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 1.6976241900647948, | |
| "grad_norm": 0.14031574130058289, | |
| "learning_rate": 4.723680691015366e-06, | |
| "loss": 0.3816, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.6997840172786178, | |
| "grad_norm": 0.13055071234703064, | |
| "learning_rate": 4.711134410416794e-06, | |
| "loss": 0.3643, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 1.7019438444924406, | |
| "grad_norm": 0.15579389035701752, | |
| "learning_rate": 4.6985899544502835e-06, | |
| "loss": 0.3797, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.7041036717062634, | |
| "grad_norm": 0.1301419883966446, | |
| "learning_rate": 4.686047402353433e-06, | |
| "loss": 0.3793, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 1.7062634989200864, | |
| "grad_norm": 0.13526466488838196, | |
| "learning_rate": 4.673506833351821e-06, | |
| "loss": 0.3911, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.7084233261339092, | |
| "grad_norm": 0.1373325139284134, | |
| "learning_rate": 4.660968326658497e-06, | |
| "loss": 0.3774, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 1.710583153347732, | |
| "grad_norm": 0.1474619358778, | |
| "learning_rate": 4.648431961473482e-06, | |
| "loss": 0.368, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.712742980561555, | |
| "grad_norm": 0.14143545925617218, | |
| "learning_rate": 4.635897816983272e-06, | |
| "loss": 0.3779, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 1.714902807775378, | |
| "grad_norm": 0.14204931259155273, | |
| "learning_rate": 4.6233659723603374e-06, | |
| "loss": 0.3667, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.7170626349892009, | |
| "grad_norm": 0.13979306817054749, | |
| "learning_rate": 4.610836506762618e-06, | |
| "loss": 0.3782, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 1.7192224622030237, | |
| "grad_norm": 0.14510124921798706, | |
| "learning_rate": 4.59830949933303e-06, | |
| "loss": 0.3705, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.7213822894168467, | |
| "grad_norm": 0.14503952860832214, | |
| "learning_rate": 4.5857850291989596e-06, | |
| "loss": 0.3804, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 1.7235421166306697, | |
| "grad_norm": 0.13347502052783966, | |
| "learning_rate": 4.573263175471766e-06, | |
| "loss": 0.3706, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.7257019438444925, | |
| "grad_norm": 0.12476824969053268, | |
| "learning_rate": 4.560744017246284e-06, | |
| "loss": 0.3756, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 1.7278617710583153, | |
| "grad_norm": 0.13821221888065338, | |
| "learning_rate": 4.548227633600322e-06, | |
| "loss": 0.3802, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.7300215982721383, | |
| "grad_norm": 0.13669945299625397, | |
| "learning_rate": 4.535714103594162e-06, | |
| "loss": 0.3818, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 1.732181425485961, | |
| "grad_norm": 0.1308770775794983, | |
| "learning_rate": 4.523203506270058e-06, | |
| "loss": 0.3836, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.734341252699784, | |
| "grad_norm": 0.1351071000099182, | |
| "learning_rate": 4.510695920651742e-06, | |
| "loss": 0.3757, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 1.736501079913607, | |
| "grad_norm": 0.1277286410331726, | |
| "learning_rate": 4.4981914257439254e-06, | |
| "loss": 0.387, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.73866090712743, | |
| "grad_norm": 0.1272444725036621, | |
| "learning_rate": 4.485690100531793e-06, | |
| "loss": 0.3829, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 1.7408207343412527, | |
| "grad_norm": 0.14064733684062958, | |
| "learning_rate": 4.473192023980509e-06, | |
| "loss": 0.3822, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.7429805615550755, | |
| "grad_norm": 0.13635459542274475, | |
| "learning_rate": 4.460697275034717e-06, | |
| "loss": 0.38, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 1.7451403887688985, | |
| "grad_norm": 0.136144757270813, | |
| "learning_rate": 4.448205932618042e-06, | |
| "loss": 0.3794, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.7473002159827213, | |
| "grad_norm": 0.14044472575187683, | |
| "learning_rate": 4.4357180756325915e-06, | |
| "loss": 0.3741, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 1.7494600431965441, | |
| "grad_norm": 0.13555637001991272, | |
| "learning_rate": 4.423233782958459e-06, | |
| "loss": 0.369, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.7516198704103672, | |
| "grad_norm": 0.1326342225074768, | |
| "learning_rate": 4.410753133453222e-06, | |
| "loss": 0.3784, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 1.7537796976241902, | |
| "grad_norm": 0.13601535558700562, | |
| "learning_rate": 4.398276205951443e-06, | |
| "loss": 0.3821, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.755939524838013, | |
| "grad_norm": 0.13336274027824402, | |
| "learning_rate": 4.38580307926418e-06, | |
| "loss": 0.3713, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 1.7580993520518358, | |
| "grad_norm": 0.14658118784427643, | |
| "learning_rate": 4.373333832178478e-06, | |
| "loss": 0.3825, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.7602591792656588, | |
| "grad_norm": 0.14051300287246704, | |
| "learning_rate": 4.360868543456883e-06, | |
| "loss": 0.3685, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 1.7624190064794818, | |
| "grad_norm": 0.1231897696852684, | |
| "learning_rate": 4.348407291836928e-06, | |
| "loss": 0.37, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.7645788336933044, | |
| "grad_norm": 0.13528205454349518, | |
| "learning_rate": 4.335950156030653e-06, | |
| "loss": 0.3855, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 1.7667386609071274, | |
| "grad_norm": 0.14070774614810944, | |
| "learning_rate": 4.323497214724099e-06, | |
| "loss": 0.3752, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.7688984881209504, | |
| "grad_norm": 0.1284414529800415, | |
| "learning_rate": 4.31104854657681e-06, | |
| "loss": 0.3659, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 1.7710583153347732, | |
| "grad_norm": 0.13247400522232056, | |
| "learning_rate": 4.298604230221341e-06, | |
| "loss": 0.3727, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.773218142548596, | |
| "grad_norm": 0.12880460917949677, | |
| "learning_rate": 4.286164344262756e-06, | |
| "loss": 0.3867, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 1.775377969762419, | |
| "grad_norm": 0.12950289249420166, | |
| "learning_rate": 4.273728967278137e-06, | |
| "loss": 0.3685, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.777537796976242, | |
| "grad_norm": 0.1209382489323616, | |
| "learning_rate": 4.261298177816082e-06, | |
| "loss": 0.3658, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 1.7796976241900648, | |
| "grad_norm": 0.1271076798439026, | |
| "learning_rate": 4.248872054396215e-06, | |
| "loss": 0.3801, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.7818574514038876, | |
| "grad_norm": 0.1265021562576294, | |
| "learning_rate": 4.2364506755086856e-06, | |
| "loss": 0.3719, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 1.7840172786177106, | |
| "grad_norm": 0.12241175025701523, | |
| "learning_rate": 4.224034119613671e-06, | |
| "loss": 0.3744, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.7861771058315334, | |
| "grad_norm": 0.12162206321954727, | |
| "learning_rate": 4.211622465140887e-06, | |
| "loss": 0.3797, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 1.7883369330453562, | |
| "grad_norm": 0.12623707950115204, | |
| "learning_rate": 4.199215790489091e-06, | |
| "loss": 0.3859, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.7904967602591793, | |
| "grad_norm": 0.13991111516952515, | |
| "learning_rate": 4.186814174025582e-06, | |
| "loss": 0.3736, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 1.7926565874730023, | |
| "grad_norm": 0.12621738016605377, | |
| "learning_rate": 4.174417694085711e-06, | |
| "loss": 0.3743, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.794816414686825, | |
| "grad_norm": 0.12811291217803955, | |
| "learning_rate": 4.16202642897238e-06, | |
| "loss": 0.3782, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 1.7969762419006479, | |
| "grad_norm": 0.12236473709344864, | |
| "learning_rate": 4.149640456955555e-06, | |
| "loss": 0.3764, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.7991360691144709, | |
| "grad_norm": 0.142435684800148, | |
| "learning_rate": 4.137259856271767e-06, | |
| "loss": 0.3719, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 1.801295896328294, | |
| "grad_norm": 0.12946586310863495, | |
| "learning_rate": 4.124884705123619e-06, | |
| "loss": 0.3852, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.8034557235421165, | |
| "grad_norm": 0.1189626008272171, | |
| "learning_rate": 4.112515081679295e-06, | |
| "loss": 0.3751, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 1.8056155507559395, | |
| "grad_norm": 0.13230590522289276, | |
| "learning_rate": 4.1001510640720525e-06, | |
| "loss": 0.3688, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.8077753779697625, | |
| "grad_norm": 0.13355307281017303, | |
| "learning_rate": 4.087792730399749e-06, | |
| "loss": 0.3885, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 1.8099352051835853, | |
| "grad_norm": 0.13115477561950684, | |
| "learning_rate": 4.075440158724339e-06, | |
| "loss": 0.3807, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.812095032397408, | |
| "grad_norm": 0.11709022521972656, | |
| "learning_rate": 4.063093427071376e-06, | |
| "loss": 0.3715, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 1.8142548596112311, | |
| "grad_norm": 0.13939060270786285, | |
| "learning_rate": 4.0507526134295314e-06, | |
| "loss": 0.3718, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.8164146868250541, | |
| "grad_norm": 0.13002587854862213, | |
| "learning_rate": 4.038417795750086e-06, | |
| "loss": 0.378, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 1.818574514038877, | |
| "grad_norm": 0.13568507134914398, | |
| "learning_rate": 4.0260890519464565e-06, | |
| "loss": 0.3715, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.8207343412526997, | |
| "grad_norm": 0.13115161657333374, | |
| "learning_rate": 4.013766459893686e-06, | |
| "loss": 0.374, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 1.8228941684665227, | |
| "grad_norm": 0.1360725313425064, | |
| "learning_rate": 4.001450097427965e-06, | |
| "loss": 0.3915, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.8250539956803455, | |
| "grad_norm": 0.14375773072242737, | |
| "learning_rate": 3.989140042346134e-06, | |
| "loss": 0.3823, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 1.8272138228941683, | |
| "grad_norm": 0.14056192338466644, | |
| "learning_rate": 3.9768363724051875e-06, | |
| "loss": 0.3797, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.8293736501079914, | |
| "grad_norm": 0.13145223259925842, | |
| "learning_rate": 3.964539165321795e-06, | |
| "loss": 0.3651, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 1.8315334773218144, | |
| "grad_norm": 0.1401747614145279, | |
| "learning_rate": 3.952248498771797e-06, | |
| "loss": 0.3803, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.8336933045356372, | |
| "grad_norm": 0.1457161009311676, | |
| "learning_rate": 3.939964450389728e-06, | |
| "loss": 0.3875, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 1.83585313174946, | |
| "grad_norm": 0.1399625837802887, | |
| "learning_rate": 3.927687097768309e-06, | |
| "loss": 0.3855, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.838012958963283, | |
| "grad_norm": 0.12442053109407425, | |
| "learning_rate": 3.915416518457974e-06, | |
| "loss": 0.3885, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 1.8401727861771058, | |
| "grad_norm": 0.12682035565376282, | |
| "learning_rate": 3.9031527899663705e-06, | |
| "loss": 0.3708, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.8423326133909286, | |
| "grad_norm": 0.12829378247261047, | |
| "learning_rate": 3.890895989757874e-06, | |
| "loss": 0.376, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 1.8444924406047516, | |
| "grad_norm": 0.14053881168365479, | |
| "learning_rate": 3.8786461952530955e-06, | |
| "loss": 0.373, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.8466522678185746, | |
| "grad_norm": 0.1281130015850067, | |
| "learning_rate": 3.866403483828392e-06, | |
| "loss": 0.3773, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 1.8488120950323974, | |
| "grad_norm": 0.12932966649532318, | |
| "learning_rate": 3.854167932815387e-06, | |
| "loss": 0.383, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.8509719222462202, | |
| "grad_norm": 0.1419646143913269, | |
| "learning_rate": 3.841939619500468e-06, | |
| "loss": 0.3674, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 1.8531317494600432, | |
| "grad_norm": 0.12675082683563232, | |
| "learning_rate": 3.8297186211243085e-06, | |
| "loss": 0.3814, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.8552915766738662, | |
| "grad_norm": 0.11979357898235321, | |
| "learning_rate": 3.817505014881378e-06, | |
| "loss": 0.38, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 1.857451403887689, | |
| "grad_norm": 0.13714280724525452, | |
| "learning_rate": 3.8052988779194478e-06, | |
| "loss": 0.3823, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.8596112311015118, | |
| "grad_norm": 0.13954536616802216, | |
| "learning_rate": 3.7931002873391156e-06, | |
| "loss": 0.3796, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 1.8617710583153348, | |
| "grad_norm": 0.11842264980077744, | |
| "learning_rate": 3.7809093201933078e-06, | |
| "loss": 0.3761, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.8639308855291576, | |
| "grad_norm": 0.12878850102424622, | |
| "learning_rate": 3.7687260534868e-06, | |
| "loss": 0.3821, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 1.8660907127429804, | |
| "grad_norm": 0.14155155420303345, | |
| "learning_rate": 3.756550564175727e-06, | |
| "loss": 0.3748, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.8682505399568035, | |
| "grad_norm": 0.12996132671833038, | |
| "learning_rate": 3.744382929167094e-06, | |
| "loss": 0.3741, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 1.8704103671706265, | |
| "grad_norm": 0.12898430228233337, | |
| "learning_rate": 3.7322232253182984e-06, | |
| "loss": 0.3763, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.8725701943844493, | |
| "grad_norm": 0.12044209241867065, | |
| "learning_rate": 3.7200715294366376e-06, | |
| "loss": 0.3747, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 1.874730021598272, | |
| "grad_norm": 0.12121162563562393, | |
| "learning_rate": 3.7079279182788263e-06, | |
| "loss": 0.381, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.876889848812095, | |
| "grad_norm": 0.12106562405824661, | |
| "learning_rate": 3.695792468550517e-06, | |
| "loss": 0.3767, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 1.8790496760259179, | |
| "grad_norm": 0.118615061044693, | |
| "learning_rate": 3.6836652569057994e-06, | |
| "loss": 0.3708, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.8812095032397407, | |
| "grad_norm": 0.12058837711811066, | |
| "learning_rate": 3.6715463599467372e-06, | |
| "loss": 0.3778, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 1.8833693304535637, | |
| "grad_norm": 0.12485583126544952, | |
| "learning_rate": 3.659435854222869e-06, | |
| "loss": 0.3679, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.8855291576673867, | |
| "grad_norm": 0.11957580596208572, | |
| "learning_rate": 3.6473338162307314e-06, | |
| "loss": 0.3709, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 1.8876889848812095, | |
| "grad_norm": 0.12649306654930115, | |
| "learning_rate": 3.635240322413375e-06, | |
| "loss": 0.3803, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.8898488120950323, | |
| "grad_norm": 0.12188448011875153, | |
| "learning_rate": 3.6231554491598766e-06, | |
| "loss": 0.3753, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.8920086393088553, | |
| "grad_norm": 0.12264645844697952, | |
| "learning_rate": 3.6110792728048636e-06, | |
| "loss": 0.3736, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.8941684665226783, | |
| "grad_norm": 0.12633198499679565, | |
| "learning_rate": 3.599011869628033e-06, | |
| "loss": 0.3734, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 1.896328293736501, | |
| "grad_norm": 0.12245716154575348, | |
| "learning_rate": 3.5869533158536583e-06, | |
| "loss": 0.3661, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.898488120950324, | |
| "grad_norm": 0.11652278900146484, | |
| "learning_rate": 3.5749036876501196e-06, | |
| "loss": 0.3775, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 1.900647948164147, | |
| "grad_norm": 0.1323150098323822, | |
| "learning_rate": 3.562863061129419e-06, | |
| "loss": 0.3736, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.9028077753779697, | |
| "grad_norm": 0.1193445473909378, | |
| "learning_rate": 3.550831512346695e-06, | |
| "loss": 0.3756, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 1.9049676025917925, | |
| "grad_norm": 0.11626636981964111, | |
| "learning_rate": 3.538809117299751e-06, | |
| "loss": 0.3771, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.9071274298056156, | |
| "grad_norm": 0.13352340459823608, | |
| "learning_rate": 3.526795951928569e-06, | |
| "loss": 0.3828, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 1.9092872570194386, | |
| "grad_norm": 0.13351115584373474, | |
| "learning_rate": 3.5147920921148267e-06, | |
| "loss": 0.3645, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.9114470842332614, | |
| "grad_norm": 0.11943700909614563, | |
| "learning_rate": 3.502797613681429e-06, | |
| "loss": 0.386, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 1.9136069114470842, | |
| "grad_norm": 0.1416894644498825, | |
| "learning_rate": 3.4908125923920204e-06, | |
| "loss": 0.3771, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.9157667386609072, | |
| "grad_norm": 0.12883397936820984, | |
| "learning_rate": 3.478837103950509e-06, | |
| "loss": 0.38, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 1.91792656587473, | |
| "grad_norm": 0.12375036627054214, | |
| "learning_rate": 3.4668712240005912e-06, | |
| "loss": 0.3771, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.9200863930885528, | |
| "grad_norm": 0.13057532906532288, | |
| "learning_rate": 3.4549150281252635e-06, | |
| "loss": 0.3867, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 1.9222462203023758, | |
| "grad_norm": 0.13063670694828033, | |
| "learning_rate": 3.442968591846359e-06, | |
| "loss": 0.3746, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.9244060475161988, | |
| "grad_norm": 0.11987043917179108, | |
| "learning_rate": 3.431031990624063e-06, | |
| "loss": 0.3733, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 1.9265658747300216, | |
| "grad_norm": 0.13609716296195984, | |
| "learning_rate": 3.4191052998564344e-06, | |
| "loss": 0.3766, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.9287257019438444, | |
| "grad_norm": 0.1286943554878235, | |
| "learning_rate": 3.407188594878938e-06, | |
| "loss": 0.3777, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 1.9308855291576674, | |
| "grad_norm": 0.15128813683986664, | |
| "learning_rate": 3.3952819509639534e-06, | |
| "loss": 0.3729, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.9330453563714904, | |
| "grad_norm": 0.12446796149015427, | |
| "learning_rate": 3.3833854433203185e-06, | |
| "loss": 0.3773, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 1.935205183585313, | |
| "grad_norm": 0.1316557675600052, | |
| "learning_rate": 3.3714991470928393e-06, | |
| "loss": 0.3843, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.937365010799136, | |
| "grad_norm": 0.12782582640647888, | |
| "learning_rate": 3.359623137361825e-06, | |
| "loss": 0.3787, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 1.939524838012959, | |
| "grad_norm": 0.13275963068008423, | |
| "learning_rate": 3.347757489142608e-06, | |
| "loss": 0.3809, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.9416846652267818, | |
| "grad_norm": 0.1355566680431366, | |
| "learning_rate": 3.3359022773850673e-06, | |
| "loss": 0.3798, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 1.9438444924406046, | |
| "grad_norm": 0.12694980204105377, | |
| "learning_rate": 3.3240575769731662e-06, | |
| "loss": 0.3825, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.9460043196544277, | |
| "grad_norm": 0.13038381934165955, | |
| "learning_rate": 3.312223462724472e-06, | |
| "loss": 0.3861, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 1.9481641468682507, | |
| "grad_norm": 0.1373014897108078, | |
| "learning_rate": 3.300400009389678e-06, | |
| "loss": 0.3828, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.9503239740820735, | |
| "grad_norm": 0.1347927749156952, | |
| "learning_rate": 3.2885872916521445e-06, | |
| "loss": 0.3701, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 1.9524838012958963, | |
| "grad_norm": 0.12417130172252655, | |
| "learning_rate": 3.2767853841274154e-06, | |
| "loss": 0.3823, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.9546436285097193, | |
| "grad_norm": 0.1381063610315323, | |
| "learning_rate": 3.264994361362753e-06, | |
| "loss": 0.3768, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 1.956803455723542, | |
| "grad_norm": 0.12305869907140732, | |
| "learning_rate": 3.2532142978366654e-06, | |
| "loss": 0.3803, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.9589632829373649, | |
| "grad_norm": 0.12444626539945602, | |
| "learning_rate": 3.241445267958438e-06, | |
| "loss": 0.3717, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 1.961123110151188, | |
| "grad_norm": 0.12498262524604797, | |
| "learning_rate": 3.2296873460676557e-06, | |
| "loss": 0.3739, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.963282937365011, | |
| "grad_norm": 0.11730080097913742, | |
| "learning_rate": 3.217940606433747e-06, | |
| "loss": 0.379, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 1.9654427645788337, | |
| "grad_norm": 0.13885721564292908, | |
| "learning_rate": 3.2062051232555024e-06, | |
| "loss": 0.3693, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.9676025917926565, | |
| "grad_norm": 0.12217065691947937, | |
| "learning_rate": 3.1944809706606123e-06, | |
| "loss": 0.3739, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 1.9697624190064795, | |
| "grad_norm": 0.1336861401796341, | |
| "learning_rate": 3.182768222705198e-06, | |
| "loss": 0.3747, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.9719222462203023, | |
| "grad_norm": 0.12711098790168762, | |
| "learning_rate": 3.171066953373338e-06, | |
| "loss": 0.3821, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 1.9740820734341251, | |
| "grad_norm": 0.1329392045736313, | |
| "learning_rate": 3.1593772365766107e-06, | |
| "loss": 0.376, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.9762419006479481, | |
| "grad_norm": 0.12040119618177414, | |
| "learning_rate": 3.147699146153621e-06, | |
| "loss": 0.3738, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 1.9784017278617712, | |
| "grad_norm": 0.23164218664169312, | |
| "learning_rate": 3.1360327558695336e-06, | |
| "loss": 0.3802, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.980561555075594, | |
| "grad_norm": 0.11707053333520889, | |
| "learning_rate": 3.1243781394156138e-06, | |
| "loss": 0.3813, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 1.9827213822894167, | |
| "grad_norm": 0.1273183971643448, | |
| "learning_rate": 3.1127353704087477e-06, | |
| "loss": 0.3779, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.9848812095032398, | |
| "grad_norm": 0.1234814003109932, | |
| "learning_rate": 3.1011045223909954e-06, | |
| "loss": 0.3804, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 1.9870410367170628, | |
| "grad_norm": 0.1258484572172165, | |
| "learning_rate": 3.089485668829113e-06, | |
| "loss": 0.3811, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.9892008639308856, | |
| "grad_norm": 0.1231926903128624, | |
| "learning_rate": 3.077878883114096e-06, | |
| "loss": 0.3831, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 1.9913606911447084, | |
| "grad_norm": 0.12332677841186523, | |
| "learning_rate": 3.066284238560713e-06, | |
| "loss": 0.3698, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.9935205183585314, | |
| "grad_norm": 0.12334899604320526, | |
| "learning_rate": 3.0547018084070344e-06, | |
| "loss": 0.3768, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 1.9956803455723542, | |
| "grad_norm": 0.11958076804876328, | |
| "learning_rate": 3.043131665813988e-06, | |
| "loss": 0.3684, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.997840172786177, | |
| "grad_norm": 0.12707985937595367, | |
| "learning_rate": 3.031573883864882e-06, | |
| "loss": 0.382, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.14072422683238983, | |
| "learning_rate": 3.0200285355649504e-06, | |
| "loss": 0.3729, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 2.002159827213823, | |
| "grad_norm": 0.1437283754348755, | |
| "learning_rate": 3.0084956938408873e-06, | |
| "loss": 0.3623, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 2.0043196544276456, | |
| "grad_norm": 0.12962134182453156, | |
| "learning_rate": 2.9969754315403865e-06, | |
| "loss": 0.3649, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 2.0064794816414686, | |
| "grad_norm": 0.13019190728664398, | |
| "learning_rate": 2.9854678214316875e-06, | |
| "loss": 0.3626, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 2.0086393088552916, | |
| "grad_norm": 0.12730036675930023, | |
| "learning_rate": 2.97397293620311e-06, | |
| "loss": 0.3572, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.0107991360691146, | |
| "grad_norm": 0.13056515157222748, | |
| "learning_rate": 2.962490848462596e-06, | |
| "loss": 0.3474, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 2.012958963282937, | |
| "grad_norm": 0.13034315407276154, | |
| "learning_rate": 2.951021630737255e-06, | |
| "loss": 0.3679, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 2.0151187904967602, | |
| "grad_norm": 0.14478375017642975, | |
| "learning_rate": 2.9395653554728955e-06, | |
| "loss": 0.3579, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 2.0172786177105833, | |
| "grad_norm": 0.1417471021413803, | |
| "learning_rate": 2.92812209503358e-06, | |
| "loss": 0.365, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 2.019438444924406, | |
| "grad_norm": 0.13237528502941132, | |
| "learning_rate": 2.91669192170116e-06, | |
| "loss": 0.3658, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 2.021598272138229, | |
| "grad_norm": 0.1314917653799057, | |
| "learning_rate": 2.9052749076748266e-06, | |
| "loss": 0.3687, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 2.023758099352052, | |
| "grad_norm": 0.11867891997098923, | |
| "learning_rate": 2.8938711250706397e-06, | |
| "loss": 0.3643, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 2.025917926565875, | |
| "grad_norm": 0.13976238667964935, | |
| "learning_rate": 2.8824806459210907e-06, | |
| "loss": 0.3678, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 2.0280777537796975, | |
| "grad_norm": 0.1445903778076172, | |
| "learning_rate": 2.871103542174637e-06, | |
| "loss": 0.3574, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 2.0302375809935205, | |
| "grad_norm": 0.11427946388721466, | |
| "learning_rate": 2.8597398856952473e-06, | |
| "loss": 0.3569, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.0323974082073435, | |
| "grad_norm": 0.13496039807796478, | |
| "learning_rate": 2.8483897482619566e-06, | |
| "loss": 0.3717, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 2.0345572354211665, | |
| "grad_norm": 0.13540537655353546, | |
| "learning_rate": 2.837053201568396e-06, | |
| "loss": 0.3667, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 2.036717062634989, | |
| "grad_norm": 0.12281273305416107, | |
| "learning_rate": 2.825730317222358e-06, | |
| "loss": 0.3541, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 2.038876889848812, | |
| "grad_norm": 0.12640658020973206, | |
| "learning_rate": 2.814421166745337e-06, | |
| "loss": 0.3641, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 2.041036717062635, | |
| "grad_norm": 0.12110286951065063, | |
| "learning_rate": 2.803125821572068e-06, | |
| "loss": 0.3597, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 2.0431965442764577, | |
| "grad_norm": 0.13443802297115326, | |
| "learning_rate": 2.791844353050094e-06, | |
| "loss": 0.3709, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 2.0453563714902807, | |
| "grad_norm": 0.11386435478925705, | |
| "learning_rate": 2.7805768324393017e-06, | |
| "loss": 0.3681, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 2.0475161987041037, | |
| "grad_norm": 0.13114500045776367, | |
| "learning_rate": 2.769323330911472e-06, | |
| "loss": 0.3602, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 2.0496760259179267, | |
| "grad_norm": 0.13121016323566437, | |
| "learning_rate": 2.7580839195498397e-06, | |
| "loss": 0.3567, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 2.0518358531317493, | |
| "grad_norm": 0.11939337104558945, | |
| "learning_rate": 2.746858669348634e-06, | |
| "loss": 0.3611, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.0539956803455723, | |
| "grad_norm": 0.11663561314344406, | |
| "learning_rate": 2.7356476512126386e-06, | |
| "loss": 0.3557, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 2.0561555075593954, | |
| "grad_norm": 0.11576730012893677, | |
| "learning_rate": 2.724450935956733e-06, | |
| "loss": 0.3723, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 2.058315334773218, | |
| "grad_norm": 0.1251356601715088, | |
| "learning_rate": 2.713268594305458e-06, | |
| "loss": 0.3637, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 2.060475161987041, | |
| "grad_norm": 0.12093979120254517, | |
| "learning_rate": 2.7021006968925613e-06, | |
| "loss": 0.3752, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 2.062634989200864, | |
| "grad_norm": 0.12214156985282898, | |
| "learning_rate": 2.6909473142605522e-06, | |
| "loss": 0.3638, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 2.064794816414687, | |
| "grad_norm": 0.12628315389156342, | |
| "learning_rate": 2.6798085168602595e-06, | |
| "loss": 0.3667, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 2.0669546436285096, | |
| "grad_norm": 0.12237667292356491, | |
| "learning_rate": 2.668684375050378e-06, | |
| "loss": 0.3653, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 2.0691144708423326, | |
| "grad_norm": 0.1107870489358902, | |
| "learning_rate": 2.6575749590970336e-06, | |
| "loss": 0.3558, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 2.0712742980561556, | |
| "grad_norm": 0.1208115965127945, | |
| "learning_rate": 2.646480339173337e-06, | |
| "loss": 0.3733, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 2.0734341252699786, | |
| "grad_norm": 0.12692323327064514, | |
| "learning_rate": 2.635400585358937e-06, | |
| "loss": 0.3663, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.075593952483801, | |
| "grad_norm": 0.11760963499546051, | |
| "learning_rate": 2.624335767639582e-06, | |
| "loss": 0.3638, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 2.077753779697624, | |
| "grad_norm": 0.12751303613185883, | |
| "learning_rate": 2.6132859559066704e-06, | |
| "loss": 0.3547, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 2.079913606911447, | |
| "grad_norm": 0.12997639179229736, | |
| "learning_rate": 2.6022512199568205e-06, | |
| "loss": 0.3558, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 2.08207343412527, | |
| "grad_norm": 0.12291760742664337, | |
| "learning_rate": 2.5912316294914232e-06, | |
| "loss": 0.3506, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 2.084233261339093, | |
| "grad_norm": 0.11633151024580002, | |
| "learning_rate": 2.580227254116199e-06, | |
| "loss": 0.3648, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 2.086393088552916, | |
| "grad_norm": 0.12379375100135803, | |
| "learning_rate": 2.5692381633407672e-06, | |
| "loss": 0.3652, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 2.088552915766739, | |
| "grad_norm": 0.12270376831293106, | |
| "learning_rate": 2.558264426578192e-06, | |
| "loss": 0.3625, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 2.0907127429805614, | |
| "grad_norm": 0.12057667225599289, | |
| "learning_rate": 2.547306113144564e-06, | |
| "loss": 0.3712, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 2.0928725701943844, | |
| "grad_norm": 0.1182745024561882, | |
| "learning_rate": 2.536363292258543e-06, | |
| "loss": 0.3686, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 2.0950323974082075, | |
| "grad_norm": 0.12089554965496063, | |
| "learning_rate": 2.5254360330409343e-06, | |
| "loss": 0.3603, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.09719222462203, | |
| "grad_norm": 0.12302310764789581, | |
| "learning_rate": 2.514524404514248e-06, | |
| "loss": 0.3599, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 2.099352051835853, | |
| "grad_norm": 0.1283075213432312, | |
| "learning_rate": 2.503628475602256e-06, | |
| "loss": 0.3685, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 2.101511879049676, | |
| "grad_norm": 0.11500417441129684, | |
| "learning_rate": 2.49274831512957e-06, | |
| "loss": 0.3657, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 2.103671706263499, | |
| "grad_norm": 0.11335953325033188, | |
| "learning_rate": 2.4818839918211963e-06, | |
| "loss": 0.3689, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 2.1058315334773217, | |
| "grad_norm": 0.12606894969940186, | |
| "learning_rate": 2.4710355743021077e-06, | |
| "loss": 0.359, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 2.1079913606911447, | |
| "grad_norm": 0.11400944739580154, | |
| "learning_rate": 2.4602031310968013e-06, | |
| "loss": 0.3661, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 2.1101511879049677, | |
| "grad_norm": 0.11969246715307236, | |
| "learning_rate": 2.4493867306288772e-06, | |
| "loss": 0.3618, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 2.1123110151187907, | |
| "grad_norm": 0.11956711113452911, | |
| "learning_rate": 2.4385864412206e-06, | |
| "loss": 0.3516, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 2.1144708423326133, | |
| "grad_norm": 0.11470730602741241, | |
| "learning_rate": 2.4278023310924676e-06, | |
| "loss": 0.3651, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 2.1166306695464363, | |
| "grad_norm": 0.12043334543704987, | |
| "learning_rate": 2.417034468362782e-06, | |
| "loss": 0.3702, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.1187904967602593, | |
| "grad_norm": 0.11915960907936096, | |
| "learning_rate": 2.406282921047213e-06, | |
| "loss": 0.3609, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 2.120950323974082, | |
| "grad_norm": 0.1116413027048111, | |
| "learning_rate": 2.395547757058379e-06, | |
| "loss": 0.3576, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 2.123110151187905, | |
| "grad_norm": 0.11029747128486633, | |
| "learning_rate": 2.3848290442054096e-06, | |
| "loss": 0.3618, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 2.125269978401728, | |
| "grad_norm": 0.12164044380187988, | |
| "learning_rate": 2.3741268501935212e-06, | |
| "loss": 0.3557, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 2.127429805615551, | |
| "grad_norm": 0.11805900186300278, | |
| "learning_rate": 2.3634412426235886e-06, | |
| "loss": 0.3665, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 2.1295896328293735, | |
| "grad_norm": 0.12578925490379333, | |
| "learning_rate": 2.3527722889917147e-06, | |
| "loss": 0.3617, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 2.1317494600431965, | |
| "grad_norm": 0.11140415817499161, | |
| "learning_rate": 2.3421200566888096e-06, | |
| "loss": 0.3529, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 2.1339092872570196, | |
| "grad_norm": 0.12330644577741623, | |
| "learning_rate": 2.3314846130001622e-06, | |
| "loss": 0.3512, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 2.136069114470842, | |
| "grad_norm": 0.11442252993583679, | |
| "learning_rate": 2.320866025105016e-06, | |
| "loss": 0.3527, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 2.138228941684665, | |
| "grad_norm": 0.11933194845914841, | |
| "learning_rate": 2.3102643600761445e-06, | |
| "loss": 0.3481, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.140388768898488, | |
| "grad_norm": 0.11264543980360031, | |
| "learning_rate": 2.299679684879421e-06, | |
| "loss": 0.3583, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 2.142548596112311, | |
| "grad_norm": 0.11280512809753418, | |
| "learning_rate": 2.289112066373411e-06, | |
| "loss": 0.3629, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 2.1447084233261338, | |
| "grad_norm": 0.11324049532413483, | |
| "learning_rate": 2.2785615713089363e-06, | |
| "loss": 0.3609, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 2.146868250539957, | |
| "grad_norm": 0.10841232538223267, | |
| "learning_rate": 2.268028266328655e-06, | |
| "loss": 0.3613, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 2.14902807775378, | |
| "grad_norm": 0.1152244582772255, | |
| "learning_rate": 2.25751221796665e-06, | |
| "loss": 0.3571, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 2.1511879049676024, | |
| "grad_norm": 0.11110089719295502, | |
| "learning_rate": 2.247013492647994e-06, | |
| "loss": 0.3548, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 2.1533477321814254, | |
| "grad_norm": 0.11328666657209396, | |
| "learning_rate": 2.2365321566883437e-06, | |
| "loss": 0.3586, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 2.1555075593952484, | |
| "grad_norm": 0.11004538089036942, | |
| "learning_rate": 2.2260682762935137e-06, | |
| "loss": 0.3565, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 2.1576673866090714, | |
| "grad_norm": 0.11562500894069672, | |
| "learning_rate": 2.2156219175590623e-06, | |
| "loss": 0.3619, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 2.159827213822894, | |
| "grad_norm": 0.11296035349369049, | |
| "learning_rate": 2.2051931464698636e-06, | |
| "loss": 0.3656, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.161987041036717, | |
| "grad_norm": 0.11270337551832199, | |
| "learning_rate": 2.1947820288997067e-06, | |
| "loss": 0.3496, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 2.16414686825054, | |
| "grad_norm": 0.11527759581804276, | |
| "learning_rate": 2.1843886306108686e-06, | |
| "loss": 0.3695, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 2.166306695464363, | |
| "grad_norm": 0.11408324539661407, | |
| "learning_rate": 2.174013017253701e-06, | |
| "loss": 0.3651, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 2.1684665226781856, | |
| "grad_norm": 0.10843408107757568, | |
| "learning_rate": 2.1636552543662187e-06, | |
| "loss": 0.3692, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 2.1706263498920086, | |
| "grad_norm": 0.11223003268241882, | |
| "learning_rate": 2.153315407373679e-06, | |
| "loss": 0.3545, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 2.1727861771058317, | |
| "grad_norm": 0.11480898410081863, | |
| "learning_rate": 2.1429935415881753e-06, | |
| "loss": 0.3609, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 2.1749460043196542, | |
| "grad_norm": 0.1133100613951683, | |
| "learning_rate": 2.132689722208223e-06, | |
| "loss": 0.361, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 2.1771058315334773, | |
| "grad_norm": 0.11355537176132202, | |
| "learning_rate": 2.1224040143183444e-06, | |
| "loss": 0.3681, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 2.1792656587473003, | |
| "grad_norm": 0.11831656098365784, | |
| "learning_rate": 2.112136482888663e-06, | |
| "loss": 0.3555, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 2.1814254859611233, | |
| "grad_norm": 0.11772197484970093, | |
| "learning_rate": 2.1018871927744844e-06, | |
| "loss": 0.3604, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.183585313174946, | |
| "grad_norm": 0.10822444409132004, | |
| "learning_rate": 2.0916562087158964e-06, | |
| "loss": 0.3583, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 2.185745140388769, | |
| "grad_norm": 0.21270522475242615, | |
| "learning_rate": 2.0814435953373554e-06, | |
| "loss": 0.3651, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 2.187904967602592, | |
| "grad_norm": 0.11271930485963821, | |
| "learning_rate": 2.0712494171472776e-06, | |
| "loss": 0.367, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 2.190064794816415, | |
| "grad_norm": 0.1191214919090271, | |
| "learning_rate": 2.061073738537635e-06, | |
| "loss": 0.3566, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 2.1922246220302375, | |
| "grad_norm": 0.1228100061416626, | |
| "learning_rate": 2.0509166237835398e-06, | |
| "loss": 0.3553, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 2.1943844492440605, | |
| "grad_norm": 0.11371961981058121, | |
| "learning_rate": 2.040778137042852e-06, | |
| "loss": 0.3621, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 2.1965442764578835, | |
| "grad_norm": 0.10948773473501205, | |
| "learning_rate": 2.030658342355765e-06, | |
| "loss": 0.3612, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 2.198704103671706, | |
| "grad_norm": 0.10944036394357681, | |
| "learning_rate": 2.0205573036443994e-06, | |
| "loss": 0.3619, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 2.200863930885529, | |
| "grad_norm": 0.11753126233816147, | |
| "learning_rate": 2.0104750847124075e-06, | |
| "loss": 0.3636, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 2.203023758099352, | |
| "grad_norm": 0.12510347366333008, | |
| "learning_rate": 2.0004117492445614e-06, | |
| "loss": 0.3789, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.205183585313175, | |
| "grad_norm": 0.1162487342953682, | |
| "learning_rate": 1.990367360806359e-06, | |
| "loss": 0.3595, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 2.2073434125269977, | |
| "grad_norm": 0.12260331958532333, | |
| "learning_rate": 1.980341982843616e-06, | |
| "loss": 0.3659, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 2.2095032397408207, | |
| "grad_norm": 0.11793196201324463, | |
| "learning_rate": 1.9703356786820687e-06, | |
| "loss": 0.3644, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 2.2116630669546438, | |
| "grad_norm": 0.11070533841848373, | |
| "learning_rate": 1.9603485115269743e-06, | |
| "loss": 0.3587, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 2.2138228941684663, | |
| "grad_norm": 0.10772062093019485, | |
| "learning_rate": 1.9503805444627054e-06, | |
| "loss": 0.358, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 2.2159827213822894, | |
| "grad_norm": 0.11722833663225174, | |
| "learning_rate": 1.9404318404523605e-06, | |
| "loss": 0.3529, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 2.2181425485961124, | |
| "grad_norm": 0.11525849252939224, | |
| "learning_rate": 1.930502462337362e-06, | |
| "loss": 0.3526, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 2.2203023758099354, | |
| "grad_norm": 0.12186475098133087, | |
| "learning_rate": 1.920592472837057e-06, | |
| "loss": 0.3642, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 2.222462203023758, | |
| "grad_norm": 0.11602187156677246, | |
| "learning_rate": 1.910701934548329e-06, | |
| "loss": 0.3741, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 2.224622030237581, | |
| "grad_norm": 0.12122868001461029, | |
| "learning_rate": 1.900830909945189e-06, | |
| "loss": 0.3658, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.226781857451404, | |
| "grad_norm": 0.11481517553329468, | |
| "learning_rate": 1.8909794613783943e-06, | |
| "loss": 0.3586, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 2.2289416846652266, | |
| "grad_norm": 0.10677429288625717, | |
| "learning_rate": 1.8811476510750486e-06, | |
| "loss": 0.367, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 2.2311015118790496, | |
| "grad_norm": 0.11565054953098297, | |
| "learning_rate": 1.8713355411382117e-06, | |
| "loss": 0.3629, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 2.2332613390928726, | |
| "grad_norm": 0.11869722604751587, | |
| "learning_rate": 1.8615431935464984e-06, | |
| "loss": 0.3455, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 2.2354211663066956, | |
| "grad_norm": 0.12298930436372757, | |
| "learning_rate": 1.8517706701536998e-06, | |
| "loss": 0.377, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 2.237580993520518, | |
| "grad_norm": 0.11223292350769043, | |
| "learning_rate": 1.8420180326883857e-06, | |
| "loss": 0.3611, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 2.239740820734341, | |
| "grad_norm": 0.10755477845668793, | |
| "learning_rate": 1.8322853427535148e-06, | |
| "loss": 0.3636, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 2.2419006479481642, | |
| "grad_norm": 0.11490552872419357, | |
| "learning_rate": 1.822572661826047e-06, | |
| "loss": 0.3606, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 2.2440604751619873, | |
| "grad_norm": 0.11000396311283112, | |
| "learning_rate": 1.8128800512565514e-06, | |
| "loss": 0.3643, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 2.24622030237581, | |
| "grad_norm": 0.10895387083292007, | |
| "learning_rate": 1.803207572268826e-06, | |
| "loss": 0.3623, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.248380129589633, | |
| "grad_norm": 0.11881309747695923, | |
| "learning_rate": 1.7935552859595058e-06, | |
| "loss": 0.3598, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 2.250539956803456, | |
| "grad_norm": 0.11568914353847504, | |
| "learning_rate": 1.7839232532976746e-06, | |
| "loss": 0.3652, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.2526997840172784, | |
| "grad_norm": 0.10827185958623886, | |
| "learning_rate": 1.7743115351244883e-06, | |
| "loss": 0.3616, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 2.2548596112311015, | |
| "grad_norm": 0.12083268910646439, | |
| "learning_rate": 1.7647201921527802e-06, | |
| "loss": 0.3696, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 2.2570194384449245, | |
| "grad_norm": 0.11744555830955505, | |
| "learning_rate": 1.7551492849666857e-06, | |
| "loss": 0.3547, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 2.2591792656587475, | |
| "grad_norm": 0.11333145946264267, | |
| "learning_rate": 1.7455988740212576e-06, | |
| "loss": 0.3648, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 2.26133909287257, | |
| "grad_norm": 0.1083984524011612, | |
| "learning_rate": 1.7360690196420816e-06, | |
| "loss": 0.3609, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 2.263498920086393, | |
| "grad_norm": 0.12069600075483322, | |
| "learning_rate": 1.7265597820248987e-06, | |
| "loss": 0.3617, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 2.265658747300216, | |
| "grad_norm": 0.11563380807638168, | |
| "learning_rate": 1.7170712212352187e-06, | |
| "loss": 0.3554, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 2.267818574514039, | |
| "grad_norm": 0.11244919896125793, | |
| "learning_rate": 1.7076033972079503e-06, | |
| "loss": 0.3526, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.2699784017278617, | |
| "grad_norm": 0.11943572014570236, | |
| "learning_rate": 1.698156369747016e-06, | |
| "loss": 0.3639, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 2.2721382289416847, | |
| "grad_norm": 0.11513727903366089, | |
| "learning_rate": 1.6887301985249754e-06, | |
| "loss": 0.3622, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 2.2742980561555077, | |
| "grad_norm": 0.11251917481422424, | |
| "learning_rate": 1.6793249430826502e-06, | |
| "loss": 0.3606, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 2.2764578833693303, | |
| "grad_norm": 0.11887813359498978, | |
| "learning_rate": 1.6699406628287423e-06, | |
| "loss": 0.3602, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 2.2786177105831533, | |
| "grad_norm": 0.1043018326163292, | |
| "learning_rate": 1.6605774170394683e-06, | |
| "loss": 0.3597, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 2.2807775377969763, | |
| "grad_norm": 0.11690463870763779, | |
| "learning_rate": 1.651235264858177e-06, | |
| "loss": 0.3706, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.282937365010799, | |
| "grad_norm": 0.11119679361581802, | |
| "learning_rate": 1.6419142652949793e-06, | |
| "loss": 0.3755, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 2.285097192224622, | |
| "grad_norm": 0.12275518476963043, | |
| "learning_rate": 1.6326144772263752e-06, | |
| "loss": 0.3617, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 2.287257019438445, | |
| "grad_norm": 0.11455702781677246, | |
| "learning_rate": 1.6233359593948777e-06, | |
| "loss": 0.3561, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 2.289416846652268, | |
| "grad_norm": 0.1072060838341713, | |
| "learning_rate": 1.6140787704086502e-06, | |
| "loss": 0.3595, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.2915766738660905, | |
| "grad_norm": 0.11446718126535416, | |
| "learning_rate": 1.6048429687411294e-06, | |
| "loss": 0.3579, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 2.2937365010799136, | |
| "grad_norm": 0.1233833059668541, | |
| "learning_rate": 1.5956286127306591e-06, | |
| "loss": 0.3571, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 2.2958963282937366, | |
| "grad_norm": 0.11054225265979767, | |
| "learning_rate": 1.586435760580118e-06, | |
| "loss": 0.3592, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 2.2980561555075596, | |
| "grad_norm": 0.11470706015825272, | |
| "learning_rate": 1.5772644703565564e-06, | |
| "loss": 0.3602, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 2.300215982721382, | |
| "grad_norm": 0.1131376326084137, | |
| "learning_rate": 1.5681147999908308e-06, | |
| "loss": 0.3579, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 2.302375809935205, | |
| "grad_norm": 0.1124383881688118, | |
| "learning_rate": 1.5589868072772279e-06, | |
| "loss": 0.3592, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 2.304535637149028, | |
| "grad_norm": 0.13568998873233795, | |
| "learning_rate": 1.5498805498731146e-06, | |
| "loss": 0.3687, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 2.306695464362851, | |
| "grad_norm": 0.11868295818567276, | |
| "learning_rate": 1.5407960852985582e-06, | |
| "loss": 0.3741, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 2.308855291576674, | |
| "grad_norm": 0.11386443674564362, | |
| "learning_rate": 1.531733470935976e-06, | |
| "loss": 0.3702, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 2.311015118790497, | |
| "grad_norm": 0.11155420541763306, | |
| "learning_rate": 1.5226927640297663e-06, | |
| "loss": 0.3543, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.31317494600432, | |
| "grad_norm": 0.11469469219446182, | |
| "learning_rate": 1.5136740216859464e-06, | |
| "loss": 0.3718, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 2.3153347732181424, | |
| "grad_norm": 0.10761052370071411, | |
| "learning_rate": 1.5046773008717968e-06, | |
| "loss": 0.3728, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 2.3174946004319654, | |
| "grad_norm": 0.10855443775653839, | |
| "learning_rate": 1.4957026584154926e-06, | |
| "loss": 0.3612, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 2.3196544276457884, | |
| "grad_norm": 0.11300813406705856, | |
| "learning_rate": 1.4867501510057548e-06, | |
| "loss": 0.3629, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 2.3218142548596115, | |
| "grad_norm": 0.1190650686621666, | |
| "learning_rate": 1.4778198351914853e-06, | |
| "loss": 0.358, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 2.323974082073434, | |
| "grad_norm": 0.11081087589263916, | |
| "learning_rate": 1.4689117673814135e-06, | |
| "loss": 0.3579, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 2.326133909287257, | |
| "grad_norm": 0.10845163464546204, | |
| "learning_rate": 1.4600260038437376e-06, | |
| "loss": 0.3547, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 2.32829373650108, | |
| "grad_norm": 0.10712606459856033, | |
| "learning_rate": 1.4511626007057667e-06, | |
| "loss": 0.3702, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 2.3304535637149026, | |
| "grad_norm": 0.10121244937181473, | |
| "learning_rate": 1.4423216139535735e-06, | |
| "loss": 0.3701, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 2.3326133909287257, | |
| "grad_norm": 0.10943249613046646, | |
| "learning_rate": 1.4335030994316357e-06, | |
| "loss": 0.3673, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.3347732181425487, | |
| "grad_norm": 0.11610903590917587, | |
| "learning_rate": 1.4247071128424838e-06, | |
| "loss": 0.3603, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 2.3369330453563713, | |
| "grad_norm": 0.11263252794742584, | |
| "learning_rate": 1.4159337097463515e-06, | |
| "loss": 0.3646, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 2.3390928725701943, | |
| "grad_norm": 0.11808553338050842, | |
| "learning_rate": 1.407182945560817e-06, | |
| "loss": 0.3551, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 2.3412526997840173, | |
| "grad_norm": 0.11071130633354187, | |
| "learning_rate": 1.3984548755604655e-06, | |
| "loss": 0.3591, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 2.3434125269978403, | |
| "grad_norm": 0.10774732381105423, | |
| "learning_rate": 1.38974955487653e-06, | |
| "loss": 0.3701, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 2.345572354211663, | |
| "grad_norm": 0.10596179217100143, | |
| "learning_rate": 1.3810670384965469e-06, | |
| "loss": 0.3619, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 2.347732181425486, | |
| "grad_norm": 0.10586302727460861, | |
| "learning_rate": 1.372407381264011e-06, | |
| "loss": 0.3671, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 2.349892008639309, | |
| "grad_norm": 0.11271238327026367, | |
| "learning_rate": 1.3637706378780209e-06, | |
| "loss": 0.369, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 2.352051835853132, | |
| "grad_norm": 0.11300753057003021, | |
| "learning_rate": 1.3551568628929434e-06, | |
| "loss": 0.366, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 2.3542116630669545, | |
| "grad_norm": 0.10634942352771759, | |
| "learning_rate": 1.346566110718061e-06, | |
| "loss": 0.3608, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.3563714902807775, | |
| "grad_norm": 0.11670755594968796, | |
| "learning_rate": 1.337998435617235e-06, | |
| "loss": 0.3649, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 2.3585313174946005, | |
| "grad_norm": 0.11227838695049286, | |
| "learning_rate": 1.3294538917085586e-06, | |
| "loss": 0.3496, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.360691144708423, | |
| "grad_norm": 0.11369525641202927, | |
| "learning_rate": 1.3209325329640126e-06, | |
| "loss": 0.367, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 2.362850971922246, | |
| "grad_norm": 0.10753148049116135, | |
| "learning_rate": 1.312434413209131e-06, | |
| "loss": 0.3654, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.365010799136069, | |
| "grad_norm": 0.11079417914152145, | |
| "learning_rate": 1.3039595861226579e-06, | |
| "loss": 0.3535, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 2.367170626349892, | |
| "grad_norm": 0.10849615931510925, | |
| "learning_rate": 1.2955081052362072e-06, | |
| "loss": 0.3584, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 2.3693304535637147, | |
| "grad_norm": 0.10960622876882553, | |
| "learning_rate": 1.2870800239339237e-06, | |
| "loss": 0.3578, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 2.3714902807775378, | |
| "grad_norm": 0.11225436627864838, | |
| "learning_rate": 1.2786753954521508e-06, | |
| "loss": 0.3645, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 2.373650107991361, | |
| "grad_norm": 0.11186996102333069, | |
| "learning_rate": 1.2702942728790897e-06, | |
| "loss": 0.3564, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 2.375809935205184, | |
| "grad_norm": 0.10800560563802719, | |
| "learning_rate": 1.2619367091544654e-06, | |
| "loss": 0.3595, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.3779697624190064, | |
| "grad_norm": 0.11503534764051437, | |
| "learning_rate": 1.2536027570691938e-06, | |
| "loss": 0.363, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 2.3801295896328294, | |
| "grad_norm": 0.1053680032491684, | |
| "learning_rate": 1.2452924692650443e-06, | |
| "loss": 0.3668, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 2.3822894168466524, | |
| "grad_norm": 0.10837449133396149, | |
| "learning_rate": 1.2370058982343109e-06, | |
| "loss": 0.3646, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 2.384449244060475, | |
| "grad_norm": 0.10401103645563126, | |
| "learning_rate": 1.2287430963194807e-06, | |
| "loss": 0.3523, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 2.386609071274298, | |
| "grad_norm": 0.1130133643746376, | |
| "learning_rate": 1.2205041157129017e-06, | |
| "loss": 0.3522, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 2.388768898488121, | |
| "grad_norm": 0.11143437772989273, | |
| "learning_rate": 1.2122890084564542e-06, | |
| "loss": 0.3622, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 2.390928725701944, | |
| "grad_norm": 0.1088298037648201, | |
| "learning_rate": 1.204097826441218e-06, | |
| "loss": 0.3524, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 2.3930885529157666, | |
| "grad_norm": 0.11658685654401779, | |
| "learning_rate": 1.1959306214071508e-06, | |
| "loss": 0.3649, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 2.3952483801295896, | |
| "grad_norm": 0.10530900955200195, | |
| "learning_rate": 1.18778744494276e-06, | |
| "loss": 0.3732, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 2.3974082073434126, | |
| "grad_norm": 0.10576412826776505, | |
| "learning_rate": 1.1796683484847731e-06, | |
| "loss": 0.3528, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.3995680345572357, | |
| "grad_norm": 0.10664583742618561, | |
| "learning_rate": 1.1715733833178178e-06, | |
| "loss": 0.3638, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 2.4017278617710582, | |
| "grad_norm": 0.11170324683189392, | |
| "learning_rate": 1.1635026005740902e-06, | |
| "loss": 0.3632, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 2.4038876889848813, | |
| "grad_norm": 0.10899297147989273, | |
| "learning_rate": 1.1554560512330437e-06, | |
| "loss": 0.3717, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 2.4060475161987043, | |
| "grad_norm": 0.10355883091688156, | |
| "learning_rate": 1.1474337861210543e-06, | |
| "loss": 0.3669, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 2.408207343412527, | |
| "grad_norm": 0.11601343005895615, | |
| "learning_rate": 1.1394358559111101e-06, | |
| "loss": 0.3675, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 2.41036717062635, | |
| "grad_norm": 0.10625651478767395, | |
| "learning_rate": 1.1314623111224865e-06, | |
| "loss": 0.3696, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 2.412526997840173, | |
| "grad_norm": 0.1087704598903656, | |
| "learning_rate": 1.1235132021204226e-06, | |
| "loss": 0.3678, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 2.4146868250539955, | |
| "grad_norm": 0.1125335842370987, | |
| "learning_rate": 1.1155885791158128e-06, | |
| "loss": 0.3676, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 2.4168466522678185, | |
| "grad_norm": 0.10977572947740555, | |
| "learning_rate": 1.1076884921648834e-06, | |
| "loss": 0.3597, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 2.4190064794816415, | |
| "grad_norm": 0.11624909937381744, | |
| "learning_rate": 1.0998129911688766e-06, | |
| "loss": 0.3645, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.4211663066954645, | |
| "grad_norm": 0.11193333566188812, | |
| "learning_rate": 1.0919621258737384e-06, | |
| "loss": 0.3679, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 2.423326133909287, | |
| "grad_norm": 0.10702624171972275, | |
| "learning_rate": 1.0841359458697986e-06, | |
| "loss": 0.3675, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 2.42548596112311, | |
| "grad_norm": 0.11081477999687195, | |
| "learning_rate": 1.0763345005914649e-06, | |
| "loss": 0.3733, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 2.427645788336933, | |
| "grad_norm": 0.11152873933315277, | |
| "learning_rate": 1.0685578393169054e-06, | |
| "loss": 0.3634, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 2.429805615550756, | |
| "grad_norm": 0.11278684437274933, | |
| "learning_rate": 1.0608060111677409e-06, | |
| "loss": 0.3646, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 2.4319654427645787, | |
| "grad_norm": 0.10329707711935043, | |
| "learning_rate": 1.053079065108728e-06, | |
| "loss": 0.3616, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 2.4341252699784017, | |
| "grad_norm": 0.11579885333776474, | |
| "learning_rate": 1.0453770499474585e-06, | |
| "loss": 0.3642, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 2.4362850971922247, | |
| "grad_norm": 0.11287212371826172, | |
| "learning_rate": 1.037700014334047e-06, | |
| "loss": 0.3588, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 2.4384449244060473, | |
| "grad_norm": 0.10435645282268524, | |
| "learning_rate": 1.0300480067608232e-06, | |
| "loss": 0.3621, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 2.4406047516198703, | |
| "grad_norm": 0.1117047443985939, | |
| "learning_rate": 1.0224210755620257e-06, | |
| "loss": 0.3665, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.4427645788336934, | |
| "grad_norm": 0.11821126937866211, | |
| "learning_rate": 1.014819268913495e-06, | |
| "loss": 0.3659, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 2.4449244060475164, | |
| "grad_norm": 0.11257217824459076, | |
| "learning_rate": 1.0072426348323754e-06, | |
| "loss": 0.3629, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 2.447084233261339, | |
| "grad_norm": 0.10960426181554794, | |
| "learning_rate": 9.99691221176805e-07, | |
| "loss": 0.3702, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 2.449244060475162, | |
| "grad_norm": 0.11274091899394989, | |
| "learning_rate": 9.921650756456164e-07, | |
| "loss": 0.3552, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 2.451403887688985, | |
| "grad_norm": 0.11033818125724792, | |
| "learning_rate": 9.84664245778037e-07, | |
| "loss": 0.3622, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 2.453563714902808, | |
| "grad_norm": 0.11202115565538406, | |
| "learning_rate": 9.771887789533818e-07, | |
| "loss": 0.3641, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 2.4557235421166306, | |
| "grad_norm": 0.10436037182807922, | |
| "learning_rate": 9.69738722390765e-07, | |
| "loss": 0.3722, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 2.4578833693304536, | |
| "grad_norm": 0.11260079592466354, | |
| "learning_rate": 9.623141231487904e-07, | |
| "loss": 0.3664, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 2.4600431965442766, | |
| "grad_norm": 0.10981511324644089, | |
| "learning_rate": 9.549150281252633e-07, | |
| "loss": 0.3729, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 2.462203023758099, | |
| "grad_norm": 0.11340730637311935, | |
| "learning_rate": 9.475414840568903e-07, | |
| "loss": 0.3614, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.464362850971922, | |
| "grad_norm": 0.10501902550458908, | |
| "learning_rate": 9.401935375189802e-07, | |
| "loss": 0.3601, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 2.466522678185745, | |
| "grad_norm": 0.11369086056947708, | |
| "learning_rate": 9.32871234925159e-07, | |
| "loss": 0.3669, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 2.468682505399568, | |
| "grad_norm": 0.10842647403478622, | |
| "learning_rate": 9.255746225270689e-07, | |
| "loss": 0.3582, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 2.470842332613391, | |
| "grad_norm": 0.1089843288064003, | |
| "learning_rate": 9.183037464140804e-07, | |
| "loss": 0.3532, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 2.473002159827214, | |
| "grad_norm": 0.1023058295249939, | |
| "learning_rate": 9.110586525129988e-07, | |
| "loss": 0.3473, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 2.475161987041037, | |
| "grad_norm": 0.1110844761133194, | |
| "learning_rate": 9.038393865877725e-07, | |
| "loss": 0.3629, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 2.4773218142548594, | |
| "grad_norm": 0.10468819737434387, | |
| "learning_rate": 8.966459942392108e-07, | |
| "loss": 0.3631, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 2.4794816414686824, | |
| "grad_norm": 0.11002985388040543, | |
| "learning_rate": 8.894785209046886e-07, | |
| "loss": 0.3584, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 2.4816414686825055, | |
| "grad_norm": 0.10573374480009079, | |
| "learning_rate": 8.823370118578628e-07, | |
| "loss": 0.3681, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 2.4838012958963285, | |
| "grad_norm": 0.11796517670154572, | |
| "learning_rate": 8.752215122083874e-07, | |
| "loss": 0.3617, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.485961123110151, | |
| "grad_norm": 0.1184302419424057, | |
| "learning_rate": 8.68132066901623e-07, | |
| "loss": 0.3672, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 2.488120950323974, | |
| "grad_norm": 0.13177676498889923, | |
| "learning_rate": 8.610687207183604e-07, | |
| "loss": 0.3573, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 2.490280777537797, | |
| "grad_norm": 0.11671025305986404, | |
| "learning_rate": 8.540315182745329e-07, | |
| "loss": 0.3569, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 2.4924406047516197, | |
| "grad_norm": 0.10741881281137466, | |
| "learning_rate": 8.470205040209362e-07, | |
| "loss": 0.3558, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 2.4946004319654427, | |
| "grad_norm": 0.12825675308704376, | |
| "learning_rate": 8.400357222429473e-07, | |
| "loss": 0.3575, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 2.4967602591792657, | |
| "grad_norm": 0.10776403546333313, | |
| "learning_rate": 8.330772170602424e-07, | |
| "loss": 0.3589, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 2.4989200863930887, | |
| "grad_norm": 0.11745335906744003, | |
| "learning_rate": 8.261450324265225e-07, | |
| "loss": 0.3617, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 2.5010799136069113, | |
| "grad_norm": 0.10803595185279846, | |
| "learning_rate": 8.192392121292336e-07, | |
| "loss": 0.3636, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 2.5032397408207343, | |
| "grad_norm": 0.11620043963193893, | |
| "learning_rate": 8.123597997892918e-07, | |
| "loss": 0.3688, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 2.5053995680345573, | |
| "grad_norm": 0.11771270632743835, | |
| "learning_rate": 8.055068388608011e-07, | |
| "loss": 0.3633, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.5075593952483803, | |
| "grad_norm": 0.11002473533153534, | |
| "learning_rate": 7.986803726307901e-07, | |
| "loss": 0.3649, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 2.509719222462203, | |
| "grad_norm": 0.11476074159145355, | |
| "learning_rate": 7.918804442189271e-07, | |
| "loss": 0.3482, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 2.511879049676026, | |
| "grad_norm": 0.10824240744113922, | |
| "learning_rate": 7.851070965772572e-07, | |
| "loss": 0.3502, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 2.514038876889849, | |
| "grad_norm": 0.11206220835447311, | |
| "learning_rate": 7.783603724899258e-07, | |
| "loss": 0.3668, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 2.5161987041036715, | |
| "grad_norm": 0.11207690834999084, | |
| "learning_rate": 7.716403145729073e-07, | |
| "loss": 0.3585, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 2.5183585313174945, | |
| "grad_norm": 0.10834087431430817, | |
| "learning_rate": 7.649469652737407e-07, | |
| "loss": 0.3557, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 2.5205183585313176, | |
| "grad_norm": 0.11165751516819, | |
| "learning_rate": 7.582803668712579e-07, | |
| "loss": 0.3654, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 2.52267818574514, | |
| "grad_norm": 0.10847879201173782, | |
| "learning_rate": 7.51640561475318e-07, | |
| "loss": 0.362, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 2.524838012958963, | |
| "grad_norm": 0.11347544938325882, | |
| "learning_rate": 7.450275910265415e-07, | |
| "loss": 0.3631, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 2.526997840172786, | |
| "grad_norm": 0.11547064036130905, | |
| "learning_rate": 7.384414972960419e-07, | |
| "loss": 0.3613, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.529157667386609, | |
| "grad_norm": 0.11166190356016159, | |
| "learning_rate": 7.318823218851668e-07, | |
| "loss": 0.3664, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 2.531317494600432, | |
| "grad_norm": 0.11519124358892441, | |
| "learning_rate": 7.253501062252338e-07, | |
| "loss": 0.3715, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 2.533477321814255, | |
| "grad_norm": 0.12818704545497894, | |
| "learning_rate": 7.188448915772673e-07, | |
| "loss": 0.3568, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 2.535637149028078, | |
| "grad_norm": 0.11333166062831879, | |
| "learning_rate": 7.123667190317396e-07, | |
| "loss": 0.366, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 2.537796976241901, | |
| "grad_norm": 0.11098440736532211, | |
| "learning_rate": 7.059156295083064e-07, | |
| "loss": 0.3651, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 2.5399568034557234, | |
| "grad_norm": 0.11005040258169174, | |
| "learning_rate": 6.994916637555571e-07, | |
| "loss": 0.3658, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 2.5421166306695464, | |
| "grad_norm": 0.10551054775714874, | |
| "learning_rate": 6.930948623507505e-07, | |
| "loss": 0.3654, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 2.5442764578833694, | |
| "grad_norm": 0.10704758763313293, | |
| "learning_rate": 6.86725265699561e-07, | |
| "loss": 0.3562, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 2.546436285097192, | |
| "grad_norm": 0.1092720702290535, | |
| "learning_rate": 6.803829140358237e-07, | |
| "loss": 0.3619, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 2.548596112311015, | |
| "grad_norm": 0.10640691220760345, | |
| "learning_rate": 6.74067847421277e-07, | |
| "loss": 0.3674, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.550755939524838, | |
| "grad_norm": 0.10517946630716324, | |
| "learning_rate": 6.677801057453143e-07, | |
| "loss": 0.3556, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 2.552915766738661, | |
| "grad_norm": 0.10489367693662643, | |
| "learning_rate": 6.615197287247299e-07, | |
| "loss": 0.3766, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 2.555075593952484, | |
| "grad_norm": 0.11467967927455902, | |
| "learning_rate": 6.552867559034687e-07, | |
| "loss": 0.3569, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 2.5572354211663066, | |
| "grad_norm": 0.11009713262319565, | |
| "learning_rate": 6.490812266523716e-07, | |
| "loss": 0.3654, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 2.5593952483801297, | |
| "grad_norm": 0.10729658603668213, | |
| "learning_rate": 6.429031801689362e-07, | |
| "loss": 0.3564, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 2.5615550755939527, | |
| "grad_norm": 0.1073872372508049, | |
| "learning_rate": 6.36752655477062e-07, | |
| "loss": 0.3606, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 2.5637149028077753, | |
| "grad_norm": 0.10580222308635712, | |
| "learning_rate": 6.30629691426804e-07, | |
| "loss": 0.371, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 2.5658747300215983, | |
| "grad_norm": 0.11771810799837112, | |
| "learning_rate": 6.245343266941328e-07, | |
| "loss": 0.3597, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 2.5680345572354213, | |
| "grad_norm": 0.11992885917425156, | |
| "learning_rate": 6.184665997806832e-07, | |
| "loss": 0.3559, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 2.570194384449244, | |
| "grad_norm": 0.11079053580760956, | |
| "learning_rate": 6.124265490135161e-07, | |
| "loss": 0.3635, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.572354211663067, | |
| "grad_norm": 0.10871004313230515, | |
| "learning_rate": 6.064142125448763e-07, | |
| "loss": 0.3625, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 2.57451403887689, | |
| "grad_norm": 0.11944089829921722, | |
| "learning_rate": 6.004296283519478e-07, | |
| "loss": 0.3531, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 2.5766738660907125, | |
| "grad_norm": 0.11835870891809464, | |
| "learning_rate": 5.944728342366179e-07, | |
| "loss": 0.3596, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 2.5788336933045355, | |
| "grad_norm": 0.10851329565048218, | |
| "learning_rate": 5.885438678252342e-07, | |
| "loss": 0.3692, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 2.5809935205183585, | |
| "grad_norm": 0.10725897550582886, | |
| "learning_rate": 5.826427665683715e-07, | |
| "loss": 0.3621, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 2.5831533477321815, | |
| "grad_norm": 0.10977955162525177, | |
| "learning_rate": 5.767695677405921e-07, | |
| "loss": 0.3536, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 2.5853131749460045, | |
| "grad_norm": 0.11643577367067337, | |
| "learning_rate": 5.709243084402128e-07, | |
| "loss": 0.3624, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 2.587473002159827, | |
| "grad_norm": 0.11957161873579025, | |
| "learning_rate": 5.651070255890689e-07, | |
| "loss": 0.3567, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 2.58963282937365, | |
| "grad_norm": 0.11547524482011795, | |
| "learning_rate": 5.593177559322776e-07, | |
| "loss": 0.3526, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 2.591792656587473, | |
| "grad_norm": 0.10810908675193787, | |
| "learning_rate": 5.535565360380146e-07, | |
| "loss": 0.3627, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.5939524838012957, | |
| "grad_norm": 0.10978656262159348, | |
| "learning_rate": 5.478234022972756e-07, | |
| "loss": 0.3689, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 2.5961123110151187, | |
| "grad_norm": 0.11710033565759659, | |
| "learning_rate": 5.421183909236494e-07, | |
| "loss": 0.354, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 2.5982721382289418, | |
| "grad_norm": 0.10731150209903717, | |
| "learning_rate": 5.364415379530891e-07, | |
| "loss": 0.3672, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 2.6004319654427643, | |
| "grad_norm": 0.10609705001115799, | |
| "learning_rate": 5.307928792436812e-07, | |
| "loss": 0.3541, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 2.6025917926565874, | |
| "grad_norm": 0.11076472699642181, | |
| "learning_rate": 5.251724504754258e-07, | |
| "loss": 0.3651, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 2.6047516198704104, | |
| "grad_norm": 0.11111797392368317, | |
| "learning_rate": 5.19580287150005e-07, | |
| "loss": 0.3557, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 2.6069114470842334, | |
| "grad_norm": 0.10651623457670212, | |
| "learning_rate": 5.140164245905633e-07, | |
| "loss": 0.3537, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 2.6090712742980564, | |
| "grad_norm": 0.11073900759220123, | |
| "learning_rate": 5.084808979414779e-07, | |
| "loss": 0.3623, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 2.611231101511879, | |
| "grad_norm": 0.11509796231985092, | |
| "learning_rate": 5.029737421681446e-07, | |
| "loss": 0.3669, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 2.613390928725702, | |
| "grad_norm": 0.11190790683031082, | |
| "learning_rate": 4.97494992056754e-07, | |
| "loss": 0.3662, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.615550755939525, | |
| "grad_norm": 0.11598829925060272, | |
| "learning_rate": 4.920446822140673e-07, | |
| "loss": 0.3617, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 2.6177105831533476, | |
| "grad_norm": 0.11533954739570618, | |
| "learning_rate": 4.866228470672041e-07, | |
| "loss": 0.3589, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 2.6198704103671706, | |
| "grad_norm": 0.10564534366130829, | |
| "learning_rate": 4.812295208634238e-07, | |
| "loss": 0.3626, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 2.6220302375809936, | |
| "grad_norm": 0.11170712113380432, | |
| "learning_rate": 4.758647376699033e-07, | |
| "loss": 0.3672, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 2.624190064794816, | |
| "grad_norm": 0.11519314348697662, | |
| "learning_rate": 4.705285313735297e-07, | |
| "loss": 0.3666, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 2.626349892008639, | |
| "grad_norm": 0.11605649441480637, | |
| "learning_rate": 4.6522093568068307e-07, | |
| "loss": 0.3484, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 2.6285097192224622, | |
| "grad_norm": 0.11404189467430115, | |
| "learning_rate": 4.599419841170216e-07, | |
| "loss": 0.3555, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 2.6306695464362853, | |
| "grad_norm": 0.11835578829050064, | |
| "learning_rate": 4.546917100272735e-07, | |
| "loss": 0.3552, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 2.632829373650108, | |
| "grad_norm": 0.11513664573431015, | |
| "learning_rate": 4.494701465750217e-07, | |
| "loss": 0.3522, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 2.634989200863931, | |
| "grad_norm": 0.11740648001432419, | |
| "learning_rate": 4.4427732674250045e-07, | |
| "loss": 0.3625, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.637149028077754, | |
| "grad_norm": 0.12071909755468369, | |
| "learning_rate": 4.391132833303807e-07, | |
| "loss": 0.3684, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 2.639308855291577, | |
| "grad_norm": 0.1136975884437561, | |
| "learning_rate": 4.3397804895756957e-07, | |
| "loss": 0.3684, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 2.6414686825053995, | |
| "grad_norm": 0.11149821430444717, | |
| "learning_rate": 4.2887165606099513e-07, | |
| "loss": 0.3603, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 2.6436285097192225, | |
| "grad_norm": 0.12100395560264587, | |
| "learning_rate": 4.237941368954124e-07, | |
| "loss": 0.3624, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 2.6457883369330455, | |
| "grad_norm": 0.1222655400633812, | |
| "learning_rate": 4.1874552353319107e-07, | |
| "loss": 0.3526, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 2.647948164146868, | |
| "grad_norm": 0.11921314895153046, | |
| "learning_rate": 4.137258478641176e-07, | |
| "loss": 0.3647, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 2.650107991360691, | |
| "grad_norm": 0.11398887634277344, | |
| "learning_rate": 4.087351415951918e-07, | |
| "loss": 0.3593, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 2.652267818574514, | |
| "grad_norm": 0.11155658215284348, | |
| "learning_rate": 4.0377343625042587e-07, | |
| "loss": 0.37, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 2.6544276457883367, | |
| "grad_norm": 0.1191490963101387, | |
| "learning_rate": 3.9884076317064813e-07, | |
| "loss": 0.3588, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 2.6565874730021597, | |
| "grad_norm": 0.12826910614967346, | |
| "learning_rate": 3.9393715351330243e-07, | |
| "loss": 0.3566, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.6587473002159827, | |
| "grad_norm": 0.11224586516618729, | |
| "learning_rate": 3.890626382522539e-07, | |
| "loss": 0.3604, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 2.6609071274298057, | |
| "grad_norm": 0.11304951459169388, | |
| "learning_rate": 3.8421724817758745e-07, | |
| "loss": 0.3719, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 2.6630669546436287, | |
| "grad_norm": 0.10955885052680969, | |
| "learning_rate": 3.794010138954213e-07, | |
| "loss": 0.3611, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 2.6652267818574513, | |
| "grad_norm": 0.11885318905115128, | |
| "learning_rate": 3.7461396582771035e-07, | |
| "loss": 0.3732, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 2.6673866090712743, | |
| "grad_norm": 0.11816181242465973, | |
| "learning_rate": 3.698561342120499e-07, | |
| "loss": 0.3577, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 2.6695464362850974, | |
| "grad_norm": 0.11143229156732559, | |
| "learning_rate": 3.651275491014905e-07, | |
| "loss": 0.3561, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 2.67170626349892, | |
| "grad_norm": 0.113620825111866, | |
| "learning_rate": 3.604282403643472e-07, | |
| "loss": 0.3659, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 2.673866090712743, | |
| "grad_norm": 0.11192460358142853, | |
| "learning_rate": 3.557582376840063e-07, | |
| "loss": 0.3627, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 2.676025917926566, | |
| "grad_norm": 0.11559736728668213, | |
| "learning_rate": 3.511175705587433e-07, | |
| "loss": 0.3632, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 2.6781857451403885, | |
| "grad_norm": 0.11298345029354095, | |
| "learning_rate": 3.465062683015341e-07, | |
| "loss": 0.3617, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.6803455723542116, | |
| "grad_norm": 0.1136719286441803, | |
| "learning_rate": 3.419243600398703e-07, | |
| "loss": 0.3534, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 2.6825053995680346, | |
| "grad_norm": 0.11135457456111908, | |
| "learning_rate": 3.373718747155752e-07, | |
| "loss": 0.3723, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 2.6846652267818576, | |
| "grad_norm": 0.10721197724342346, | |
| "learning_rate": 3.328488410846187e-07, | |
| "loss": 0.3551, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 2.6868250539956806, | |
| "grad_norm": 0.11308667808771133, | |
| "learning_rate": 3.283552877169399e-07, | |
| "loss": 0.3667, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 2.688984881209503, | |
| "grad_norm": 0.10848429799079895, | |
| "learning_rate": 3.2389124299626483e-07, | |
| "loss": 0.3643, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 2.691144708423326, | |
| "grad_norm": 0.11723221838474274, | |
| "learning_rate": 3.194567351199257e-07, | |
| "loss": 0.3717, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 2.693304535637149, | |
| "grad_norm": 0.12472040206193924, | |
| "learning_rate": 3.150517920986851e-07, | |
| "loss": 0.3608, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 2.695464362850972, | |
| "grad_norm": 0.11016938090324402, | |
| "learning_rate": 3.106764417565561e-07, | |
| "loss": 0.3588, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 2.697624190064795, | |
| "grad_norm": 0.11815854161977768, | |
| "learning_rate": 3.0633071173062966e-07, | |
| "loss": 0.3617, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 2.699784017278618, | |
| "grad_norm": 0.1177084818482399, | |
| "learning_rate": 3.0201462947089865e-07, | |
| "loss": 0.3576, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.7019438444924404, | |
| "grad_norm": 0.11179111897945404, | |
| "learning_rate": 2.9772822224008515e-07, | |
| "loss": 0.3667, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 2.7041036717062634, | |
| "grad_norm": 0.11454194784164429, | |
| "learning_rate": 2.9347151711346556e-07, | |
| "loss": 0.3707, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 2.7062634989200864, | |
| "grad_norm": 0.10757472366094589, | |
| "learning_rate": 2.892445409787037e-07, | |
| "loss": 0.3628, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 2.708423326133909, | |
| "grad_norm": 0.11914849281311035, | |
| "learning_rate": 2.850473205356774e-07, | |
| "loss": 0.3468, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 2.710583153347732, | |
| "grad_norm": 0.1173713430762291, | |
| "learning_rate": 2.8087988229631325e-07, | |
| "loss": 0.3668, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 2.712742980561555, | |
| "grad_norm": 0.11365855485200882, | |
| "learning_rate": 2.76742252584416e-07, | |
| "loss": 0.359, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 2.714902807775378, | |
| "grad_norm": 0.11546127498149872, | |
| "learning_rate": 2.7263445753550275e-07, | |
| "loss": 0.364, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 2.717062634989201, | |
| "grad_norm": 0.11186777800321579, | |
| "learning_rate": 2.685565230966408e-07, | |
| "loss": 0.3526, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 2.7192224622030237, | |
| "grad_norm": 0.10442403703927994, | |
| "learning_rate": 2.6450847502627883e-07, | |
| "loss": 0.3551, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 2.7213822894168467, | |
| "grad_norm": 0.12204797565937042, | |
| "learning_rate": 2.604903388940899e-07, | |
| "loss": 0.3587, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.7235421166306697, | |
| "grad_norm": 0.11084363609552383, | |
| "learning_rate": 2.5650214008080544e-07, | |
| "loss": 0.3679, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 2.7257019438444923, | |
| "grad_norm": 0.10979737341403961, | |
| "learning_rate": 2.525439037780558e-07, | |
| "loss": 0.3717, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 2.7278617710583153, | |
| "grad_norm": 0.11145438998937607, | |
| "learning_rate": 2.486156549882135e-07, | |
| "loss": 0.3613, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 2.7300215982721383, | |
| "grad_norm": 0.11015837639570236, | |
| "learning_rate": 2.447174185242324e-07, | |
| "loss": 0.3652, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 2.732181425485961, | |
| "grad_norm": 0.1096833273768425, | |
| "learning_rate": 2.40849219009493e-07, | |
| "loss": 0.3531, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 2.734341252699784, | |
| "grad_norm": 0.109636589884758, | |
| "learning_rate": 2.3701108087764657e-07, | |
| "loss": 0.3596, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 2.736501079913607, | |
| "grad_norm": 0.11428305506706238, | |
| "learning_rate": 2.3320302837245846e-07, | |
| "loss": 0.3659, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 2.73866090712743, | |
| "grad_norm": 0.11387787014245987, | |
| "learning_rate": 2.2942508554765764e-07, | |
| "loss": 0.3726, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 2.740820734341253, | |
| "grad_norm": 0.10690239071846008, | |
| "learning_rate": 2.2567727626678527e-07, | |
| "loss": 0.3651, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 2.7429805615550755, | |
| "grad_norm": 0.10845934599637985, | |
| "learning_rate": 2.2195962420304083e-07, | |
| "loss": 0.3608, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.7451403887688985, | |
| "grad_norm": 0.11751694232225418, | |
| "learning_rate": 2.1827215283913683e-07, | |
| "loss": 0.3659, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 2.7473002159827216, | |
| "grad_norm": 0.10652041435241699, | |
| "learning_rate": 2.1461488546714425e-07, | |
| "loss": 0.3634, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 2.749460043196544, | |
| "grad_norm": 0.10296986997127533, | |
| "learning_rate": 2.1098784518835292e-07, | |
| "loss": 0.3632, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 2.751619870410367, | |
| "grad_norm": 0.10827996581792831, | |
| "learning_rate": 2.0739105491312028e-07, | |
| "loss": 0.3624, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 2.75377969762419, | |
| "grad_norm": 0.11208463460206985, | |
| "learning_rate": 2.0382453736072838e-07, | |
| "loss": 0.3552, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 2.7559395248380127, | |
| "grad_norm": 0.11274047195911407, | |
| "learning_rate": 2.0028831505924162e-07, | |
| "loss": 0.3613, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 2.7580993520518358, | |
| "grad_norm": 0.10478544235229492, | |
| "learning_rate": 1.967824103453597e-07, | |
| "loss": 0.3592, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 2.760259179265659, | |
| "grad_norm": 0.10351528972387314, | |
| "learning_rate": 1.9330684536428335e-07, | |
| "loss": 0.3693, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 2.762419006479482, | |
| "grad_norm": 0.11334282159805298, | |
| "learning_rate": 1.8986164206957037e-07, | |
| "loss": 0.3615, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 2.7645788336933044, | |
| "grad_norm": 0.10871846228837967, | |
| "learning_rate": 1.8644682222299703e-07, | |
| "loss": 0.3644, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.7667386609071274, | |
| "grad_norm": 0.10826321691274643, | |
| "learning_rate": 1.8306240739442094e-07, | |
| "loss": 0.3599, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 2.7688984881209504, | |
| "grad_norm": 0.1105961948633194, | |
| "learning_rate": 1.7970841896164658e-07, | |
| "loss": 0.3652, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 2.7710583153347734, | |
| "grad_norm": 0.10997821390628815, | |
| "learning_rate": 1.7638487811028616e-07, | |
| "loss": 0.3675, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 2.773218142548596, | |
| "grad_norm": 0.1074373796582222, | |
| "learning_rate": 1.7309180583363062e-07, | |
| "loss": 0.3542, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 2.775377969762419, | |
| "grad_norm": 0.10459216684103012, | |
| "learning_rate": 1.6982922293251548e-07, | |
| "loss": 0.3538, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 2.777537796976242, | |
| "grad_norm": 0.10451044887304306, | |
| "learning_rate": 1.6659715001518583e-07, | |
| "loss": 0.367, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 2.7796976241900646, | |
| "grad_norm": 0.10947411507368088, | |
| "learning_rate": 1.6339560749717154e-07, | |
| "loss": 0.3515, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 2.7818574514038876, | |
| "grad_norm": 0.11110340058803558, | |
| "learning_rate": 1.6022461560115498e-07, | |
| "loss": 0.3603, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 2.7840172786177106, | |
| "grad_norm": 0.10515395551919937, | |
| "learning_rate": 1.5708419435684463e-07, | |
| "loss": 0.3547, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 2.786177105831533, | |
| "grad_norm": 0.10683929920196533, | |
| "learning_rate": 1.5397436360084784e-07, | |
| "loss": 0.3617, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.7883369330453562, | |
| "grad_norm": 0.10624652355909348, | |
| "learning_rate": 1.5089514297654594e-07, | |
| "loss": 0.3553, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 2.7904967602591793, | |
| "grad_norm": 0.11002147197723389, | |
| "learning_rate": 1.4784655193396947e-07, | |
| "loss": 0.3557, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 2.7926565874730023, | |
| "grad_norm": 0.1125330999493599, | |
| "learning_rate": 1.448286097296764e-07, | |
| "loss": 0.3544, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 2.7948164146868253, | |
| "grad_norm": 0.11160624772310257, | |
| "learning_rate": 1.4184133542663014e-07, | |
| "loss": 0.3694, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 2.796976241900648, | |
| "grad_norm": 0.10507107526063919, | |
| "learning_rate": 1.388847478940797e-07, | |
| "loss": 0.3713, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 2.799136069114471, | |
| "grad_norm": 0.107913538813591, | |
| "learning_rate": 1.3595886580743677e-07, | |
| "loss": 0.3698, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 2.801295896328294, | |
| "grad_norm": 0.11146403104066849, | |
| "learning_rate": 1.330637076481639e-07, | |
| "loss": 0.36, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 2.8034557235421165, | |
| "grad_norm": 0.10874520242214203, | |
| "learning_rate": 1.3019929170365376e-07, | |
| "loss": 0.3639, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 2.8056155507559395, | |
| "grad_norm": 0.11767850816249847, | |
| "learning_rate": 1.2736563606711384e-07, | |
| "loss": 0.3618, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 2.8077753779697625, | |
| "grad_norm": 0.10746905952692032, | |
| "learning_rate": 1.2456275863745426e-07, | |
| "loss": 0.3624, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.809935205183585, | |
| "grad_norm": 0.10965242981910706, | |
| "learning_rate": 1.2179067711917015e-07, | |
| "loss": 0.3732, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 2.812095032397408, | |
| "grad_norm": 0.10720682889223099, | |
| "learning_rate": 1.1904940902223661e-07, | |
| "loss": 0.3661, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 2.814254859611231, | |
| "grad_norm": 0.11190472543239594, | |
| "learning_rate": 1.1633897166199227e-07, | |
| "loss": 0.3572, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 2.816414686825054, | |
| "grad_norm": 0.10630635917186737, | |
| "learning_rate": 1.136593821590326e-07, | |
| "loss": 0.3587, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 2.818574514038877, | |
| "grad_norm": 0.10910697281360626, | |
| "learning_rate": 1.1101065743910122e-07, | |
| "loss": 0.3666, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 2.8207343412526997, | |
| "grad_norm": 0.11752592027187347, | |
| "learning_rate": 1.0839281423298375e-07, | |
| "loss": 0.3638, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 2.8228941684665227, | |
| "grad_norm": 0.11391156911849976, | |
| "learning_rate": 1.0580586907639912e-07, | |
| "loss": 0.3605, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 2.8250539956803458, | |
| "grad_norm": 0.11459757387638092, | |
| "learning_rate": 1.032498383099001e-07, | |
| "loss": 0.365, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 2.8272138228941683, | |
| "grad_norm": 0.10249683260917664, | |
| "learning_rate": 1.007247380787657e-07, | |
| "loss": 0.3609, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 2.8293736501079914, | |
| "grad_norm": 0.11776190996170044, | |
| "learning_rate": 9.823058433290178e-08, | |
| "loss": 0.3667, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.8315334773218144, | |
| "grad_norm": 0.10746931284666061, | |
| "learning_rate": 9.576739282673886e-08, | |
| "loss": 0.3598, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 2.833693304535637, | |
| "grad_norm": 0.1106642559170723, | |
| "learning_rate": 9.333517911913281e-08, | |
| "loss": 0.3627, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 2.83585313174946, | |
| "grad_norm": 0.1114298552274704, | |
| "learning_rate": 9.093395857326714e-08, | |
| "loss": 0.3521, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 2.838012958963283, | |
| "grad_norm": 0.11040709167718887, | |
| "learning_rate": 8.856374635655696e-08, | |
| "loss": 0.3618, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 2.8401727861771056, | |
| "grad_norm": 0.10548478364944458, | |
| "learning_rate": 8.622455744054958e-08, | |
| "loss": 0.3574, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 2.8423326133909286, | |
| "grad_norm": 0.1121056005358696, | |
| "learning_rate": 8.391640660083411e-08, | |
| "loss": 0.3693, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 2.8444924406047516, | |
| "grad_norm": 0.11348962038755417, | |
| "learning_rate": 8.163930841694589e-08, | |
| "loss": 0.3569, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 2.8466522678185746, | |
| "grad_norm": 0.10726695507764816, | |
| "learning_rate": 7.939327727227441e-08, | |
| "loss": 0.3667, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 2.8488120950323976, | |
| "grad_norm": 0.10446982830762863, | |
| "learning_rate": 7.717832735397335e-08, | |
| "loss": 0.3685, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 2.85097192224622, | |
| "grad_norm": 0.11472396552562714, | |
| "learning_rate": 7.499447265286952e-08, | |
| "loss": 0.364, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.853131749460043, | |
| "grad_norm": 0.10750308632850647, | |
| "learning_rate": 7.284172696337688e-08, | |
| "loss": 0.3626, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 2.8552915766738662, | |
| "grad_norm": 0.11191460490226746, | |
| "learning_rate": 7.072010388340656e-08, | |
| "loss": 0.3623, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 2.857451403887689, | |
| "grad_norm": 0.10993245989084244, | |
| "learning_rate": 6.862961681428304e-08, | |
| "loss": 0.3549, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 2.859611231101512, | |
| "grad_norm": 0.11314646899700165, | |
| "learning_rate": 6.657027896065982e-08, | |
| "loss": 0.3542, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 2.861771058315335, | |
| "grad_norm": 0.1285964399576187, | |
| "learning_rate": 6.454210333043275e-08, | |
| "loss": 0.3572, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 2.8639308855291574, | |
| "grad_norm": 0.10818547010421753, | |
| "learning_rate": 6.254510273466186e-08, | |
| "loss": 0.3676, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 2.8660907127429804, | |
| "grad_norm": 0.10412049293518066, | |
| "learning_rate": 6.057928978748906e-08, | |
| "loss": 0.3685, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 2.8682505399568035, | |
| "grad_norm": 0.10978944599628448, | |
| "learning_rate": 5.864467690605613e-08, | |
| "loss": 0.3671, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 2.8704103671706265, | |
| "grad_norm": 0.1174926683306694, | |
| "learning_rate": 5.674127631043025e-08, | |
| "loss": 0.3658, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 2.8725701943844495, | |
| "grad_norm": 0.11143560707569122, | |
| "learning_rate": 5.4869100023523526e-08, | |
| "loss": 0.3624, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.874730021598272, | |
| "grad_norm": 0.10805241763591766, | |
| "learning_rate": 5.302815987101917e-08, | |
| "loss": 0.3636, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 2.876889848812095, | |
| "grad_norm": 0.11456768959760666, | |
| "learning_rate": 5.121846748129544e-08, | |
| "loss": 0.3537, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 2.879049676025918, | |
| "grad_norm": 0.1143973246216774, | |
| "learning_rate": 4.944003428535349e-08, | |
| "loss": 0.361, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 2.8812095032397407, | |
| "grad_norm": 0.11492909491062164, | |
| "learning_rate": 4.769287151674407e-08, | |
| "loss": 0.3529, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 2.8833693304535637, | |
| "grad_norm": 0.11312732100486755, | |
| "learning_rate": 4.597699021149649e-08, | |
| "loss": 0.3604, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 2.8855291576673867, | |
| "grad_norm": 0.11396172642707825, | |
| "learning_rate": 4.429240120804923e-08, | |
| "loss": 0.3601, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 2.8876889848812093, | |
| "grad_norm": 0.10564181953668594, | |
| "learning_rate": 4.263911514718222e-08, | |
| "loss": 0.365, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 2.8898488120950323, | |
| "grad_norm": 0.11512638628482819, | |
| "learning_rate": 4.10171424719491e-08, | |
| "loss": 0.3658, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 2.8920086393088553, | |
| "grad_norm": 0.11602869629859924, | |
| "learning_rate": 3.9426493427611177e-08, | |
| "loss": 0.3611, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 2.8941684665226783, | |
| "grad_norm": 0.11228124052286148, | |
| "learning_rate": 3.786717806157136e-08, | |
| "loss": 0.3615, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.896328293736501, | |
| "grad_norm": 0.1036510244011879, | |
| "learning_rate": 3.633920622331311e-08, | |
| "loss": 0.3621, | |
| "step": 1341 | |
| }, | |
| { | |
| "epoch": 2.898488120950324, | |
| "grad_norm": 0.11727307736873627, | |
| "learning_rate": 3.4842587564337674e-08, | |
| "loss": 0.3569, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 2.900647948164147, | |
| "grad_norm": 0.10487374663352966, | |
| "learning_rate": 3.337733153810141e-08, | |
| "loss": 0.362, | |
| "step": 1343 | |
| }, | |
| { | |
| "epoch": 2.90280777537797, | |
| "grad_norm": 0.10877780616283417, | |
| "learning_rate": 3.194344739995803e-08, | |
| "loss": 0.349, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 2.9049676025917925, | |
| "grad_norm": 0.11223684251308441, | |
| "learning_rate": 3.054094420709863e-08, | |
| "loss": 0.365, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 2.9071274298056156, | |
| "grad_norm": 0.103155717253685, | |
| "learning_rate": 2.9169830818496226e-08, | |
| "loss": 0.3592, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 2.9092872570194386, | |
| "grad_norm": 0.11470197141170502, | |
| "learning_rate": 2.783011589484741e-08, | |
| "loss": 0.3578, | |
| "step": 1347 | |
| }, | |
| { | |
| "epoch": 2.911447084233261, | |
| "grad_norm": 0.12167331576347351, | |
| "learning_rate": 2.6521807898520214e-08, | |
| "loss": 0.353, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 2.913606911447084, | |
| "grad_norm": 0.1081666648387909, | |
| "learning_rate": 2.5244915093499134e-08, | |
| "loss": 0.3703, | |
| "step": 1349 | |
| }, | |
| { | |
| "epoch": 2.915766738660907, | |
| "grad_norm": 0.11052247881889343, | |
| "learning_rate": 2.3999445545332955e-08, | |
| "loss": 0.3593, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.9179265658747298, | |
| "grad_norm": 0.10058227181434631, | |
| "learning_rate": 2.2785407121084236e-08, | |
| "loss": 0.371, | |
| "step": 1351 | |
| }, | |
| { | |
| "epoch": 2.920086393088553, | |
| "grad_norm": 0.11320126056671143, | |
| "learning_rate": 2.1602807489279344e-08, | |
| "loss": 0.3549, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 2.922246220302376, | |
| "grad_norm": 0.11561363190412521, | |
| "learning_rate": 2.0451654119860164e-08, | |
| "loss": 0.3578, | |
| "step": 1353 | |
| }, | |
| { | |
| "epoch": 2.924406047516199, | |
| "grad_norm": 0.10961954295635223, | |
| "learning_rate": 1.9331954284137476e-08, | |
| "loss": 0.3676, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 2.926565874730022, | |
| "grad_norm": 0.10924555361270905, | |
| "learning_rate": 1.8243715054744315e-08, | |
| "loss": 0.3726, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 2.9287257019438444, | |
| "grad_norm": 0.10880248993635178, | |
| "learning_rate": 1.71869433055899e-08, | |
| "loss": 0.3547, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 2.9308855291576674, | |
| "grad_norm": 0.10798300057649612, | |
| "learning_rate": 1.6161645711819664e-08, | |
| "loss": 0.3569, | |
| "step": 1357 | |
| }, | |
| { | |
| "epoch": 2.9330453563714904, | |
| "grad_norm": 0.11236506700515747, | |
| "learning_rate": 1.5167828749770853e-08, | |
| "loss": 0.3681, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 2.935205183585313, | |
| "grad_norm": 0.10835976153612137, | |
| "learning_rate": 1.4205498696930332e-08, | |
| "loss": 0.3613, | |
| "step": 1359 | |
| }, | |
| { | |
| "epoch": 2.937365010799136, | |
| "grad_norm": 0.12566576898097992, | |
| "learning_rate": 1.3274661631899055e-08, | |
| "loss": 0.3637, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.939524838012959, | |
| "grad_norm": 0.10957465320825577, | |
| "learning_rate": 1.2375323434348773e-08, | |
| "loss": 0.3504, | |
| "step": 1361 | |
| }, | |
| { | |
| "epoch": 2.9416846652267816, | |
| "grad_norm": 0.10701923072338104, | |
| "learning_rate": 1.1507489784989278e-08, | |
| "loss": 0.3607, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 2.9438444924406046, | |
| "grad_norm": 0.10923349112272263, | |
| "learning_rate": 1.067116616552899e-08, | |
| "loss": 0.3706, | |
| "step": 1363 | |
| }, | |
| { | |
| "epoch": 2.9460043196544277, | |
| "grad_norm": 0.11076433211565018, | |
| "learning_rate": 9.866357858642206e-09, | |
| "loss": 0.3746, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 2.9481641468682507, | |
| "grad_norm": 0.10817807167768478, | |
| "learning_rate": 9.09306994793635e-09, | |
| "loss": 0.3648, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 2.9503239740820737, | |
| "grad_norm": 0.10961637645959854, | |
| "learning_rate": 8.351307317917002e-09, | |
| "loss": 0.3571, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 2.9524838012958963, | |
| "grad_norm": 0.10122332721948624, | |
| "learning_rate": 7.641074653961244e-09, | |
| "loss": 0.3681, | |
| "step": 1367 | |
| }, | |
| { | |
| "epoch": 2.9546436285097193, | |
| "grad_norm": 0.10331834852695465, | |
| "learning_rate": 6.962376442284368e-09, | |
| "loss": 0.3566, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 2.9568034557235423, | |
| "grad_norm": 0.10392733663320541, | |
| "learning_rate": 6.315216969912663e-09, | |
| "loss": 0.3422, | |
| "step": 1369 | |
| }, | |
| { | |
| "epoch": 2.958963282937365, | |
| "grad_norm": 0.1083427146077156, | |
| "learning_rate": 5.699600324657328e-09, | |
| "loss": 0.3711, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.961123110151188, | |
| "grad_norm": 0.1106184870004654, | |
| "learning_rate": 5.115530395087276e-09, | |
| "loss": 0.3639, | |
| "step": 1371 | |
| }, | |
| { | |
| "epoch": 2.963282937365011, | |
| "grad_norm": 0.10812865942716599, | |
| "learning_rate": 4.5630108705063684e-09, | |
| "loss": 0.3647, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 2.9654427645788335, | |
| "grad_norm": 0.11043433845043182, | |
| "learning_rate": 4.042045240927883e-09, | |
| "loss": 0.3706, | |
| "step": 1373 | |
| }, | |
| { | |
| "epoch": 2.9676025917926565, | |
| "grad_norm": 0.1146334782242775, | |
| "learning_rate": 3.5526367970539765e-09, | |
| "loss": 0.3564, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 2.9697624190064795, | |
| "grad_norm": 0.11209335923194885, | |
| "learning_rate": 3.094788630254031e-09, | |
| "loss": 0.369, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 2.971922246220302, | |
| "grad_norm": 0.10334824025630951, | |
| "learning_rate": 2.6685036325457826e-09, | |
| "loss": 0.3614, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 2.974082073434125, | |
| "grad_norm": 0.12000252306461334, | |
| "learning_rate": 2.2737844965775578e-09, | |
| "loss": 0.3677, | |
| "step": 1377 | |
| }, | |
| { | |
| "epoch": 2.976241900647948, | |
| "grad_norm": 0.10969026386737823, | |
| "learning_rate": 1.9106337156099553e-09, | |
| "loss": 0.3506, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 2.978401727861771, | |
| "grad_norm": 0.11796250939369202, | |
| "learning_rate": 1.5790535835003006e-09, | |
| "loss": 0.3698, | |
| "step": 1379 | |
| }, | |
| { | |
| "epoch": 2.980561555075594, | |
| "grad_norm": 0.11176804453134537, | |
| "learning_rate": 1.2790461946887712e-09, | |
| "loss": 0.3574, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.9827213822894167, | |
| "grad_norm": 0.10805931687355042, | |
| "learning_rate": 1.0106134441850712e-09, | |
| "loss": 0.3732, | |
| "step": 1381 | |
| }, | |
| { | |
| "epoch": 2.9848812095032398, | |
| "grad_norm": 0.11747419834136963, | |
| "learning_rate": 7.737570275573314e-10, | |
| "loss": 0.3544, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 2.987041036717063, | |
| "grad_norm": 0.11195072531700134, | |
| "learning_rate": 5.684784409182298e-10, | |
| "loss": 0.3743, | |
| "step": 1383 | |
| }, | |
| { | |
| "epoch": 2.9892008639308854, | |
| "grad_norm": 0.11737102270126343, | |
| "learning_rate": 3.9477898091944135e-10, | |
| "loss": 0.3672, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 2.9913606911447084, | |
| "grad_norm": 0.1034155786037445, | |
| "learning_rate": 2.5265974474109054e-10, | |
| "loss": 0.3586, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 2.9935205183585314, | |
| "grad_norm": 0.11616694182157516, | |
| "learning_rate": 1.4212163008509028e-10, | |
| "loss": 0.36, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 2.995680345572354, | |
| "grad_norm": 0.11564143747091293, | |
| "learning_rate": 6.316533517125578e-11, | |
| "loss": 0.3624, | |
| "step": 1387 | |
| }, | |
| { | |
| "epoch": 2.997840172786177, | |
| "grad_norm": 0.11294636130332947, | |
| "learning_rate": 1.57913587295333e-11, | |
| "loss": 0.3607, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.10668095201253891, | |
| "learning_rate": 0.0, | |
| "loss": 0.3576, | |
| "step": 1389 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 1389, | |
| "total_flos": 2.853679693771571e+16, | |
| "train_loss": 0.023199697249737295, | |
| "train_runtime": 5911.0235, | |
| "train_samples_per_second": 90.117, | |
| "train_steps_per_second": 0.235 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1389, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.853679693771571e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |