| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 66.85236768802228, | |
| "eval_steps": 500, | |
| "global_step": 24000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.055710306406685235, | |
| "grad_norm": 14.402867878604889, | |
| "learning_rate": 3.2000000000000005e-05, | |
| "loss": 10.1886, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.11142061281337047, | |
| "grad_norm": 13.635298426580595, | |
| "learning_rate": 6.400000000000001e-05, | |
| "loss": 8.826, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1671309192200557, | |
| "grad_norm": 4.640229834066508, | |
| "learning_rate": 7.999996773810157e-05, | |
| "loss": 7.8841, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.22284122562674094, | |
| "grad_norm": 4.771787721259069, | |
| "learning_rate": 7.999970964324714e-05, | |
| "loss": 5.9641, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.2785515320334262, | |
| "grad_norm": 2.673493851023645, | |
| "learning_rate": 7.999919345531461e-05, | |
| "loss": 4.8468, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3342618384401114, | |
| "grad_norm": 4.303381236511131, | |
| "learning_rate": 7.999841917785668e-05, | |
| "loss": 4.5736, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.38997214484679665, | |
| "grad_norm": 3.0474327568183464, | |
| "learning_rate": 7.999738681620232e-05, | |
| "loss": 4.492, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.4456824512534819, | |
| "grad_norm": 4.693843207680094, | |
| "learning_rate": 7.999609637745683e-05, | |
| "loss": 4.2847, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.5013927576601671, | |
| "grad_norm": 2.9247230468930265, | |
| "learning_rate": 7.999454787050167e-05, | |
| "loss": 3.8923, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.5571030640668524, | |
| "grad_norm": 1.489196861741293, | |
| "learning_rate": 7.999274130599451e-05, | |
| "loss": 3.7348, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.6128133704735376, | |
| "grad_norm": 1.61920958013195, | |
| "learning_rate": 7.999067669636909e-05, | |
| "loss": 3.6525, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.6685236768802229, | |
| "grad_norm": 1.7425863171418448, | |
| "learning_rate": 7.998835405583514e-05, | |
| "loss": 3.606, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.724233983286908, | |
| "grad_norm": 1.5257744205454054, | |
| "learning_rate": 7.998577340037835e-05, | |
| "loss": 3.5769, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.7799442896935933, | |
| "grad_norm": 2.667757094800373, | |
| "learning_rate": 7.998293474776016e-05, | |
| "loss": 3.5703, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.8356545961002786, | |
| "grad_norm": 1.7404959255578403, | |
| "learning_rate": 7.997983811751768e-05, | |
| "loss": 3.5621, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.8913649025069638, | |
| "grad_norm": 1.634375074224456, | |
| "learning_rate": 7.99764835309636e-05, | |
| "loss": 3.5179, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.947075208913649, | |
| "grad_norm": 1.7342700062168488, | |
| "learning_rate": 7.997287101118597e-05, | |
| "loss": 3.4854, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.0027855153203342, | |
| "grad_norm": 1.3788139455421626, | |
| "learning_rate": 7.996900058304807e-05, | |
| "loss": 3.4837, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.0584958217270195, | |
| "grad_norm": 2.0400921187647367, | |
| "learning_rate": 7.996487227318829e-05, | |
| "loss": 3.4779, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.1142061281337048, | |
| "grad_norm": 1.3557958841205344, | |
| "learning_rate": 7.996048611001985e-05, | |
| "loss": 3.4484, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.16991643454039, | |
| "grad_norm": 1.2968269160609383, | |
| "learning_rate": 7.995584212373067e-05, | |
| "loss": 3.4364, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.2256267409470751, | |
| "grad_norm": 1.552015046378834, | |
| "learning_rate": 7.995094034628315e-05, | |
| "loss": 3.428, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.2813370473537604, | |
| "grad_norm": 1.5830966652724554, | |
| "learning_rate": 7.994578081141396e-05, | |
| "loss": 3.4002, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.3370473537604457, | |
| "grad_norm": 1.1969791506799554, | |
| "learning_rate": 7.994036355463378e-05, | |
| "loss": 3.3879, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.392757660167131, | |
| "grad_norm": 1.615715341656064, | |
| "learning_rate": 7.993468861322705e-05, | |
| "loss": 3.3804, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.448467966573816, | |
| "grad_norm": 1.7044068053200199, | |
| "learning_rate": 7.992875602625179e-05, | |
| "loss": 3.3872, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.5041782729805013, | |
| "grad_norm": 1.376009869207643, | |
| "learning_rate": 7.99225658345392e-05, | |
| "loss": 3.3729, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.5598885793871866, | |
| "grad_norm": 1.3796323292210666, | |
| "learning_rate": 7.991611808069354e-05, | |
| "loss": 3.3832, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.615598885793872, | |
| "grad_norm": 1.5448889484720325, | |
| "learning_rate": 7.990941280909165e-05, | |
| "loss": 3.372, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.6713091922005572, | |
| "grad_norm": 1.492544519723889, | |
| "learning_rate": 7.990245006588282e-05, | |
| "loss": 3.3374, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.7270194986072425, | |
| "grad_norm": 1.9983170136669428, | |
| "learning_rate": 7.98952298989884e-05, | |
| "loss": 3.3147, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.7827298050139275, | |
| "grad_norm": 1.3253294659757162, | |
| "learning_rate": 7.988775235810143e-05, | |
| "loss": 3.3236, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.8384401114206128, | |
| "grad_norm": 1.235983274303143, | |
| "learning_rate": 7.988001749468634e-05, | |
| "loss": 3.293, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.894150417827298, | |
| "grad_norm": 1.4225853711687855, | |
| "learning_rate": 7.987202536197861e-05, | |
| "loss": 3.3039, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.9498607242339832, | |
| "grad_norm": 1.3045957220402902, | |
| "learning_rate": 7.986377601498437e-05, | |
| "loss": 3.2981, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.0055710306406684, | |
| "grad_norm": 1.3450960062726653, | |
| "learning_rate": 7.985526951048004e-05, | |
| "loss": 3.2797, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.0612813370473537, | |
| "grad_norm": 1.3839789031500191, | |
| "learning_rate": 7.984650590701197e-05, | |
| "loss": 3.2485, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.116991643454039, | |
| "grad_norm": 1.2061805725063066, | |
| "learning_rate": 7.983748526489592e-05, | |
| "loss": 3.2598, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 2.1727019498607243, | |
| "grad_norm": 1.272502438657944, | |
| "learning_rate": 7.98282076462168e-05, | |
| "loss": 3.2587, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 2.2284122562674096, | |
| "grad_norm": 1.2869044872960342, | |
| "learning_rate": 7.981867311482816e-05, | |
| "loss": 3.2227, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.284122562674095, | |
| "grad_norm": 1.3303768462152665, | |
| "learning_rate": 7.980888173635174e-05, | |
| "loss": 3.2648, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 2.33983286908078, | |
| "grad_norm": 1.3112716268503581, | |
| "learning_rate": 7.979883357817706e-05, | |
| "loss": 3.2745, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.3955431754874654, | |
| "grad_norm": 1.286190262319452, | |
| "learning_rate": 7.978852870946091e-05, | |
| "loss": 3.2425, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.4512534818941503, | |
| "grad_norm": 1.220565008870163, | |
| "learning_rate": 7.977796720112692e-05, | |
| "loss": 3.2243, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.5069637883008355, | |
| "grad_norm": 1.2416999049169055, | |
| "learning_rate": 7.976714912586503e-05, | |
| "loss": 3.2217, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.562674094707521, | |
| "grad_norm": 1.649909126615191, | |
| "learning_rate": 7.975607455813105e-05, | |
| "loss": 3.2232, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.618384401114206, | |
| "grad_norm": 1.2360938114511875, | |
| "learning_rate": 7.974474357414606e-05, | |
| "loss": 3.1888, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.6740947075208914, | |
| "grad_norm": 1.3519864827851877, | |
| "learning_rate": 7.973315625189597e-05, | |
| "loss": 3.1782, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.7298050139275767, | |
| "grad_norm": 1.2890437174386116, | |
| "learning_rate": 7.972131267113096e-05, | |
| "loss": 3.192, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.785515320334262, | |
| "grad_norm": 1.2063188725654215, | |
| "learning_rate": 7.970921291336485e-05, | |
| "loss": 3.1869, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.841225626740947, | |
| "grad_norm": 1.2749039180973243, | |
| "learning_rate": 7.969685706187467e-05, | |
| "loss": 3.1663, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.896935933147632, | |
| "grad_norm": 1.1624439689926256, | |
| "learning_rate": 7.968424520170001e-05, | |
| "loss": 3.1558, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.9526462395543174, | |
| "grad_norm": 1.3645084603771966, | |
| "learning_rate": 7.967137741964243e-05, | |
| "loss": 3.2151, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 3.0083565459610027, | |
| "grad_norm": 1.3095011308845006, | |
| "learning_rate": 7.965825380426492e-05, | |
| "loss": 3.1241, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 3.064066852367688, | |
| "grad_norm": 1.3067653478209693, | |
| "learning_rate": 7.96448744458912e-05, | |
| "loss": 3.1433, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 3.1197771587743732, | |
| "grad_norm": 1.299162093757844, | |
| "learning_rate": 7.963123943660518e-05, | |
| "loss": 3.1515, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 3.1754874651810585, | |
| "grad_norm": 1.1814155508957564, | |
| "learning_rate": 7.961734887025032e-05, | |
| "loss": 3.1658, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 3.231197771587744, | |
| "grad_norm": 1.4814086313964214, | |
| "learning_rate": 7.96032028424289e-05, | |
| "loss": 3.1477, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 3.286908077994429, | |
| "grad_norm": 1.2408741761500102, | |
| "learning_rate": 7.958880145050149e-05, | |
| "loss": 3.1562, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 3.3426183844011144, | |
| "grad_norm": 1.240867953717542, | |
| "learning_rate": 7.957414479358615e-05, | |
| "loss": 3.128, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 3.3983286908077996, | |
| "grad_norm": 1.3259135820081311, | |
| "learning_rate": 7.955923297255786e-05, | |
| "loss": 3.1341, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 3.4540389972144845, | |
| "grad_norm": 1.24731715301278, | |
| "learning_rate": 7.954406609004775e-05, | |
| "loss": 3.1352, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 3.5097493036211698, | |
| "grad_norm": 1.4328700677390847, | |
| "learning_rate": 7.952864425044241e-05, | |
| "loss": 3.1776, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 3.565459610027855, | |
| "grad_norm": 1.2128896131543132, | |
| "learning_rate": 7.951296755988323e-05, | |
| "loss": 3.155, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 3.6211699164345403, | |
| "grad_norm": 1.2116171750344513, | |
| "learning_rate": 7.949703612626555e-05, | |
| "loss": 3.1577, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 3.6768802228412256, | |
| "grad_norm": 1.1238332423339203, | |
| "learning_rate": 7.948085005923804e-05, | |
| "loss": 3.1176, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 3.732590529247911, | |
| "grad_norm": 1.2861584858089754, | |
| "learning_rate": 7.94644094702019e-05, | |
| "loss": 3.1444, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 3.788300835654596, | |
| "grad_norm": 1.392865877163545, | |
| "learning_rate": 7.944771447231002e-05, | |
| "loss": 3.1275, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 3.8440111420612815, | |
| "grad_norm": 1.319680796984498, | |
| "learning_rate": 7.943076518046636e-05, | |
| "loss": 3.1178, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 3.8997214484679663, | |
| "grad_norm": 1.6517648664445108, | |
| "learning_rate": 7.9413561711325e-05, | |
| "loss": 3.113, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 3.9554317548746516, | |
| "grad_norm": 1.2610682032071576, | |
| "learning_rate": 7.939610418328943e-05, | |
| "loss": 3.1197, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 4.011142061281337, | |
| "grad_norm": 1.451143416466195, | |
| "learning_rate": 7.937839271651169e-05, | |
| "loss": 3.081, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 4.066852367688022, | |
| "grad_norm": 1.377524653184373, | |
| "learning_rate": 7.936042743289158e-05, | |
| "loss": 3.0716, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 4.1225626740947074, | |
| "grad_norm": 1.2134154671846935, | |
| "learning_rate": 7.934220845607582e-05, | |
| "loss": 3.0934, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 4.178272980501393, | |
| "grad_norm": 1.1951444826901634, | |
| "learning_rate": 7.932373591145714e-05, | |
| "loss": 3.0666, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 4.233983286908078, | |
| "grad_norm": 1.3540057345367065, | |
| "learning_rate": 7.93050099261735e-05, | |
| "loss": 3.1106, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 4.289693593314763, | |
| "grad_norm": 1.0915162692508482, | |
| "learning_rate": 7.928603062910715e-05, | |
| "loss": 3.0979, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 4.345403899721449, | |
| "grad_norm": 1.3408430692556432, | |
| "learning_rate": 7.926679815088376e-05, | |
| "loss": 3.0822, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 4.401114206128134, | |
| "grad_norm": 1.244967969301378, | |
| "learning_rate": 7.924731262387156e-05, | |
| "loss": 3.0636, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 4.456824512534819, | |
| "grad_norm": 1.2480585827601505, | |
| "learning_rate": 7.922757418218038e-05, | |
| "loss": 3.0699, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 4.512534818941504, | |
| "grad_norm": 1.5801893483515768, | |
| "learning_rate": 7.920758296166072e-05, | |
| "loss": 3.0814, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 4.56824512534819, | |
| "grad_norm": 1.2273710344040911, | |
| "learning_rate": 7.918733909990287e-05, | |
| "loss": 3.0844, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 4.623955431754875, | |
| "grad_norm": 1.2276563480543434, | |
| "learning_rate": 7.916684273623593e-05, | |
| "loss": 3.042, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 4.67966573816156, | |
| "grad_norm": 1.2696026342294924, | |
| "learning_rate": 7.914609401172687e-05, | |
| "loss": 3.0693, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 4.735376044568245, | |
| "grad_norm": 1.2199175574002186, | |
| "learning_rate": 7.912509306917949e-05, | |
| "loss": 3.0728, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 4.791086350974931, | |
| "grad_norm": 1.2583579762159638, | |
| "learning_rate": 7.910384005313353e-05, | |
| "loss": 3.0661, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 4.846796657381615, | |
| "grad_norm": 1.1722768148065734, | |
| "learning_rate": 7.908233510986363e-05, | |
| "loss": 3.0687, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 4.9025069637883005, | |
| "grad_norm": 1.3035051706656053, | |
| "learning_rate": 7.906057838737831e-05, | |
| "loss": 3.032, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 4.958217270194986, | |
| "grad_norm": 1.1974756008535596, | |
| "learning_rate": 7.903857003541898e-05, | |
| "loss": 3.0866, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 5.013927576601671, | |
| "grad_norm": 1.1642430602684481, | |
| "learning_rate": 7.901631020545893e-05, | |
| "loss": 3.0565, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 5.069637883008356, | |
| "grad_norm": 1.4004497345779519, | |
| "learning_rate": 7.899379905070219e-05, | |
| "loss": 3.0445, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 5.125348189415042, | |
| "grad_norm": 1.17997512711719, | |
| "learning_rate": 7.89710367260826e-05, | |
| "loss": 3.035, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 5.181058495821727, | |
| "grad_norm": 1.3826862632813843, | |
| "learning_rate": 7.894802338826267e-05, | |
| "loss": 3.0447, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 5.236768802228412, | |
| "grad_norm": 1.1816414577958267, | |
| "learning_rate": 7.89247591956325e-05, | |
| "loss": 3.0637, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 5.2924791086350975, | |
| "grad_norm": 1.1654353865755456, | |
| "learning_rate": 7.890124430830871e-05, | |
| "loss": 3.0468, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 5.348189415041783, | |
| "grad_norm": 1.165385013993255, | |
| "learning_rate": 7.887747888813336e-05, | |
| "loss": 3.0313, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 5.403899721448468, | |
| "grad_norm": 1.2162837114192384, | |
| "learning_rate": 7.88534630986728e-05, | |
| "loss": 3.0466, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 5.459610027855153, | |
| "grad_norm": 1.20720099919841, | |
| "learning_rate": 7.882919710521653e-05, | |
| "loss": 3.0551, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 5.515320334261839, | |
| "grad_norm": 1.177139436715742, | |
| "learning_rate": 7.880468107477611e-05, | |
| "loss": 3.0376, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 5.571030640668524, | |
| "grad_norm": 1.1524202569240398, | |
| "learning_rate": 7.8779915176084e-05, | |
| "loss": 3.0291, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 5.626740947075209, | |
| "grad_norm": 1.2146587030127356, | |
| "learning_rate": 7.875489957959237e-05, | |
| "loss": 3.0191, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 5.6824512534818945, | |
| "grad_norm": 1.128843488220497, | |
| "learning_rate": 7.872963445747195e-05, | |
| "loss": 3.0227, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 5.73816155988858, | |
| "grad_norm": 1.0994966342019519, | |
| "learning_rate": 7.870411998361084e-05, | |
| "loss": 3.02, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 5.793871866295264, | |
| "grad_norm": 1.2808892137589254, | |
| "learning_rate": 7.867835633361329e-05, | |
| "loss": 3.0469, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 5.84958217270195, | |
| "grad_norm": 1.5413808282647536, | |
| "learning_rate": 7.865234368479853e-05, | |
| "loss": 3.0436, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 5.905292479108635, | |
| "grad_norm": 1.2503860090167789, | |
| "learning_rate": 7.862608221619959e-05, | |
| "loss": 3.0106, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 5.96100278551532, | |
| "grad_norm": 1.125474961018013, | |
| "learning_rate": 7.859957210856188e-05, | |
| "loss": 3.0519, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 6.016713091922005, | |
| "grad_norm": 1.1689492826416197, | |
| "learning_rate": 7.857281354434221e-05, | |
| "loss": 2.9989, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 6.072423398328691, | |
| "grad_norm": 1.154286021730802, | |
| "learning_rate": 7.854580670770731e-05, | |
| "loss": 3.0334, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 6.128133704735376, | |
| "grad_norm": 1.4350398534810123, | |
| "learning_rate": 7.851855178453272e-05, | |
| "loss": 2.988, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 6.183844011142061, | |
| "grad_norm": 1.113383813885033, | |
| "learning_rate": 7.84910489624014e-05, | |
| "loss": 2.9763, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 6.2395543175487465, | |
| "grad_norm": 1.1728629584155144, | |
| "learning_rate": 7.846329843060248e-05, | |
| "loss": 3.0121, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 6.295264623955432, | |
| "grad_norm": 1.3311805526252927, | |
| "learning_rate": 7.843530038012998e-05, | |
| "loss": 3.0093, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 6.350974930362117, | |
| "grad_norm": 1.6172845191063454, | |
| "learning_rate": 7.840705500368151e-05, | |
| "loss": 3.006, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 6.406685236768802, | |
| "grad_norm": 1.11224616315367, | |
| "learning_rate": 7.837856249565682e-05, | |
| "loss": 3.0092, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 6.462395543175488, | |
| "grad_norm": 1.2756438414289364, | |
| "learning_rate": 7.834982305215663e-05, | |
| "loss": 2.992, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 6.518105849582173, | |
| "grad_norm": 1.2405302540136935, | |
| "learning_rate": 7.832083687098119e-05, | |
| "loss": 3.0005, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 6.573816155988858, | |
| "grad_norm": 1.16507749139941, | |
| "learning_rate": 7.829160415162888e-05, | |
| "loss": 2.9687, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 6.629526462395543, | |
| "grad_norm": 1.1740957839848314, | |
| "learning_rate": 7.826212509529497e-05, | |
| "loss": 2.99, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 6.685236768802229, | |
| "grad_norm": 1.1161100776511597, | |
| "learning_rate": 7.823239990487008e-05, | |
| "loss": 2.9827, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 6.740947075208914, | |
| "grad_norm": 1.157820224450206, | |
| "learning_rate": 7.820242878493888e-05, | |
| "loss": 2.9993, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 6.796657381615599, | |
| "grad_norm": 1.1204269029527796, | |
| "learning_rate": 7.817221194177869e-05, | |
| "loss": 2.9845, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 6.852367688022284, | |
| "grad_norm": 1.1227241115622848, | |
| "learning_rate": 7.814174958335797e-05, | |
| "loss": 3.0135, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 6.908077994428969, | |
| "grad_norm": 1.314572529939298, | |
| "learning_rate": 7.8111041919335e-05, | |
| "loss": 3.0121, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 6.963788300835654, | |
| "grad_norm": 1.3816466804127303, | |
| "learning_rate": 7.808008916105636e-05, | |
| "loss": 3.0031, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 7.0194986072423395, | |
| "grad_norm": 1.1289207328451878, | |
| "learning_rate": 7.804889152155548e-05, | |
| "loss": 2.9677, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 7.075208913649025, | |
| "grad_norm": 1.1378808677658065, | |
| "learning_rate": 7.801744921555127e-05, | |
| "loss": 2.9911, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 7.13091922005571, | |
| "grad_norm": 1.2254491174881055, | |
| "learning_rate": 7.798576245944647e-05, | |
| "loss": 2.9853, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 7.186629526462395, | |
| "grad_norm": 1.1570802675245002, | |
| "learning_rate": 7.795383147132631e-05, | |
| "loss": 2.9589, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 7.242339832869081, | |
| "grad_norm": 1.2894488302976834, | |
| "learning_rate": 7.792165647095696e-05, | |
| "loss": 2.9776, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 7.298050139275766, | |
| "grad_norm": 1.0528540016974788, | |
| "learning_rate": 7.788923767978396e-05, | |
| "loss": 2.96, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 7.353760445682451, | |
| "grad_norm": 1.2125786891416614, | |
| "learning_rate": 7.785657532093085e-05, | |
| "loss": 3.0041, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 7.4094707520891365, | |
| "grad_norm": 1.1920890603213412, | |
| "learning_rate": 7.78236696191974e-05, | |
| "loss": 2.9508, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 7.465181058495822, | |
| "grad_norm": 1.2174124036610061, | |
| "learning_rate": 7.779052080105831e-05, | |
| "loss": 2.9744, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 7.520891364902507, | |
| "grad_norm": 1.1450363781873376, | |
| "learning_rate": 7.77571290946615e-05, | |
| "loss": 2.9648, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 7.576601671309192, | |
| "grad_norm": 1.0906384870617993, | |
| "learning_rate": 7.772349472982652e-05, | |
| "loss": 2.9472, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 7.632311977715878, | |
| "grad_norm": 1.321934725618673, | |
| "learning_rate": 7.768961793804312e-05, | |
| "loss": 2.9812, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 7.688022284122563, | |
| "grad_norm": 1.3145051192938724, | |
| "learning_rate": 7.765549895246952e-05, | |
| "loss": 2.9936, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 7.743732590529248, | |
| "grad_norm": 1.1688841213500007, | |
| "learning_rate": 7.762113800793083e-05, | |
| "loss": 2.9673, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 7.7994428969359335, | |
| "grad_norm": 1.3151433447911725, | |
| "learning_rate": 7.758653534091746e-05, | |
| "loss": 2.9899, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 7.855153203342619, | |
| "grad_norm": 1.1890842453445192, | |
| "learning_rate": 7.75516911895835e-05, | |
| "loss": 2.9372, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 7.910863509749303, | |
| "grad_norm": 1.1869067775771354, | |
| "learning_rate": 7.751660579374505e-05, | |
| "loss": 2.9741, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 7.9665738161559885, | |
| "grad_norm": 1.2713091007536645, | |
| "learning_rate": 7.74812793948786e-05, | |
| "loss": 2.9583, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 8.022284122562674, | |
| "grad_norm": 1.2300818760281924, | |
| "learning_rate": 7.74457122361193e-05, | |
| "loss": 2.9214, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 8.07799442896936, | |
| "grad_norm": 1.1631255228333053, | |
| "learning_rate": 7.740990456225944e-05, | |
| "loss": 2.9644, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 8.133704735376044, | |
| "grad_norm": 1.2750643723536295, | |
| "learning_rate": 7.737385661974655e-05, | |
| "loss": 2.9401, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 8.18941504178273, | |
| "grad_norm": 1.1034800758000585, | |
| "learning_rate": 7.733756865668189e-05, | |
| "loss": 2.9726, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 8.245125348189415, | |
| "grad_norm": 1.1418415045379222, | |
| "learning_rate": 7.730104092281867e-05, | |
| "loss": 2.9504, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 8.300835654596101, | |
| "grad_norm": 1.1744854672216198, | |
| "learning_rate": 7.726427366956026e-05, | |
| "loss": 2.9361, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 8.356545961002785, | |
| "grad_norm": 1.3072658416879444, | |
| "learning_rate": 7.722726714995862e-05, | |
| "loss": 2.9589, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 8.412256267409472, | |
| "grad_norm": 1.441058710439587, | |
| "learning_rate": 7.719002161871242e-05, | |
| "loss": 2.9417, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 8.467966573816156, | |
| "grad_norm": 1.067582173805221, | |
| "learning_rate": 7.715253733216534e-05, | |
| "loss": 2.9067, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 8.52367688022284, | |
| "grad_norm": 1.1284427897815015, | |
| "learning_rate": 7.711481454830433e-05, | |
| "loss": 2.899, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 8.579387186629527, | |
| "grad_norm": 1.081964247959463, | |
| "learning_rate": 7.707685352675777e-05, | |
| "loss": 2.9379, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 8.635097493036211, | |
| "grad_norm": 1.1987325305883298, | |
| "learning_rate": 7.703865452879372e-05, | |
| "loss": 2.9327, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 8.690807799442897, | |
| "grad_norm": 1.285067490039356, | |
| "learning_rate": 7.700021781731815e-05, | |
| "loss": 2.9105, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 8.746518105849582, | |
| "grad_norm": 1.2495135447033165, | |
| "learning_rate": 7.696154365687308e-05, | |
| "loss": 2.9324, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 8.802228412256268, | |
| "grad_norm": 1.2973981114841804, | |
| "learning_rate": 7.69226323136348e-05, | |
| "loss": 2.9255, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 8.857938718662952, | |
| "grad_norm": 1.4170513439543635, | |
| "learning_rate": 7.6883484055412e-05, | |
| "loss": 2.9497, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 8.913649025069638, | |
| "grad_norm": 1.1690142712727585, | |
| "learning_rate": 7.684409915164392e-05, | |
| "loss": 2.923, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 8.969359331476323, | |
| "grad_norm": 1.1510038194930527, | |
| "learning_rate": 7.680447787339861e-05, | |
| "loss": 2.926, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 9.025069637883009, | |
| "grad_norm": 1.2532790582579474, | |
| "learning_rate": 7.676462049337088e-05, | |
| "loss": 2.9202, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 9.080779944289693, | |
| "grad_norm": 1.1281216086435024, | |
| "learning_rate": 7.672452728588057e-05, | |
| "loss": 2.962, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 9.13649025069638, | |
| "grad_norm": 1.1715186252733858, | |
| "learning_rate": 7.668419852687062e-05, | |
| "loss": 2.9135, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 9.192200557103064, | |
| "grad_norm": 1.1013980953917584, | |
| "learning_rate": 7.664363449390508e-05, | |
| "loss": 2.9017, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 9.24791086350975, | |
| "grad_norm": 1.2577466487795455, | |
| "learning_rate": 7.660283546616741e-05, | |
| "loss": 2.9397, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 9.303621169916434, | |
| "grad_norm": 1.2666590722136728, | |
| "learning_rate": 7.656180172445832e-05, | |
| "loss": 2.9291, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 9.35933147632312, | |
| "grad_norm": 1.0677411210264725, | |
| "learning_rate": 7.6520533551194e-05, | |
| "loss": 2.8936, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 9.415041782729805, | |
| "grad_norm": 1.2801627452281688, | |
| "learning_rate": 7.647903123040411e-05, | |
| "loss": 2.9053, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 9.47075208913649, | |
| "grad_norm": 1.4058461947277687, | |
| "learning_rate": 7.643729504772985e-05, | |
| "loss": 2.9267, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 9.526462395543176, | |
| "grad_norm": 1.0476841151648881, | |
| "learning_rate": 7.639532529042196e-05, | |
| "loss": 2.9067, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 9.58217270194986, | |
| "grad_norm": 1.1192055741834313, | |
| "learning_rate": 7.635312224733879e-05, | |
| "loss": 2.9217, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 9.637883008356546, | |
| "grad_norm": 1.0978122554766025, | |
| "learning_rate": 7.631068620894427e-05, | |
| "loss": 2.9008, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 9.69359331476323, | |
| "grad_norm": 1.1633036806799766, | |
| "learning_rate": 7.626801746730594e-05, | |
| "loss": 2.9058, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 9.749303621169917, | |
| "grad_norm": 1.09083755501122, | |
| "learning_rate": 7.622511631609293e-05, | |
| "loss": 2.9128, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 9.805013927576601, | |
| "grad_norm": 1.0388545560459703, | |
| "learning_rate": 7.618198305057391e-05, | |
| "loss": 2.9161, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 9.860724233983287, | |
| "grad_norm": 1.1015281389024363, | |
| "learning_rate": 7.613861796761513e-05, | |
| "loss": 2.901, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 9.916434540389972, | |
| "grad_norm": 1.0877360587312859, | |
| "learning_rate": 7.609502136567829e-05, | |
| "loss": 2.9284, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 9.972144846796658, | |
| "grad_norm": 1.0365425361156384, | |
| "learning_rate": 7.605119354481855e-05, | |
| "loss": 2.902, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 10.027855153203342, | |
| "grad_norm": 1.2163943871082232, | |
| "learning_rate": 7.600713480668244e-05, | |
| "loss": 2.8877, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 10.083565459610028, | |
| "grad_norm": 1.3467097823122347, | |
| "learning_rate": 7.596284545450579e-05, | |
| "loss": 2.902, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 10.139275766016713, | |
| "grad_norm": 1.1614707108221107, | |
| "learning_rate": 7.591832579311162e-05, | |
| "loss": 2.8924, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 10.194986072423399, | |
| "grad_norm": 1.154263677555927, | |
| "learning_rate": 7.587357612890807e-05, | |
| "loss": 2.8906, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 10.250696378830083, | |
| "grad_norm": 1.2048421202419115, | |
| "learning_rate": 7.582859676988631e-05, | |
| "loss": 2.91, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 10.30640668523677, | |
| "grad_norm": 1.0867049785413572, | |
| "learning_rate": 7.578338802561835e-05, | |
| "loss": 2.9205, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 10.362116991643454, | |
| "grad_norm": 1.2226191180056192, | |
| "learning_rate": 7.573795020725498e-05, | |
| "loss": 2.891, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 10.41782729805014, | |
| "grad_norm": 1.0288993145273457, | |
| "learning_rate": 7.569228362752359e-05, | |
| "loss": 2.8813, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 10.473537604456824, | |
| "grad_norm": 1.322014772637416, | |
| "learning_rate": 7.564638860072602e-05, | |
| "loss": 2.8942, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 10.52924791086351, | |
| "grad_norm": 1.7224079895109572, | |
| "learning_rate": 7.560026544273644e-05, | |
| "loss": 2.89, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 10.584958217270195, | |
| "grad_norm": 1.06553603535958, | |
| "learning_rate": 7.555391447099909e-05, | |
| "loss": 2.8933, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 10.64066852367688, | |
| "grad_norm": 1.6005573677632197, | |
| "learning_rate": 7.550733600452618e-05, | |
| "loss": 2.8778, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 10.696378830083566, | |
| "grad_norm": 1.183378791005643, | |
| "learning_rate": 7.546053036389568e-05, | |
| "loss": 2.8785, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 10.75208913649025, | |
| "grad_norm": 1.0673503444473083, | |
| "learning_rate": 7.541349787124903e-05, | |
| "loss": 2.8656, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 10.807799442896936, | |
| "grad_norm": 1.1161036292131443, | |
| "learning_rate": 7.536623885028903e-05, | |
| "loss": 2.8949, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 10.86350974930362, | |
| "grad_norm": 1.1044441414641109, | |
| "learning_rate": 7.53187536262776e-05, | |
| "loss": 2.8852, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 10.919220055710307, | |
| "grad_norm": 1.1221763618181442, | |
| "learning_rate": 7.527104252603341e-05, | |
| "loss": 2.8687, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 10.974930362116991, | |
| "grad_norm": 1.0239803282921194, | |
| "learning_rate": 7.522310587792984e-05, | |
| "loss": 2.8738, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 11.030640668523677, | |
| "grad_norm": 1.0403254568733065, | |
| "learning_rate": 7.517494401189256e-05, | |
| "loss": 2.8654, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 11.086350974930362, | |
| "grad_norm": 1.3774438500736799, | |
| "learning_rate": 7.512655725939733e-05, | |
| "loss": 2.8514, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 11.142061281337048, | |
| "grad_norm": 1.0311405284757509, | |
| "learning_rate": 7.507794595346767e-05, | |
| "loss": 2.8698, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 11.197771587743732, | |
| "grad_norm": 1.0136275961779155, | |
| "learning_rate": 7.502911042867261e-05, | |
| "loss": 2.8141, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 11.253481894150418, | |
| "grad_norm": 1.2412045833199774, | |
| "learning_rate": 7.498005102112435e-05, | |
| "loss": 2.894, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 11.309192200557103, | |
| "grad_norm": 1.070331458091606, | |
| "learning_rate": 7.493076806847605e-05, | |
| "loss": 2.8753, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 11.364902506963789, | |
| "grad_norm": 1.1760961554541343, | |
| "learning_rate": 7.488126190991936e-05, | |
| "loss": 2.8722, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 11.420612813370473, | |
| "grad_norm": 1.1616540292064435, | |
| "learning_rate": 7.483153288618215e-05, | |
| "loss": 2.8909, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 11.47632311977716, | |
| "grad_norm": 1.0331404489020481, | |
| "learning_rate": 7.478158133952619e-05, | |
| "loss": 2.853, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 11.532033426183844, | |
| "grad_norm": 1.254677923406745, | |
| "learning_rate": 7.473140761374479e-05, | |
| "loss": 2.8674, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 11.587743732590528, | |
| "grad_norm": 1.4723808775722143, | |
| "learning_rate": 7.468101205416035e-05, | |
| "loss": 2.8738, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 11.643454038997215, | |
| "grad_norm": 1.2019875338287322, | |
| "learning_rate": 7.463039500762213e-05, | |
| "loss": 2.8878, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 11.699164345403899, | |
| "grad_norm": 1.2787778339880642, | |
| "learning_rate": 7.457955682250372e-05, | |
| "loss": 2.8797, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 11.754874651810585, | |
| "grad_norm": 1.054866858779706, | |
| "learning_rate": 7.452849784870072e-05, | |
| "loss": 2.8617, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 11.81058495821727, | |
| "grad_norm": 1.1047418002149563, | |
| "learning_rate": 7.447721843762836e-05, | |
| "loss": 2.8519, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 11.866295264623956, | |
| "grad_norm": 1.3479580629482741, | |
| "learning_rate": 7.442571894221898e-05, | |
| "loss": 2.8764, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 11.92200557103064, | |
| "grad_norm": 1.0996191502382984, | |
| "learning_rate": 7.437399971691968e-05, | |
| "loss": 2.8742, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 11.977715877437326, | |
| "grad_norm": 1.1829758402369137, | |
| "learning_rate": 7.432206111768985e-05, | |
| "loss": 2.8795, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 12.03342618384401, | |
| "grad_norm": 1.2260323749324813, | |
| "learning_rate": 7.426990350199874e-05, | |
| "loss": 2.8393, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 12.089136490250697, | |
| "grad_norm": 1.0838502363024154, | |
| "learning_rate": 7.421752722882299e-05, | |
| "loss": 2.8434, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 12.144846796657381, | |
| "grad_norm": 1.3481847345508688, | |
| "learning_rate": 7.416493265864415e-05, | |
| "loss": 2.8609, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 12.200557103064067, | |
| "grad_norm": 1.0824899172058005, | |
| "learning_rate": 7.411212015344622e-05, | |
| "loss": 2.8521, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 12.256267409470752, | |
| "grad_norm": 1.2218416915168235, | |
| "learning_rate": 7.40590900767131e-05, | |
| "loss": 2.8913, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 12.311977715877438, | |
| "grad_norm": 1.1304024863305357, | |
| "learning_rate": 7.400584279342621e-05, | |
| "loss": 2.8493, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 12.367688022284122, | |
| "grad_norm": 1.0421994761055569, | |
| "learning_rate": 7.395237867006185e-05, | |
| "loss": 2.8292, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 12.423398328690809, | |
| "grad_norm": 1.265322995175785, | |
| "learning_rate": 7.389869807458872e-05, | |
| "loss": 2.8576, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 12.479108635097493, | |
| "grad_norm": 1.2148541733313956, | |
| "learning_rate": 7.384480137646545e-05, | |
| "loss": 2.8684, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 12.534818941504179, | |
| "grad_norm": 1.0816243901094347, | |
| "learning_rate": 7.379068894663795e-05, | |
| "loss": 2.8608, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 12.590529247910863, | |
| "grad_norm": 1.0319313826952614, | |
| "learning_rate": 7.373636115753691e-05, | |
| "loss": 2.8381, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 12.64623955431755, | |
| "grad_norm": 1.362502756526822, | |
| "learning_rate": 7.368181838307531e-05, | |
| "loss": 2.8361, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 12.701949860724234, | |
| "grad_norm": 1.1691483178804676, | |
| "learning_rate": 7.36270609986457e-05, | |
| "loss": 2.8476, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 12.757660167130918, | |
| "grad_norm": 1.099737609334872, | |
| "learning_rate": 7.357208938111772e-05, | |
| "loss": 2.8317, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 12.813370473537605, | |
| "grad_norm": 1.2465031640095632, | |
| "learning_rate": 7.351690390883547e-05, | |
| "loss": 2.8607, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 12.869080779944289, | |
| "grad_norm": 1.0626741966239892, | |
| "learning_rate": 7.346150496161489e-05, | |
| "loss": 2.8482, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 12.924791086350975, | |
| "grad_norm": 1.4215858090007158, | |
| "learning_rate": 7.340589292074123e-05, | |
| "loss": 2.828, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 12.98050139275766, | |
| "grad_norm": 1.189746289044543, | |
| "learning_rate": 7.33500681689663e-05, | |
| "loss": 2.8392, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 13.036211699164346, | |
| "grad_norm": 1.1816710302425453, | |
| "learning_rate": 7.329403109050598e-05, | |
| "loss": 2.8439, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 13.09192200557103, | |
| "grad_norm": 1.1422328461301028, | |
| "learning_rate": 7.323778207103738e-05, | |
| "loss": 2.8458, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 13.147632311977716, | |
| "grad_norm": 1.286190919467895, | |
| "learning_rate": 7.318132149769639e-05, | |
| "loss": 2.8373, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 13.2033426183844, | |
| "grad_norm": 1.1535174701098847, | |
| "learning_rate": 7.312464975907494e-05, | |
| "loss": 2.8287, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 13.259052924791087, | |
| "grad_norm": 1.098410005946985, | |
| "learning_rate": 7.306776724521822e-05, | |
| "loss": 2.8347, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 13.314763231197771, | |
| "grad_norm": 1.0730663678590038, | |
| "learning_rate": 7.301067434762217e-05, | |
| "loss": 2.8022, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 13.370473537604457, | |
| "grad_norm": 1.2860001127878453, | |
| "learning_rate": 7.295337145923068e-05, | |
| "loss": 2.8209, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 13.426183844011142, | |
| "grad_norm": 1.1788256509864643, | |
| "learning_rate": 7.28958589744329e-05, | |
| "loss": 2.8202, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 13.481894150417828, | |
| "grad_norm": 1.2476156265733942, | |
| "learning_rate": 7.283813728906054e-05, | |
| "loss": 2.8301, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 13.537604456824512, | |
| "grad_norm": 1.1806963623362805, | |
| "learning_rate": 7.278020680038514e-05, | |
| "loss": 2.8325, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 13.593314763231199, | |
| "grad_norm": 1.312577644195395, | |
| "learning_rate": 7.272206790711534e-05, | |
| "loss": 2.8268, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 13.649025069637883, | |
| "grad_norm": 1.2945260257216111, | |
| "learning_rate": 7.266372100939415e-05, | |
| "loss": 2.8474, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 13.704735376044567, | |
| "grad_norm": 1.1825196002989207, | |
| "learning_rate": 7.26051665087961e-05, | |
| "loss": 2.8245, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 13.760445682451254, | |
| "grad_norm": 1.1409651239961929, | |
| "learning_rate": 7.254640480832468e-05, | |
| "loss": 2.8342, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 13.816155988857938, | |
| "grad_norm": 1.1047252543056303, | |
| "learning_rate": 7.248743631240934e-05, | |
| "loss": 2.8504, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 13.871866295264624, | |
| "grad_norm": 1.1114472045482278, | |
| "learning_rate": 7.242826142690284e-05, | |
| "loss": 2.8238, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 13.927576601671309, | |
| "grad_norm": 1.0521613836121042, | |
| "learning_rate": 7.236888055907841e-05, | |
| "loss": 2.8524, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 13.983286908077995, | |
| "grad_norm": 1.1464930432732499, | |
| "learning_rate": 7.230929411762698e-05, | |
| "loss": 2.8309, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 14.038997214484679, | |
| "grad_norm": 1.2249548851439938, | |
| "learning_rate": 7.224950251265438e-05, | |
| "loss": 2.8166, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 14.094707520891365, | |
| "grad_norm": 1.1496013157017706, | |
| "learning_rate": 7.218950615567839e-05, | |
| "loss": 2.8176, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 14.15041782729805, | |
| "grad_norm": 1.2406955049253514, | |
| "learning_rate": 7.212930545962609e-05, | |
| "loss": 2.8452, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 14.206128133704736, | |
| "grad_norm": 1.0793023104931123, | |
| "learning_rate": 7.206890083883089e-05, | |
| "loss": 2.7934, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 14.26183844011142, | |
| "grad_norm": 1.0555332439739342, | |
| "learning_rate": 7.200829270902974e-05, | |
| "loss": 2.7967, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 14.317548746518106, | |
| "grad_norm": 1.088008092694001, | |
| "learning_rate": 7.194748148736022e-05, | |
| "loss": 2.8118, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 14.37325905292479, | |
| "grad_norm": 1.1664773713662093, | |
| "learning_rate": 7.18864675923577e-05, | |
| "loss": 2.8322, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 14.428969359331477, | |
| "grad_norm": 1.180830286270773, | |
| "learning_rate": 7.182525144395254e-05, | |
| "loss": 2.7889, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 14.484679665738161, | |
| "grad_norm": 1.289182787809941, | |
| "learning_rate": 7.176383346346697e-05, | |
| "loss": 2.8145, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 14.540389972144848, | |
| "grad_norm": 1.3156558914225063, | |
| "learning_rate": 7.170221407361246e-05, | |
| "loss": 2.8057, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 14.596100278551532, | |
| "grad_norm": 1.311742340109156, | |
| "learning_rate": 7.164039369848662e-05, | |
| "loss": 2.7996, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 14.651810584958218, | |
| "grad_norm": 1.1167094205760877, | |
| "learning_rate": 7.157837276357038e-05, | |
| "loss": 2.8106, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 14.707520891364902, | |
| "grad_norm": 1.0697751811552614, | |
| "learning_rate": 7.151615169572499e-05, | |
| "loss": 2.8089, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 14.763231197771589, | |
| "grad_norm": 1.3217920715681262, | |
| "learning_rate": 7.145373092318921e-05, | |
| "loss": 2.8295, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 14.818941504178273, | |
| "grad_norm": 1.2030049127163769, | |
| "learning_rate": 7.139111087557614e-05, | |
| "loss": 2.8208, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 14.874651810584957, | |
| "grad_norm": 1.1560181548327002, | |
| "learning_rate": 7.132829198387052e-05, | |
| "loss": 2.7894, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 14.930362116991644, | |
| "grad_norm": 1.3677024675114284, | |
| "learning_rate": 7.12652746804256e-05, | |
| "loss": 2.8208, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 14.986072423398328, | |
| "grad_norm": 1.1758085824901958, | |
| "learning_rate": 7.120205939896016e-05, | |
| "loss": 2.7816, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 15.041782729805014, | |
| "grad_norm": 1.236149394894879, | |
| "learning_rate": 7.113864657455565e-05, | |
| "loss": 2.8242, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 15.097493036211699, | |
| "grad_norm": 1.2013719116832535, | |
| "learning_rate": 7.107503664365306e-05, | |
| "loss": 2.8048, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 15.153203342618385, | |
| "grad_norm": 1.1890270902537186, | |
| "learning_rate": 7.101123004404999e-05, | |
| "loss": 2.7988, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 15.20891364902507, | |
| "grad_norm": 0.9947321024811, | |
| "learning_rate": 7.094722721489762e-05, | |
| "loss": 2.8023, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 15.264623955431755, | |
| "grad_norm": 1.097217550580232, | |
| "learning_rate": 7.088302859669767e-05, | |
| "loss": 2.7876, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 15.32033426183844, | |
| "grad_norm": 1.1297335196918232, | |
| "learning_rate": 7.081863463129943e-05, | |
| "loss": 2.81, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 15.376044568245126, | |
| "grad_norm": 1.1252485351464636, | |
| "learning_rate": 7.075404576189664e-05, | |
| "loss": 2.8104, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 15.43175487465181, | |
| "grad_norm": 1.0952308860470021, | |
| "learning_rate": 7.068926243302446e-05, | |
| "loss": 2.8134, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 15.487465181058496, | |
| "grad_norm": 1.1033772218566738, | |
| "learning_rate": 7.062428509055645e-05, | |
| "loss": 2.7919, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 15.54317548746518, | |
| "grad_norm": 1.1682663344591195, | |
| "learning_rate": 7.055911418170146e-05, | |
| "loss": 2.8255, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 15.598885793871867, | |
| "grad_norm": 1.131084807002329, | |
| "learning_rate": 7.049375015500061e-05, | |
| "loss": 2.7911, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 15.654596100278551, | |
| "grad_norm": 1.200817519578834, | |
| "learning_rate": 7.042819346032408e-05, | |
| "loss": 2.8178, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 15.710306406685238, | |
| "grad_norm": 1.14175820047776, | |
| "learning_rate": 7.036244454886818e-05, | |
| "loss": 2.7656, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 15.766016713091922, | |
| "grad_norm": 1.1227562173996573, | |
| "learning_rate": 7.029650387315208e-05, | |
| "loss": 2.8176, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 15.821727019498606, | |
| "grad_norm": 1.1575039434693588, | |
| "learning_rate": 7.023037188701485e-05, | |
| "loss": 2.7942, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 15.877437325905293, | |
| "grad_norm": 1.229577946005525, | |
| "learning_rate": 7.01640490456122e-05, | |
| "loss": 2.786, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 15.933147632311977, | |
| "grad_norm": 1.2021995784653015, | |
| "learning_rate": 7.009753580541344e-05, | |
| "loss": 2.7857, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 15.988857938718663, | |
| "grad_norm": 1.1325819395096217, | |
| "learning_rate": 7.003083262419829e-05, | |
| "loss": 2.7999, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 16.044568245125348, | |
| "grad_norm": 1.1155399032222173, | |
| "learning_rate": 6.996393996105378e-05, | |
| "loss": 2.7835, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 16.100278551532032, | |
| "grad_norm": 1.152416883414678, | |
| "learning_rate": 6.989685827637099e-05, | |
| "loss": 2.7879, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 16.15598885793872, | |
| "grad_norm": 1.0579264010465572, | |
| "learning_rate": 6.982958803184201e-05, | |
| "loss": 2.7968, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 16.211699164345404, | |
| "grad_norm": 1.2482769970776515, | |
| "learning_rate": 6.976212969045668e-05, | |
| "loss": 2.7628, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 16.26740947075209, | |
| "grad_norm": 1.1876572666161167, | |
| "learning_rate": 6.969448371649945e-05, | |
| "loss": 2.7645, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 16.323119777158773, | |
| "grad_norm": 1.0502937038876212, | |
| "learning_rate": 6.962665057554606e-05, | |
| "loss": 2.7836, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 16.37883008356546, | |
| "grad_norm": 1.1579505600465934, | |
| "learning_rate": 6.955863073446054e-05, | |
| "loss": 2.8117, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 16.434540389972145, | |
| "grad_norm": 1.1026773625827397, | |
| "learning_rate": 6.949042466139187e-05, | |
| "loss": 2.7684, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 16.49025069637883, | |
| "grad_norm": 1.080576186890865, | |
| "learning_rate": 6.942203282577072e-05, | |
| "loss": 2.8201, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 16.545961002785514, | |
| "grad_norm": 1.1201896108002356, | |
| "learning_rate": 6.935345569830636e-05, | |
| "loss": 2.7998, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 16.601671309192202, | |
| "grad_norm": 1.0674451923986512, | |
| "learning_rate": 6.928469375098327e-05, | |
| "loss": 2.7513, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 16.657381615598887, | |
| "grad_norm": 1.393151947824109, | |
| "learning_rate": 6.921574745705798e-05, | |
| "loss": 2.7765, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 16.71309192200557, | |
| "grad_norm": 1.0998714589532412, | |
| "learning_rate": 6.91466172910558e-05, | |
| "loss": 2.7645, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 16.768802228412255, | |
| "grad_norm": 1.161893735809738, | |
| "learning_rate": 6.907730372876756e-05, | |
| "loss": 2.7775, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 16.824512534818943, | |
| "grad_norm": 1.1249294683555988, | |
| "learning_rate": 6.90078072472463e-05, | |
| "loss": 2.7751, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 16.880222841225628, | |
| "grad_norm": 1.0278563705072414, | |
| "learning_rate": 6.8938128324804e-05, | |
| "loss": 2.7886, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 16.935933147632312, | |
| "grad_norm": 1.061083533262939, | |
| "learning_rate": 6.886826744100831e-05, | |
| "loss": 2.7706, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 16.991643454038996, | |
| "grad_norm": 1.1962487610461527, | |
| "learning_rate": 6.879822507667925e-05, | |
| "loss": 2.778, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 17.04735376044568, | |
| "grad_norm": 1.084120872740396, | |
| "learning_rate": 6.872800171388584e-05, | |
| "loss": 2.758, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 17.10306406685237, | |
| "grad_norm": 1.2302739964645966, | |
| "learning_rate": 6.865759783594288e-05, | |
| "loss": 2.7437, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 17.158774373259053, | |
| "grad_norm": 1.2503733070591003, | |
| "learning_rate": 6.858701392740755e-05, | |
| "loss": 2.7828, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 17.214484679665738, | |
| "grad_norm": 1.0453068724351287, | |
| "learning_rate": 6.85162504740761e-05, | |
| "loss": 2.7804, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 17.270194986072422, | |
| "grad_norm": 1.102788495625949, | |
| "learning_rate": 6.844530796298049e-05, | |
| "loss": 2.7794, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 17.32590529247911, | |
| "grad_norm": 1.1692774985464567, | |
| "learning_rate": 6.837418688238506e-05, | |
| "loss": 2.7432, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 17.381615598885794, | |
| "grad_norm": 1.1331591899737494, | |
| "learning_rate": 6.830288772178319e-05, | |
| "loss": 2.7716, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 17.43732590529248, | |
| "grad_norm": 1.2148411223909634, | |
| "learning_rate": 6.823141097189384e-05, | |
| "loss": 2.7696, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 17.493036211699163, | |
| "grad_norm": 1.393055281753607, | |
| "learning_rate": 6.815975712465829e-05, | |
| "loss": 2.7415, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 17.54874651810585, | |
| "grad_norm": 1.3474136405073431, | |
| "learning_rate": 6.808792667323665e-05, | |
| "loss": 2.781, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 17.604456824512535, | |
| "grad_norm": 1.036698414343895, | |
| "learning_rate": 6.80159201120046e-05, | |
| "loss": 2.7695, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 17.66016713091922, | |
| "grad_norm": 1.1451609472767672, | |
| "learning_rate": 6.79437379365498e-05, | |
| "loss": 2.7744, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 17.715877437325904, | |
| "grad_norm": 1.0591481814927388, | |
| "learning_rate": 6.787138064366862e-05, | |
| "loss": 2.7892, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 17.771587743732592, | |
| "grad_norm": 1.1623299583698332, | |
| "learning_rate": 6.779884873136271e-05, | |
| "loss": 2.7675, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 17.827298050139277, | |
| "grad_norm": 1.0147233449592903, | |
| "learning_rate": 6.772614269883552e-05, | |
| "loss": 2.7427, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 17.88300835654596, | |
| "grad_norm": 1.1368649796660046, | |
| "learning_rate": 6.765326304648889e-05, | |
| "loss": 2.7683, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 17.938718662952645, | |
| "grad_norm": 1.1490699285660757, | |
| "learning_rate": 6.758021027591959e-05, | |
| "loss": 2.7886, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 17.99442896935933, | |
| "grad_norm": 0.9785051726498183, | |
| "learning_rate": 6.75069848899159e-05, | |
| "loss": 2.7515, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 18.050139275766018, | |
| "grad_norm": 1.2031239642894627, | |
| "learning_rate": 6.743358739245416e-05, | |
| "loss": 2.7646, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 18.105849582172702, | |
| "grad_norm": 1.0544436866470264, | |
| "learning_rate": 6.736001828869522e-05, | |
| "loss": 2.7755, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 18.161559888579387, | |
| "grad_norm": 1.2123837569916929, | |
| "learning_rate": 6.728627808498102e-05, | |
| "loss": 2.726, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 18.21727019498607, | |
| "grad_norm": 1.0826550987274146, | |
| "learning_rate": 6.721236728883116e-05, | |
| "loss": 2.7447, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 18.27298050139276, | |
| "grad_norm": 1.0759411138187283, | |
| "learning_rate": 6.71382864089393e-05, | |
| "loss": 2.7457, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 18.328690807799443, | |
| "grad_norm": 1.097324968536256, | |
| "learning_rate": 6.706403595516969e-05, | |
| "loss": 2.7833, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 18.384401114206128, | |
| "grad_norm": 1.057266074357713, | |
| "learning_rate": 6.69896164385537e-05, | |
| "loss": 2.7441, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 18.440111420612812, | |
| "grad_norm": 1.1747200892285241, | |
| "learning_rate": 6.691502837128632e-05, | |
| "loss": 2.7255, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 18.4958217270195, | |
| "grad_norm": 0.9921075391724958, | |
| "learning_rate": 6.684027226672256e-05, | |
| "loss": 2.749, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 18.551532033426184, | |
| "grad_norm": 1.1919910460153382, | |
| "learning_rate": 6.676534863937394e-05, | |
| "loss": 2.7244, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 18.60724233983287, | |
| "grad_norm": 0.9700416046708717, | |
| "learning_rate": 6.669025800490496e-05, | |
| "loss": 2.7578, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 18.662952646239553, | |
| "grad_norm": 1.7798390717297705, | |
| "learning_rate": 6.66150008801296e-05, | |
| "loss": 2.7497, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 18.71866295264624, | |
| "grad_norm": 1.3075654856552488, | |
| "learning_rate": 6.653957778300764e-05, | |
| "loss": 2.7627, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 18.774373259052926, | |
| "grad_norm": 1.109643118621012, | |
| "learning_rate": 6.646398923264127e-05, | |
| "loss": 2.7451, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 18.83008356545961, | |
| "grad_norm": 1.0116510422849132, | |
| "learning_rate": 6.638823574927133e-05, | |
| "loss": 2.7904, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 18.885793871866294, | |
| "grad_norm": 1.154541906977704, | |
| "learning_rate": 6.631231785427385e-05, | |
| "loss": 2.7375, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 18.94150417827298, | |
| "grad_norm": 0.9834780422588326, | |
| "learning_rate": 6.623623607015642e-05, | |
| "loss": 2.7324, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 18.997214484679667, | |
| "grad_norm": 1.0967675089569602, | |
| "learning_rate": 6.615999092055462e-05, | |
| "loss": 2.7377, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 19.05292479108635, | |
| "grad_norm": 1.033720234986334, | |
| "learning_rate": 6.608358293022839e-05, | |
| "loss": 2.7455, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 19.108635097493035, | |
| "grad_norm": 1.4074135325652137, | |
| "learning_rate": 6.600701262505844e-05, | |
| "loss": 2.7175, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 19.16434540389972, | |
| "grad_norm": 1.2217671501431764, | |
| "learning_rate": 6.593028053204258e-05, | |
| "loss": 2.7459, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 19.220055710306408, | |
| "grad_norm": 1.157829388585607, | |
| "learning_rate": 6.585338717929218e-05, | |
| "loss": 2.7437, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 19.275766016713092, | |
| "grad_norm": 1.0614455080094343, | |
| "learning_rate": 6.577633309602842e-05, | |
| "loss": 2.7703, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 19.331476323119777, | |
| "grad_norm": 1.1146275976097708, | |
| "learning_rate": 6.569911881257878e-05, | |
| "loss": 2.7435, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 19.38718662952646, | |
| "grad_norm": 1.0865432136684192, | |
| "learning_rate": 6.56217448603733e-05, | |
| "loss": 2.732, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 19.44289693593315, | |
| "grad_norm": 1.1353040144610025, | |
| "learning_rate": 6.554421177194095e-05, | |
| "loss": 2.7285, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 19.498607242339833, | |
| "grad_norm": 1.0801239077744584, | |
| "learning_rate": 6.546652008090591e-05, | |
| "loss": 2.7449, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 19.554317548746518, | |
| "grad_norm": 1.3301081392287104, | |
| "learning_rate": 6.538867032198405e-05, | |
| "loss": 2.758, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 19.610027855153202, | |
| "grad_norm": 1.378308356595679, | |
| "learning_rate": 6.531066303097907e-05, | |
| "loss": 2.7296, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 19.66573816155989, | |
| "grad_norm": 1.0272775678623267, | |
| "learning_rate": 6.523249874477889e-05, | |
| "loss": 2.7366, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 19.721448467966574, | |
| "grad_norm": 1.3280539231741249, | |
| "learning_rate": 6.515417800135199e-05, | |
| "loss": 2.7206, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 19.77715877437326, | |
| "grad_norm": 1.2190042029662624, | |
| "learning_rate": 6.507570133974366e-05, | |
| "loss": 2.7413, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 19.832869080779943, | |
| "grad_norm": 1.0253976846781938, | |
| "learning_rate": 6.499706930007227e-05, | |
| "loss": 2.7194, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 19.88857938718663, | |
| "grad_norm": 1.0998801088450254, | |
| "learning_rate": 6.491828242352565e-05, | |
| "loss": 2.7299, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 19.944289693593316, | |
| "grad_norm": 1.2547720080479265, | |
| "learning_rate": 6.483934125235726e-05, | |
| "loss": 2.6907, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 1.1249680628198624, | |
| "learning_rate": 6.47602463298825e-05, | |
| "loss": 2.7427, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 20.055710306406684, | |
| "grad_norm": 1.438020280162261, | |
| "learning_rate": 6.468099820047495e-05, | |
| "loss": 2.7324, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 20.11142061281337, | |
| "grad_norm": 1.2513883791091014, | |
| "learning_rate": 6.46015974095627e-05, | |
| "loss": 2.7433, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 20.167130919220057, | |
| "grad_norm": 1.1732322456488424, | |
| "learning_rate": 6.452204450362446e-05, | |
| "loss": 2.7287, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 20.22284122562674, | |
| "grad_norm": 1.099413306167727, | |
| "learning_rate": 6.444234003018595e-05, | |
| "loss": 2.7166, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 20.278551532033426, | |
| "grad_norm": 1.1933888070643845, | |
| "learning_rate": 6.436248453781604e-05, | |
| "loss": 2.7084, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 20.33426183844011, | |
| "grad_norm": 1.1295980078738417, | |
| "learning_rate": 6.428247857612295e-05, | |
| "loss": 2.7101, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 20.389972144846798, | |
| "grad_norm": 1.1784232492865596, | |
| "learning_rate": 6.420232269575055e-05, | |
| "loss": 2.7238, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 20.445682451253482, | |
| "grad_norm": 1.600869680659427, | |
| "learning_rate": 6.412201744837451e-05, | |
| "loss": 2.7048, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 20.501392757660167, | |
| "grad_norm": 1.15499859140262, | |
| "learning_rate": 6.404156338669859e-05, | |
| "loss": 2.6977, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 20.55710306406685, | |
| "grad_norm": 1.1378208760575172, | |
| "learning_rate": 6.396096106445064e-05, | |
| "loss": 2.7181, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 20.61281337047354, | |
| "grad_norm": 1.123871481777876, | |
| "learning_rate": 6.388021103637904e-05, | |
| "loss": 2.7155, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 20.668523676880223, | |
| "grad_norm": 1.0841648486082098, | |
| "learning_rate": 6.37993138582487e-05, | |
| "loss": 2.7354, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 20.724233983286908, | |
| "grad_norm": 1.1664188982324037, | |
| "learning_rate": 6.371827008683732e-05, | |
| "loss": 2.7238, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 20.779944289693592, | |
| "grad_norm": 1.104589633589391, | |
| "learning_rate": 6.363708027993152e-05, | |
| "loss": 2.6975, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 20.83565459610028, | |
| "grad_norm": 1.421916066233274, | |
| "learning_rate": 6.355574499632301e-05, | |
| "loss": 2.7423, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 20.891364902506965, | |
| "grad_norm": 1.081254160033794, | |
| "learning_rate": 6.347426479580477e-05, | |
| "loss": 2.725, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 20.94707520891365, | |
| "grad_norm": 1.1098202109359585, | |
| "learning_rate": 6.339264023916715e-05, | |
| "loss": 2.7272, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 21.002785515320333, | |
| "grad_norm": 1.0514058782353737, | |
| "learning_rate": 6.331087188819405e-05, | |
| "loss": 2.739, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 21.058495821727018, | |
| "grad_norm": 1.1184046430910828, | |
| "learning_rate": 6.322896030565905e-05, | |
| "loss": 2.703, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 21.114206128133706, | |
| "grad_norm": 1.1842229516973584, | |
| "learning_rate": 6.31469060553215e-05, | |
| "loss": 2.7151, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 21.16991643454039, | |
| "grad_norm": 1.0858640050274855, | |
| "learning_rate": 6.30647097019227e-05, | |
| "loss": 2.7033, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 21.225626740947074, | |
| "grad_norm": 1.2333467648992407, | |
| "learning_rate": 6.298237181118193e-05, | |
| "loss": 2.6952, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 21.28133704735376, | |
| "grad_norm": 1.3879484824315327, | |
| "learning_rate": 6.289989294979264e-05, | |
| "loss": 2.7024, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 21.337047353760447, | |
| "grad_norm": 1.2106236210149603, | |
| "learning_rate": 6.281727368541853e-05, | |
| "loss": 2.7047, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 21.39275766016713, | |
| "grad_norm": 1.0115849386843587, | |
| "learning_rate": 6.273451458668961e-05, | |
| "loss": 2.7075, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 21.448467966573816, | |
| "grad_norm": 1.051437728204155, | |
| "learning_rate": 6.265161622319829e-05, | |
| "loss": 2.7247, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 21.5041782729805, | |
| "grad_norm": 1.1019904640067848, | |
| "learning_rate": 6.256857916549548e-05, | |
| "loss": 2.691, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 21.559888579387188, | |
| "grad_norm": 1.1014027785637281, | |
| "learning_rate": 6.248540398508673e-05, | |
| "loss": 2.6992, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 21.615598885793872, | |
| "grad_norm": 1.0988814252962915, | |
| "learning_rate": 6.240209125442806e-05, | |
| "loss": 2.714, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 21.671309192200557, | |
| "grad_norm": 1.2025654774415926, | |
| "learning_rate": 6.231864154692237e-05, | |
| "loss": 2.7042, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 21.72701949860724, | |
| "grad_norm": 1.1583280781305814, | |
| "learning_rate": 6.223505543691518e-05, | |
| "loss": 2.7081, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 21.78272980501393, | |
| "grad_norm": 1.1105375147778487, | |
| "learning_rate": 6.215133349969086e-05, | |
| "loss": 2.6869, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 21.838440111420613, | |
| "grad_norm": 1.1146300728660752, | |
| "learning_rate": 6.206747631146862e-05, | |
| "loss": 2.6988, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 21.894150417827298, | |
| "grad_norm": 1.0660501800448277, | |
| "learning_rate": 6.198348444939849e-05, | |
| "loss": 2.6491, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 21.949860724233982, | |
| "grad_norm": 1.1013215569007113, | |
| "learning_rate": 6.189935849155747e-05, | |
| "loss": 2.7103, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 22.00557103064067, | |
| "grad_norm": 1.1802406207769038, | |
| "learning_rate": 6.18150990169454e-05, | |
| "loss": 2.7193, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 22.061281337047355, | |
| "grad_norm": 1.1108973776747364, | |
| "learning_rate": 6.173070660548112e-05, | |
| "loss": 2.6831, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 22.11699164345404, | |
| "grad_norm": 1.092182682917437, | |
| "learning_rate": 6.16461818379984e-05, | |
| "loss": 2.6557, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 22.172701949860723, | |
| "grad_norm": 1.145560140100733, | |
| "learning_rate": 6.156152529624193e-05, | |
| "loss": 2.6672, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 22.228412256267408, | |
| "grad_norm": 1.2287470982936533, | |
| "learning_rate": 6.147673756286334e-05, | |
| "loss": 2.7312, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 22.284122562674096, | |
| "grad_norm": 1.0846368953094574, | |
| "learning_rate": 6.139181922141721e-05, | |
| "loss": 2.7017, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 22.33983286908078, | |
| "grad_norm": 1.1767612465335586, | |
| "learning_rate": 6.130677085635704e-05, | |
| "loss": 2.7118, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 22.395543175487465, | |
| "grad_norm": 1.1460490982941247, | |
| "learning_rate": 6.12215930530312e-05, | |
| "loss": 2.6819, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 22.45125348189415, | |
| "grad_norm": 1.1479085095640083, | |
| "learning_rate": 6.113628639767893e-05, | |
| "loss": 2.6877, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 22.506963788300837, | |
| "grad_norm": 1.1375246029213462, | |
| "learning_rate": 6.105085147742632e-05, | |
| "loss": 2.6925, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 22.56267409470752, | |
| "grad_norm": 1.1123231981092023, | |
| "learning_rate": 6.0965288880282214e-05, | |
| "loss": 2.6822, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 22.618384401114206, | |
| "grad_norm": 1.2752545626840799, | |
| "learning_rate": 6.087959919513422e-05, | |
| "loss": 2.7205, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 22.67409470752089, | |
| "grad_norm": 1.2262605320485462, | |
| "learning_rate": 6.079378301174464e-05, | |
| "loss": 2.6924, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 22.729805013927578, | |
| "grad_norm": 1.0730006469584497, | |
| "learning_rate": 6.0707840920746374e-05, | |
| "loss": 2.7124, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 22.785515320334262, | |
| "grad_norm": 1.3257454551313221, | |
| "learning_rate": 6.0621773513638905e-05, | |
| "loss": 2.6762, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 22.841225626740947, | |
| "grad_norm": 1.2930384450871677, | |
| "learning_rate": 6.0535581382784216e-05, | |
| "loss": 2.6623, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 22.89693593314763, | |
| "grad_norm": 1.139885910912536, | |
| "learning_rate": 6.0449265121402686e-05, | |
| "loss": 2.6867, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 22.95264623955432, | |
| "grad_norm": 1.125710423580146, | |
| "learning_rate": 6.036282532356904e-05, | |
| "loss": 2.6742, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 23.008356545961004, | |
| "grad_norm": 1.131377041116149, | |
| "learning_rate": 6.027626258420825e-05, | |
| "loss": 2.7031, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 23.064066852367688, | |
| "grad_norm": 1.0869417103268262, | |
| "learning_rate": 6.0189577499091424e-05, | |
| "loss": 2.6683, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 23.119777158774372, | |
| "grad_norm": 1.1391157676417962, | |
| "learning_rate": 6.010277066483174e-05, | |
| "loss": 2.707, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 23.175487465181057, | |
| "grad_norm": 1.2945583722046563, | |
| "learning_rate": 6.001584267888028e-05, | |
| "loss": 2.6522, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 23.231197771587745, | |
| "grad_norm": 1.1565608753713996, | |
| "learning_rate": 5.9928794139522025e-05, | |
| "loss": 2.6717, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 23.28690807799443, | |
| "grad_norm": 1.2394122917368424, | |
| "learning_rate": 5.9841625645871575e-05, | |
| "loss": 2.7024, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 23.342618384401113, | |
| "grad_norm": 1.0903861377505122, | |
| "learning_rate": 5.975433779786921e-05, | |
| "loss": 2.6455, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 23.398328690807798, | |
| "grad_norm": 1.0661921555569798, | |
| "learning_rate": 5.966693119627662e-05, | |
| "loss": 2.6706, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 23.454038997214486, | |
| "grad_norm": 1.0655312130218653, | |
| "learning_rate": 5.957940644267282e-05, | |
| "loss": 2.6906, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 23.50974930362117, | |
| "grad_norm": 1.1134166788328776, | |
| "learning_rate": 5.949176413945003e-05, | |
| "loss": 2.6432, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 23.565459610027855, | |
| "grad_norm": 1.650527244066039, | |
| "learning_rate": 5.94040048898095e-05, | |
| "loss": 2.6937, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 23.62116991643454, | |
| "grad_norm": 1.1235056862121802, | |
| "learning_rate": 5.931612929775738e-05, | |
| "loss": 2.6705, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 23.676880222841227, | |
| "grad_norm": 1.0368298826041793, | |
| "learning_rate": 5.922813796810054e-05, | |
| "loss": 2.6724, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 23.73259052924791, | |
| "grad_norm": 1.215524406135999, | |
| "learning_rate": 5.914003150644242e-05, | |
| "loss": 2.6768, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 23.788300835654596, | |
| "grad_norm": 1.2276511679169064, | |
| "learning_rate": 5.905181051917883e-05, | |
| "loss": 2.7046, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 23.84401114206128, | |
| "grad_norm": 1.0144214891191534, | |
| "learning_rate": 5.896347561349387e-05, | |
| "loss": 2.652, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 23.899721448467968, | |
| "grad_norm": 1.206243520777862, | |
| "learning_rate": 5.887502739735565e-05, | |
| "loss": 2.6965, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 23.955431754874652, | |
| "grad_norm": 1.0056037720791713, | |
| "learning_rate": 5.878646647951213e-05, | |
| "loss": 2.6475, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 24.011142061281337, | |
| "grad_norm": 1.0641520059001517, | |
| "learning_rate": 5.8697793469486964e-05, | |
| "loss": 2.6991, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 24.06685236768802, | |
| "grad_norm": 1.2174985172315418, | |
| "learning_rate": 5.860900897757528e-05, | |
| "loss": 2.6711, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 24.12256267409471, | |
| "grad_norm": 1.0515823218930573, | |
| "learning_rate": 5.852011361483949e-05, | |
| "loss": 2.6625, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 24.178272980501394, | |
| "grad_norm": 1.3132128877951654, | |
| "learning_rate": 5.8431107993105076e-05, | |
| "loss": 2.6604, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 24.233983286908078, | |
| "grad_norm": 1.0603177321270034, | |
| "learning_rate": 5.834199272495636e-05, | |
| "loss": 2.6663, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 24.289693593314762, | |
| "grad_norm": 1.262687868955379, | |
| "learning_rate": 5.8252768423732364e-05, | |
| "loss": 2.6708, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 24.345403899721447, | |
| "grad_norm": 1.0873862910591228, | |
| "learning_rate": 5.816343570352244e-05, | |
| "loss": 2.6367, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 24.401114206128135, | |
| "grad_norm": 1.2746436419501377, | |
| "learning_rate": 5.8073995179162254e-05, | |
| "loss": 2.7081, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 24.45682451253482, | |
| "grad_norm": 1.092203478224612, | |
| "learning_rate": 5.798444746622934e-05, | |
| "loss": 2.6693, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 24.512534818941504, | |
| "grad_norm": 1.1045845154960057, | |
| "learning_rate": 5.7894793181039e-05, | |
| "loss": 2.6981, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 24.568245125348188, | |
| "grad_norm": 1.139876543282688, | |
| "learning_rate": 5.780503294064005e-05, | |
| "loss": 2.6539, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 24.623955431754876, | |
| "grad_norm": 1.1892780737352568, | |
| "learning_rate": 5.771516736281051e-05, | |
| "loss": 2.6676, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 24.67966573816156, | |
| "grad_norm": 1.124479629411898, | |
| "learning_rate": 5.7625197066053374e-05, | |
| "loss": 2.6712, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 24.735376044568245, | |
| "grad_norm": 1.078433196751875, | |
| "learning_rate": 5.753512266959242e-05, | |
| "loss": 2.6658, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 24.79108635097493, | |
| "grad_norm": 1.0663315697490754, | |
| "learning_rate": 5.744494479336786e-05, | |
| "loss": 2.6488, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 24.846796657381617, | |
| "grad_norm": 1.1044363572012328, | |
| "learning_rate": 5.735466405803211e-05, | |
| "loss": 2.6905, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 24.9025069637883, | |
| "grad_norm": 1.0926049616035345, | |
| "learning_rate": 5.7264281084945534e-05, | |
| "loss": 2.6744, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 24.958217270194986, | |
| "grad_norm": 1.0597627637210976, | |
| "learning_rate": 5.717379649617212e-05, | |
| "loss": 2.6501, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 25.01392757660167, | |
| "grad_norm": 0.9918782369429666, | |
| "learning_rate": 5.70832109144753e-05, | |
| "loss": 2.6394, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 25.069637883008358, | |
| "grad_norm": 1.1550215185121195, | |
| "learning_rate": 5.6992524963313494e-05, | |
| "loss": 2.6491, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 25.125348189415043, | |
| "grad_norm": 1.2681147669552022, | |
| "learning_rate": 5.6901739266835976e-05, | |
| "loss": 2.6637, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 25.181058495821727, | |
| "grad_norm": 1.1669892724232989, | |
| "learning_rate": 5.681085444987855e-05, | |
| "loss": 2.6595, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 25.23676880222841, | |
| "grad_norm": 1.118544311154742, | |
| "learning_rate": 5.6719871137959136e-05, | |
| "loss": 2.6602, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 25.2924791086351, | |
| "grad_norm": 1.1886485765626151, | |
| "learning_rate": 5.6628789957273634e-05, | |
| "loss": 2.6209, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 25.348189415041784, | |
| "grad_norm": 1.170833233617083, | |
| "learning_rate": 5.653761153469147e-05, | |
| "loss": 2.6986, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 25.403899721448468, | |
| "grad_norm": 1.248618232385229, | |
| "learning_rate": 5.644633649775136e-05, | |
| "loss": 2.6686, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 25.459610027855152, | |
| "grad_norm": 1.1165667219670397, | |
| "learning_rate": 5.6354965474657e-05, | |
| "loss": 2.6708, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 25.515320334261837, | |
| "grad_norm": 1.1574567530077358, | |
| "learning_rate": 5.626349909427265e-05, | |
| "loss": 2.6521, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 25.571030640668525, | |
| "grad_norm": 1.0835835883220495, | |
| "learning_rate": 5.617193798611895e-05, | |
| "loss": 2.6581, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 25.62674094707521, | |
| "grad_norm": 1.144587311838764, | |
| "learning_rate": 5.6080282780368435e-05, | |
| "loss": 2.6602, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 25.682451253481894, | |
| "grad_norm": 1.112741624680344, | |
| "learning_rate": 5.598853410784133e-05, | |
| "loss": 2.6598, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 25.738161559888578, | |
| "grad_norm": 1.168044637764499, | |
| "learning_rate": 5.589669260000109e-05, | |
| "loss": 2.6645, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 25.793871866295266, | |
| "grad_norm": 1.120774568517253, | |
| "learning_rate": 5.580475888895015e-05, | |
| "loss": 2.6602, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 25.84958217270195, | |
| "grad_norm": 1.2114817500368666, | |
| "learning_rate": 5.571273360742552e-05, | |
| "loss": 2.6328, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 25.905292479108635, | |
| "grad_norm": 1.2356256005701014, | |
| "learning_rate": 5.5620617388794466e-05, | |
| "loss": 2.6384, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 25.96100278551532, | |
| "grad_norm": 1.2235414625020526, | |
| "learning_rate": 5.552841086705014e-05, | |
| "loss": 2.6681, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 26.016713091922007, | |
| "grad_norm": 1.2819636542569275, | |
| "learning_rate": 5.5436114676807156e-05, | |
| "loss": 2.6561, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 26.07242339832869, | |
| "grad_norm": 1.2032746744995522, | |
| "learning_rate": 5.534372945329733e-05, | |
| "loss": 2.6384, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 26.128133704735376, | |
| "grad_norm": 1.1226061787109827, | |
| "learning_rate": 5.525125583236522e-05, | |
| "loss": 2.6294, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 26.18384401114206, | |
| "grad_norm": 1.021042107378643, | |
| "learning_rate": 5.515869445046379e-05, | |
| "loss": 2.6588, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 26.23955431754875, | |
| "grad_norm": 1.291042490575208, | |
| "learning_rate": 5.506604594465004e-05, | |
| "loss": 2.6264, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 26.295264623955433, | |
| "grad_norm": 1.1032208560398462, | |
| "learning_rate": 5.4973310952580576e-05, | |
| "loss": 2.6169, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 26.350974930362117, | |
| "grad_norm": 1.2761063536817212, | |
| "learning_rate": 5.488049011250727e-05, | |
| "loss": 2.6506, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 26.4066852367688, | |
| "grad_norm": 1.1511514672695646, | |
| "learning_rate": 5.478758406327282e-05, | |
| "loss": 2.6698, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 26.462395543175486, | |
| "grad_norm": 1.0992455713094436, | |
| "learning_rate": 5.469459344430642e-05, | |
| "loss": 2.6097, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 26.518105849582174, | |
| "grad_norm": 1.172205701527116, | |
| "learning_rate": 5.4601518895619284e-05, | |
| "loss": 2.6293, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 26.573816155988858, | |
| "grad_norm": 1.2031374723785744, | |
| "learning_rate": 5.4508361057800276e-05, | |
| "loss": 2.6199, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 26.629526462395543, | |
| "grad_norm": 1.0768039312990048, | |
| "learning_rate": 5.441512057201152e-05, | |
| "loss": 2.6497, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 26.685236768802227, | |
| "grad_norm": 1.182782939690952, | |
| "learning_rate": 5.432179807998395e-05, | |
| "loss": 2.6439, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 26.740947075208915, | |
| "grad_norm": 1.1202931202000697, | |
| "learning_rate": 5.422839422401295e-05, | |
| "loss": 2.622, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 26.7966573816156, | |
| "grad_norm": 1.291606489378618, | |
| "learning_rate": 5.413490964695381e-05, | |
| "loss": 2.6146, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 26.852367688022284, | |
| "grad_norm": 1.1538604314310363, | |
| "learning_rate": 5.404134499221748e-05, | |
| "loss": 2.6338, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 26.908077994428968, | |
| "grad_norm": 1.6695600582971142, | |
| "learning_rate": 5.3947700903765986e-05, | |
| "loss": 2.6499, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 26.963788300835656, | |
| "grad_norm": 1.245827651961817, | |
| "learning_rate": 5.3853978026108086e-05, | |
| "loss": 2.6421, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 27.01949860724234, | |
| "grad_norm": 1.173940924924453, | |
| "learning_rate": 5.37601770042948e-05, | |
| "loss": 2.6403, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 27.075208913649025, | |
| "grad_norm": 1.0519719376965715, | |
| "learning_rate": 5.3666298483914984e-05, | |
| "loss": 2.6203, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 27.13091922005571, | |
| "grad_norm": 1.133593745206024, | |
| "learning_rate": 5.357234311109086e-05, | |
| "loss": 2.6574, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 27.186629526462397, | |
| "grad_norm": 1.2653772970355646, | |
| "learning_rate": 5.347831153247361e-05, | |
| "loss": 2.6414, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 27.24233983286908, | |
| "grad_norm": 1.109574259928109, | |
| "learning_rate": 5.338420439523891e-05, | |
| "loss": 2.6147, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 27.298050139275766, | |
| "grad_norm": 1.2411069069646816, | |
| "learning_rate": 5.329002234708245e-05, | |
| "loss": 2.608, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 27.35376044568245, | |
| "grad_norm": 1.1182716681713758, | |
| "learning_rate": 5.319576603621553e-05, | |
| "loss": 2.6413, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 27.409470752089135, | |
| "grad_norm": 1.2003086684148825, | |
| "learning_rate": 5.3101436111360504e-05, | |
| "loss": 2.6275, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 27.465181058495823, | |
| "grad_norm": 1.0717097302386294, | |
| "learning_rate": 5.300703322174646e-05, | |
| "loss": 2.6328, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 27.520891364902507, | |
| "grad_norm": 1.4444979446572614, | |
| "learning_rate": 5.29125580171046e-05, | |
| "loss": 2.6201, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 27.57660167130919, | |
| "grad_norm": 1.0746363462650246, | |
| "learning_rate": 5.281801114766385e-05, | |
| "loss": 2.6123, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 27.632311977715876, | |
| "grad_norm": 1.0661192079701574, | |
| "learning_rate": 5.272339326414642e-05, | |
| "loss": 2.5964, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 27.688022284122564, | |
| "grad_norm": 1.1015082831508238, | |
| "learning_rate": 5.262870501776321e-05, | |
| "loss": 2.5953, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 27.74373259052925, | |
| "grad_norm": 1.1382427443811807, | |
| "learning_rate": 5.253394706020944e-05, | |
| "loss": 2.6181, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 27.799442896935933, | |
| "grad_norm": 1.0943370244357078, | |
| "learning_rate": 5.243912004366008e-05, | |
| "loss": 2.6116, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 27.855153203342617, | |
| "grad_norm": 1.0772056082741257, | |
| "learning_rate": 5.234422462076547e-05, | |
| "loss": 2.5998, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 27.910863509749305, | |
| "grad_norm": 1.0546759553565561, | |
| "learning_rate": 5.2249261444646674e-05, | |
| "loss": 2.5937, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 27.96657381615599, | |
| "grad_norm": 1.176478667272748, | |
| "learning_rate": 5.2154231168891134e-05, | |
| "loss": 2.6093, | |
| "step": 10040 | |
| }, | |
| { | |
| "epoch": 28.022284122562674, | |
| "grad_norm": 1.1321140906627838, | |
| "learning_rate": 5.2059134447548076e-05, | |
| "loss": 2.6229, | |
| "step": 10060 | |
| }, | |
| { | |
| "epoch": 28.077994428969358, | |
| "grad_norm": 1.065907796206171, | |
| "learning_rate": 5.196397193512405e-05, | |
| "loss": 2.6205, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 28.133704735376046, | |
| "grad_norm": 1.233651032400036, | |
| "learning_rate": 5.1868744286578406e-05, | |
| "loss": 2.5931, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 28.18941504178273, | |
| "grad_norm": 1.0599465164229271, | |
| "learning_rate": 5.177345215731881e-05, | |
| "loss": 2.6147, | |
| "step": 10120 | |
| }, | |
| { | |
| "epoch": 28.245125348189415, | |
| "grad_norm": 1.1538820509055618, | |
| "learning_rate": 5.167809620319672e-05, | |
| "loss": 2.6167, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 28.3008356545961, | |
| "grad_norm": 1.154317454601146, | |
| "learning_rate": 5.158267708050286e-05, | |
| "loss": 2.5937, | |
| "step": 10160 | |
| }, | |
| { | |
| "epoch": 28.356545961002787, | |
| "grad_norm": 1.0665328766519204, | |
| "learning_rate": 5.1487195445962715e-05, | |
| "loss": 2.604, | |
| "step": 10180 | |
| }, | |
| { | |
| "epoch": 28.41225626740947, | |
| "grad_norm": 1.1694960107465548, | |
| "learning_rate": 5.139165195673201e-05, | |
| "loss": 2.5995, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 28.467966573816156, | |
| "grad_norm": 1.0526752329813267, | |
| "learning_rate": 5.1296047270392175e-05, | |
| "loss": 2.6209, | |
| "step": 10220 | |
| }, | |
| { | |
| "epoch": 28.52367688022284, | |
| "grad_norm": 1.1414513270535445, | |
| "learning_rate": 5.120038204494588e-05, | |
| "loss": 2.5929, | |
| "step": 10240 | |
| }, | |
| { | |
| "epoch": 28.579387186629525, | |
| "grad_norm": 1.0966114959097728, | |
| "learning_rate": 5.1104656938812394e-05, | |
| "loss": 2.5924, | |
| "step": 10260 | |
| }, | |
| { | |
| "epoch": 28.635097493036213, | |
| "grad_norm": 1.1174765080610298, | |
| "learning_rate": 5.1008872610823155e-05, | |
| "loss": 2.6202, | |
| "step": 10280 | |
| }, | |
| { | |
| "epoch": 28.690807799442897, | |
| "grad_norm": 1.1145726415269017, | |
| "learning_rate": 5.091302972021719e-05, | |
| "loss": 2.5968, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 28.74651810584958, | |
| "grad_norm": 1.0586956135779206, | |
| "learning_rate": 5.08171289266366e-05, | |
| "loss": 2.6123, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 28.802228412256266, | |
| "grad_norm": 1.3940508320205856, | |
| "learning_rate": 5.072117089012195e-05, | |
| "loss": 2.597, | |
| "step": 10340 | |
| }, | |
| { | |
| "epoch": 28.857938718662954, | |
| "grad_norm": 1.0350493924845274, | |
| "learning_rate": 5.062515627110785e-05, | |
| "loss": 2.6207, | |
| "step": 10360 | |
| }, | |
| { | |
| "epoch": 28.91364902506964, | |
| "grad_norm": 1.2315404210100842, | |
| "learning_rate": 5.0529085730418306e-05, | |
| "loss": 2.6179, | |
| "step": 10380 | |
| }, | |
| { | |
| "epoch": 28.969359331476323, | |
| "grad_norm": 1.1045820694597503, | |
| "learning_rate": 5.0432959929262205e-05, | |
| "loss": 2.6008, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 29.025069637883007, | |
| "grad_norm": 1.146579867692895, | |
| "learning_rate": 5.03367795292288e-05, | |
| "loss": 2.6202, | |
| "step": 10420 | |
| }, | |
| { | |
| "epoch": 29.080779944289695, | |
| "grad_norm": 1.2733330944071208, | |
| "learning_rate": 5.0240545192283056e-05, | |
| "loss": 2.6123, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 29.13649025069638, | |
| "grad_norm": 1.0494794859758667, | |
| "learning_rate": 5.0144257580761224e-05, | |
| "loss": 2.5829, | |
| "step": 10460 | |
| }, | |
| { | |
| "epoch": 29.192200557103064, | |
| "grad_norm": 1.1190765883905507, | |
| "learning_rate": 5.0047917357366194e-05, | |
| "loss": 2.6223, | |
| "step": 10480 | |
| }, | |
| { | |
| "epoch": 29.24791086350975, | |
| "grad_norm": 1.5745640876405715, | |
| "learning_rate": 4.995152518516296e-05, | |
| "loss": 2.6133, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 29.303621169916436, | |
| "grad_norm": 1.1765754248618923, | |
| "learning_rate": 4.9855081727574066e-05, | |
| "loss": 2.6047, | |
| "step": 10520 | |
| }, | |
| { | |
| "epoch": 29.35933147632312, | |
| "grad_norm": 1.498763487641365, | |
| "learning_rate": 4.975858764837501e-05, | |
| "loss": 2.5656, | |
| "step": 10540 | |
| }, | |
| { | |
| "epoch": 29.415041782729805, | |
| "grad_norm": 1.1132252850299105, | |
| "learning_rate": 4.966204361168971e-05, | |
| "loss": 2.5914, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 29.47075208913649, | |
| "grad_norm": 1.1686019155914007, | |
| "learning_rate": 4.956545028198591e-05, | |
| "loss": 2.5874, | |
| "step": 10580 | |
| }, | |
| { | |
| "epoch": 29.526462395543177, | |
| "grad_norm": 1.1003078614700978, | |
| "learning_rate": 4.946880832407062e-05, | |
| "loss": 2.6143, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 29.58217270194986, | |
| "grad_norm": 1.0784823374299444, | |
| "learning_rate": 4.937211840308553e-05, | |
| "loss": 2.6153, | |
| "step": 10620 | |
| }, | |
| { | |
| "epoch": 29.637883008356546, | |
| "grad_norm": 1.2454601562477818, | |
| "learning_rate": 4.927538118450244e-05, | |
| "loss": 2.5872, | |
| "step": 10640 | |
| }, | |
| { | |
| "epoch": 29.69359331476323, | |
| "grad_norm": 1.2988040165269858, | |
| "learning_rate": 4.917859733411869e-05, | |
| "loss": 2.603, | |
| "step": 10660 | |
| }, | |
| { | |
| "epoch": 29.749303621169915, | |
| "grad_norm": 1.1745460752963417, | |
| "learning_rate": 4.908176751805253e-05, | |
| "loss": 2.5681, | |
| "step": 10680 | |
| }, | |
| { | |
| "epoch": 29.805013927576603, | |
| "grad_norm": 1.3297312991458439, | |
| "learning_rate": 4.898489240273864e-05, | |
| "loss": 2.6095, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 29.860724233983287, | |
| "grad_norm": 1.1339679906193685, | |
| "learning_rate": 4.888797265492338e-05, | |
| "loss": 2.6067, | |
| "step": 10720 | |
| }, | |
| { | |
| "epoch": 29.91643454038997, | |
| "grad_norm": 1.1806251948614392, | |
| "learning_rate": 4.879100894166038e-05, | |
| "loss": 2.5967, | |
| "step": 10740 | |
| }, | |
| { | |
| "epoch": 29.972144846796656, | |
| "grad_norm": 1.165860935100357, | |
| "learning_rate": 4.8694001930305794e-05, | |
| "loss": 2.5785, | |
| "step": 10760 | |
| }, | |
| { | |
| "epoch": 30.027855153203344, | |
| "grad_norm": 1.3776136936276104, | |
| "learning_rate": 4.859695228851381e-05, | |
| "loss": 2.5897, | |
| "step": 10780 | |
| }, | |
| { | |
| "epoch": 30.08356545961003, | |
| "grad_norm": 1.2587071827316874, | |
| "learning_rate": 4.8499860684232066e-05, | |
| "loss": 2.5797, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 30.139275766016713, | |
| "grad_norm": 1.1970210105742216, | |
| "learning_rate": 4.84027277856969e-05, | |
| "loss": 2.5672, | |
| "step": 10820 | |
| }, | |
| { | |
| "epoch": 30.194986072423397, | |
| "grad_norm": 1.3454554228099718, | |
| "learning_rate": 4.830555426142899e-05, | |
| "loss": 2.5934, | |
| "step": 10840 | |
| }, | |
| { | |
| "epoch": 30.250696378830085, | |
| "grad_norm": 1.264747247911362, | |
| "learning_rate": 4.8208340780228475e-05, | |
| "loss": 2.5894, | |
| "step": 10860 | |
| }, | |
| { | |
| "epoch": 30.30640668523677, | |
| "grad_norm": 1.05872829319226, | |
| "learning_rate": 4.811108801117065e-05, | |
| "loss": 2.5867, | |
| "step": 10880 | |
| }, | |
| { | |
| "epoch": 30.362116991643454, | |
| "grad_norm": 1.084586036460413, | |
| "learning_rate": 4.80137966236011e-05, | |
| "loss": 2.5901, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 30.41782729805014, | |
| "grad_norm": 1.229834581800877, | |
| "learning_rate": 4.7916467287131244e-05, | |
| "loss": 2.5604, | |
| "step": 10920 | |
| }, | |
| { | |
| "epoch": 30.473537604456826, | |
| "grad_norm": 1.4451033607435961, | |
| "learning_rate": 4.7819100671633706e-05, | |
| "loss": 2.597, | |
| "step": 10940 | |
| }, | |
| { | |
| "epoch": 30.52924791086351, | |
| "grad_norm": 1.2548193540172925, | |
| "learning_rate": 4.772169744723762e-05, | |
| "loss": 2.5529, | |
| "step": 10960 | |
| }, | |
| { | |
| "epoch": 30.584958217270195, | |
| "grad_norm": 1.5052254853750195, | |
| "learning_rate": 4.762425828432416e-05, | |
| "loss": 2.6054, | |
| "step": 10980 | |
| }, | |
| { | |
| "epoch": 30.64066852367688, | |
| "grad_norm": 1.1519592174538793, | |
| "learning_rate": 4.7526783853521796e-05, | |
| "loss": 2.5836, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 30.696378830083564, | |
| "grad_norm": 1.1375488161597391, | |
| "learning_rate": 4.742927482570176e-05, | |
| "loss": 2.5621, | |
| "step": 11020 | |
| }, | |
| { | |
| "epoch": 30.75208913649025, | |
| "grad_norm": 1.0552495588001027, | |
| "learning_rate": 4.733173187197335e-05, | |
| "loss": 2.5886, | |
| "step": 11040 | |
| }, | |
| { | |
| "epoch": 30.807799442896936, | |
| "grad_norm": 1.2026052803495149, | |
| "learning_rate": 4.723415566367945e-05, | |
| "loss": 2.576, | |
| "step": 11060 | |
| }, | |
| { | |
| "epoch": 30.86350974930362, | |
| "grad_norm": 1.2031132145653618, | |
| "learning_rate": 4.713654687239171e-05, | |
| "loss": 2.5871, | |
| "step": 11080 | |
| }, | |
| { | |
| "epoch": 30.919220055710305, | |
| "grad_norm": 1.152263728928741, | |
| "learning_rate": 4.703890616990612e-05, | |
| "loss": 2.586, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 30.974930362116993, | |
| "grad_norm": 1.3435496121402817, | |
| "learning_rate": 4.6941234228238256e-05, | |
| "loss": 2.5813, | |
| "step": 11120 | |
| }, | |
| { | |
| "epoch": 31.030640668523677, | |
| "grad_norm": 1.1328862858818538, | |
| "learning_rate": 4.684353171961873e-05, | |
| "loss": 2.5917, | |
| "step": 11140 | |
| }, | |
| { | |
| "epoch": 31.08635097493036, | |
| "grad_norm": 1.2484233364341746, | |
| "learning_rate": 4.674579931648851e-05, | |
| "loss": 2.5619, | |
| "step": 11160 | |
| }, | |
| { | |
| "epoch": 31.142061281337046, | |
| "grad_norm": 1.3584774397600772, | |
| "learning_rate": 4.664803769149427e-05, | |
| "loss": 2.5569, | |
| "step": 11180 | |
| }, | |
| { | |
| "epoch": 31.197771587743734, | |
| "grad_norm": 1.1992013763670537, | |
| "learning_rate": 4.6550247517483926e-05, | |
| "loss": 2.5468, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 31.25348189415042, | |
| "grad_norm": 1.1878315761868057, | |
| "learning_rate": 4.645242946750176e-05, | |
| "loss": 2.5693, | |
| "step": 11220 | |
| }, | |
| { | |
| "epoch": 31.309192200557103, | |
| "grad_norm": 1.2856028595876672, | |
| "learning_rate": 4.635458421478398e-05, | |
| "loss": 2.5959, | |
| "step": 11240 | |
| }, | |
| { | |
| "epoch": 31.364902506963787, | |
| "grad_norm": 1.0789628687819908, | |
| "learning_rate": 4.6256712432754e-05, | |
| "loss": 2.5813, | |
| "step": 11260 | |
| }, | |
| { | |
| "epoch": 31.420612813370475, | |
| "grad_norm": 1.3393871601470777, | |
| "learning_rate": 4.615881479501779e-05, | |
| "loss": 2.5487, | |
| "step": 11280 | |
| }, | |
| { | |
| "epoch": 31.47632311977716, | |
| "grad_norm": 1.4170061638611984, | |
| "learning_rate": 4.606089197535936e-05, | |
| "loss": 2.5672, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 31.532033426183844, | |
| "grad_norm": 1.200423025841862, | |
| "learning_rate": 4.5962944647735934e-05, | |
| "loss": 2.5586, | |
| "step": 11320 | |
| }, | |
| { | |
| "epoch": 31.58774373259053, | |
| "grad_norm": 1.1437564881838662, | |
| "learning_rate": 4.586497348627349e-05, | |
| "loss": 2.5968, | |
| "step": 11340 | |
| }, | |
| { | |
| "epoch": 31.643454038997213, | |
| "grad_norm": 1.08141997515126, | |
| "learning_rate": 4.576697916526199e-05, | |
| "loss": 2.5688, | |
| "step": 11360 | |
| }, | |
| { | |
| "epoch": 31.6991643454039, | |
| "grad_norm": 1.2066477041237875, | |
| "learning_rate": 4.5668962359150815e-05, | |
| "loss": 2.593, | |
| "step": 11380 | |
| }, | |
| { | |
| "epoch": 31.754874651810585, | |
| "grad_norm": 1.2032961351385616, | |
| "learning_rate": 4.557092374254412e-05, | |
| "loss": 2.5883, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 31.81058495821727, | |
| "grad_norm": 1.1746121842250725, | |
| "learning_rate": 4.547286399019614e-05, | |
| "loss": 2.5669, | |
| "step": 11420 | |
| }, | |
| { | |
| "epoch": 31.866295264623954, | |
| "grad_norm": 1.1916678733066106, | |
| "learning_rate": 4.53747837770066e-05, | |
| "loss": 2.5613, | |
| "step": 11440 | |
| }, | |
| { | |
| "epoch": 31.922005571030642, | |
| "grad_norm": 1.151296833307102, | |
| "learning_rate": 4.5276683778015984e-05, | |
| "loss": 2.5574, | |
| "step": 11460 | |
| }, | |
| { | |
| "epoch": 31.977715877437326, | |
| "grad_norm": 1.1387505510412506, | |
| "learning_rate": 4.517856466840108e-05, | |
| "loss": 2.5778, | |
| "step": 11480 | |
| }, | |
| { | |
| "epoch": 32.033426183844014, | |
| "grad_norm": 1.1307628256541518, | |
| "learning_rate": 4.50804271234701e-05, | |
| "loss": 2.58, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 32.089136490250695, | |
| "grad_norm": 1.143654969969148, | |
| "learning_rate": 4.498227181865816e-05, | |
| "loss": 2.5342, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 32.14484679665738, | |
| "grad_norm": 1.0872870621719841, | |
| "learning_rate": 4.488409942952261e-05, | |
| "loss": 2.5615, | |
| "step": 11540 | |
| }, | |
| { | |
| "epoch": 32.200557103064064, | |
| "grad_norm": 1.258161916655144, | |
| "learning_rate": 4.478591063173842e-05, | |
| "loss": 2.5566, | |
| "step": 11560 | |
| }, | |
| { | |
| "epoch": 32.25626740947075, | |
| "grad_norm": 1.2208070919269458, | |
| "learning_rate": 4.468770610109344e-05, | |
| "loss": 2.5549, | |
| "step": 11580 | |
| }, | |
| { | |
| "epoch": 32.31197771587744, | |
| "grad_norm": 1.169221911082448, | |
| "learning_rate": 4.458948651348383e-05, | |
| "loss": 2.5896, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 32.36768802228412, | |
| "grad_norm": 1.095014238667817, | |
| "learning_rate": 4.4491252544909394e-05, | |
| "loss": 2.5633, | |
| "step": 11620 | |
| }, | |
| { | |
| "epoch": 32.42339832869081, | |
| "grad_norm": 1.2970244190994054, | |
| "learning_rate": 4.439300487146887e-05, | |
| "loss": 2.5643, | |
| "step": 11640 | |
| }, | |
| { | |
| "epoch": 32.4791086350975, | |
| "grad_norm": 1.2886541818686938, | |
| "learning_rate": 4.429474416935536e-05, | |
| "loss": 2.6024, | |
| "step": 11660 | |
| }, | |
| { | |
| "epoch": 32.53481894150418, | |
| "grad_norm": 1.1502247567681423, | |
| "learning_rate": 4.419647111485162e-05, | |
| "loss": 2.5393, | |
| "step": 11680 | |
| }, | |
| { | |
| "epoch": 32.590529247910865, | |
| "grad_norm": 1.2887795563474687, | |
| "learning_rate": 4.4098186384325424e-05, | |
| "loss": 2.5511, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 32.646239554317546, | |
| "grad_norm": 1.1497087680966827, | |
| "learning_rate": 4.399989065422491e-05, | |
| "loss": 2.5538, | |
| "step": 11720 | |
| }, | |
| { | |
| "epoch": 32.701949860724234, | |
| "grad_norm": 1.2553351424149752, | |
| "learning_rate": 4.39015846010739e-05, | |
| "loss": 2.5896, | |
| "step": 11740 | |
| }, | |
| { | |
| "epoch": 32.75766016713092, | |
| "grad_norm": 1.1172142499914282, | |
| "learning_rate": 4.380326890146732e-05, | |
| "loss": 2.5503, | |
| "step": 11760 | |
| }, | |
| { | |
| "epoch": 32.8133704735376, | |
| "grad_norm": 1.0957163400827985, | |
| "learning_rate": 4.370494423206639e-05, | |
| "loss": 2.5527, | |
| "step": 11780 | |
| }, | |
| { | |
| "epoch": 32.86908077994429, | |
| "grad_norm": 1.1727453881618946, | |
| "learning_rate": 4.360661126959418e-05, | |
| "loss": 2.5808, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 32.92479108635097, | |
| "grad_norm": 1.5043031377227738, | |
| "learning_rate": 4.3508270690830764e-05, | |
| "loss": 2.5809, | |
| "step": 11820 | |
| }, | |
| { | |
| "epoch": 32.98050139275766, | |
| "grad_norm": 3.061321585302142, | |
| "learning_rate": 4.340992317260865e-05, | |
| "loss": 2.5672, | |
| "step": 11840 | |
| }, | |
| { | |
| "epoch": 33.03621169916435, | |
| "grad_norm": 1.2959039372508918, | |
| "learning_rate": 4.3311569391808116e-05, | |
| "loss": 2.5542, | |
| "step": 11860 | |
| }, | |
| { | |
| "epoch": 33.09192200557103, | |
| "grad_norm": 1.1013587892712957, | |
| "learning_rate": 4.321321002535253e-05, | |
| "loss": 2.5175, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 33.147632311977716, | |
| "grad_norm": 1.0924446035892936, | |
| "learning_rate": 4.311484575020373e-05, | |
| "loss": 2.538, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 33.203342618384404, | |
| "grad_norm": 1.4345799681219358, | |
| "learning_rate": 4.3016477243357297e-05, | |
| "loss": 2.5775, | |
| "step": 11920 | |
| }, | |
| { | |
| "epoch": 33.259052924791085, | |
| "grad_norm": 1.2806133296858657, | |
| "learning_rate": 4.291810518183797e-05, | |
| "loss": 2.5358, | |
| "step": 11940 | |
| }, | |
| { | |
| "epoch": 33.31476323119777, | |
| "grad_norm": 1.1660089750322047, | |
| "learning_rate": 4.2819730242694924e-05, | |
| "loss": 2.5516, | |
| "step": 11960 | |
| }, | |
| { | |
| "epoch": 33.370473537604454, | |
| "grad_norm": 1.5552478865676143, | |
| "learning_rate": 4.272135310299719e-05, | |
| "loss": 2.5551, | |
| "step": 11980 | |
| }, | |
| { | |
| "epoch": 33.42618384401114, | |
| "grad_norm": 1.3019947250320747, | |
| "learning_rate": 4.262297443982888e-05, | |
| "loss": 2.5147, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 33.48189415041783, | |
| "grad_norm": 1.1712469646586066, | |
| "learning_rate": 4.252459493028466e-05, | |
| "loss": 2.5448, | |
| "step": 12020 | |
| }, | |
| { | |
| "epoch": 33.53760445682451, | |
| "grad_norm": 1.190656040631796, | |
| "learning_rate": 4.2426215251464944e-05, | |
| "loss": 2.5421, | |
| "step": 12040 | |
| }, | |
| { | |
| "epoch": 33.5933147632312, | |
| "grad_norm": 1.166057800751934, | |
| "learning_rate": 4.232783608047138e-05, | |
| "loss": 2.5225, | |
| "step": 12060 | |
| }, | |
| { | |
| "epoch": 33.64902506963789, | |
| "grad_norm": 1.1827485566010056, | |
| "learning_rate": 4.222945809440208e-05, | |
| "loss": 2.5264, | |
| "step": 12080 | |
| }, | |
| { | |
| "epoch": 33.70473537604457, | |
| "grad_norm": 1.0982238340826698, | |
| "learning_rate": 4.213108197034701e-05, | |
| "loss": 2.5311, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 33.760445682451255, | |
| "grad_norm": 1.3570517309524062, | |
| "learning_rate": 4.2032708385383325e-05, | |
| "loss": 2.5381, | |
| "step": 12120 | |
| }, | |
| { | |
| "epoch": 33.816155988857936, | |
| "grad_norm": 1.221101343240307, | |
| "learning_rate": 4.193433801657072e-05, | |
| "loss": 2.5085, | |
| "step": 12140 | |
| }, | |
| { | |
| "epoch": 33.871866295264624, | |
| "grad_norm": 1.1074955804736837, | |
| "learning_rate": 4.183597154094672e-05, | |
| "loss": 2.554, | |
| "step": 12160 | |
| }, | |
| { | |
| "epoch": 33.92757660167131, | |
| "grad_norm": 1.2061033239097223, | |
| "learning_rate": 4.173760963552209e-05, | |
| "loss": 2.5144, | |
| "step": 12180 | |
| }, | |
| { | |
| "epoch": 33.98328690807799, | |
| "grad_norm": 1.5819083614189133, | |
| "learning_rate": 4.1639252977276076e-05, | |
| "loss": 2.5495, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 34.03899721448468, | |
| "grad_norm": 1.2608802822567775, | |
| "learning_rate": 4.1540902243151906e-05, | |
| "loss": 2.5386, | |
| "step": 12220 | |
| }, | |
| { | |
| "epoch": 34.09470752089136, | |
| "grad_norm": 1.1987980606427822, | |
| "learning_rate": 4.144255811005199e-05, | |
| "loss": 2.5521, | |
| "step": 12240 | |
| }, | |
| { | |
| "epoch": 34.15041782729805, | |
| "grad_norm": 1.5620621378858097, | |
| "learning_rate": 4.134422125483328e-05, | |
| "loss": 2.547, | |
| "step": 12260 | |
| }, | |
| { | |
| "epoch": 34.20612813370474, | |
| "grad_norm": 1.1486672391725685, | |
| "learning_rate": 4.124589235430266e-05, | |
| "loss": 2.5527, | |
| "step": 12280 | |
| }, | |
| { | |
| "epoch": 34.26183844011142, | |
| "grad_norm": 1.0658317601206686, | |
| "learning_rate": 4.114757208521229e-05, | |
| "loss": 2.5188, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 34.317548746518106, | |
| "grad_norm": 1.1632885977667755, | |
| "learning_rate": 4.104926112425487e-05, | |
| "loss": 2.5066, | |
| "step": 12320 | |
| }, | |
| { | |
| "epoch": 34.373259052924794, | |
| "grad_norm": 1.1978332103431018, | |
| "learning_rate": 4.095096014805907e-05, | |
| "loss": 2.5242, | |
| "step": 12340 | |
| }, | |
| { | |
| "epoch": 34.428969359331475, | |
| "grad_norm": 1.2329715563026837, | |
| "learning_rate": 4.0852669833184864e-05, | |
| "loss": 2.5121, | |
| "step": 12360 | |
| }, | |
| { | |
| "epoch": 34.48467966573816, | |
| "grad_norm": 1.4254336559578342, | |
| "learning_rate": 4.075439085611879e-05, | |
| "loss": 2.5327, | |
| "step": 12380 | |
| }, | |
| { | |
| "epoch": 34.540389972144844, | |
| "grad_norm": 1.3455389003917915, | |
| "learning_rate": 4.065612389326941e-05, | |
| "loss": 2.5282, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 34.59610027855153, | |
| "grad_norm": 1.1098645710519481, | |
| "learning_rate": 4.055786962096253e-05, | |
| "loss": 2.5414, | |
| "step": 12420 | |
| }, | |
| { | |
| "epoch": 34.65181058495822, | |
| "grad_norm": 1.0712441022651848, | |
| "learning_rate": 4.04596287154367e-05, | |
| "loss": 2.5243, | |
| "step": 12440 | |
| }, | |
| { | |
| "epoch": 34.7075208913649, | |
| "grad_norm": 1.2229530919327418, | |
| "learning_rate": 4.0361401852838415e-05, | |
| "loss": 2.5391, | |
| "step": 12460 | |
| }, | |
| { | |
| "epoch": 34.76323119777159, | |
| "grad_norm": 1.1116414230831893, | |
| "learning_rate": 4.026318970921751e-05, | |
| "loss": 2.5549, | |
| "step": 12480 | |
| }, | |
| { | |
| "epoch": 34.81894150417827, | |
| "grad_norm": 1.3725266835406396, | |
| "learning_rate": 4.016499296052257e-05, | |
| "loss": 2.5375, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 34.87465181058496, | |
| "grad_norm": 1.1846019558051342, | |
| "learning_rate": 4.0066812282596165e-05, | |
| "loss": 2.5508, | |
| "step": 12520 | |
| }, | |
| { | |
| "epoch": 34.930362116991645, | |
| "grad_norm": 1.155996133148295, | |
| "learning_rate": 3.9968648351170285e-05, | |
| "loss": 2.5284, | |
| "step": 12540 | |
| }, | |
| { | |
| "epoch": 34.986072423398326, | |
| "grad_norm": 1.3251336278575234, | |
| "learning_rate": 3.987050184186168e-05, | |
| "loss": 2.5112, | |
| "step": 12560 | |
| }, | |
| { | |
| "epoch": 35.041782729805014, | |
| "grad_norm": 1.2962788212433607, | |
| "learning_rate": 3.9772373430167165e-05, | |
| "loss": 2.5334, | |
| "step": 12580 | |
| }, | |
| { | |
| "epoch": 35.0974930362117, | |
| "grad_norm": 1.1720387548872857, | |
| "learning_rate": 3.967426379145899e-05, | |
| "loss": 2.5233, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 35.15320334261838, | |
| "grad_norm": 1.0933571811040883, | |
| "learning_rate": 3.957617360098023e-05, | |
| "loss": 2.5134, | |
| "step": 12620 | |
| }, | |
| { | |
| "epoch": 35.20891364902507, | |
| "grad_norm": 1.2440055643446601, | |
| "learning_rate": 3.9478103533840095e-05, | |
| "loss": 2.5155, | |
| "step": 12640 | |
| }, | |
| { | |
| "epoch": 35.26462395543175, | |
| "grad_norm": 1.1329596221636413, | |
| "learning_rate": 3.938005426500927e-05, | |
| "loss": 2.5335, | |
| "step": 12660 | |
| }, | |
| { | |
| "epoch": 35.32033426183844, | |
| "grad_norm": 1.185464659161853, | |
| "learning_rate": 3.928202646931534e-05, | |
| "loss": 2.5438, | |
| "step": 12680 | |
| }, | |
| { | |
| "epoch": 35.37604456824513, | |
| "grad_norm": 1.3785291897615783, | |
| "learning_rate": 3.918402082143804e-05, | |
| "loss": 2.5442, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 35.43175487465181, | |
| "grad_norm": 2.546941401301199, | |
| "learning_rate": 3.908603799590476e-05, | |
| "loss": 2.5113, | |
| "step": 12720 | |
| }, | |
| { | |
| "epoch": 35.4874651810585, | |
| "grad_norm": 1.1505394021830595, | |
| "learning_rate": 3.898807866708572e-05, | |
| "loss": 2.5527, | |
| "step": 12740 | |
| }, | |
| { | |
| "epoch": 35.543175487465184, | |
| "grad_norm": 1.1406223829986795, | |
| "learning_rate": 3.889014350918947e-05, | |
| "loss": 2.5169, | |
| "step": 12760 | |
| }, | |
| { | |
| "epoch": 35.598885793871865, | |
| "grad_norm": 1.5268262564835722, | |
| "learning_rate": 3.8792233196258226e-05, | |
| "loss": 2.5385, | |
| "step": 12780 | |
| }, | |
| { | |
| "epoch": 35.65459610027855, | |
| "grad_norm": 1.4366052923997545, | |
| "learning_rate": 3.869434840216315e-05, | |
| "loss": 2.5138, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 35.710306406685234, | |
| "grad_norm": 1.3493022957735832, | |
| "learning_rate": 3.8596489800599826e-05, | |
| "loss": 2.5012, | |
| "step": 12820 | |
| }, | |
| { | |
| "epoch": 35.76601671309192, | |
| "grad_norm": 1.2612297939888255, | |
| "learning_rate": 3.849865806508352e-05, | |
| "loss": 2.5167, | |
| "step": 12840 | |
| }, | |
| { | |
| "epoch": 35.82172701949861, | |
| "grad_norm": 1.1655785400988308, | |
| "learning_rate": 3.8400853868944604e-05, | |
| "loss": 2.5054, | |
| "step": 12860 | |
| }, | |
| { | |
| "epoch": 35.87743732590529, | |
| "grad_norm": 1.195789035259279, | |
| "learning_rate": 3.8303077885323945e-05, | |
| "loss": 2.5038, | |
| "step": 12880 | |
| }, | |
| { | |
| "epoch": 35.93314763231198, | |
| "grad_norm": 1.1195435490995402, | |
| "learning_rate": 3.820533078716821e-05, | |
| "loss": 2.5628, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 35.98885793871866, | |
| "grad_norm": 1.7921872957808758, | |
| "learning_rate": 3.810761324722523e-05, | |
| "loss": 2.5052, | |
| "step": 12920 | |
| }, | |
| { | |
| "epoch": 36.04456824512535, | |
| "grad_norm": 1.2261058827590274, | |
| "learning_rate": 3.800992593803946e-05, | |
| "loss": 2.5112, | |
| "step": 12940 | |
| }, | |
| { | |
| "epoch": 36.100278551532035, | |
| "grad_norm": 1.135621182926646, | |
| "learning_rate": 3.791226953194725e-05, | |
| "loss": 2.5028, | |
| "step": 12960 | |
| }, | |
| { | |
| "epoch": 36.155988857938716, | |
| "grad_norm": 1.6277687709086734, | |
| "learning_rate": 3.7814644701072246e-05, | |
| "loss": 2.5162, | |
| "step": 12980 | |
| }, | |
| { | |
| "epoch": 36.211699164345404, | |
| "grad_norm": 1.1697686185114848, | |
| "learning_rate": 3.771705211732085e-05, | |
| "loss": 2.4937, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 36.26740947075209, | |
| "grad_norm": 1.1470445311609152, | |
| "learning_rate": 3.761949245237742e-05, | |
| "loss": 2.4959, | |
| "step": 13020 | |
| }, | |
| { | |
| "epoch": 36.32311977715877, | |
| "grad_norm": 1.1830067451201864, | |
| "learning_rate": 3.752196637769983e-05, | |
| "loss": 2.5184, | |
| "step": 13040 | |
| }, | |
| { | |
| "epoch": 36.37883008356546, | |
| "grad_norm": 1.467366443353488, | |
| "learning_rate": 3.742447456451474e-05, | |
| "loss": 2.5167, | |
| "step": 13060 | |
| }, | |
| { | |
| "epoch": 36.43454038997214, | |
| "grad_norm": 1.4104527622849412, | |
| "learning_rate": 3.732701768381299e-05, | |
| "loss": 2.5044, | |
| "step": 13080 | |
| }, | |
| { | |
| "epoch": 36.49025069637883, | |
| "grad_norm": 1.2155633839198112, | |
| "learning_rate": 3.722959640634501e-05, | |
| "loss": 2.5472, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 36.54596100278552, | |
| "grad_norm": 1.083177335128614, | |
| "learning_rate": 3.713221140261619e-05, | |
| "loss": 2.5002, | |
| "step": 13120 | |
| }, | |
| { | |
| "epoch": 36.6016713091922, | |
| "grad_norm": 1.2014071998350737, | |
| "learning_rate": 3.703486334288228e-05, | |
| "loss": 2.5114, | |
| "step": 13140 | |
| }, | |
| { | |
| "epoch": 36.65738161559889, | |
| "grad_norm": 1.1428988294911389, | |
| "learning_rate": 3.693755289714471e-05, | |
| "loss": 2.4979, | |
| "step": 13160 | |
| }, | |
| { | |
| "epoch": 36.713091922005574, | |
| "grad_norm": 1.1304274747808816, | |
| "learning_rate": 3.68402807351461e-05, | |
| "loss": 2.4936, | |
| "step": 13180 | |
| }, | |
| { | |
| "epoch": 36.768802228412255, | |
| "grad_norm": 1.3650882764211232, | |
| "learning_rate": 3.674304752636551e-05, | |
| "loss": 2.5157, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 36.82451253481894, | |
| "grad_norm": 1.2182169011081385, | |
| "learning_rate": 3.664585394001398e-05, | |
| "loss": 2.5035, | |
| "step": 13220 | |
| }, | |
| { | |
| "epoch": 36.880222841225624, | |
| "grad_norm": 1.272624756548235, | |
| "learning_rate": 3.654870064502978e-05, | |
| "loss": 2.4992, | |
| "step": 13240 | |
| }, | |
| { | |
| "epoch": 36.93593314763231, | |
| "grad_norm": 1.0715589202820812, | |
| "learning_rate": 3.6451588310073895e-05, | |
| "loss": 2.5021, | |
| "step": 13260 | |
| }, | |
| { | |
| "epoch": 36.991643454039, | |
| "grad_norm": 1.2942781983724116, | |
| "learning_rate": 3.6354517603525434e-05, | |
| "loss": 2.4859, | |
| "step": 13280 | |
| }, | |
| { | |
| "epoch": 37.04735376044568, | |
| "grad_norm": 1.2606778534888776, | |
| "learning_rate": 3.625748919347694e-05, | |
| "loss": 2.506, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 37.10306406685237, | |
| "grad_norm": 1.2777601010216262, | |
| "learning_rate": 3.616050374772989e-05, | |
| "loss": 2.4778, | |
| "step": 13320 | |
| }, | |
| { | |
| "epoch": 37.15877437325905, | |
| "grad_norm": 1.176481978013248, | |
| "learning_rate": 3.606356193379004e-05, | |
| "loss": 2.5033, | |
| "step": 13340 | |
| }, | |
| { | |
| "epoch": 37.21448467966574, | |
| "grad_norm": 1.2169531494104022, | |
| "learning_rate": 3.596666441886285e-05, | |
| "loss": 2.4996, | |
| "step": 13360 | |
| }, | |
| { | |
| "epoch": 37.270194986072426, | |
| "grad_norm": 1.3217947072787108, | |
| "learning_rate": 3.586981186984891e-05, | |
| "loss": 2.4884, | |
| "step": 13380 | |
| }, | |
| { | |
| "epoch": 37.325905292479106, | |
| "grad_norm": 1.2091550344263509, | |
| "learning_rate": 3.577300495333929e-05, | |
| "loss": 2.4643, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 37.381615598885794, | |
| "grad_norm": 1.3020086095843864, | |
| "learning_rate": 3.5676244335611045e-05, | |
| "loss": 2.5115, | |
| "step": 13420 | |
| }, | |
| { | |
| "epoch": 37.43732590529248, | |
| "grad_norm": 1.224631401229228, | |
| "learning_rate": 3.5579530682622527e-05, | |
| "loss": 2.5052, | |
| "step": 13440 | |
| }, | |
| { | |
| "epoch": 37.49303621169916, | |
| "grad_norm": 1.2937153649240984, | |
| "learning_rate": 3.548286466000888e-05, | |
| "loss": 2.4887, | |
| "step": 13460 | |
| }, | |
| { | |
| "epoch": 37.54874651810585, | |
| "grad_norm": 1.1629156767485442, | |
| "learning_rate": 3.5386246933077437e-05, | |
| "loss": 2.4835, | |
| "step": 13480 | |
| }, | |
| { | |
| "epoch": 37.60445682451253, | |
| "grad_norm": 1.409151745433056, | |
| "learning_rate": 3.52896781668031e-05, | |
| "loss": 2.4844, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 37.66016713091922, | |
| "grad_norm": 1.219993695453392, | |
| "learning_rate": 3.519315902582384e-05, | |
| "loss": 2.4891, | |
| "step": 13520 | |
| }, | |
| { | |
| "epoch": 37.71587743732591, | |
| "grad_norm": 1.286568356632355, | |
| "learning_rate": 3.509669017443603e-05, | |
| "loss": 2.5028, | |
| "step": 13540 | |
| }, | |
| { | |
| "epoch": 37.77158774373259, | |
| "grad_norm": 1.2144839115218449, | |
| "learning_rate": 3.500027227658998e-05, | |
| "loss": 2.4808, | |
| "step": 13560 | |
| }, | |
| { | |
| "epoch": 37.82729805013928, | |
| "grad_norm": 1.4206777606666374, | |
| "learning_rate": 3.490390599588527e-05, | |
| "loss": 2.4884, | |
| "step": 13580 | |
| }, | |
| { | |
| "epoch": 37.88300835654596, | |
| "grad_norm": 1.119769219553682, | |
| "learning_rate": 3.480759199556625e-05, | |
| "loss": 2.532, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 37.938718662952645, | |
| "grad_norm": 1.216344088823688, | |
| "learning_rate": 3.4711330938517415e-05, | |
| "loss": 2.4825, | |
| "step": 13620 | |
| }, | |
| { | |
| "epoch": 37.99442896935933, | |
| "grad_norm": 1.2356798590645826, | |
| "learning_rate": 3.4615123487258904e-05, | |
| "loss": 2.477, | |
| "step": 13640 | |
| }, | |
| { | |
| "epoch": 38.050139275766014, | |
| "grad_norm": 1.1889430421938458, | |
| "learning_rate": 3.45189703039419e-05, | |
| "loss": 2.4657, | |
| "step": 13660 | |
| }, | |
| { | |
| "epoch": 38.1058495821727, | |
| "grad_norm": 1.2167871317287668, | |
| "learning_rate": 3.442287205034409e-05, | |
| "loss": 2.4873, | |
| "step": 13680 | |
| }, | |
| { | |
| "epoch": 38.16155988857939, | |
| "grad_norm": 1.3417090804209535, | |
| "learning_rate": 3.4326829387865105e-05, | |
| "loss": 2.4978, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 38.21727019498607, | |
| "grad_norm": 1.215214848291835, | |
| "learning_rate": 3.423084297752197e-05, | |
| "loss": 2.4873, | |
| "step": 13720 | |
| }, | |
| { | |
| "epoch": 38.27298050139276, | |
| "grad_norm": 1.3942080399553185, | |
| "learning_rate": 3.413491347994455e-05, | |
| "loss": 2.4869, | |
| "step": 13740 | |
| }, | |
| { | |
| "epoch": 38.32869080779944, | |
| "grad_norm": 1.1596082595198587, | |
| "learning_rate": 3.4039041555370985e-05, | |
| "loss": 2.4742, | |
| "step": 13760 | |
| }, | |
| { | |
| "epoch": 38.38440111420613, | |
| "grad_norm": 1.300823510553577, | |
| "learning_rate": 3.394322786364321e-05, | |
| "loss": 2.4824, | |
| "step": 13780 | |
| }, | |
| { | |
| "epoch": 38.440111420612816, | |
| "grad_norm": 1.4023204575319392, | |
| "learning_rate": 3.384747306420234e-05, | |
| "loss": 2.5132, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 38.4958217270195, | |
| "grad_norm": 1.169311221589696, | |
| "learning_rate": 3.375177781608417e-05, | |
| "loss": 2.4931, | |
| "step": 13820 | |
| }, | |
| { | |
| "epoch": 38.551532033426184, | |
| "grad_norm": 1.3513790522732192, | |
| "learning_rate": 3.365614277791463e-05, | |
| "loss": 2.5037, | |
| "step": 13840 | |
| }, | |
| { | |
| "epoch": 38.60724233983287, | |
| "grad_norm": 1.127031391091575, | |
| "learning_rate": 3.3560568607905244e-05, | |
| "loss": 2.5187, | |
| "step": 13860 | |
| }, | |
| { | |
| "epoch": 38.66295264623955, | |
| "grad_norm": 1.2275938957516415, | |
| "learning_rate": 3.346505596384864e-05, | |
| "loss": 2.4657, | |
| "step": 13880 | |
| }, | |
| { | |
| "epoch": 38.71866295264624, | |
| "grad_norm": 1.3320190791797168, | |
| "learning_rate": 3.336960550311395e-05, | |
| "loss": 2.4951, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 38.77437325905292, | |
| "grad_norm": 1.5869673766120833, | |
| "learning_rate": 3.3274217882642355e-05, | |
| "loss": 2.5087, | |
| "step": 13920 | |
| }, | |
| { | |
| "epoch": 38.83008356545961, | |
| "grad_norm": 1.2695582913500114, | |
| "learning_rate": 3.317889375894252e-05, | |
| "loss": 2.4826, | |
| "step": 13940 | |
| }, | |
| { | |
| "epoch": 38.8857938718663, | |
| "grad_norm": 1.430015517022631, | |
| "learning_rate": 3.3083633788086115e-05, | |
| "loss": 2.4652, | |
| "step": 13960 | |
| }, | |
| { | |
| "epoch": 38.94150417827298, | |
| "grad_norm": 1.2238102984760655, | |
| "learning_rate": 3.2988438625703226e-05, | |
| "loss": 2.5151, | |
| "step": 13980 | |
| }, | |
| { | |
| "epoch": 38.99721448467967, | |
| "grad_norm": 1.1505694948469867, | |
| "learning_rate": 3.2893308926977964e-05, | |
| "loss": 2.4639, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 39.05292479108635, | |
| "grad_norm": 1.3358804025647248, | |
| "learning_rate": 3.2798245346643826e-05, | |
| "loss": 2.4831, | |
| "step": 14020 | |
| }, | |
| { | |
| "epoch": 39.108635097493035, | |
| "grad_norm": 1.1965450681906031, | |
| "learning_rate": 3.270324853897926e-05, | |
| "loss": 2.4934, | |
| "step": 14040 | |
| }, | |
| { | |
| "epoch": 39.16434540389972, | |
| "grad_norm": 1.2724701888326422, | |
| "learning_rate": 3.260831915780317e-05, | |
| "loss": 2.515, | |
| "step": 14060 | |
| }, | |
| { | |
| "epoch": 39.220055710306404, | |
| "grad_norm": 1.226431107683309, | |
| "learning_rate": 3.251345785647037e-05, | |
| "loss": 2.4912, | |
| "step": 14080 | |
| }, | |
| { | |
| "epoch": 39.27576601671309, | |
| "grad_norm": 1.4978021366863286, | |
| "learning_rate": 3.241866528786712e-05, | |
| "loss": 2.4666, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 39.33147632311978, | |
| "grad_norm": 1.1852102410219578, | |
| "learning_rate": 3.232394210440664e-05, | |
| "loss": 2.453, | |
| "step": 14120 | |
| }, | |
| { | |
| "epoch": 39.38718662952646, | |
| "grad_norm": 1.1514347696655127, | |
| "learning_rate": 3.222928895802457e-05, | |
| "loss": 2.492, | |
| "step": 14140 | |
| }, | |
| { | |
| "epoch": 39.44289693593315, | |
| "grad_norm": 1.1727022967870606, | |
| "learning_rate": 3.213470650017457e-05, | |
| "loss": 2.4671, | |
| "step": 14160 | |
| }, | |
| { | |
| "epoch": 39.49860724233983, | |
| "grad_norm": 1.2255693825516534, | |
| "learning_rate": 3.204019538182371e-05, | |
| "loss": 2.47, | |
| "step": 14180 | |
| }, | |
| { | |
| "epoch": 39.55431754874652, | |
| "grad_norm": 1.3308024547484585, | |
| "learning_rate": 3.194575625344813e-05, | |
| "loss": 2.4705, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 39.610027855153206, | |
| "grad_norm": 1.29922812166292, | |
| "learning_rate": 3.185138976502847e-05, | |
| "loss": 2.4756, | |
| "step": 14220 | |
| }, | |
| { | |
| "epoch": 39.66573816155989, | |
| "grad_norm": 1.172050649565444, | |
| "learning_rate": 3.175709656604543e-05, | |
| "loss": 2.4795, | |
| "step": 14240 | |
| }, | |
| { | |
| "epoch": 39.721448467966574, | |
| "grad_norm": 1.1860108514337921, | |
| "learning_rate": 3.166287730547528e-05, | |
| "loss": 2.4682, | |
| "step": 14260 | |
| }, | |
| { | |
| "epoch": 39.77715877437326, | |
| "grad_norm": 1.1464631864975399, | |
| "learning_rate": 3.1568732631785405e-05, | |
| "loss": 2.4649, | |
| "step": 14280 | |
| }, | |
| { | |
| "epoch": 39.83286908077994, | |
| "grad_norm": 1.2912682284287014, | |
| "learning_rate": 3.147466319292988e-05, | |
| "loss": 2.458, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 39.88857938718663, | |
| "grad_norm": 1.24473741562901, | |
| "learning_rate": 3.138066963634491e-05, | |
| "loss": 2.4418, | |
| "step": 14320 | |
| }, | |
| { | |
| "epoch": 39.94428969359331, | |
| "grad_norm": 1.2188645511392617, | |
| "learning_rate": 3.1286752608944504e-05, | |
| "loss": 2.4666, | |
| "step": 14340 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "grad_norm": 1.515575618356932, | |
| "learning_rate": 3.11929127571159e-05, | |
| "loss": 2.4695, | |
| "step": 14360 | |
| }, | |
| { | |
| "epoch": 40.05571030640669, | |
| "grad_norm": 1.267322258199449, | |
| "learning_rate": 3.10991507267152e-05, | |
| "loss": 2.4631, | |
| "step": 14380 | |
| }, | |
| { | |
| "epoch": 40.11142061281337, | |
| "grad_norm": 1.2899851180690782, | |
| "learning_rate": 3.100546716306292e-05, | |
| "loss": 2.4461, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 40.16713091922006, | |
| "grad_norm": 1.14649768727926, | |
| "learning_rate": 3.091186271093947e-05, | |
| "loss": 2.4534, | |
| "step": 14420 | |
| }, | |
| { | |
| "epoch": 40.22284122562674, | |
| "grad_norm": 1.2682086289441403, | |
| "learning_rate": 3.081833801458084e-05, | |
| "loss": 2.4369, | |
| "step": 14440 | |
| }, | |
| { | |
| "epoch": 40.278551532033426, | |
| "grad_norm": 1.2091006451986415, | |
| "learning_rate": 3.0724893717674023e-05, | |
| "loss": 2.4586, | |
| "step": 14460 | |
| }, | |
| { | |
| "epoch": 40.33426183844011, | |
| "grad_norm": 1.3592584263818426, | |
| "learning_rate": 3.063153046335271e-05, | |
| "loss": 2.4591, | |
| "step": 14480 | |
| }, | |
| { | |
| "epoch": 40.389972144846794, | |
| "grad_norm": 1.3946508542140756, | |
| "learning_rate": 3.0538248894192804e-05, | |
| "loss": 2.4411, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 40.44568245125348, | |
| "grad_norm": 1.1777413578534581, | |
| "learning_rate": 3.0445049652207995e-05, | |
| "loss": 2.4261, | |
| "step": 14520 | |
| }, | |
| { | |
| "epoch": 40.50139275766017, | |
| "grad_norm": 1.2195721573601, | |
| "learning_rate": 3.035193337884538e-05, | |
| "loss": 2.4421, | |
| "step": 14540 | |
| }, | |
| { | |
| "epoch": 40.55710306406685, | |
| "grad_norm": 1.3520200371008158, | |
| "learning_rate": 3.0258900714981e-05, | |
| "loss": 2.4602, | |
| "step": 14560 | |
| }, | |
| { | |
| "epoch": 40.61281337047354, | |
| "grad_norm": 1.3624148046187146, | |
| "learning_rate": 3.016595230091545e-05, | |
| "loss": 2.4655, | |
| "step": 14580 | |
| }, | |
| { | |
| "epoch": 40.66852367688022, | |
| "grad_norm": 1.2699195161380987, | |
| "learning_rate": 3.0073088776369473e-05, | |
| "loss": 2.4279, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 40.72423398328691, | |
| "grad_norm": 1.1758469344658353, | |
| "learning_rate": 2.998031078047958e-05, | |
| "loss": 2.473, | |
| "step": 14620 | |
| }, | |
| { | |
| "epoch": 40.779944289693596, | |
| "grad_norm": 1.215388378764313, | |
| "learning_rate": 2.9887618951793587e-05, | |
| "loss": 2.4955, | |
| "step": 14640 | |
| }, | |
| { | |
| "epoch": 40.83565459610028, | |
| "grad_norm": 1.2477068837194507, | |
| "learning_rate": 2.97950139282663e-05, | |
| "loss": 2.4784, | |
| "step": 14660 | |
| }, | |
| { | |
| "epoch": 40.891364902506965, | |
| "grad_norm": 1.3420186707298443, | |
| "learning_rate": 2.9702496347255056e-05, | |
| "loss": 2.4768, | |
| "step": 14680 | |
| }, | |
| { | |
| "epoch": 40.94707520891365, | |
| "grad_norm": 1.39249618130456, | |
| "learning_rate": 2.9610066845515383e-05, | |
| "loss": 2.4385, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 41.00278551532033, | |
| "grad_norm": 1.2754818915657757, | |
| "learning_rate": 2.9517726059196613e-05, | |
| "loss": 2.4569, | |
| "step": 14720 | |
| }, | |
| { | |
| "epoch": 41.05849582172702, | |
| "grad_norm": 1.4277410738801621, | |
| "learning_rate": 2.942547462383744e-05, | |
| "loss": 2.4587, | |
| "step": 14740 | |
| }, | |
| { | |
| "epoch": 41.1142061281337, | |
| "grad_norm": 1.1986405858066078, | |
| "learning_rate": 2.9333313174361673e-05, | |
| "loss": 2.4533, | |
| "step": 14760 | |
| }, | |
| { | |
| "epoch": 41.16991643454039, | |
| "grad_norm": 1.4106771315477926, | |
| "learning_rate": 2.924124234507371e-05, | |
| "loss": 2.4564, | |
| "step": 14780 | |
| }, | |
| { | |
| "epoch": 41.22562674094708, | |
| "grad_norm": 1.1572284880554229, | |
| "learning_rate": 2.9149262769654307e-05, | |
| "loss": 2.4403, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 41.28133704735376, | |
| "grad_norm": 1.3398664349194382, | |
| "learning_rate": 2.9057375081156153e-05, | |
| "loss": 2.4632, | |
| "step": 14820 | |
| }, | |
| { | |
| "epoch": 41.33704735376045, | |
| "grad_norm": 1.4232783195653564, | |
| "learning_rate": 2.89655799119995e-05, | |
| "loss": 2.4457, | |
| "step": 14840 | |
| }, | |
| { | |
| "epoch": 41.39275766016713, | |
| "grad_norm": 1.2303189537876713, | |
| "learning_rate": 2.887387789396784e-05, | |
| "loss": 2.4454, | |
| "step": 14860 | |
| }, | |
| { | |
| "epoch": 41.448467966573816, | |
| "grad_norm": 1.4753554867267846, | |
| "learning_rate": 2.8782269658203593e-05, | |
| "loss": 2.4708, | |
| "step": 14880 | |
| }, | |
| { | |
| "epoch": 41.5041782729805, | |
| "grad_norm": 2.0554338333623225, | |
| "learning_rate": 2.8690755835203644e-05, | |
| "loss": 2.4174, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 41.559888579387184, | |
| "grad_norm": 1.2638050115107629, | |
| "learning_rate": 2.8599337054815128e-05, | |
| "loss": 2.4576, | |
| "step": 14920 | |
| }, | |
| { | |
| "epoch": 41.61559888579387, | |
| "grad_norm": 1.6592795850932565, | |
| "learning_rate": 2.8508013946231054e-05, | |
| "loss": 2.4439, | |
| "step": 14940 | |
| }, | |
| { | |
| "epoch": 41.67130919220056, | |
| "grad_norm": 1.2018369861968858, | |
| "learning_rate": 2.8416787137985912e-05, | |
| "loss": 2.4677, | |
| "step": 14960 | |
| }, | |
| { | |
| "epoch": 41.72701949860724, | |
| "grad_norm": 1.1787125181340552, | |
| "learning_rate": 2.832565725795147e-05, | |
| "loss": 2.4423, | |
| "step": 14980 | |
| }, | |
| { | |
| "epoch": 41.78272980501393, | |
| "grad_norm": 1.3144876376584371, | |
| "learning_rate": 2.8234624933332324e-05, | |
| "loss": 2.4166, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 41.83844011142061, | |
| "grad_norm": 1.2101663058378904, | |
| "learning_rate": 2.8143690790661687e-05, | |
| "loss": 2.431, | |
| "step": 15020 | |
| }, | |
| { | |
| "epoch": 41.8941504178273, | |
| "grad_norm": 1.3306396247714227, | |
| "learning_rate": 2.8052855455797008e-05, | |
| "loss": 2.423, | |
| "step": 15040 | |
| }, | |
| { | |
| "epoch": 41.949860724233986, | |
| "grad_norm": 1.1740048371108092, | |
| "learning_rate": 2.7962119553915685e-05, | |
| "loss": 2.4543, | |
| "step": 15060 | |
| }, | |
| { | |
| "epoch": 42.00557103064067, | |
| "grad_norm": 1.2505959391308659, | |
| "learning_rate": 2.7871483709510788e-05, | |
| "loss": 2.4612, | |
| "step": 15080 | |
| }, | |
| { | |
| "epoch": 42.061281337047355, | |
| "grad_norm": 1.1705839887196592, | |
| "learning_rate": 2.7780948546386702e-05, | |
| "loss": 2.4248, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 42.116991643454035, | |
| "grad_norm": 1.5770301620040164, | |
| "learning_rate": 2.76905146876549e-05, | |
| "loss": 2.4475, | |
| "step": 15120 | |
| }, | |
| { | |
| "epoch": 42.17270194986072, | |
| "grad_norm": 1.3540734118986908, | |
| "learning_rate": 2.760018275572962e-05, | |
| "loss": 2.4186, | |
| "step": 15140 | |
| }, | |
| { | |
| "epoch": 42.22841225626741, | |
| "grad_norm": 1.146718032535289, | |
| "learning_rate": 2.750995337232356e-05, | |
| "loss": 2.4091, | |
| "step": 15160 | |
| }, | |
| { | |
| "epoch": 42.28412256267409, | |
| "grad_norm": 1.2196868218322996, | |
| "learning_rate": 2.7419827158443667e-05, | |
| "loss": 2.4309, | |
| "step": 15180 | |
| }, | |
| { | |
| "epoch": 42.33983286908078, | |
| "grad_norm": 1.5485243840943164, | |
| "learning_rate": 2.7329804734386765e-05, | |
| "loss": 2.4602, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 42.39554317548747, | |
| "grad_norm": 1.2206227305933974, | |
| "learning_rate": 2.723988671973541e-05, | |
| "loss": 2.4701, | |
| "step": 15220 | |
| }, | |
| { | |
| "epoch": 42.45125348189415, | |
| "grad_norm": 1.26332460678578, | |
| "learning_rate": 2.7150073733353484e-05, | |
| "loss": 2.4528, | |
| "step": 15240 | |
| }, | |
| { | |
| "epoch": 42.50696378830084, | |
| "grad_norm": 1.311901210503493, | |
| "learning_rate": 2.706036639338207e-05, | |
| "loss": 2.4283, | |
| "step": 15260 | |
| }, | |
| { | |
| "epoch": 42.56267409470752, | |
| "grad_norm": 1.2690533418017822, | |
| "learning_rate": 2.6970765317235096e-05, | |
| "loss": 2.4345, | |
| "step": 15280 | |
| }, | |
| { | |
| "epoch": 42.618384401114206, | |
| "grad_norm": 1.2676520230160475, | |
| "learning_rate": 2.6881271121595137e-05, | |
| "loss": 2.4048, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 42.674094707520894, | |
| "grad_norm": 1.4516895593566883, | |
| "learning_rate": 2.6791884422409157e-05, | |
| "loss": 2.4279, | |
| "step": 15320 | |
| }, | |
| { | |
| "epoch": 42.729805013927574, | |
| "grad_norm": 1.3872513872471008, | |
| "learning_rate": 2.6702605834884283e-05, | |
| "loss": 2.4026, | |
| "step": 15340 | |
| }, | |
| { | |
| "epoch": 42.78551532033426, | |
| "grad_norm": 1.3767239373202538, | |
| "learning_rate": 2.6613435973483546e-05, | |
| "loss": 2.4219, | |
| "step": 15360 | |
| }, | |
| { | |
| "epoch": 42.84122562674095, | |
| "grad_norm": 1.206741931800155, | |
| "learning_rate": 2.6524375451921694e-05, | |
| "loss": 2.426, | |
| "step": 15380 | |
| }, | |
| { | |
| "epoch": 42.89693593314763, | |
| "grad_norm": 1.314107492262272, | |
| "learning_rate": 2.643542488316087e-05, | |
| "loss": 2.4027, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 42.95264623955432, | |
| "grad_norm": 1.2591566602374167, | |
| "learning_rate": 2.6346584879406546e-05, | |
| "loss": 2.4105, | |
| "step": 15420 | |
| }, | |
| { | |
| "epoch": 43.008356545961, | |
| "grad_norm": 1.2543202609095945, | |
| "learning_rate": 2.6257856052103176e-05, | |
| "loss": 2.4174, | |
| "step": 15440 | |
| }, | |
| { | |
| "epoch": 43.06406685236769, | |
| "grad_norm": 1.3161836673091634, | |
| "learning_rate": 2.616923901193006e-05, | |
| "loss": 2.4146, | |
| "step": 15460 | |
| }, | |
| { | |
| "epoch": 43.119777158774376, | |
| "grad_norm": 1.171075292652416, | |
| "learning_rate": 2.6080734368797124e-05, | |
| "loss": 2.4159, | |
| "step": 15480 | |
| }, | |
| { | |
| "epoch": 43.17548746518106, | |
| "grad_norm": 1.207718728407823, | |
| "learning_rate": 2.599234273184067e-05, | |
| "loss": 2.404, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 43.231197771587745, | |
| "grad_norm": 1.2836648932544974, | |
| "learning_rate": 2.5904064709419275e-05, | |
| "loss": 2.4147, | |
| "step": 15520 | |
| }, | |
| { | |
| "epoch": 43.286908077994426, | |
| "grad_norm": 1.2867738641320774, | |
| "learning_rate": 2.5815900909109578e-05, | |
| "loss": 2.4405, | |
| "step": 15540 | |
| }, | |
| { | |
| "epoch": 43.34261838440111, | |
| "grad_norm": 1.314400827907675, | |
| "learning_rate": 2.572785193770205e-05, | |
| "loss": 2.384, | |
| "step": 15560 | |
| }, | |
| { | |
| "epoch": 43.3983286908078, | |
| "grad_norm": 1.4291537299918844, | |
| "learning_rate": 2.5639918401196828e-05, | |
| "loss": 2.4408, | |
| "step": 15580 | |
| }, | |
| { | |
| "epoch": 43.45403899721448, | |
| "grad_norm": 1.5382813225617216, | |
| "learning_rate": 2.555210090479959e-05, | |
| "loss": 2.4224, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 43.50974930362117, | |
| "grad_norm": 1.2172238724441946, | |
| "learning_rate": 2.5464400052917377e-05, | |
| "loss": 2.4273, | |
| "step": 15620 | |
| }, | |
| { | |
| "epoch": 43.56545961002786, | |
| "grad_norm": 1.3185716102890666, | |
| "learning_rate": 2.537681644915439e-05, | |
| "loss": 2.4399, | |
| "step": 15640 | |
| }, | |
| { | |
| "epoch": 43.62116991643454, | |
| "grad_norm": 1.7970207701573762, | |
| "learning_rate": 2.528935069630791e-05, | |
| "loss": 2.438, | |
| "step": 15660 | |
| }, | |
| { | |
| "epoch": 43.67688022284123, | |
| "grad_norm": 1.336384852624976, | |
| "learning_rate": 2.5202003396364028e-05, | |
| "loss": 2.4104, | |
| "step": 15680 | |
| }, | |
| { | |
| "epoch": 43.73259052924791, | |
| "grad_norm": 1.2492741812810837, | |
| "learning_rate": 2.5114775150493652e-05, | |
| "loss": 2.4372, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 43.788300835654596, | |
| "grad_norm": 1.1839597940386342, | |
| "learning_rate": 2.5027666559048265e-05, | |
| "loss": 2.4374, | |
| "step": 15720 | |
| }, | |
| { | |
| "epoch": 43.844011142061284, | |
| "grad_norm": 1.5042190873869037, | |
| "learning_rate": 2.4940678221555836e-05, | |
| "loss": 2.4131, | |
| "step": 15740 | |
| }, | |
| { | |
| "epoch": 43.899721448467965, | |
| "grad_norm": 1.263884884008274, | |
| "learning_rate": 2.485381073671668e-05, | |
| "loss": 2.4481, | |
| "step": 15760 | |
| }, | |
| { | |
| "epoch": 43.95543175487465, | |
| "grad_norm": 1.3105766012420574, | |
| "learning_rate": 2.4767064702399307e-05, | |
| "loss": 2.4316, | |
| "step": 15780 | |
| }, | |
| { | |
| "epoch": 44.01114206128134, | |
| "grad_norm": 1.3070696897883654, | |
| "learning_rate": 2.4680440715636386e-05, | |
| "loss": 2.4113, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 44.06685236768802, | |
| "grad_norm": 1.3679934235167148, | |
| "learning_rate": 2.459393937262057e-05, | |
| "loss": 2.462, | |
| "step": 15820 | |
| }, | |
| { | |
| "epoch": 44.12256267409471, | |
| "grad_norm": 1.2961531328086042, | |
| "learning_rate": 2.45075612687004e-05, | |
| "loss": 2.3913, | |
| "step": 15840 | |
| }, | |
| { | |
| "epoch": 44.17827298050139, | |
| "grad_norm": 1.5918138107382298, | |
| "learning_rate": 2.4421306998376247e-05, | |
| "loss": 2.4062, | |
| "step": 15860 | |
| }, | |
| { | |
| "epoch": 44.23398328690808, | |
| "grad_norm": 1.5407959855411433, | |
| "learning_rate": 2.4335177155296173e-05, | |
| "loss": 2.4135, | |
| "step": 15880 | |
| }, | |
| { | |
| "epoch": 44.289693593314766, | |
| "grad_norm": 1.3864718482505074, | |
| "learning_rate": 2.4249172332251867e-05, | |
| "loss": 2.435, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 44.34540389972145, | |
| "grad_norm": 1.5629168020574962, | |
| "learning_rate": 2.4163293121174586e-05, | |
| "loss": 2.42, | |
| "step": 15920 | |
| }, | |
| { | |
| "epoch": 44.401114206128135, | |
| "grad_norm": 1.3404420567150592, | |
| "learning_rate": 2.4077540113131e-05, | |
| "loss": 2.3939, | |
| "step": 15940 | |
| }, | |
| { | |
| "epoch": 44.456824512534816, | |
| "grad_norm": 1.2610215128317497, | |
| "learning_rate": 2.3991913898319236e-05, | |
| "loss": 2.3981, | |
| "step": 15960 | |
| }, | |
| { | |
| "epoch": 44.5125348189415, | |
| "grad_norm": 1.1948628905515135, | |
| "learning_rate": 2.390641506606475e-05, | |
| "loss": 2.4259, | |
| "step": 15980 | |
| }, | |
| { | |
| "epoch": 44.56824512534819, | |
| "grad_norm": 1.5442691993168876, | |
| "learning_rate": 2.3821044204816285e-05, | |
| "loss": 2.4106, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 44.62395543175487, | |
| "grad_norm": 1.3385181560005985, | |
| "learning_rate": 2.3735801902141812e-05, | |
| "loss": 2.4231, | |
| "step": 16020 | |
| }, | |
| { | |
| "epoch": 44.67966573816156, | |
| "grad_norm": 1.5429534444435276, | |
| "learning_rate": 2.3650688744724484e-05, | |
| "loss": 2.4094, | |
| "step": 16040 | |
| }, | |
| { | |
| "epoch": 44.73537604456825, | |
| "grad_norm": 1.3480847228783814, | |
| "learning_rate": 2.356570531835862e-05, | |
| "loss": 2.3925, | |
| "step": 16060 | |
| }, | |
| { | |
| "epoch": 44.79108635097493, | |
| "grad_norm": 1.3495527852819211, | |
| "learning_rate": 2.348085220794566e-05, | |
| "loss": 2.4055, | |
| "step": 16080 | |
| }, | |
| { | |
| "epoch": 44.84679665738162, | |
| "grad_norm": 1.3160942477102502, | |
| "learning_rate": 2.3396129997490143e-05, | |
| "loss": 2.4, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 44.9025069637883, | |
| "grad_norm": 1.1577155684233915, | |
| "learning_rate": 2.3311539270095685e-05, | |
| "loss": 2.4214, | |
| "step": 16120 | |
| }, | |
| { | |
| "epoch": 44.958217270194986, | |
| "grad_norm": 1.1408523761505251, | |
| "learning_rate": 2.3227080607960936e-05, | |
| "loss": 2.3958, | |
| "step": 16140 | |
| }, | |
| { | |
| "epoch": 45.013927576601674, | |
| "grad_norm": 1.2730620543264026, | |
| "learning_rate": 2.314275459237564e-05, | |
| "loss": 2.3903, | |
| "step": 16160 | |
| }, | |
| { | |
| "epoch": 45.069637883008355, | |
| "grad_norm": 1.1827924678817745, | |
| "learning_rate": 2.3058561803716587e-05, | |
| "loss": 2.4268, | |
| "step": 16180 | |
| }, | |
| { | |
| "epoch": 45.12534818941504, | |
| "grad_norm": 1.3034192039686017, | |
| "learning_rate": 2.2974502821443615e-05, | |
| "loss": 2.3954, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 45.18105849582173, | |
| "grad_norm": 1.4633461110410906, | |
| "learning_rate": 2.289057822409564e-05, | |
| "loss": 2.3956, | |
| "step": 16220 | |
| }, | |
| { | |
| "epoch": 45.23676880222841, | |
| "grad_norm": 1.4777159861247156, | |
| "learning_rate": 2.2806788589286683e-05, | |
| "loss": 2.3643, | |
| "step": 16240 | |
| }, | |
| { | |
| "epoch": 45.2924791086351, | |
| "grad_norm": 1.2317517708690167, | |
| "learning_rate": 2.2723134493701863e-05, | |
| "loss": 2.3884, | |
| "step": 16260 | |
| }, | |
| { | |
| "epoch": 45.34818941504178, | |
| "grad_norm": 1.2821403751825975, | |
| "learning_rate": 2.2639616513093453e-05, | |
| "loss": 2.4146, | |
| "step": 16280 | |
| }, | |
| { | |
| "epoch": 45.40389972144847, | |
| "grad_norm": 1.445641063656784, | |
| "learning_rate": 2.2556235222276924e-05, | |
| "loss": 2.4316, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 45.459610027855156, | |
| "grad_norm": 1.4577958162948974, | |
| "learning_rate": 2.2472991195126933e-05, | |
| "loss": 2.417, | |
| "step": 16320 | |
| }, | |
| { | |
| "epoch": 45.51532033426184, | |
| "grad_norm": 1.3311809796824847, | |
| "learning_rate": 2.2389885004573452e-05, | |
| "loss": 2.4165, | |
| "step": 16340 | |
| }, | |
| { | |
| "epoch": 45.571030640668525, | |
| "grad_norm": 1.404962587268908, | |
| "learning_rate": 2.2306917222597776e-05, | |
| "loss": 2.4204, | |
| "step": 16360 | |
| }, | |
| { | |
| "epoch": 45.626740947075206, | |
| "grad_norm": 1.2925568482512864, | |
| "learning_rate": 2.2224088420228597e-05, | |
| "loss": 2.3624, | |
| "step": 16380 | |
| }, | |
| { | |
| "epoch": 45.682451253481894, | |
| "grad_norm": 1.2480563390495507, | |
| "learning_rate": 2.21413991675381e-05, | |
| "loss": 2.4226, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 45.73816155988858, | |
| "grad_norm": 1.395538579066591, | |
| "learning_rate": 2.2058850033637958e-05, | |
| "loss": 2.4103, | |
| "step": 16420 | |
| }, | |
| { | |
| "epoch": 45.79387186629526, | |
| "grad_norm": 1.2886437393173196, | |
| "learning_rate": 2.197644158667552e-05, | |
| "loss": 2.4027, | |
| "step": 16440 | |
| }, | |
| { | |
| "epoch": 45.84958217270195, | |
| "grad_norm": 1.2805552396725532, | |
| "learning_rate": 2.1894174393829843e-05, | |
| "loss": 2.3974, | |
| "step": 16460 | |
| }, | |
| { | |
| "epoch": 45.90529247910864, | |
| "grad_norm": 1.3616296005412893, | |
| "learning_rate": 2.1812049021307776e-05, | |
| "loss": 2.389, | |
| "step": 16480 | |
| }, | |
| { | |
| "epoch": 45.96100278551532, | |
| "grad_norm": 1.2441651106621028, | |
| "learning_rate": 2.1730066034340133e-05, | |
| "loss": 2.397, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 46.01671309192201, | |
| "grad_norm": 1.294168032433046, | |
| "learning_rate": 2.1648225997177664e-05, | |
| "loss": 2.4032, | |
| "step": 16520 | |
| }, | |
| { | |
| "epoch": 46.07242339832869, | |
| "grad_norm": 1.3259623680432362, | |
| "learning_rate": 2.1566529473087366e-05, | |
| "loss": 2.409, | |
| "step": 16540 | |
| }, | |
| { | |
| "epoch": 46.128133704735376, | |
| "grad_norm": 1.4193156574119963, | |
| "learning_rate": 2.1484977024348456e-05, | |
| "loss": 2.3973, | |
| "step": 16560 | |
| }, | |
| { | |
| "epoch": 46.183844011142064, | |
| "grad_norm": 1.423133534891623, | |
| "learning_rate": 2.1403569212248545e-05, | |
| "loss": 2.4221, | |
| "step": 16580 | |
| }, | |
| { | |
| "epoch": 46.239554317548745, | |
| "grad_norm": 1.289312718105723, | |
| "learning_rate": 2.1322306597079752e-05, | |
| "loss": 2.4058, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 46.29526462395543, | |
| "grad_norm": 1.3742336820519754, | |
| "learning_rate": 2.1241189738134925e-05, | |
| "loss": 2.3803, | |
| "step": 16620 | |
| }, | |
| { | |
| "epoch": 46.35097493036211, | |
| "grad_norm": 1.3293445511084065, | |
| "learning_rate": 2.116021919370371e-05, | |
| "loss": 2.3779, | |
| "step": 16640 | |
| }, | |
| { | |
| "epoch": 46.4066852367688, | |
| "grad_norm": 1.438873266798883, | |
| "learning_rate": 2.1079395521068736e-05, | |
| "loss": 2.4125, | |
| "step": 16660 | |
| }, | |
| { | |
| "epoch": 46.46239554317549, | |
| "grad_norm": 1.5684362505574445, | |
| "learning_rate": 2.099871927650181e-05, | |
| "loss": 2.4172, | |
| "step": 16680 | |
| }, | |
| { | |
| "epoch": 46.51810584958217, | |
| "grad_norm": 1.219039146798224, | |
| "learning_rate": 2.091819101526001e-05, | |
| "loss": 2.3556, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 46.57381615598886, | |
| "grad_norm": 1.290744476185817, | |
| "learning_rate": 2.083781129158196e-05, | |
| "loss": 2.3915, | |
| "step": 16720 | |
| }, | |
| { | |
| "epoch": 46.629526462395546, | |
| "grad_norm": 1.2783924627018073, | |
| "learning_rate": 2.075758065868394e-05, | |
| "loss": 2.4021, | |
| "step": 16740 | |
| }, | |
| { | |
| "epoch": 46.68523676880223, | |
| "grad_norm": 1.4723296505015109, | |
| "learning_rate": 2.0677499668756148e-05, | |
| "loss": 2.4076, | |
| "step": 16760 | |
| }, | |
| { | |
| "epoch": 46.740947075208915, | |
| "grad_norm": 1.4361000627178464, | |
| "learning_rate": 2.0597568872958793e-05, | |
| "loss": 2.3704, | |
| "step": 16780 | |
| }, | |
| { | |
| "epoch": 46.796657381615596, | |
| "grad_norm": 1.3406172199532498, | |
| "learning_rate": 2.051778882141842e-05, | |
| "loss": 2.4095, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 46.852367688022284, | |
| "grad_norm": 1.2728857237580478, | |
| "learning_rate": 2.0438160063224055e-05, | |
| "loss": 2.3951, | |
| "step": 16820 | |
| }, | |
| { | |
| "epoch": 46.90807799442897, | |
| "grad_norm": 1.307601696226092, | |
| "learning_rate": 2.035868314642344e-05, | |
| "loss": 2.3762, | |
| "step": 16840 | |
| }, | |
| { | |
| "epoch": 46.96378830083565, | |
| "grad_norm": 1.341375077755919, | |
| "learning_rate": 2.0279358618019277e-05, | |
| "loss": 2.3747, | |
| "step": 16860 | |
| }, | |
| { | |
| "epoch": 47.01949860724234, | |
| "grad_norm": 1.3661705880679602, | |
| "learning_rate": 2.0200187023965426e-05, | |
| "loss": 2.3762, | |
| "step": 16880 | |
| }, | |
| { | |
| "epoch": 47.07520891364903, | |
| "grad_norm": 1.498474803717406, | |
| "learning_rate": 2.0121168909163192e-05, | |
| "loss": 2.3757, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 47.13091922005571, | |
| "grad_norm": 1.30345057097787, | |
| "learning_rate": 2.0042304817457542e-05, | |
| "loss": 2.4065, | |
| "step": 16920 | |
| }, | |
| { | |
| "epoch": 47.1866295264624, | |
| "grad_norm": 1.8505180253101237, | |
| "learning_rate": 1.9963595291633392e-05, | |
| "loss": 2.3871, | |
| "step": 16940 | |
| }, | |
| { | |
| "epoch": 47.24233983286908, | |
| "grad_norm": 1.450593355694572, | |
| "learning_rate": 1.9885040873411806e-05, | |
| "loss": 2.3871, | |
| "step": 16960 | |
| }, | |
| { | |
| "epoch": 47.298050139275766, | |
| "grad_norm": 1.3343700388758895, | |
| "learning_rate": 1.980664210344637e-05, | |
| "loss": 2.3649, | |
| "step": 16980 | |
| }, | |
| { | |
| "epoch": 47.353760445682454, | |
| "grad_norm": 1.355886861721978, | |
| "learning_rate": 1.9728399521319373e-05, | |
| "loss": 2.4009, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 47.409470752089135, | |
| "grad_norm": 1.271470013080887, | |
| "learning_rate": 1.9650313665538177e-05, | |
| "loss": 2.3921, | |
| "step": 17020 | |
| }, | |
| { | |
| "epoch": 47.46518105849582, | |
| "grad_norm": 1.3641748668938396, | |
| "learning_rate": 1.957238507353144e-05, | |
| "loss": 2.3785, | |
| "step": 17040 | |
| }, | |
| { | |
| "epoch": 47.5208913649025, | |
| "grad_norm": 1.3453345779522274, | |
| "learning_rate": 1.9494614281645438e-05, | |
| "loss": 2.3535, | |
| "step": 17060 | |
| }, | |
| { | |
| "epoch": 47.57660167130919, | |
| "grad_norm": 1.4507718501211375, | |
| "learning_rate": 1.9417001825140412e-05, | |
| "loss": 2.3866, | |
| "step": 17080 | |
| }, | |
| { | |
| "epoch": 47.63231197771588, | |
| "grad_norm": 1.246940923314926, | |
| "learning_rate": 1.9339548238186828e-05, | |
| "loss": 2.3664, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 47.68802228412256, | |
| "grad_norm": 1.369776524169899, | |
| "learning_rate": 1.9262254053861745e-05, | |
| "loss": 2.4068, | |
| "step": 17120 | |
| }, | |
| { | |
| "epoch": 47.74373259052925, | |
| "grad_norm": 1.4449578327650376, | |
| "learning_rate": 1.9185119804145137e-05, | |
| "loss": 2.386, | |
| "step": 17140 | |
| }, | |
| { | |
| "epoch": 47.799442896935936, | |
| "grad_norm": 1.3430555560772082, | |
| "learning_rate": 1.9108146019916174e-05, | |
| "loss": 2.4116, | |
| "step": 17160 | |
| }, | |
| { | |
| "epoch": 47.85515320334262, | |
| "grad_norm": 1.3639059020256794, | |
| "learning_rate": 1.9031333230949668e-05, | |
| "loss": 2.3732, | |
| "step": 17180 | |
| }, | |
| { | |
| "epoch": 47.910863509749305, | |
| "grad_norm": 1.2408281799864953, | |
| "learning_rate": 1.8954681965912332e-05, | |
| "loss": 2.3787, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 47.966573816155986, | |
| "grad_norm": 1.3061826212099938, | |
| "learning_rate": 1.8878192752359258e-05, | |
| "loss": 2.3728, | |
| "step": 17220 | |
| }, | |
| { | |
| "epoch": 48.022284122562674, | |
| "grad_norm": 1.538238890457239, | |
| "learning_rate": 1.8801866116730123e-05, | |
| "loss": 2.3755, | |
| "step": 17240 | |
| }, | |
| { | |
| "epoch": 48.07799442896936, | |
| "grad_norm": 1.3762558112205403, | |
| "learning_rate": 1.872570258434571e-05, | |
| "loss": 2.3727, | |
| "step": 17260 | |
| }, | |
| { | |
| "epoch": 48.13370473537604, | |
| "grad_norm": 1.2476738434905705, | |
| "learning_rate": 1.8649702679404223e-05, | |
| "loss": 2.3652, | |
| "step": 17280 | |
| }, | |
| { | |
| "epoch": 48.18941504178273, | |
| "grad_norm": 1.278376392904509, | |
| "learning_rate": 1.8573866924977697e-05, | |
| "loss": 2.3867, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 48.24512534818942, | |
| "grad_norm": 1.5831828264230967, | |
| "learning_rate": 1.84981958430084e-05, | |
| "loss": 2.382, | |
| "step": 17320 | |
| }, | |
| { | |
| "epoch": 48.3008356545961, | |
| "grad_norm": 1.2626351145463612, | |
| "learning_rate": 1.842268995430522e-05, | |
| "loss": 2.36, | |
| "step": 17340 | |
| }, | |
| { | |
| "epoch": 48.35654596100279, | |
| "grad_norm": 1.3775126997716187, | |
| "learning_rate": 1.834734977854011e-05, | |
| "loss": 2.3553, | |
| "step": 17360 | |
| }, | |
| { | |
| "epoch": 48.41225626740947, | |
| "grad_norm": 1.2704692987761135, | |
| "learning_rate": 1.8272175834244497e-05, | |
| "loss": 2.3722, | |
| "step": 17380 | |
| }, | |
| { | |
| "epoch": 48.467966573816156, | |
| "grad_norm": 1.2840542491455302, | |
| "learning_rate": 1.8197168638805704e-05, | |
| "loss": 2.3766, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 48.523676880222844, | |
| "grad_norm": 1.2405421296938253, | |
| "learning_rate": 1.812232870846343e-05, | |
| "loss": 2.3796, | |
| "step": 17420 | |
| }, | |
| { | |
| "epoch": 48.579387186629525, | |
| "grad_norm": 1.5352436430288825, | |
| "learning_rate": 1.8047656558306114e-05, | |
| "loss": 2.3297, | |
| "step": 17440 | |
| }, | |
| { | |
| "epoch": 48.63509749303621, | |
| "grad_norm": 1.3152930932933073, | |
| "learning_rate": 1.797315270226748e-05, | |
| "loss": 2.3763, | |
| "step": 17460 | |
| }, | |
| { | |
| "epoch": 48.690807799442894, | |
| "grad_norm": 1.162288641400069, | |
| "learning_rate": 1.789881765312296e-05, | |
| "loss": 2.378, | |
| "step": 17480 | |
| }, | |
| { | |
| "epoch": 48.74651810584958, | |
| "grad_norm": 1.5801279533275956, | |
| "learning_rate": 1.7824651922486156e-05, | |
| "loss": 2.3697, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 48.80222841225627, | |
| "grad_norm": 1.471173585562619, | |
| "learning_rate": 1.7750656020805324e-05, | |
| "loss": 2.3874, | |
| "step": 17520 | |
| }, | |
| { | |
| "epoch": 48.85793871866295, | |
| "grad_norm": 1.4109592484040796, | |
| "learning_rate": 1.767683045735989e-05, | |
| "loss": 2.3444, | |
| "step": 17540 | |
| }, | |
| { | |
| "epoch": 48.91364902506964, | |
| "grad_norm": 1.3726614971919417, | |
| "learning_rate": 1.7603175740256895e-05, | |
| "loss": 2.3635, | |
| "step": 17560 | |
| }, | |
| { | |
| "epoch": 48.969359331476326, | |
| "grad_norm": 1.4758519191463757, | |
| "learning_rate": 1.752969237642755e-05, | |
| "loss": 2.3672, | |
| "step": 17580 | |
| }, | |
| { | |
| "epoch": 49.02506963788301, | |
| "grad_norm": 1.4221040255565391, | |
| "learning_rate": 1.745638087162368e-05, | |
| "loss": 2.3578, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 49.080779944289695, | |
| "grad_norm": 1.3119388898351543, | |
| "learning_rate": 1.7383241730414324e-05, | |
| "loss": 2.3895, | |
| "step": 17620 | |
| }, | |
| { | |
| "epoch": 49.136490250696376, | |
| "grad_norm": 1.3268973264123183, | |
| "learning_rate": 1.7310275456182212e-05, | |
| "loss": 2.3383, | |
| "step": 17640 | |
| }, | |
| { | |
| "epoch": 49.192200557103064, | |
| "grad_norm": 1.3097397298281048, | |
| "learning_rate": 1.72374825511203e-05, | |
| "loss": 2.3623, | |
| "step": 17660 | |
| }, | |
| { | |
| "epoch": 49.24791086350975, | |
| "grad_norm": 1.1921177397839031, | |
| "learning_rate": 1.716486351622835e-05, | |
| "loss": 2.351, | |
| "step": 17680 | |
| }, | |
| { | |
| "epoch": 49.30362116991643, | |
| "grad_norm": 1.3660254674411707, | |
| "learning_rate": 1.709241885130941e-05, | |
| "loss": 2.3643, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 49.35933147632312, | |
| "grad_norm": 1.6307520440880778, | |
| "learning_rate": 1.7020149054966462e-05, | |
| "loss": 2.3624, | |
| "step": 17720 | |
| }, | |
| { | |
| "epoch": 49.41504178272981, | |
| "grad_norm": 1.4329708911636774, | |
| "learning_rate": 1.694805462459894e-05, | |
| "loss": 2.3278, | |
| "step": 17740 | |
| }, | |
| { | |
| "epoch": 49.47075208913649, | |
| "grad_norm": 1.516855939329352, | |
| "learning_rate": 1.6876136056399307e-05, | |
| "loss": 2.3734, | |
| "step": 17760 | |
| }, | |
| { | |
| "epoch": 49.52646239554318, | |
| "grad_norm": 1.2457026011328314, | |
| "learning_rate": 1.6804393845349665e-05, | |
| "loss": 2.3626, | |
| "step": 17780 | |
| }, | |
| { | |
| "epoch": 49.58217270194986, | |
| "grad_norm": 1.7297834390596316, | |
| "learning_rate": 1.6732828485218297e-05, | |
| "loss": 2.3713, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 49.637883008356546, | |
| "grad_norm": 1.429662136545568, | |
| "learning_rate": 1.6661440468556335e-05, | |
| "loss": 2.3455, | |
| "step": 17820 | |
| }, | |
| { | |
| "epoch": 49.693593314763234, | |
| "grad_norm": 1.26740617213478, | |
| "learning_rate": 1.6590230286694328e-05, | |
| "loss": 2.3659, | |
| "step": 17840 | |
| }, | |
| { | |
| "epoch": 49.749303621169915, | |
| "grad_norm": 1.681389697921109, | |
| "learning_rate": 1.651919842973888e-05, | |
| "loss": 2.3445, | |
| "step": 17860 | |
| }, | |
| { | |
| "epoch": 49.8050139275766, | |
| "grad_norm": 1.441881294284973, | |
| "learning_rate": 1.6448345386569248e-05, | |
| "loss": 2.3834, | |
| "step": 17880 | |
| }, | |
| { | |
| "epoch": 49.860724233983284, | |
| "grad_norm": 1.341854199720449, | |
| "learning_rate": 1.637767164483401e-05, | |
| "loss": 2.3699, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 49.91643454038997, | |
| "grad_norm": 1.252618865244937, | |
| "learning_rate": 1.6307177690947698e-05, | |
| "loss": 2.3635, | |
| "step": 17920 | |
| }, | |
| { | |
| "epoch": 49.97214484679666, | |
| "grad_norm": 1.226307845622635, | |
| "learning_rate": 1.6236864010087446e-05, | |
| "loss": 2.3485, | |
| "step": 17940 | |
| }, | |
| { | |
| "epoch": 50.02785515320334, | |
| "grad_norm": 1.3356553843028374, | |
| "learning_rate": 1.616673108618965e-05, | |
| "loss": 2.3578, | |
| "step": 17960 | |
| }, | |
| { | |
| "epoch": 50.08356545961003, | |
| "grad_norm": 1.3848996607905597, | |
| "learning_rate": 1.6096779401946624e-05, | |
| "loss": 2.3504, | |
| "step": 17980 | |
| }, | |
| { | |
| "epoch": 50.139275766016716, | |
| "grad_norm": 1.498723321509667, | |
| "learning_rate": 1.6027009438803323e-05, | |
| "loss": 2.3496, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 50.1949860724234, | |
| "grad_norm": 1.5191100216493636, | |
| "learning_rate": 1.595742167695398e-05, | |
| "loss": 2.3461, | |
| "step": 18020 | |
| }, | |
| { | |
| "epoch": 50.250696378830085, | |
| "grad_norm": 1.2559077943231471, | |
| "learning_rate": 1.5888016595338836e-05, | |
| "loss": 2.371, | |
| "step": 18040 | |
| }, | |
| { | |
| "epoch": 50.306406685236766, | |
| "grad_norm": 1.3899128057224512, | |
| "learning_rate": 1.5818794671640822e-05, | |
| "loss": 2.349, | |
| "step": 18060 | |
| }, | |
| { | |
| "epoch": 50.362116991643454, | |
| "grad_norm": 1.354712886418284, | |
| "learning_rate": 1.574975638228226e-05, | |
| "loss": 2.3709, | |
| "step": 18080 | |
| }, | |
| { | |
| "epoch": 50.41782729805014, | |
| "grad_norm": 1.3042322626317018, | |
| "learning_rate": 1.5680902202421623e-05, | |
| "loss": 2.3456, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 50.47353760445682, | |
| "grad_norm": 1.3508893425795137, | |
| "learning_rate": 1.5612232605950247e-05, | |
| "loss": 2.3353, | |
| "step": 18120 | |
| }, | |
| { | |
| "epoch": 50.52924791086351, | |
| "grad_norm": 1.4691874741171684, | |
| "learning_rate": 1.554374806548906e-05, | |
| "loss": 2.3336, | |
| "step": 18140 | |
| }, | |
| { | |
| "epoch": 50.5849582172702, | |
| "grad_norm": 3.0455576787450673, | |
| "learning_rate": 1.5475449052385337e-05, | |
| "loss": 2.3218, | |
| "step": 18160 | |
| }, | |
| { | |
| "epoch": 50.64066852367688, | |
| "grad_norm": 1.4402674578269268, | |
| "learning_rate": 1.540733603670942e-05, | |
| "loss": 2.3372, | |
| "step": 18180 | |
| }, | |
| { | |
| "epoch": 50.69637883008357, | |
| "grad_norm": 1.4795688476423643, | |
| "learning_rate": 1.5339409487251585e-05, | |
| "loss": 2.341, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 50.75208913649025, | |
| "grad_norm": 1.4070357885010039, | |
| "learning_rate": 1.5271669871518705e-05, | |
| "loss": 2.3241, | |
| "step": 18220 | |
| }, | |
| { | |
| "epoch": 50.807799442896936, | |
| "grad_norm": 1.3031362800514608, | |
| "learning_rate": 1.5204117655731085e-05, | |
| "loss": 2.3621, | |
| "step": 18240 | |
| }, | |
| { | |
| "epoch": 50.863509749303624, | |
| "grad_norm": 1.434766758940882, | |
| "learning_rate": 1.5136753304819218e-05, | |
| "loss": 2.3302, | |
| "step": 18260 | |
| }, | |
| { | |
| "epoch": 50.919220055710305, | |
| "grad_norm": 1.274818587770635, | |
| "learning_rate": 1.5069577282420647e-05, | |
| "loss": 2.3465, | |
| "step": 18280 | |
| }, | |
| { | |
| "epoch": 50.97493036211699, | |
| "grad_norm": 1.2730001227993584, | |
| "learning_rate": 1.500259005087672e-05, | |
| "loss": 2.3294, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 51.030640668523674, | |
| "grad_norm": 1.3369943012249967, | |
| "learning_rate": 1.493579207122943e-05, | |
| "loss": 2.3577, | |
| "step": 18320 | |
| }, | |
| { | |
| "epoch": 51.08635097493036, | |
| "grad_norm": 1.255851793002455, | |
| "learning_rate": 1.4869183803218242e-05, | |
| "loss": 2.3442, | |
| "step": 18340 | |
| }, | |
| { | |
| "epoch": 51.14206128133705, | |
| "grad_norm": 1.465379611467944, | |
| "learning_rate": 1.4802765705276894e-05, | |
| "loss": 2.3361, | |
| "step": 18360 | |
| }, | |
| { | |
| "epoch": 51.19777158774373, | |
| "grad_norm": 1.598753189094513, | |
| "learning_rate": 1.4736538234530309e-05, | |
| "loss": 2.3488, | |
| "step": 18380 | |
| }, | |
| { | |
| "epoch": 51.25348189415042, | |
| "grad_norm": 1.3758579974135776, | |
| "learning_rate": 1.4670501846791401e-05, | |
| "loss": 2.341, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 51.309192200557106, | |
| "grad_norm": 1.3023167947311844, | |
| "learning_rate": 1.4604656996557936e-05, | |
| "loss": 2.3496, | |
| "step": 18420 | |
| }, | |
| { | |
| "epoch": 51.36490250696379, | |
| "grad_norm": 1.2931564385198864, | |
| "learning_rate": 1.4539004137009436e-05, | |
| "loss": 2.3394, | |
| "step": 18440 | |
| }, | |
| { | |
| "epoch": 51.420612813370475, | |
| "grad_norm": 1.6620919275534023, | |
| "learning_rate": 1.4473543720004015e-05, | |
| "loss": 2.3285, | |
| "step": 18460 | |
| }, | |
| { | |
| "epoch": 51.476323119777156, | |
| "grad_norm": 1.3917496152973825, | |
| "learning_rate": 1.4408276196075313e-05, | |
| "loss": 2.3486, | |
| "step": 18480 | |
| }, | |
| { | |
| "epoch": 51.532033426183844, | |
| "grad_norm": 1.3562772472225384, | |
| "learning_rate": 1.4343202014429376e-05, | |
| "loss": 2.3323, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 51.58774373259053, | |
| "grad_norm": 1.2319819791798092, | |
| "learning_rate": 1.4278321622941556e-05, | |
| "loss": 2.3439, | |
| "step": 18520 | |
| }, | |
| { | |
| "epoch": 51.64345403899721, | |
| "grad_norm": 1.5358845964171328, | |
| "learning_rate": 1.4213635468153446e-05, | |
| "loss": 2.3307, | |
| "step": 18540 | |
| }, | |
| { | |
| "epoch": 51.6991643454039, | |
| "grad_norm": 1.292648527837981, | |
| "learning_rate": 1.4149143995269799e-05, | |
| "loss": 2.3303, | |
| "step": 18560 | |
| }, | |
| { | |
| "epoch": 51.75487465181058, | |
| "grad_norm": 1.9631233257274625, | |
| "learning_rate": 1.4084847648155449e-05, | |
| "loss": 2.3382, | |
| "step": 18580 | |
| }, | |
| { | |
| "epoch": 51.81058495821727, | |
| "grad_norm": 1.6354418847695984, | |
| "learning_rate": 1.4020746869332296e-05, | |
| "loss": 2.3761, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 51.86629526462396, | |
| "grad_norm": 1.4408996452028136, | |
| "learning_rate": 1.3956842099976191e-05, | |
| "loss": 2.3899, | |
| "step": 18620 | |
| }, | |
| { | |
| "epoch": 51.92200557103064, | |
| "grad_norm": 1.3154420223017438, | |
| "learning_rate": 1.3893133779913992e-05, | |
| "loss": 2.3267, | |
| "step": 18640 | |
| }, | |
| { | |
| "epoch": 51.977715877437326, | |
| "grad_norm": 1.2664206876617758, | |
| "learning_rate": 1.382962234762045e-05, | |
| "loss": 2.3145, | |
| "step": 18660 | |
| }, | |
| { | |
| "epoch": 52.033426183844014, | |
| "grad_norm": 1.461002841812497, | |
| "learning_rate": 1.3766308240215257e-05, | |
| "loss": 2.337, | |
| "step": 18680 | |
| }, | |
| { | |
| "epoch": 52.089136490250695, | |
| "grad_norm": 1.3350308045413666, | |
| "learning_rate": 1.3703191893460002e-05, | |
| "loss": 2.3553, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 52.14484679665738, | |
| "grad_norm": 1.4095843649708175, | |
| "learning_rate": 1.364027374175515e-05, | |
| "loss": 2.3408, | |
| "step": 18720 | |
| }, | |
| { | |
| "epoch": 52.200557103064064, | |
| "grad_norm": 1.8553789055534144, | |
| "learning_rate": 1.357755421813712e-05, | |
| "loss": 2.3513, | |
| "step": 18740 | |
| }, | |
| { | |
| "epoch": 52.25626740947075, | |
| "grad_norm": 1.517906600566457, | |
| "learning_rate": 1.3515033754275249e-05, | |
| "loss": 2.3512, | |
| "step": 18760 | |
| }, | |
| { | |
| "epoch": 52.31197771587744, | |
| "grad_norm": 1.3004637489061956, | |
| "learning_rate": 1.3452712780468846e-05, | |
| "loss": 2.3344, | |
| "step": 18780 | |
| }, | |
| { | |
| "epoch": 52.36768802228412, | |
| "grad_norm": 1.6081005585159005, | |
| "learning_rate": 1.3390591725644231e-05, | |
| "loss": 2.3714, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 52.42339832869081, | |
| "grad_norm": 1.3820776705600462, | |
| "learning_rate": 1.3328671017351728e-05, | |
| "loss": 2.3472, | |
| "step": 18820 | |
| }, | |
| { | |
| "epoch": 52.4791086350975, | |
| "grad_norm": 1.741399862442912, | |
| "learning_rate": 1.3266951081762823e-05, | |
| "loss": 2.3318, | |
| "step": 18840 | |
| }, | |
| { | |
| "epoch": 52.53481894150418, | |
| "grad_norm": 1.6610982616432777, | |
| "learning_rate": 1.320543234366714e-05, | |
| "loss": 2.3564, | |
| "step": 18860 | |
| }, | |
| { | |
| "epoch": 52.590529247910865, | |
| "grad_norm": 1.534678472008335, | |
| "learning_rate": 1.3144115226469601e-05, | |
| "loss": 2.3453, | |
| "step": 18880 | |
| }, | |
| { | |
| "epoch": 52.646239554317546, | |
| "grad_norm": 1.6155419457685751, | |
| "learning_rate": 1.3083000152187406e-05, | |
| "loss": 2.3193, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 52.701949860724234, | |
| "grad_norm": 1.3933017087549446, | |
| "learning_rate": 1.3022087541447226e-05, | |
| "loss": 2.3263, | |
| "step": 18920 | |
| }, | |
| { | |
| "epoch": 52.75766016713092, | |
| "grad_norm": 1.4752643448356435, | |
| "learning_rate": 1.2961377813482258e-05, | |
| "loss": 2.3198, | |
| "step": 18940 | |
| }, | |
| { | |
| "epoch": 52.8133704735376, | |
| "grad_norm": 1.4046488544858395, | |
| "learning_rate": 1.2900871386129355e-05, | |
| "loss": 2.3076, | |
| "step": 18960 | |
| }, | |
| { | |
| "epoch": 52.86908077994429, | |
| "grad_norm": 1.3496360542086223, | |
| "learning_rate": 1.2840568675826145e-05, | |
| "loss": 2.3298, | |
| "step": 18980 | |
| }, | |
| { | |
| "epoch": 52.92479108635097, | |
| "grad_norm": 1.441744999480469, | |
| "learning_rate": 1.2780470097608155e-05, | |
| "loss": 2.3579, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 52.98050139275766, | |
| "grad_norm": 1.441039976490415, | |
| "learning_rate": 1.272057606510598e-05, | |
| "loss": 2.3408, | |
| "step": 19020 | |
| }, | |
| { | |
| "epoch": 53.03621169916435, | |
| "grad_norm": 1.394791214620375, | |
| "learning_rate": 1.2660886990542415e-05, | |
| "loss": 2.3151, | |
| "step": 19040 | |
| }, | |
| { | |
| "epoch": 53.09192200557103, | |
| "grad_norm": 1.315868085708281, | |
| "learning_rate": 1.2601403284729635e-05, | |
| "loss": 2.3304, | |
| "step": 19060 | |
| }, | |
| { | |
| "epoch": 53.147632311977716, | |
| "grad_norm": 1.3975934028921921, | |
| "learning_rate": 1.2542125357066354e-05, | |
| "loss": 2.3314, | |
| "step": 19080 | |
| }, | |
| { | |
| "epoch": 53.203342618384404, | |
| "grad_norm": 1.3231737653504019, | |
| "learning_rate": 1.2483053615534986e-05, | |
| "loss": 2.3419, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 53.259052924791085, | |
| "grad_norm": 1.4696480235764173, | |
| "learning_rate": 1.2424188466698894e-05, | |
| "loss": 2.327, | |
| "step": 19120 | |
| }, | |
| { | |
| "epoch": 53.31476323119777, | |
| "grad_norm": 1.369796074017674, | |
| "learning_rate": 1.2365530315699543e-05, | |
| "loss": 2.2887, | |
| "step": 19140 | |
| }, | |
| { | |
| "epoch": 53.370473537604454, | |
| "grad_norm": 1.318991041965293, | |
| "learning_rate": 1.2307079566253733e-05, | |
| "loss": 2.3359, | |
| "step": 19160 | |
| }, | |
| { | |
| "epoch": 53.42618384401114, | |
| "grad_norm": 1.3653752917448936, | |
| "learning_rate": 1.2248836620650818e-05, | |
| "loss": 2.3091, | |
| "step": 19180 | |
| }, | |
| { | |
| "epoch": 53.48189415041783, | |
| "grad_norm": 1.3620553290863062, | |
| "learning_rate": 1.219080187974993e-05, | |
| "loss": 2.333, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 53.53760445682451, | |
| "grad_norm": 1.521530969044834, | |
| "learning_rate": 1.2132975742977222e-05, | |
| "loss": 2.3477, | |
| "step": 19220 | |
| }, | |
| { | |
| "epoch": 53.5933147632312, | |
| "grad_norm": 1.3604837849834415, | |
| "learning_rate": 1.2075358608323133e-05, | |
| "loss": 2.3018, | |
| "step": 19240 | |
| }, | |
| { | |
| "epoch": 53.64902506963789, | |
| "grad_norm": 1.3342516336240242, | |
| "learning_rate": 1.2017950872339636e-05, | |
| "loss": 2.3189, | |
| "step": 19260 | |
| }, | |
| { | |
| "epoch": 53.70473537604457, | |
| "grad_norm": 1.388890930917694, | |
| "learning_rate": 1.1960752930137489e-05, | |
| "loss": 2.3289, | |
| "step": 19280 | |
| }, | |
| { | |
| "epoch": 53.760445682451255, | |
| "grad_norm": 1.3001131964873058, | |
| "learning_rate": 1.1903765175383552e-05, | |
| "loss": 2.2918, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 53.816155988857936, | |
| "grad_norm": 1.3807429463362486, | |
| "learning_rate": 1.1846988000298073e-05, | |
| "loss": 2.2947, | |
| "step": 19320 | |
| }, | |
| { | |
| "epoch": 53.871866295264624, | |
| "grad_norm": 1.3500489069634025, | |
| "learning_rate": 1.1790421795651973e-05, | |
| "loss": 2.3193, | |
| "step": 19340 | |
| }, | |
| { | |
| "epoch": 53.92757660167131, | |
| "grad_norm": 1.440888239581446, | |
| "learning_rate": 1.1734066950764138e-05, | |
| "loss": 2.3342, | |
| "step": 19360 | |
| }, | |
| { | |
| "epoch": 53.98328690807799, | |
| "grad_norm": 1.5457618066269658, | |
| "learning_rate": 1.1677923853498792e-05, | |
| "loss": 2.2843, | |
| "step": 19380 | |
| }, | |
| { | |
| "epoch": 54.03899721448468, | |
| "grad_norm": 1.4026171429295824, | |
| "learning_rate": 1.162199289026279e-05, | |
| "loss": 2.2993, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 54.09470752089136, | |
| "grad_norm": 1.5830085773681513, | |
| "learning_rate": 1.156627444600296e-05, | |
| "loss": 2.3154, | |
| "step": 19420 | |
| }, | |
| { | |
| "epoch": 54.15041782729805, | |
| "grad_norm": 1.4881826390608948, | |
| "learning_rate": 1.151076890420348e-05, | |
| "loss": 2.3147, | |
| "step": 19440 | |
| }, | |
| { | |
| "epoch": 54.20612813370474, | |
| "grad_norm": 1.3551091744705666, | |
| "learning_rate": 1.1455476646883177e-05, | |
| "loss": 2.3427, | |
| "step": 19460 | |
| }, | |
| { | |
| "epoch": 54.26183844011142, | |
| "grad_norm": 1.4419537324222909, | |
| "learning_rate": 1.1400398054592988e-05, | |
| "loss": 2.3253, | |
| "step": 19480 | |
| }, | |
| { | |
| "epoch": 54.317548746518106, | |
| "grad_norm": 1.3708788026201257, | |
| "learning_rate": 1.1345533506413266e-05, | |
| "loss": 2.2869, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 54.373259052924794, | |
| "grad_norm": 1.474440189127514, | |
| "learning_rate": 1.1290883379951205e-05, | |
| "loss": 2.3095, | |
| "step": 19520 | |
| }, | |
| { | |
| "epoch": 54.428969359331475, | |
| "grad_norm": 1.3454680397423404, | |
| "learning_rate": 1.1236448051338234e-05, | |
| "loss": 2.2888, | |
| "step": 19540 | |
| }, | |
| { | |
| "epoch": 54.48467966573816, | |
| "grad_norm": 1.429931802787514, | |
| "learning_rate": 1.1182227895227435e-05, | |
| "loss": 2.3356, | |
| "step": 19560 | |
| }, | |
| { | |
| "epoch": 54.540389972144844, | |
| "grad_norm": 1.5782912526289399, | |
| "learning_rate": 1.112822328479094e-05, | |
| "loss": 2.3116, | |
| "step": 19580 | |
| }, | |
| { | |
| "epoch": 54.59610027855153, | |
| "grad_norm": 1.4640262618715514, | |
| "learning_rate": 1.1074434591717396e-05, | |
| "loss": 2.3333, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 54.65181058495822, | |
| "grad_norm": 1.7289128171607941, | |
| "learning_rate": 1.102086218620939e-05, | |
| "loss": 2.2807, | |
| "step": 19620 | |
| }, | |
| { | |
| "epoch": 54.7075208913649, | |
| "grad_norm": 1.4358145943314486, | |
| "learning_rate": 1.0967506436980888e-05, | |
| "loss": 2.3362, | |
| "step": 19640 | |
| }, | |
| { | |
| "epoch": 54.76323119777159, | |
| "grad_norm": 1.2782981684370716, | |
| "learning_rate": 1.0914367711254726e-05, | |
| "loss": 2.3087, | |
| "step": 19660 | |
| }, | |
| { | |
| "epoch": 54.81894150417827, | |
| "grad_norm": 1.4574671055158, | |
| "learning_rate": 1.0861446374760058e-05, | |
| "loss": 2.329, | |
| "step": 19680 | |
| }, | |
| { | |
| "epoch": 54.87465181058496, | |
| "grad_norm": 1.6398070121291626, | |
| "learning_rate": 1.0808742791729863e-05, | |
| "loss": 2.3005, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 54.930362116991645, | |
| "grad_norm": 1.3682990675605438, | |
| "learning_rate": 1.075625732489842e-05, | |
| "loss": 2.3105, | |
| "step": 19720 | |
| }, | |
| { | |
| "epoch": 54.986072423398326, | |
| "grad_norm": 1.4113101451622823, | |
| "learning_rate": 1.0703990335498795e-05, | |
| "loss": 2.3004, | |
| "step": 19740 | |
| }, | |
| { | |
| "epoch": 55.041782729805014, | |
| "grad_norm": 1.3747716130043024, | |
| "learning_rate": 1.0651942183260405e-05, | |
| "loss": 2.3123, | |
| "step": 19760 | |
| }, | |
| { | |
| "epoch": 55.0974930362117, | |
| "grad_norm": 1.5773531144976136, | |
| "learning_rate": 1.0600113226406483e-05, | |
| "loss": 2.31, | |
| "step": 19780 | |
| }, | |
| { | |
| "epoch": 55.15320334261838, | |
| "grad_norm": 1.636015923365525, | |
| "learning_rate": 1.0548503821651675e-05, | |
| "loss": 2.2963, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 55.20891364902507, | |
| "grad_norm": 1.9126127423976698, | |
| "learning_rate": 1.0497114324199536e-05, | |
| "loss": 2.3125, | |
| "step": 19820 | |
| }, | |
| { | |
| "epoch": 55.26462395543175, | |
| "grad_norm": 1.3810199319505396, | |
| "learning_rate": 1.0445945087740083e-05, | |
| "loss": 2.2836, | |
| "step": 19840 | |
| }, | |
| { | |
| "epoch": 55.32033426183844, | |
| "grad_norm": 1.655152735076459, | |
| "learning_rate": 1.0394996464447398e-05, | |
| "loss": 2.3183, | |
| "step": 19860 | |
| }, | |
| { | |
| "epoch": 55.37604456824513, | |
| "grad_norm": 1.7280728341318472, | |
| "learning_rate": 1.0344268804977195e-05, | |
| "loss": 2.3056, | |
| "step": 19880 | |
| }, | |
| { | |
| "epoch": 55.43175487465181, | |
| "grad_norm": 1.3354142969390423, | |
| "learning_rate": 1.029376245846439e-05, | |
| "loss": 2.2894, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 55.4874651810585, | |
| "grad_norm": 1.6222487674242974, | |
| "learning_rate": 1.024347777252068e-05, | |
| "loss": 2.3073, | |
| "step": 19920 | |
| }, | |
| { | |
| "epoch": 55.543175487465184, | |
| "grad_norm": 1.454409209087223, | |
| "learning_rate": 1.0193415093232206e-05, | |
| "loss": 2.3023, | |
| "step": 19940 | |
| }, | |
| { | |
| "epoch": 55.598885793871865, | |
| "grad_norm": 1.5315652454207556, | |
| "learning_rate": 1.0143574765157128e-05, | |
| "loss": 2.3427, | |
| "step": 19960 | |
| }, | |
| { | |
| "epoch": 55.65459610027855, | |
| "grad_norm": 1.441290570573882, | |
| "learning_rate": 1.0093957131323262e-05, | |
| "loss": 2.3211, | |
| "step": 19980 | |
| }, | |
| { | |
| "epoch": 55.710306406685234, | |
| "grad_norm": 1.6552345609147763, | |
| "learning_rate": 1.004456253322574e-05, | |
| "loss": 2.3032, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 55.76601671309192, | |
| "grad_norm": 1.4344844434843587, | |
| "learning_rate": 9.995391310824615e-06, | |
| "loss": 2.32, | |
| "step": 20020 | |
| }, | |
| { | |
| "epoch": 55.82172701949861, | |
| "grad_norm": 1.5149262550896996, | |
| "learning_rate": 9.946443802542573e-06, | |
| "loss": 2.3054, | |
| "step": 20040 | |
| }, | |
| { | |
| "epoch": 55.87743732590529, | |
| "grad_norm": 1.4983617276276844, | |
| "learning_rate": 9.89772034526257e-06, | |
| "loss": 2.2887, | |
| "step": 20060 | |
| }, | |
| { | |
| "epoch": 55.93314763231198, | |
| "grad_norm": 1.3214738473117364, | |
| "learning_rate": 9.849221274325526e-06, | |
| "loss": 2.3222, | |
| "step": 20080 | |
| }, | |
| { | |
| "epoch": 55.98885793871866, | |
| "grad_norm": 1.477089337511352, | |
| "learning_rate": 9.800946923528015e-06, | |
| "loss": 2.2982, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 56.04456824512535, | |
| "grad_norm": 1.4526733288769058, | |
| "learning_rate": 9.752897625119957e-06, | |
| "loss": 2.2978, | |
| "step": 20120 | |
| }, | |
| { | |
| "epoch": 56.100278551532035, | |
| "grad_norm": 1.4020841003024251, | |
| "learning_rate": 9.705073709802343e-06, | |
| "loss": 2.2945, | |
| "step": 20140 | |
| }, | |
| { | |
| "epoch": 56.155988857938716, | |
| "grad_norm": 1.5600664300784186, | |
| "learning_rate": 9.657475506724974e-06, | |
| "loss": 2.2782, | |
| "step": 20160 | |
| }, | |
| { | |
| "epoch": 56.211699164345404, | |
| "grad_norm": 1.8810092843791293, | |
| "learning_rate": 9.610103343484164e-06, | |
| "loss": 2.3072, | |
| "step": 20180 | |
| }, | |
| { | |
| "epoch": 56.26740947075209, | |
| "grad_norm": 1.5355656388936216, | |
| "learning_rate": 9.562957546120497e-06, | |
| "loss": 2.2978, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 56.32311977715877, | |
| "grad_norm": 1.49909865084026, | |
| "learning_rate": 9.51603843911659e-06, | |
| "loss": 2.3092, | |
| "step": 20220 | |
| }, | |
| { | |
| "epoch": 56.37883008356546, | |
| "grad_norm": 1.5161221850342854, | |
| "learning_rate": 9.469346345394869e-06, | |
| "loss": 2.2818, | |
| "step": 20240 | |
| }, | |
| { | |
| "epoch": 56.43454038997214, | |
| "grad_norm": 1.7615731834241355, | |
| "learning_rate": 9.422881586315314e-06, | |
| "loss": 2.3084, | |
| "step": 20260 | |
| }, | |
| { | |
| "epoch": 56.49025069637883, | |
| "grad_norm": 1.329887631910666, | |
| "learning_rate": 9.376644481673266e-06, | |
| "loss": 2.3056, | |
| "step": 20280 | |
| }, | |
| { | |
| "epoch": 56.54596100278552, | |
| "grad_norm": 1.4720910620951293, | |
| "learning_rate": 9.33063534969724e-06, | |
| "loss": 2.3108, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 56.6016713091922, | |
| "grad_norm": 1.3281512249998089, | |
| "learning_rate": 9.284854507046706e-06, | |
| "loss": 2.2901, | |
| "step": 20320 | |
| }, | |
| { | |
| "epoch": 56.65738161559889, | |
| "grad_norm": 1.3508495829729492, | |
| "learning_rate": 9.239302268809946e-06, | |
| "loss": 2.3169, | |
| "step": 20340 | |
| }, | |
| { | |
| "epoch": 56.713091922005574, | |
| "grad_norm": 1.4459681060448604, | |
| "learning_rate": 9.19397894850185e-06, | |
| "loss": 2.2935, | |
| "step": 20360 | |
| }, | |
| { | |
| "epoch": 56.768802228412255, | |
| "grad_norm": 1.435466464580322, | |
| "learning_rate": 9.148884858061761e-06, | |
| "loss": 2.297, | |
| "step": 20380 | |
| }, | |
| { | |
| "epoch": 56.82451253481894, | |
| "grad_norm": 1.4747023153570098, | |
| "learning_rate": 9.10402030785136e-06, | |
| "loss": 2.2758, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 56.880222841225624, | |
| "grad_norm": 1.732004184834518, | |
| "learning_rate": 9.059385606652494e-06, | |
| "loss": 2.2663, | |
| "step": 20420 | |
| }, | |
| { | |
| "epoch": 56.93593314763231, | |
| "grad_norm": 1.627581542112412, | |
| "learning_rate": 9.014981061665082e-06, | |
| "loss": 2.3057, | |
| "step": 20440 | |
| }, | |
| { | |
| "epoch": 56.991643454039, | |
| "grad_norm": 1.557984274560907, | |
| "learning_rate": 8.970806978504978e-06, | |
| "loss": 2.3203, | |
| "step": 20460 | |
| }, | |
| { | |
| "epoch": 57.04735376044568, | |
| "grad_norm": 1.3370492439725272, | |
| "learning_rate": 8.926863661201858e-06, | |
| "loss": 2.2901, | |
| "step": 20480 | |
| }, | |
| { | |
| "epoch": 57.10306406685237, | |
| "grad_norm": 1.5834661112813444, | |
| "learning_rate": 8.883151412197163e-06, | |
| "loss": 2.3148, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 57.15877437325905, | |
| "grad_norm": 1.444140494560892, | |
| "learning_rate": 8.839670532341993e-06, | |
| "loss": 2.2811, | |
| "step": 20520 | |
| }, | |
| { | |
| "epoch": 57.21448467966574, | |
| "grad_norm": 1.2924659150251059, | |
| "learning_rate": 8.796421320895056e-06, | |
| "loss": 2.2812, | |
| "step": 20540 | |
| }, | |
| { | |
| "epoch": 57.270194986072426, | |
| "grad_norm": 1.278167875022471, | |
| "learning_rate": 8.753404075520562e-06, | |
| "loss": 2.2695, | |
| "step": 20560 | |
| }, | |
| { | |
| "epoch": 57.325905292479106, | |
| "grad_norm": 1.489794984401024, | |
| "learning_rate": 8.710619092286228e-06, | |
| "loss": 2.2812, | |
| "step": 20580 | |
| }, | |
| { | |
| "epoch": 57.381615598885794, | |
| "grad_norm": 1.4707110829209729, | |
| "learning_rate": 8.668066665661217e-06, | |
| "loss": 2.2903, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 57.43732590529248, | |
| "grad_norm": 1.4687558384788093, | |
| "learning_rate": 8.625747088514107e-06, | |
| "loss": 2.306, | |
| "step": 20620 | |
| }, | |
| { | |
| "epoch": 57.49303621169916, | |
| "grad_norm": 1.4060321766361261, | |
| "learning_rate": 8.583660652110897e-06, | |
| "loss": 2.3054, | |
| "step": 20640 | |
| }, | |
| { | |
| "epoch": 57.54874651810585, | |
| "grad_norm": 1.416314142016587, | |
| "learning_rate": 8.541807646112959e-06, | |
| "loss": 2.2776, | |
| "step": 20660 | |
| }, | |
| { | |
| "epoch": 57.60445682451253, | |
| "grad_norm": 1.528612849317557, | |
| "learning_rate": 8.50018835857509e-06, | |
| "loss": 2.2615, | |
| "step": 20680 | |
| }, | |
| { | |
| "epoch": 57.66016713091922, | |
| "grad_norm": 1.5253351674209896, | |
| "learning_rate": 8.45880307594351e-06, | |
| "loss": 2.2641, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 57.71587743732591, | |
| "grad_norm": 1.4358983735975828, | |
| "learning_rate": 8.417652083053896e-06, | |
| "loss": 2.2722, | |
| "step": 20720 | |
| }, | |
| { | |
| "epoch": 57.77158774373259, | |
| "grad_norm": 1.7734798200078705, | |
| "learning_rate": 8.376735663129412e-06, | |
| "loss": 2.3084, | |
| "step": 20740 | |
| }, | |
| { | |
| "epoch": 57.82729805013928, | |
| "grad_norm": 1.5869547851700487, | |
| "learning_rate": 8.336054097778755e-06, | |
| "loss": 2.2899, | |
| "step": 20760 | |
| }, | |
| { | |
| "epoch": 57.88300835654596, | |
| "grad_norm": 1.4546940753793316, | |
| "learning_rate": 8.295607666994244e-06, | |
| "loss": 2.3095, | |
| "step": 20780 | |
| }, | |
| { | |
| "epoch": 57.938718662952645, | |
| "grad_norm": 2.102508648107544, | |
| "learning_rate": 8.255396649149872e-06, | |
| "loss": 2.2591, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 57.99442896935933, | |
| "grad_norm": 1.7539480811989963, | |
| "learning_rate": 8.215421320999385e-06, | |
| "loss": 2.2713, | |
| "step": 20820 | |
| }, | |
| { | |
| "epoch": 58.050139275766014, | |
| "grad_norm": 2.230779439808303, | |
| "learning_rate": 8.175681957674403e-06, | |
| "loss": 2.3016, | |
| "step": 20840 | |
| }, | |
| { | |
| "epoch": 58.1058495821727, | |
| "grad_norm": 1.382319191320228, | |
| "learning_rate": 8.136178832682491e-06, | |
| "loss": 2.3041, | |
| "step": 20860 | |
| }, | |
| { | |
| "epoch": 58.16155988857939, | |
| "grad_norm": 1.5265747955874778, | |
| "learning_rate": 8.096912217905309e-06, | |
| "loss": 2.2702, | |
| "step": 20880 | |
| }, | |
| { | |
| "epoch": 58.21727019498607, | |
| "grad_norm": 1.4460542045577416, | |
| "learning_rate": 8.057882383596717e-06, | |
| "loss": 2.3015, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 58.27298050139276, | |
| "grad_norm": 1.4566734252474305, | |
| "learning_rate": 8.019089598380943e-06, | |
| "loss": 2.2889, | |
| "step": 20920 | |
| }, | |
| { | |
| "epoch": 58.32869080779944, | |
| "grad_norm": 1.2969700785128098, | |
| "learning_rate": 7.98053412925069e-06, | |
| "loss": 2.3123, | |
| "step": 20940 | |
| }, | |
| { | |
| "epoch": 58.38440111420613, | |
| "grad_norm": 1.4381747219027274, | |
| "learning_rate": 7.942216241565335e-06, | |
| "loss": 2.2903, | |
| "step": 20960 | |
| }, | |
| { | |
| "epoch": 58.440111420612816, | |
| "grad_norm": 1.41845772591463, | |
| "learning_rate": 7.904136199049108e-06, | |
| "loss": 2.2915, | |
| "step": 20980 | |
| }, | |
| { | |
| "epoch": 58.4958217270195, | |
| "grad_norm": 1.7118356239586723, | |
| "learning_rate": 7.866294263789243e-06, | |
| "loss": 2.272, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 58.551532033426184, | |
| "grad_norm": 1.6802721584331735, | |
| "learning_rate": 7.828690696234207e-06, | |
| "loss": 2.2831, | |
| "step": 21020 | |
| }, | |
| { | |
| "epoch": 58.60724233983287, | |
| "grad_norm": 1.3312453916466178, | |
| "learning_rate": 7.791325755191866e-06, | |
| "loss": 2.3042, | |
| "step": 21040 | |
| }, | |
| { | |
| "epoch": 58.66295264623955, | |
| "grad_norm": 1.4400016262848356, | |
| "learning_rate": 7.754199697827755e-06, | |
| "loss": 2.2708, | |
| "step": 21060 | |
| }, | |
| { | |
| "epoch": 58.71866295264624, | |
| "grad_norm": 1.4653785109530788, | |
| "learning_rate": 7.717312779663285e-06, | |
| "loss": 2.298, | |
| "step": 21080 | |
| }, | |
| { | |
| "epoch": 58.77437325905292, | |
| "grad_norm": 1.4617048241574984, | |
| "learning_rate": 7.680665254573972e-06, | |
| "loss": 2.295, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 58.83008356545961, | |
| "grad_norm": 1.5216063506001387, | |
| "learning_rate": 7.644257374787696e-06, | |
| "loss": 2.276, | |
| "step": 21120 | |
| }, | |
| { | |
| "epoch": 58.8857938718663, | |
| "grad_norm": 1.3935611767924123, | |
| "learning_rate": 7.6080893908829835e-06, | |
| "loss": 2.2758, | |
| "step": 21140 | |
| }, | |
| { | |
| "epoch": 58.94150417827298, | |
| "grad_norm": 1.4853858842912901, | |
| "learning_rate": 7.572161551787261e-06, | |
| "loss": 2.2871, | |
| "step": 21160 | |
| }, | |
| { | |
| "epoch": 58.99721448467967, | |
| "grad_norm": 1.5467142471984074, | |
| "learning_rate": 7.536474104775158e-06, | |
| "loss": 2.2848, | |
| "step": 21180 | |
| }, | |
| { | |
| "epoch": 59.05292479108635, | |
| "grad_norm": 1.5612739193627336, | |
| "learning_rate": 7.501027295466781e-06, | |
| "loss": 2.2918, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 59.108635097493035, | |
| "grad_norm": 1.5107647532729618, | |
| "learning_rate": 7.4658213678260586e-06, | |
| "loss": 2.2938, | |
| "step": 21220 | |
| }, | |
| { | |
| "epoch": 59.16434540389972, | |
| "grad_norm": 1.613803688667171, | |
| "learning_rate": 7.430856564159026e-06, | |
| "loss": 2.2624, | |
| "step": 21240 | |
| }, | |
| { | |
| "epoch": 59.220055710306404, | |
| "grad_norm": 1.4075510840449976, | |
| "learning_rate": 7.396133125112186e-06, | |
| "loss": 2.2882, | |
| "step": 21260 | |
| }, | |
| { | |
| "epoch": 59.27576601671309, | |
| "grad_norm": 1.4680721335227742, | |
| "learning_rate": 7.361651289670837e-06, | |
| "loss": 2.2772, | |
| "step": 21280 | |
| }, | |
| { | |
| "epoch": 59.33147632311978, | |
| "grad_norm": 1.9100448192464394, | |
| "learning_rate": 7.327411295157427e-06, | |
| "loss": 2.2552, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 59.38718662952646, | |
| "grad_norm": 1.6058890472494596, | |
| "learning_rate": 7.293413377229926e-06, | |
| "loss": 2.2458, | |
| "step": 21320 | |
| }, | |
| { | |
| "epoch": 59.44289693593315, | |
| "grad_norm": 1.760861350098991, | |
| "learning_rate": 7.259657769880218e-06, | |
| "loss": 2.2921, | |
| "step": 21340 | |
| }, | |
| { | |
| "epoch": 59.49860724233983, | |
| "grad_norm": 1.4588818247613144, | |
| "learning_rate": 7.226144705432453e-06, | |
| "loss": 2.2647, | |
| "step": 21360 | |
| }, | |
| { | |
| "epoch": 59.55431754874652, | |
| "grad_norm": 1.347496064491126, | |
| "learning_rate": 7.192874414541492e-06, | |
| "loss": 2.3212, | |
| "step": 21380 | |
| }, | |
| { | |
| "epoch": 59.610027855153206, | |
| "grad_norm": 1.4180100417862518, | |
| "learning_rate": 7.159847126191279e-06, | |
| "loss": 2.2922, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 59.66573816155989, | |
| "grad_norm": 1.3383217284211308, | |
| "learning_rate": 7.127063067693305e-06, | |
| "loss": 2.2642, | |
| "step": 21420 | |
| }, | |
| { | |
| "epoch": 59.721448467966574, | |
| "grad_norm": 1.5431150296701466, | |
| "learning_rate": 7.094522464685003e-06, | |
| "loss": 2.2798, | |
| "step": 21440 | |
| }, | |
| { | |
| "epoch": 59.77715877437326, | |
| "grad_norm": 1.453049719160441, | |
| "learning_rate": 7.062225541128232e-06, | |
| "loss": 2.2882, | |
| "step": 21460 | |
| }, | |
| { | |
| "epoch": 59.83286908077994, | |
| "grad_norm": 1.5504386381902358, | |
| "learning_rate": 7.030172519307708e-06, | |
| "loss": 2.2702, | |
| "step": 21480 | |
| }, | |
| { | |
| "epoch": 59.88857938718663, | |
| "grad_norm": 1.3068595652128718, | |
| "learning_rate": 6.998363619829485e-06, | |
| "loss": 2.2867, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 59.94428969359331, | |
| "grad_norm": 1.5921797096923227, | |
| "learning_rate": 6.966799061619429e-06, | |
| "loss": 2.3073, | |
| "step": 21520 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "grad_norm": 1.4377189989333592, | |
| "learning_rate": 6.935479061921752e-06, | |
| "loss": 2.2524, | |
| "step": 21540 | |
| }, | |
| { | |
| "epoch": 60.05571030640669, | |
| "grad_norm": 1.4965660553834361, | |
| "learning_rate": 6.904403836297449e-06, | |
| "loss": 2.2908, | |
| "step": 21560 | |
| }, | |
| { | |
| "epoch": 60.11142061281337, | |
| "grad_norm": 1.5433152079814891, | |
| "learning_rate": 6.873573598622855e-06, | |
| "loss": 2.3, | |
| "step": 21580 | |
| }, | |
| { | |
| "epoch": 60.16713091922006, | |
| "grad_norm": 1.532824089241608, | |
| "learning_rate": 6.842988561088175e-06, | |
| "loss": 2.2503, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 60.22284122562674, | |
| "grad_norm": 1.404540197652475, | |
| "learning_rate": 6.81264893419601e-06, | |
| "loss": 2.2671, | |
| "step": 21620 | |
| }, | |
| { | |
| "epoch": 60.278551532033426, | |
| "grad_norm": 2.3304693566638277, | |
| "learning_rate": 6.782554926759919e-06, | |
| "loss": 2.272, | |
| "step": 21640 | |
| }, | |
| { | |
| "epoch": 60.33426183844011, | |
| "grad_norm": 1.4431191588807148, | |
| "learning_rate": 6.752706745902972e-06, | |
| "loss": 2.2741, | |
| "step": 21660 | |
| }, | |
| { | |
| "epoch": 60.389972144846794, | |
| "grad_norm": 1.7541038462058614, | |
| "learning_rate": 6.723104597056326e-06, | |
| "loss": 2.2679, | |
| "step": 21680 | |
| }, | |
| { | |
| "epoch": 60.44568245125348, | |
| "grad_norm": 1.5107298093918222, | |
| "learning_rate": 6.693748683957818e-06, | |
| "loss": 2.2439, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 60.50139275766017, | |
| "grad_norm": 1.8531551454644686, | |
| "learning_rate": 6.664639208650558e-06, | |
| "loss": 2.3011, | |
| "step": 21720 | |
| }, | |
| { | |
| "epoch": 60.55710306406685, | |
| "grad_norm": 1.7982354405211904, | |
| "learning_rate": 6.635776371481545e-06, | |
| "loss": 2.2564, | |
| "step": 21740 | |
| }, | |
| { | |
| "epoch": 60.61281337047354, | |
| "grad_norm": 1.4660071270340647, | |
| "learning_rate": 6.607160371100274e-06, | |
| "loss": 2.2991, | |
| "step": 21760 | |
| }, | |
| { | |
| "epoch": 60.66852367688022, | |
| "grad_norm": 1.6108738101591895, | |
| "learning_rate": 6.578791404457377e-06, | |
| "loss": 2.2712, | |
| "step": 21780 | |
| }, | |
| { | |
| "epoch": 60.72423398328691, | |
| "grad_norm": 1.7280681944263292, | |
| "learning_rate": 6.550669666803269e-06, | |
| "loss": 2.2645, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 60.779944289693596, | |
| "grad_norm": 1.634845479160207, | |
| "learning_rate": 6.522795351686807e-06, | |
| "loss": 2.264, | |
| "step": 21820 | |
| }, | |
| { | |
| "epoch": 60.83565459610028, | |
| "grad_norm": 1.4425486098001479, | |
| "learning_rate": 6.495168650953954e-06, | |
| "loss": 2.2848, | |
| "step": 21840 | |
| }, | |
| { | |
| "epoch": 60.891364902506965, | |
| "grad_norm": 1.4822789289824396, | |
| "learning_rate": 6.467789754746452e-06, | |
| "loss": 2.2683, | |
| "step": 21860 | |
| }, | |
| { | |
| "epoch": 60.94707520891365, | |
| "grad_norm": 1.732381751649639, | |
| "learning_rate": 6.440658851500523e-06, | |
| "loss": 2.2965, | |
| "step": 21880 | |
| }, | |
| { | |
| "epoch": 61.00278551532033, | |
| "grad_norm": 1.5491944453547546, | |
| "learning_rate": 6.413776127945568e-06, | |
| "loss": 2.2874, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 61.05849582172702, | |
| "grad_norm": 1.3257669016068976, | |
| "learning_rate": 6.3871417691028895e-06, | |
| "loss": 2.2499, | |
| "step": 21920 | |
| }, | |
| { | |
| "epoch": 61.1142061281337, | |
| "grad_norm": 1.9195662310637283, | |
| "learning_rate": 6.360755958284388e-06, | |
| "loss": 2.2535, | |
| "step": 21940 | |
| }, | |
| { | |
| "epoch": 61.16991643454039, | |
| "grad_norm": 1.5972372792438843, | |
| "learning_rate": 6.334618877091354e-06, | |
| "loss": 2.2632, | |
| "step": 21960 | |
| }, | |
| { | |
| "epoch": 61.22562674094708, | |
| "grad_norm": 1.3893966806690632, | |
| "learning_rate": 6.308730705413165e-06, | |
| "loss": 2.2583, | |
| "step": 21980 | |
| }, | |
| { | |
| "epoch": 61.28133704735376, | |
| "grad_norm": 1.3230497193349502, | |
| "learning_rate": 6.283091621426083e-06, | |
| "loss": 2.2836, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 61.33704735376045, | |
| "grad_norm": 1.3952354521448391, | |
| "learning_rate": 6.257701801592015e-06, | |
| "loss": 2.257, | |
| "step": 22020 | |
| }, | |
| { | |
| "epoch": 61.39275766016713, | |
| "grad_norm": 1.554543762365429, | |
| "learning_rate": 6.232561420657287e-06, | |
| "loss": 2.2712, | |
| "step": 22040 | |
| }, | |
| { | |
| "epoch": 61.448467966573816, | |
| "grad_norm": 1.5605932348002485, | |
| "learning_rate": 6.207670651651461e-06, | |
| "loss": 2.2724, | |
| "step": 22060 | |
| }, | |
| { | |
| "epoch": 61.5041782729805, | |
| "grad_norm": 1.3976211106358032, | |
| "learning_rate": 6.183029665886133e-06, | |
| "loss": 2.2473, | |
| "step": 22080 | |
| }, | |
| { | |
| "epoch": 61.559888579387184, | |
| "grad_norm": 1.8546230069148926, | |
| "learning_rate": 6.158638632953763e-06, | |
| "loss": 2.2717, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 61.61559888579387, | |
| "grad_norm": 1.89636102470963, | |
| "learning_rate": 6.134497720726502e-06, | |
| "loss": 2.2812, | |
| "step": 22120 | |
| }, | |
| { | |
| "epoch": 61.67130919220056, | |
| "grad_norm": 1.5811862192223516, | |
| "learning_rate": 6.110607095355023e-06, | |
| "loss": 2.2526, | |
| "step": 22140 | |
| }, | |
| { | |
| "epoch": 61.72701949860724, | |
| "grad_norm": 1.4824595544381087, | |
| "learning_rate": 6.0869669212674075e-06, | |
| "loss": 2.2745, | |
| "step": 22160 | |
| }, | |
| { | |
| "epoch": 61.78272980501393, | |
| "grad_norm": 1.457324859716249, | |
| "learning_rate": 6.063577361167978e-06, | |
| "loss": 2.2999, | |
| "step": 22180 | |
| }, | |
| { | |
| "epoch": 61.83844011142061, | |
| "grad_norm": 1.7057172055075098, | |
| "learning_rate": 6.040438576036232e-06, | |
| "loss": 2.2332, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 61.8941504178273, | |
| "grad_norm": 1.4146229574598475, | |
| "learning_rate": 6.0175507251256545e-06, | |
| "loss": 2.2701, | |
| "step": 22220 | |
| }, | |
| { | |
| "epoch": 61.949860724233986, | |
| "grad_norm": 1.583083000184335, | |
| "learning_rate": 5.994913965962701e-06, | |
| "loss": 2.2528, | |
| "step": 22240 | |
| }, | |
| { | |
| "epoch": 62.00557103064067, | |
| "grad_norm": 1.4267458097429977, | |
| "learning_rate": 5.972528454345661e-06, | |
| "loss": 2.2459, | |
| "step": 22260 | |
| }, | |
| { | |
| "epoch": 62.061281337047355, | |
| "grad_norm": 1.868289697809984, | |
| "learning_rate": 5.950394344343613e-06, | |
| "loss": 2.2553, | |
| "step": 22280 | |
| }, | |
| { | |
| "epoch": 62.116991643454035, | |
| "grad_norm": 1.6100749427479117, | |
| "learning_rate": 5.928511788295353e-06, | |
| "loss": 2.258, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 62.17270194986072, | |
| "grad_norm": 2.033401841218128, | |
| "learning_rate": 5.906880936808346e-06, | |
| "loss": 2.2656, | |
| "step": 22320 | |
| }, | |
| { | |
| "epoch": 62.22841225626741, | |
| "grad_norm": 1.5336132691384432, | |
| "learning_rate": 5.8855019387576895e-06, | |
| "loss": 2.2713, | |
| "step": 22340 | |
| }, | |
| { | |
| "epoch": 62.28412256267409, | |
| "grad_norm": 1.4006290924081595, | |
| "learning_rate": 5.864374941285097e-06, | |
| "loss": 2.273, | |
| "step": 22360 | |
| }, | |
| { | |
| "epoch": 62.33983286908078, | |
| "grad_norm": 1.5727786325700963, | |
| "learning_rate": 5.843500089797875e-06, | |
| "loss": 2.2698, | |
| "step": 22380 | |
| }, | |
| { | |
| "epoch": 62.39554317548747, | |
| "grad_norm": 1.5471505670773764, | |
| "learning_rate": 5.822877527967931e-06, | |
| "loss": 2.2366, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 62.45125348189415, | |
| "grad_norm": 1.7387583369728248, | |
| "learning_rate": 5.802507397730769e-06, | |
| "loss": 2.2517, | |
| "step": 22420 | |
| }, | |
| { | |
| "epoch": 62.50696378830084, | |
| "grad_norm": 1.34648038991986, | |
| "learning_rate": 5.782389839284539e-06, | |
| "loss": 2.2792, | |
| "step": 22440 | |
| }, | |
| { | |
| "epoch": 62.56267409470752, | |
| "grad_norm": 1.4257642559426869, | |
| "learning_rate": 5.76252499108904e-06, | |
| "loss": 2.2639, | |
| "step": 22460 | |
| }, | |
| { | |
| "epoch": 62.618384401114206, | |
| "grad_norm": 1.4992603072132409, | |
| "learning_rate": 5.7429129898647996e-06, | |
| "loss": 2.2469, | |
| "step": 22480 | |
| }, | |
| { | |
| "epoch": 62.674094707520894, | |
| "grad_norm": 1.3812098236775807, | |
| "learning_rate": 5.723553970592111e-06, | |
| "loss": 2.2778, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 62.729805013927574, | |
| "grad_norm": 1.5792548079682394, | |
| "learning_rate": 5.704448066510095e-06, | |
| "loss": 2.267, | |
| "step": 22520 | |
| }, | |
| { | |
| "epoch": 62.78551532033426, | |
| "grad_norm": 1.3978392074739474, | |
| "learning_rate": 5.6855954091158275e-06, | |
| "loss": 2.2949, | |
| "step": 22540 | |
| }, | |
| { | |
| "epoch": 62.84122562674095, | |
| "grad_norm": 1.323364408912749, | |
| "learning_rate": 5.666996128163389e-06, | |
| "loss": 2.239, | |
| "step": 22560 | |
| }, | |
| { | |
| "epoch": 62.89693593314763, | |
| "grad_norm": 1.47299942786999, | |
| "learning_rate": 5.648650351662984e-06, | |
| "loss": 2.2428, | |
| "step": 22580 | |
| }, | |
| { | |
| "epoch": 62.95264623955432, | |
| "grad_norm": 1.5746917164193233, | |
| "learning_rate": 5.630558205880067e-06, | |
| "loss": 2.2717, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 63.008356545961, | |
| "grad_norm": 1.3453881918350201, | |
| "learning_rate": 5.612719815334472e-06, | |
| "loss": 2.2605, | |
| "step": 22620 | |
| }, | |
| { | |
| "epoch": 63.06406685236769, | |
| "grad_norm": 1.5732476592031857, | |
| "learning_rate": 5.595135302799554e-06, | |
| "loss": 2.2981, | |
| "step": 22640 | |
| }, | |
| { | |
| "epoch": 63.119777158774376, | |
| "grad_norm": 1.4933682090479892, | |
| "learning_rate": 5.577804789301342e-06, | |
| "loss": 2.2629, | |
| "step": 22660 | |
| }, | |
| { | |
| "epoch": 63.17548746518106, | |
| "grad_norm": 2.277146814514644, | |
| "learning_rate": 5.560728394117715e-06, | |
| "loss": 2.2708, | |
| "step": 22680 | |
| }, | |
| { | |
| "epoch": 63.231197771587745, | |
| "grad_norm": 1.272383107286924, | |
| "learning_rate": 5.543906234777552e-06, | |
| "loss": 2.2573, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 63.286908077994426, | |
| "grad_norm": 1.5182549490627633, | |
| "learning_rate": 5.527338427059974e-06, | |
| "loss": 2.2316, | |
| "step": 22720 | |
| }, | |
| { | |
| "epoch": 63.34261838440111, | |
| "grad_norm": 1.7891996636535088, | |
| "learning_rate": 5.511025084993495e-06, | |
| "loss": 2.2441, | |
| "step": 22740 | |
| }, | |
| { | |
| "epoch": 63.3983286908078, | |
| "grad_norm": 1.5802790373457376, | |
| "learning_rate": 5.494966320855273e-06, | |
| "loss": 2.2617, | |
| "step": 22760 | |
| }, | |
| { | |
| "epoch": 63.45403899721448, | |
| "grad_norm": 1.6073072729662374, | |
| "learning_rate": 5.479162245170319e-06, | |
| "loss": 2.2458, | |
| "step": 22780 | |
| }, | |
| { | |
| "epoch": 63.50974930362117, | |
| "grad_norm": 1.5697619339543767, | |
| "learning_rate": 5.4636129667107414e-06, | |
| "loss": 2.2971, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 63.56545961002786, | |
| "grad_norm": 1.3744571764690732, | |
| "learning_rate": 5.448318592495002e-06, | |
| "loss": 2.2844, | |
| "step": 22820 | |
| }, | |
| { | |
| "epoch": 63.62116991643454, | |
| "grad_norm": 1.6689901020657363, | |
| "learning_rate": 5.433279227787173e-06, | |
| "loss": 2.2517, | |
| "step": 22840 | |
| }, | |
| { | |
| "epoch": 63.67688022284123, | |
| "grad_norm": 1.382942198241601, | |
| "learning_rate": 5.418494976096209e-06, | |
| "loss": 2.26, | |
| "step": 22860 | |
| }, | |
| { | |
| "epoch": 63.73259052924791, | |
| "grad_norm": 1.5877846358641807, | |
| "learning_rate": 5.403965939175251e-06, | |
| "loss": 2.2572, | |
| "step": 22880 | |
| }, | |
| { | |
| "epoch": 63.788300835654596, | |
| "grad_norm": 1.816527139540087, | |
| "learning_rate": 5.389692217020904e-06, | |
| "loss": 2.2546, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 63.844011142061284, | |
| "grad_norm": 1.7654697501524792, | |
| "learning_rate": 5.375673907872574e-06, | |
| "loss": 2.2418, | |
| "step": 22920 | |
| }, | |
| { | |
| "epoch": 63.899721448467965, | |
| "grad_norm": 1.5888334127364063, | |
| "learning_rate": 5.36191110821176e-06, | |
| "loss": 2.2664, | |
| "step": 22940 | |
| }, | |
| { | |
| "epoch": 63.95543175487465, | |
| "grad_norm": 1.4670614525113759, | |
| "learning_rate": 5.348403912761424e-06, | |
| "loss": 2.2343, | |
| "step": 22960 | |
| }, | |
| { | |
| "epoch": 64.01114206128133, | |
| "grad_norm": 1.3543942876062933, | |
| "learning_rate": 5.335152414485308e-06, | |
| "loss": 2.2503, | |
| "step": 22980 | |
| }, | |
| { | |
| "epoch": 64.06685236768803, | |
| "grad_norm": 1.4563546735325645, | |
| "learning_rate": 5.32215670458733e-06, | |
| "loss": 2.2304, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 64.12256267409471, | |
| "grad_norm": 1.622565484000199, | |
| "learning_rate": 5.309416872510913e-06, | |
| "loss": 2.2452, | |
| "step": 23020 | |
| }, | |
| { | |
| "epoch": 64.17827298050139, | |
| "grad_norm": 1.4460308265765018, | |
| "learning_rate": 5.296933005938412e-06, | |
| "loss": 2.2938, | |
| "step": 23040 | |
| }, | |
| { | |
| "epoch": 64.23398328690807, | |
| "grad_norm": 1.4440564061553423, | |
| "learning_rate": 5.284705190790466e-06, | |
| "loss": 2.2453, | |
| "step": 23060 | |
| }, | |
| { | |
| "epoch": 64.28969359331477, | |
| "grad_norm": 1.4478148572192913, | |
| "learning_rate": 5.272733511225455e-06, | |
| "loss": 2.2343, | |
| "step": 23080 | |
| }, | |
| { | |
| "epoch": 64.34540389972145, | |
| "grad_norm": 1.4180812216938306, | |
| "learning_rate": 5.261018049638886e-06, | |
| "loss": 2.2665, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 64.40111420612813, | |
| "grad_norm": 1.5831915297512447, | |
| "learning_rate": 5.24955888666284e-06, | |
| "loss": 2.2539, | |
| "step": 23120 | |
| }, | |
| { | |
| "epoch": 64.45682451253482, | |
| "grad_norm": 1.6700085502977315, | |
| "learning_rate": 5.238356101165407e-06, | |
| "loss": 2.2677, | |
| "step": 23140 | |
| }, | |
| { | |
| "epoch": 64.5125348189415, | |
| "grad_norm": 1.4281340861580372, | |
| "learning_rate": 5.227409770250158e-06, | |
| "loss": 2.2693, | |
| "step": 23160 | |
| }, | |
| { | |
| "epoch": 64.56824512534818, | |
| "grad_norm": 1.474080653934136, | |
| "learning_rate": 5.216719969255597e-06, | |
| "loss": 2.2576, | |
| "step": 23180 | |
| }, | |
| { | |
| "epoch": 64.62395543175488, | |
| "grad_norm": 1.5546948660357771, | |
| "learning_rate": 5.206286771754661e-06, | |
| "loss": 2.2718, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 64.67966573816156, | |
| "grad_norm": 1.5619037183872753, | |
| "learning_rate": 5.196110249554205e-06, | |
| "loss": 2.2617, | |
| "step": 23220 | |
| }, | |
| { | |
| "epoch": 64.73537604456824, | |
| "grad_norm": 1.4612049949245505, | |
| "learning_rate": 5.186190472694495e-06, | |
| "loss": 2.2531, | |
| "step": 23240 | |
| }, | |
| { | |
| "epoch": 64.79108635097494, | |
| "grad_norm": 1.4463084271801094, | |
| "learning_rate": 5.176527509448752e-06, | |
| "loss": 2.2492, | |
| "step": 23260 | |
| }, | |
| { | |
| "epoch": 64.84679665738162, | |
| "grad_norm": 1.52054949971835, | |
| "learning_rate": 5.167121426322663e-06, | |
| "loss": 2.265, | |
| "step": 23280 | |
| }, | |
| { | |
| "epoch": 64.9025069637883, | |
| "grad_norm": 1.6137336460770288, | |
| "learning_rate": 5.157972288053923e-06, | |
| "loss": 2.2761, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 64.958217270195, | |
| "grad_norm": 1.5343563001642067, | |
| "learning_rate": 5.1490801576118046e-06, | |
| "loss": 2.2589, | |
| "step": 23320 | |
| }, | |
| { | |
| "epoch": 65.01392757660167, | |
| "grad_norm": 1.4067304900789732, | |
| "learning_rate": 5.140445096196706e-06, | |
| "loss": 2.2344, | |
| "step": 23340 | |
| }, | |
| { | |
| "epoch": 65.06963788300835, | |
| "grad_norm": 1.6266281172023442, | |
| "learning_rate": 5.132067163239744e-06, | |
| "loss": 2.2327, | |
| "step": 23360 | |
| }, | |
| { | |
| "epoch": 65.12534818941504, | |
| "grad_norm": 1.6114614480347964, | |
| "learning_rate": 5.123946416402338e-06, | |
| "loss": 2.2252, | |
| "step": 23380 | |
| }, | |
| { | |
| "epoch": 65.18105849582173, | |
| "grad_norm": 1.4381934112576613, | |
| "learning_rate": 5.116082911575816e-06, | |
| "loss": 2.2376, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 65.23676880222841, | |
| "grad_norm": 1.6265621183224508, | |
| "learning_rate": 5.108476702881032e-06, | |
| "loss": 2.2575, | |
| "step": 23420 | |
| }, | |
| { | |
| "epoch": 65.29247910863509, | |
| "grad_norm": 1.6332251534091453, | |
| "learning_rate": 5.101127842667981e-06, | |
| "loss": 2.2482, | |
| "step": 23440 | |
| }, | |
| { | |
| "epoch": 65.34818941504179, | |
| "grad_norm": 1.4703516792242604, | |
| "learning_rate": 5.094036381515459e-06, | |
| "loss": 2.2636, | |
| "step": 23460 | |
| }, | |
| { | |
| "epoch": 65.40389972144847, | |
| "grad_norm": 1.828744349221896, | |
| "learning_rate": 5.087202368230689e-06, | |
| "loss": 2.2676, | |
| "step": 23480 | |
| }, | |
| { | |
| "epoch": 65.45961002785515, | |
| "grad_norm": 1.5728681116117378, | |
| "learning_rate": 5.080625849849016e-06, | |
| "loss": 2.2408, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 65.51532033426184, | |
| "grad_norm": 1.3646586794266737, | |
| "learning_rate": 5.074306871633561e-06, | |
| "loss": 2.2594, | |
| "step": 23520 | |
| }, | |
| { | |
| "epoch": 65.57103064066852, | |
| "grad_norm": 1.344312658230311, | |
| "learning_rate": 5.068245477074914e-06, | |
| "loss": 2.2548, | |
| "step": 23540 | |
| }, | |
| { | |
| "epoch": 65.6267409470752, | |
| "grad_norm": 2.3009044106769543, | |
| "learning_rate": 5.062441707890833e-06, | |
| "loss": 2.2515, | |
| "step": 23560 | |
| }, | |
| { | |
| "epoch": 65.6824512534819, | |
| "grad_norm": 1.5477394057524316, | |
| "learning_rate": 5.056895604025971e-06, | |
| "loss": 2.2286, | |
| "step": 23580 | |
| }, | |
| { | |
| "epoch": 65.73816155988858, | |
| "grad_norm": 1.473509209538851, | |
| "learning_rate": 5.051607203651582e-06, | |
| "loss": 2.2558, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 65.79387186629526, | |
| "grad_norm": 1.6163262255244608, | |
| "learning_rate": 5.046576543165266e-06, | |
| "loss": 2.2587, | |
| "step": 23620 | |
| }, | |
| { | |
| "epoch": 65.84958217270194, | |
| "grad_norm": 1.7522873848064437, | |
| "learning_rate": 5.041803657190727e-06, | |
| "loss": 2.262, | |
| "step": 23640 | |
| }, | |
| { | |
| "epoch": 65.90529247910864, | |
| "grad_norm": 1.9510070766562828, | |
| "learning_rate": 5.037288578577515e-06, | |
| "loss": 2.2731, | |
| "step": 23660 | |
| }, | |
| { | |
| "epoch": 65.96100278551532, | |
| "grad_norm": 1.3249968235869283, | |
| "learning_rate": 5.033031338400824e-06, | |
| "loss": 2.2357, | |
| "step": 23680 | |
| }, | |
| { | |
| "epoch": 66.016713091922, | |
| "grad_norm": 1.8089068811586904, | |
| "learning_rate": 5.0290319659612565e-06, | |
| "loss": 2.2264, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 66.0724233983287, | |
| "grad_norm": 1.6823052379660255, | |
| "learning_rate": 5.0252904887846365e-06, | |
| "loss": 2.2241, | |
| "step": 23720 | |
| }, | |
| { | |
| "epoch": 66.12813370473538, | |
| "grad_norm": 1.4348819340116656, | |
| "learning_rate": 5.02180693262181e-06, | |
| "loss": 2.2448, | |
| "step": 23740 | |
| }, | |
| { | |
| "epoch": 66.18384401114206, | |
| "grad_norm": 1.48816291038319, | |
| "learning_rate": 5.01858132144848e-06, | |
| "loss": 2.2445, | |
| "step": 23760 | |
| }, | |
| { | |
| "epoch": 66.23955431754875, | |
| "grad_norm": 1.4921612956391412, | |
| "learning_rate": 5.015613677465031e-06, | |
| "loss": 2.2608, | |
| "step": 23780 | |
| }, | |
| { | |
| "epoch": 66.29526462395543, | |
| "grad_norm": 1.8304620275041354, | |
| "learning_rate": 5.0129040210963695e-06, | |
| "loss": 2.2599, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 66.35097493036211, | |
| "grad_norm": 1.3873830650491625, | |
| "learning_rate": 5.010452370991807e-06, | |
| "loss": 2.2506, | |
| "step": 23820 | |
| }, | |
| { | |
| "epoch": 66.40668523676881, | |
| "grad_norm": 1.4260103007082212, | |
| "learning_rate": 5.008258744024913e-06, | |
| "loss": 2.2474, | |
| "step": 23840 | |
| }, | |
| { | |
| "epoch": 66.46239554317549, | |
| "grad_norm": 1.5475790370983857, | |
| "learning_rate": 5.006323155293398e-06, | |
| "loss": 2.2718, | |
| "step": 23860 | |
| }, | |
| { | |
| "epoch": 66.51810584958217, | |
| "grad_norm": 1.6513082696882795, | |
| "learning_rate": 5.004645618119022e-06, | |
| "loss": 2.2305, | |
| "step": 23880 | |
| }, | |
| { | |
| "epoch": 66.57381615598885, | |
| "grad_norm": 1.3518749191666553, | |
| "learning_rate": 5.0032261440475e-06, | |
| "loss": 2.2475, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 66.62952646239555, | |
| "grad_norm": 1.4415217716791036, | |
| "learning_rate": 5.0020647428484e-06, | |
| "loss": 2.2413, | |
| "step": 23920 | |
| }, | |
| { | |
| "epoch": 66.68523676880223, | |
| "grad_norm": 1.463396039264776, | |
| "learning_rate": 5.001161422515119e-06, | |
| "loss": 2.2409, | |
| "step": 23940 | |
| }, | |
| { | |
| "epoch": 66.74094707520891, | |
| "grad_norm": 1.6094576785573045, | |
| "learning_rate": 5.000516189264787e-06, | |
| "loss": 2.2368, | |
| "step": 23960 | |
| }, | |
| { | |
| "epoch": 66.7966573816156, | |
| "grad_norm": 1.8634319644864112, | |
| "learning_rate": 5.000129047538239e-06, | |
| "loss": 2.2534, | |
| "step": 23980 | |
| }, | |
| { | |
| "epoch": 66.85236768802228, | |
| "grad_norm": 1.5147891109103364, | |
| "learning_rate": 5e-06, | |
| "loss": 2.2525, | |
| "step": 24000 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 24000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 67, | |
| "save_steps": 3000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4048509763584000.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |