| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 7470, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004017576898932831, | |
| "grad_norm": 5.806783098701457, | |
| "learning_rate": 1.204819277108434e-07, | |
| "loss": 1.0662, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.008035153797865662, | |
| "grad_norm": 6.1737495782500815, | |
| "learning_rate": 2.5435073627844717e-07, | |
| "loss": 1.0418, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.012052730696798493, | |
| "grad_norm": 5.869188839514599, | |
| "learning_rate": 3.8821954484605087e-07, | |
| "loss": 1.0476, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.016070307595731324, | |
| "grad_norm": 4.067430650309112, | |
| "learning_rate": 5.220883534136546e-07, | |
| "loss": 1.0186, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.020087884494664157, | |
| "grad_norm": 1.9993360792772212, | |
| "learning_rate": 6.559571619812584e-07, | |
| "loss": 0.9457, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.024105461393596987, | |
| "grad_norm": 1.0897504400503495, | |
| "learning_rate": 7.898259705488621e-07, | |
| "loss": 0.8694, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.02812303829252982, | |
| "grad_norm": 0.8480278075524572, | |
| "learning_rate": 9.236947791164659e-07, | |
| "loss": 0.8453, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.03214061519146265, | |
| "grad_norm": 0.5847985812870466, | |
| "learning_rate": 1.0575635876840697e-06, | |
| "loss": 0.8091, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.03615819209039548, | |
| "grad_norm": 0.5632601125338154, | |
| "learning_rate": 1.1914323962516733e-06, | |
| "loss": 0.7981, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.040175768989328314, | |
| "grad_norm": 0.4998168225005524, | |
| "learning_rate": 1.3253012048192773e-06, | |
| "loss": 0.7646, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04419334588826114, | |
| "grad_norm": 0.6903822114162349, | |
| "learning_rate": 1.4591700133868811e-06, | |
| "loss": 0.7619, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.04821092278719397, | |
| "grad_norm": 0.4815863955585346, | |
| "learning_rate": 1.593038821954485e-06, | |
| "loss": 0.7679, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.052228499686126806, | |
| "grad_norm": 0.4828754779519579, | |
| "learning_rate": 1.7269076305220885e-06, | |
| "loss": 0.7406, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.05624607658505964, | |
| "grad_norm": 0.5272829904340466, | |
| "learning_rate": 1.8607764390896923e-06, | |
| "loss": 0.7451, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.060263653483992465, | |
| "grad_norm": 0.48100813622874905, | |
| "learning_rate": 1.994645247657296e-06, | |
| "loss": 0.7053, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0642812303829253, | |
| "grad_norm": 0.49305369454902975, | |
| "learning_rate": 2.1285140562248997e-06, | |
| "loss": 0.7217, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.06829880728185812, | |
| "grad_norm": 0.4711986293293931, | |
| "learning_rate": 2.2623828647925037e-06, | |
| "loss": 0.7368, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.07231638418079096, | |
| "grad_norm": 0.5025185766261334, | |
| "learning_rate": 2.3962516733601073e-06, | |
| "loss": 0.7179, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.07633396107972379, | |
| "grad_norm": 0.5264169172616818, | |
| "learning_rate": 2.530120481927711e-06, | |
| "loss": 0.7374, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.08035153797865663, | |
| "grad_norm": 0.46948548462020134, | |
| "learning_rate": 2.6639892904953145e-06, | |
| "loss": 0.72, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.08436911487758945, | |
| "grad_norm": 0.463248294175632, | |
| "learning_rate": 2.7978580990629185e-06, | |
| "loss": 0.7115, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.08838669177652228, | |
| "grad_norm": 0.45484667965794356, | |
| "learning_rate": 2.931726907630522e-06, | |
| "loss": 0.7034, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.09240426867545512, | |
| "grad_norm": 0.4357030076439862, | |
| "learning_rate": 3.0655957161981257e-06, | |
| "loss": 0.7126, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.09642184557438795, | |
| "grad_norm": 0.6453731431637157, | |
| "learning_rate": 3.1994645247657297e-06, | |
| "loss": 0.7097, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.10043942247332077, | |
| "grad_norm": 0.4849489606416127, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.7023, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.10445699937225361, | |
| "grad_norm": 0.42549798364358465, | |
| "learning_rate": 3.4672021419009373e-06, | |
| "loss": 0.6924, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.10847457627118644, | |
| "grad_norm": 0.4773000774187341, | |
| "learning_rate": 3.601070950468541e-06, | |
| "loss": 0.6968, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.11249215317011928, | |
| "grad_norm": 0.5240441729923269, | |
| "learning_rate": 3.7349397590361445e-06, | |
| "loss": 0.7045, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.1165097300690521, | |
| "grad_norm": 0.4910140491378646, | |
| "learning_rate": 3.8688085676037485e-06, | |
| "loss": 0.6933, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.12052730696798493, | |
| "grad_norm": 0.4580543272940571, | |
| "learning_rate": 4.002677376171352e-06, | |
| "loss": 0.6762, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.12454488386691777, | |
| "grad_norm": 0.4638793548670153, | |
| "learning_rate": 4.136546184738956e-06, | |
| "loss": 0.6807, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.1285624607658506, | |
| "grad_norm": 0.48226078090248237, | |
| "learning_rate": 4.270414993306559e-06, | |
| "loss": 0.6836, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.13258003766478343, | |
| "grad_norm": 0.5205992514175658, | |
| "learning_rate": 4.404283801874164e-06, | |
| "loss": 0.7006, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.13659761456371625, | |
| "grad_norm": 0.5157658854686866, | |
| "learning_rate": 4.538152610441767e-06, | |
| "loss": 0.6777, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.1406151914626491, | |
| "grad_norm": 0.5186893129183067, | |
| "learning_rate": 4.672021419009371e-06, | |
| "loss": 0.6863, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.14463276836158193, | |
| "grad_norm": 0.48228160242697454, | |
| "learning_rate": 4.8058902275769745e-06, | |
| "loss": 0.692, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.14865034526051477, | |
| "grad_norm": 0.48252742197899023, | |
| "learning_rate": 4.939759036144578e-06, | |
| "loss": 0.6866, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.15266792215944758, | |
| "grad_norm": 0.4457961570378318, | |
| "learning_rate": 5.0736278447121826e-06, | |
| "loss": 0.6786, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.15668549905838042, | |
| "grad_norm": 0.475655993068204, | |
| "learning_rate": 5.207496653279787e-06, | |
| "loss": 0.6899, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.16070307595731326, | |
| "grad_norm": 0.44137190174512886, | |
| "learning_rate": 5.34136546184739e-06, | |
| "loss": 0.6673, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.16472065285624607, | |
| "grad_norm": 0.49163743462647375, | |
| "learning_rate": 5.475234270414994e-06, | |
| "loss": 0.6696, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.1687382297551789, | |
| "grad_norm": 0.5274225683716718, | |
| "learning_rate": 5.609103078982597e-06, | |
| "loss": 0.6604, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.17275580665411175, | |
| "grad_norm": 0.4371201249615473, | |
| "learning_rate": 5.742971887550201e-06, | |
| "loss": 0.6623, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.17677338355304456, | |
| "grad_norm": 0.496866788031066, | |
| "learning_rate": 5.876840696117805e-06, | |
| "loss": 0.6733, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.1807909604519774, | |
| "grad_norm": 0.5029900924311191, | |
| "learning_rate": 6.010709504685409e-06, | |
| "loss": 0.6593, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.18480853735091024, | |
| "grad_norm": 0.5057790072232172, | |
| "learning_rate": 6.144578313253012e-06, | |
| "loss": 0.684, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.18882611424984305, | |
| "grad_norm": 1.210542334820464, | |
| "learning_rate": 6.2784471218206166e-06, | |
| "loss": 0.6743, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.1928436911487759, | |
| "grad_norm": 0.4979782025477802, | |
| "learning_rate": 6.41231593038822e-06, | |
| "loss": 0.6645, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.19686126804770873, | |
| "grad_norm": 0.4671679965356099, | |
| "learning_rate": 6.546184738955825e-06, | |
| "loss": 0.6593, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.20087884494664154, | |
| "grad_norm": 0.47107607146399, | |
| "learning_rate": 6.680053547523427e-06, | |
| "loss": 0.653, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.20489642184557438, | |
| "grad_norm": 0.5355141556215653, | |
| "learning_rate": 6.813922356091032e-06, | |
| "loss": 0.6555, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.20891399874450722, | |
| "grad_norm": 0.4770424360600091, | |
| "learning_rate": 6.9477911646586345e-06, | |
| "loss": 0.6426, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.21293157564344006, | |
| "grad_norm": 0.4799836242336395, | |
| "learning_rate": 7.081659973226239e-06, | |
| "loss": 0.6565, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.21694915254237288, | |
| "grad_norm": 0.5167694867415858, | |
| "learning_rate": 7.2155287817938426e-06, | |
| "loss": 0.6609, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.22096672944130571, | |
| "grad_norm": 0.5086826009179188, | |
| "learning_rate": 7.349397590361447e-06, | |
| "loss": 0.6494, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.22498430634023855, | |
| "grad_norm": 0.4922827776447584, | |
| "learning_rate": 7.48326639892905e-06, | |
| "loss": 0.6558, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.22900188323917137, | |
| "grad_norm": 0.48344938995436526, | |
| "learning_rate": 7.617135207496654e-06, | |
| "loss": 0.6481, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.2330194601381042, | |
| "grad_norm": 0.507502223485786, | |
| "learning_rate": 7.751004016064258e-06, | |
| "loss": 0.6529, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.23703703703703705, | |
| "grad_norm": 0.49027674360195406, | |
| "learning_rate": 7.884872824631861e-06, | |
| "loss": 0.6475, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.24105461393596986, | |
| "grad_norm": 0.47662111002737323, | |
| "learning_rate": 8.018741633199465e-06, | |
| "loss": 0.6606, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.2450721908349027, | |
| "grad_norm": 0.5178759139169029, | |
| "learning_rate": 8.152610441767069e-06, | |
| "loss": 0.6507, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.24908976773383554, | |
| "grad_norm": 0.4913437654692537, | |
| "learning_rate": 8.286479250334672e-06, | |
| "loss": 0.6676, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.25310734463276835, | |
| "grad_norm": 0.4983838425660273, | |
| "learning_rate": 8.420348058902277e-06, | |
| "loss": 0.6431, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.2571249215317012, | |
| "grad_norm": 0.5296861218593591, | |
| "learning_rate": 8.55421686746988e-06, | |
| "loss": 0.6469, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.26114249843063403, | |
| "grad_norm": 0.49470648462227873, | |
| "learning_rate": 8.688085676037485e-06, | |
| "loss": 0.653, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.26516007532956687, | |
| "grad_norm": 0.4977505266791689, | |
| "learning_rate": 8.821954484605088e-06, | |
| "loss": 0.6506, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.2691776522284997, | |
| "grad_norm": 0.5181442610979835, | |
| "learning_rate": 8.955823293172692e-06, | |
| "loss": 0.6389, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.2731952291274325, | |
| "grad_norm": 0.5401626680115751, | |
| "learning_rate": 9.089692101740295e-06, | |
| "loss": 0.644, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.27721280602636533, | |
| "grad_norm": 0.5294876220592581, | |
| "learning_rate": 9.223560910307899e-06, | |
| "loss": 0.6476, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.2812303829252982, | |
| "grad_norm": 0.5001749586085277, | |
| "learning_rate": 9.357429718875503e-06, | |
| "loss": 0.6469, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.285247959824231, | |
| "grad_norm": 0.49514793732187534, | |
| "learning_rate": 9.491298527443106e-06, | |
| "loss": 0.6412, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.28926553672316385, | |
| "grad_norm": 0.5451601544807843, | |
| "learning_rate": 9.62516733601071e-06, | |
| "loss": 0.6342, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.2932831136220967, | |
| "grad_norm": 0.5386128345367395, | |
| "learning_rate": 9.759036144578315e-06, | |
| "loss": 0.6422, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.29730069052102953, | |
| "grad_norm": 0.5295404573044942, | |
| "learning_rate": 9.892904953145917e-06, | |
| "loss": 0.6418, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.3013182674199623, | |
| "grad_norm": 0.48815626124299877, | |
| "learning_rate": 9.999997816397962e-06, | |
| "loss": 0.649, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.30533584431889516, | |
| "grad_norm": 0.5070827630467095, | |
| "learning_rate": 9.999921390526839e-06, | |
| "loss": 0.6453, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.309353421217828, | |
| "grad_norm": 0.5070143645241298, | |
| "learning_rate": 9.999735786460982e-06, | |
| "loss": 0.6302, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.31337099811676083, | |
| "grad_norm": 0.4790968559270035, | |
| "learning_rate": 9.999441008253238e-06, | |
| "loss": 0.632, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.3173885750156937, | |
| "grad_norm": 0.48852106728621275, | |
| "learning_rate": 9.999037062340376e-06, | |
| "loss": 0.6436, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.3214061519146265, | |
| "grad_norm": 0.4972110031536491, | |
| "learning_rate": 9.998523957542955e-06, | |
| "loss": 0.6411, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.3254237288135593, | |
| "grad_norm": 0.533109673228536, | |
| "learning_rate": 9.997901705065118e-06, | |
| "loss": 0.6422, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.32944130571249214, | |
| "grad_norm": 0.528721261444126, | |
| "learning_rate": 9.997170318494362e-06, | |
| "loss": 0.6457, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.333458882611425, | |
| "grad_norm": 0.7706123304017218, | |
| "learning_rate": 9.996329813801233e-06, | |
| "loss": 0.6479, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.3374764595103578, | |
| "grad_norm": 0.6304185980981377, | |
| "learning_rate": 9.995380209338973e-06, | |
| "loss": 0.639, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.34149403640929066, | |
| "grad_norm": 0.47993658132408695, | |
| "learning_rate": 9.99432152584313e-06, | |
| "loss": 0.6232, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.3455116133082235, | |
| "grad_norm": 0.5189489672799563, | |
| "learning_rate": 9.993153786431098e-06, | |
| "loss": 0.6457, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.3495291902071563, | |
| "grad_norm": 0.490298021772909, | |
| "learning_rate": 9.991877016601612e-06, | |
| "loss": 0.6489, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.3535467671060891, | |
| "grad_norm": 0.4733551505574991, | |
| "learning_rate": 9.990491244234197e-06, | |
| "loss": 0.6327, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.35756434400502196, | |
| "grad_norm": 0.4812850798556578, | |
| "learning_rate": 9.988996499588556e-06, | |
| "loss": 0.6325, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.3615819209039548, | |
| "grad_norm": 0.5139202954592238, | |
| "learning_rate": 9.987392815303903e-06, | |
| "loss": 0.6302, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.36559949780288764, | |
| "grad_norm": 0.4950874659702236, | |
| "learning_rate": 9.985680226398261e-06, | |
| "loss": 0.641, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.3696170747018205, | |
| "grad_norm": 0.5079828516156794, | |
| "learning_rate": 9.98385877026769e-06, | |
| "loss": 0.6384, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.3736346516007533, | |
| "grad_norm": 0.49658973438025256, | |
| "learning_rate": 9.981928486685477e-06, | |
| "loss": 0.6365, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.3776522284996861, | |
| "grad_norm": 0.46532869938151694, | |
| "learning_rate": 9.979889417801257e-06, | |
| "loss": 0.64, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.38166980539861894, | |
| "grad_norm": 0.5187436115265832, | |
| "learning_rate": 9.9777416081401e-06, | |
| "loss": 0.6268, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.3856873822975518, | |
| "grad_norm": 0.46664875039431214, | |
| "learning_rate": 9.975485104601544e-06, | |
| "loss": 0.6302, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.3897049591964846, | |
| "grad_norm": 0.47880441558880193, | |
| "learning_rate": 9.973119956458558e-06, | |
| "loss": 0.6238, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.39372253609541746, | |
| "grad_norm": 0.4691906979131477, | |
| "learning_rate": 9.970646215356477e-06, | |
| "loss": 0.6422, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.3977401129943503, | |
| "grad_norm": 0.5000299551978495, | |
| "learning_rate": 9.968063935311865e-06, | |
| "loss": 0.6329, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.4017576898932831, | |
| "grad_norm": 0.5484904742238748, | |
| "learning_rate": 9.965373172711343e-06, | |
| "loss": 0.6317, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.4057752667922159, | |
| "grad_norm": 0.517325966851464, | |
| "learning_rate": 9.96257398631036e-06, | |
| "loss": 0.6404, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.40979284369114877, | |
| "grad_norm": 0.4507627813502968, | |
| "learning_rate": 9.959666437231895e-06, | |
| "loss": 0.6303, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.4138104205900816, | |
| "grad_norm": 0.5071861791885447, | |
| "learning_rate": 9.95665058896514e-06, | |
| "loss": 0.6135, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.41782799748901445, | |
| "grad_norm": 0.45341474710439855, | |
| "learning_rate": 9.953526507364106e-06, | |
| "loss": 0.619, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.4218455743879473, | |
| "grad_norm": 0.46797706523025123, | |
| "learning_rate": 9.95029426064618e-06, | |
| "loss": 0.6253, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.4258631512868801, | |
| "grad_norm": 0.5018403383029635, | |
| "learning_rate": 9.946953919390648e-06, | |
| "loss": 0.6363, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.4298807281858129, | |
| "grad_norm": 0.4563294653990798, | |
| "learning_rate": 9.94350555653714e-06, | |
| "loss": 0.6223, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.43389830508474575, | |
| "grad_norm": 0.5142438260172338, | |
| "learning_rate": 9.939949247384046e-06, | |
| "loss": 0.636, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.4379158819836786, | |
| "grad_norm": 0.4974669445120328, | |
| "learning_rate": 9.93628506958687e-06, | |
| "loss": 0.6242, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.44193345888261143, | |
| "grad_norm": 0.5980709429827091, | |
| "learning_rate": 9.932513103156532e-06, | |
| "loss": 0.6408, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.44595103578154427, | |
| "grad_norm": 0.5581732511219715, | |
| "learning_rate": 9.928633430457628e-06, | |
| "loss": 0.6139, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.4499686126804771, | |
| "grad_norm": 0.5080904502728457, | |
| "learning_rate": 9.924646136206617e-06, | |
| "loss": 0.628, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.4539861895794099, | |
| "grad_norm": 0.5539298313516415, | |
| "learning_rate": 9.920551307469987e-06, | |
| "loss": 0.6212, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.45800376647834273, | |
| "grad_norm": 0.5135075682143369, | |
| "learning_rate": 9.916349033662349e-06, | |
| "loss": 0.6207, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.4620213433772756, | |
| "grad_norm": 0.5431528274267999, | |
| "learning_rate": 9.912039406544477e-06, | |
| "loss": 0.6268, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.4660389202762084, | |
| "grad_norm": 0.46293456683258105, | |
| "learning_rate": 9.907622520221312e-06, | |
| "loss": 0.6168, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.47005649717514125, | |
| "grad_norm": 0.5265783067042777, | |
| "learning_rate": 9.903098471139903e-06, | |
| "loss": 0.611, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.4740740740740741, | |
| "grad_norm": 0.4964165005454209, | |
| "learning_rate": 9.89846735808731e-06, | |
| "loss": 0.6209, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.47809165097300693, | |
| "grad_norm": 0.44720379185402914, | |
| "learning_rate": 9.893729282188433e-06, | |
| "loss": 0.6274, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.4821092278719397, | |
| "grad_norm": 0.5160429448873454, | |
| "learning_rate": 9.888884346903813e-06, | |
| "loss": 0.618, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.48612680477087256, | |
| "grad_norm": 0.483511156492776, | |
| "learning_rate": 9.883932658027374e-06, | |
| "loss": 0.621, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.4901443816698054, | |
| "grad_norm": 0.5158419978797311, | |
| "learning_rate": 9.8788743236841e-06, | |
| "loss": 0.6295, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.49416195856873824, | |
| "grad_norm": 0.5784261169598502, | |
| "learning_rate": 9.873709454327697e-06, | |
| "loss": 0.6215, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.4981795354676711, | |
| "grad_norm": 0.5198565153146563, | |
| "learning_rate": 9.868438162738154e-06, | |
| "loss": 0.6264, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.5021971123666039, | |
| "grad_norm": 0.510434585062711, | |
| "learning_rate": 9.863060564019305e-06, | |
| "loss": 0.6149, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.5062146892655367, | |
| "grad_norm": 0.4593962120443787, | |
| "learning_rate": 9.8575767755963e-06, | |
| "loss": 0.625, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.5102322661644696, | |
| "grad_norm": 0.498172586068187, | |
| "learning_rate": 9.851986917213044e-06, | |
| "loss": 0.6143, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.5142498430634024, | |
| "grad_norm": 0.4620438331915131, | |
| "learning_rate": 9.846291110929586e-06, | |
| "loss": 0.6313, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.5182674199623352, | |
| "grad_norm": 0.5343634450415627, | |
| "learning_rate": 9.840489481119452e-06, | |
| "loss": 0.6182, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.5222849968612681, | |
| "grad_norm": 0.48909140457697287, | |
| "learning_rate": 9.834582154466927e-06, | |
| "loss": 0.6325, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.5263025737602008, | |
| "grad_norm": 0.4678287985367856, | |
| "learning_rate": 9.828569259964291e-06, | |
| "loss": 0.6307, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.5303201506591337, | |
| "grad_norm": 0.4920124660634079, | |
| "learning_rate": 9.822450928909e-06, | |
| "loss": 0.6108, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.5343377275580665, | |
| "grad_norm": 1.4526222205367336, | |
| "learning_rate": 9.816227294900822e-06, | |
| "loss": 0.6213, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.5383553044569994, | |
| "grad_norm": 0.471616985395006, | |
| "learning_rate": 9.809898493838923e-06, | |
| "loss": 0.6169, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.5423728813559322, | |
| "grad_norm": 0.4421867425241258, | |
| "learning_rate": 9.803464663918886e-06, | |
| "loss": 0.6093, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.546390458254865, | |
| "grad_norm": 0.5351347284551249, | |
| "learning_rate": 9.796925945629711e-06, | |
| "loss": 0.6143, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.5504080351537979, | |
| "grad_norm": 0.4877248363865589, | |
| "learning_rate": 9.79028248175073e-06, | |
| "loss": 0.6192, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.5544256120527307, | |
| "grad_norm": 0.5009708952846684, | |
| "learning_rate": 9.783534417348507e-06, | |
| "loss": 0.6143, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.5584431889516636, | |
| "grad_norm": 0.5178945086806888, | |
| "learning_rate": 9.776681899773652e-06, | |
| "loss": 0.6205, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.5624607658505963, | |
| "grad_norm": 0.4762670817917884, | |
| "learning_rate": 9.769725078657622e-06, | |
| "loss": 0.6173, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.5664783427495292, | |
| "grad_norm": 0.4613547006315256, | |
| "learning_rate": 9.762664105909434e-06, | |
| "loss": 0.6251, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.570495919648462, | |
| "grad_norm": 0.4702754249747577, | |
| "learning_rate": 9.755499135712368e-06, | |
| "loss": 0.6183, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.5745134965473948, | |
| "grad_norm": 0.5373232333646776, | |
| "learning_rate": 9.748230324520585e-06, | |
| "loss": 0.6132, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.5785310734463277, | |
| "grad_norm": 0.5626349604991109, | |
| "learning_rate": 9.740857831055715e-06, | |
| "loss": 0.621, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.5825486503452605, | |
| "grad_norm": 0.5658213472247587, | |
| "learning_rate": 9.733381816303395e-06, | |
| "loss": 0.6138, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.5865662272441934, | |
| "grad_norm": 0.45081662608826584, | |
| "learning_rate": 9.725802443509753e-06, | |
| "loss": 0.616, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.5905838041431262, | |
| "grad_norm": 0.44740848559688695, | |
| "learning_rate": 9.718119878177837e-06, | |
| "loss": 0.6129, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.5946013810420591, | |
| "grad_norm": 0.5237403744825302, | |
| "learning_rate": 9.710334288064007e-06, | |
| "loss": 0.6136, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.5986189579409918, | |
| "grad_norm": 0.48924900162227497, | |
| "learning_rate": 9.702445843174274e-06, | |
| "loss": 0.6196, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.6026365348399246, | |
| "grad_norm": 0.49368923400575526, | |
| "learning_rate": 9.694454715760573e-06, | |
| "loss": 0.6187, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.6066541117388575, | |
| "grad_norm": 0.487126510015843, | |
| "learning_rate": 9.686361080317029e-06, | |
| "loss": 0.6172, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.6106716886377903, | |
| "grad_norm": 0.45477094460777745, | |
| "learning_rate": 9.678165113576114e-06, | |
| "loss": 0.6056, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.6146892655367232, | |
| "grad_norm": 0.49141680431275075, | |
| "learning_rate": 9.669866994504818e-06, | |
| "loss": 0.6043, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.618706842435656, | |
| "grad_norm": 0.7376167208477882, | |
| "learning_rate": 9.66146690430072e-06, | |
| "loss": 0.6208, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.6227244193345888, | |
| "grad_norm": 0.4931059951127201, | |
| "learning_rate": 9.652965026388039e-06, | |
| "loss": 0.6097, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.6267419962335217, | |
| "grad_norm": 0.48305824703353156, | |
| "learning_rate": 9.644361546413635e-06, | |
| "loss": 0.6081, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.6307595731324545, | |
| "grad_norm": 0.46866086349351754, | |
| "learning_rate": 9.635656652242938e-06, | |
| "loss": 0.6187, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.6347771500313873, | |
| "grad_norm": 0.48653033771244636, | |
| "learning_rate": 9.626850533955864e-06, | |
| "loss": 0.6039, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.6387947269303201, | |
| "grad_norm": 0.4839769721956582, | |
| "learning_rate": 9.617943383842659e-06, | |
| "loss": 0.617, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.642812303829253, | |
| "grad_norm": 0.4887686378157136, | |
| "learning_rate": 9.608935396399692e-06, | |
| "loss": 0.6043, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.6468298807281858, | |
| "grad_norm": 0.46040947244346264, | |
| "learning_rate": 9.599826768325218e-06, | |
| "loss": 0.6088, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.6508474576271186, | |
| "grad_norm": 0.4882314027569112, | |
| "learning_rate": 9.590617698515077e-06, | |
| "loss": 0.6084, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.6548650345260515, | |
| "grad_norm": 0.46985181649935615, | |
| "learning_rate": 9.581308388058354e-06, | |
| "loss": 0.6029, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.6588826114249843, | |
| "grad_norm": 0.48794194189773543, | |
| "learning_rate": 9.571899040232989e-06, | |
| "loss": 0.6088, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.6629001883239172, | |
| "grad_norm": 0.46766933522748133, | |
| "learning_rate": 9.56238986050133e-06, | |
| "loss": 0.6149, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.66691776522285, | |
| "grad_norm": 0.48282374325088395, | |
| "learning_rate": 9.552781056505662e-06, | |
| "loss": 0.6101, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.6709353421217829, | |
| "grad_norm": 0.5292742570379373, | |
| "learning_rate": 9.543072838063655e-06, | |
| "loss": 0.6128, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.6749529190207156, | |
| "grad_norm": 0.5260262320491007, | |
| "learning_rate": 9.533265417163793e-06, | |
| "loss": 0.6234, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.6789704959196484, | |
| "grad_norm": 0.48584608333045604, | |
| "learning_rate": 9.523359007960748e-06, | |
| "loss": 0.6116, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.6829880728185813, | |
| "grad_norm": 0.4743066347549614, | |
| "learning_rate": 9.513353826770695e-06, | |
| "loss": 0.5959, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.6870056497175141, | |
| "grad_norm": 0.4856299177120577, | |
| "learning_rate": 9.503250092066592e-06, | |
| "loss": 0.6204, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.691023226616447, | |
| "grad_norm": 0.5046231694012181, | |
| "learning_rate": 9.493048024473413e-06, | |
| "loss": 0.6126, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.6950408035153798, | |
| "grad_norm": 0.4928237045269149, | |
| "learning_rate": 9.48274784676332e-06, | |
| "loss": 0.6089, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.6990583804143126, | |
| "grad_norm": 0.4766754598686215, | |
| "learning_rate": 9.472349783850815e-06, | |
| "loss": 0.6061, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.7030759573132455, | |
| "grad_norm": 0.4631337703939171, | |
| "learning_rate": 9.461854062787812e-06, | |
| "loss": 0.6121, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.7070935342121782, | |
| "grad_norm": 0.4961256195106074, | |
| "learning_rate": 9.451260912758695e-06, | |
| "loss": 0.6037, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.7111111111111111, | |
| "grad_norm": 0.511885819703456, | |
| "learning_rate": 9.440570565075295e-06, | |
| "loss": 0.6145, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.7151286880100439, | |
| "grad_norm": 0.48629463779046733, | |
| "learning_rate": 9.429783253171855e-06, | |
| "loss": 0.5966, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.7191462649089768, | |
| "grad_norm": 0.4792036473525835, | |
| "learning_rate": 9.418899212599928e-06, | |
| "loss": 0.608, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.7231638418079096, | |
| "grad_norm": 0.5083453945346657, | |
| "learning_rate": 9.407918681023229e-06, | |
| "loss": 0.6095, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.7271814187068424, | |
| "grad_norm": 0.4685060218692666, | |
| "learning_rate": 9.396841898212452e-06, | |
| "loss": 0.6111, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.7311989956057753, | |
| "grad_norm": 0.5007535253551997, | |
| "learning_rate": 9.38566910604003e-06, | |
| "loss": 0.6175, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.7352165725047081, | |
| "grad_norm": 0.4721707017133051, | |
| "learning_rate": 9.374400548474853e-06, | |
| "loss": 0.6105, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.739234149403641, | |
| "grad_norm": 0.45194793890207013, | |
| "learning_rate": 9.363036471576945e-06, | |
| "loss": 0.5976, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.7432517263025737, | |
| "grad_norm": 0.48023710070315084, | |
| "learning_rate": 9.351577123492087e-06, | |
| "loss": 0.606, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.7472693032015066, | |
| "grad_norm": 0.4562292402628347, | |
| "learning_rate": 9.3400227544464e-06, | |
| "loss": 0.6204, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.7512868801004394, | |
| "grad_norm": 0.5199018316758173, | |
| "learning_rate": 9.328373616740884e-06, | |
| "loss": 0.6061, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.7553044569993722, | |
| "grad_norm": 0.45403478079118165, | |
| "learning_rate": 9.3166299647459e-06, | |
| "loss": 0.5977, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.7593220338983051, | |
| "grad_norm": 0.48026121094218754, | |
| "learning_rate": 9.304792054895627e-06, | |
| "loss": 0.6046, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.7633396107972379, | |
| "grad_norm": 0.5273730174734627, | |
| "learning_rate": 9.292860145682451e-06, | |
| "loss": 0.6016, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.7673571876961708, | |
| "grad_norm": 0.45635254897692096, | |
| "learning_rate": 9.280834497651334e-06, | |
| "loss": 0.6049, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.7713747645951036, | |
| "grad_norm": 0.4952893916325239, | |
| "learning_rate": 9.26871537339411e-06, | |
| "loss": 0.6108, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.7753923414940365, | |
| "grad_norm": 0.5020795358241874, | |
| "learning_rate": 9.25650303754376e-06, | |
| "loss": 0.6066, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.7794099183929692, | |
| "grad_norm": 0.4519568302601775, | |
| "learning_rate": 9.244197756768638e-06, | |
| "loss": 0.6048, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.783427495291902, | |
| "grad_norm": 0.4730934017356477, | |
| "learning_rate": 9.231799799766633e-06, | |
| "loss": 0.6205, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.7874450721908349, | |
| "grad_norm": 0.43205824907881557, | |
| "learning_rate": 9.219309437259312e-06, | |
| "loss": 0.6094, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.7914626490897677, | |
| "grad_norm": 0.49712695410471086, | |
| "learning_rate": 9.206726941986012e-06, | |
| "loss": 0.6177, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.7954802259887006, | |
| "grad_norm": 0.5220660443409905, | |
| "learning_rate": 9.194052588697877e-06, | |
| "loss": 0.6101, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.7994978028876334, | |
| "grad_norm": 0.5154062082113726, | |
| "learning_rate": 9.18128665415186e-06, | |
| "loss": 0.5928, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.8035153797865662, | |
| "grad_norm": 0.466497275783982, | |
| "learning_rate": 9.16842941710468e-06, | |
| "loss": 0.5976, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.8075329566854991, | |
| "grad_norm": 0.4710841642629808, | |
| "learning_rate": 9.155481158306736e-06, | |
| "loss": 0.5989, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.8115505335844319, | |
| "grad_norm": 0.5011683049021086, | |
| "learning_rate": 9.142442160495981e-06, | |
| "loss": 0.602, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.8155681104833647, | |
| "grad_norm": 0.48222917749993743, | |
| "learning_rate": 9.129312708391735e-06, | |
| "loss": 0.5991, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.8195856873822975, | |
| "grad_norm": 0.5023833093181953, | |
| "learning_rate": 9.116093088688486e-06, | |
| "loss": 0.603, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.8236032642812304, | |
| "grad_norm": 0.5057506213682262, | |
| "learning_rate": 9.102783590049613e-06, | |
| "loss": 0.6074, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.8276208411801632, | |
| "grad_norm": 0.4465714041839266, | |
| "learning_rate": 9.08938450310109e-06, | |
| "loss": 0.6117, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.831638418079096, | |
| "grad_norm": 0.45774188769330276, | |
| "learning_rate": 9.075896120425144e-06, | |
| "loss": 0.5982, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.8356559949780289, | |
| "grad_norm": 0.4956125400496556, | |
| "learning_rate": 9.06231873655386e-06, | |
| "loss": 0.6131, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.8396735718769617, | |
| "grad_norm": 0.503936504338912, | |
| "learning_rate": 9.04865264796275e-06, | |
| "loss": 0.6067, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.8436911487758946, | |
| "grad_norm": 0.4966401695744208, | |
| "learning_rate": 9.034898153064281e-06, | |
| "loss": 0.5982, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.8477087256748274, | |
| "grad_norm": 0.48606930633171735, | |
| "learning_rate": 9.021055552201364e-06, | |
| "loss": 0.6015, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.8517263025737603, | |
| "grad_norm": 0.5102880215310355, | |
| "learning_rate": 9.00712514764078e-06, | |
| "loss": 0.6084, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.855743879472693, | |
| "grad_norm": 0.5874497994476533, | |
| "learning_rate": 8.993107243566599e-06, | |
| "loss": 0.6014, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.8597614563716258, | |
| "grad_norm": 0.45526275583074516, | |
| "learning_rate": 8.979002146073526e-06, | |
| "loss": 0.6047, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.8637790332705587, | |
| "grad_norm": 0.44057367739611536, | |
| "learning_rate": 8.964810163160218e-06, | |
| "loss": 0.6023, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.8677966101694915, | |
| "grad_norm": 0.47276989533109426, | |
| "learning_rate": 8.95053160472256e-06, | |
| "loss": 0.5996, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.8718141870684244, | |
| "grad_norm": 0.47310493449075497, | |
| "learning_rate": 8.936166782546907e-06, | |
| "loss": 0.6053, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.8758317639673572, | |
| "grad_norm": 0.4417798231937385, | |
| "learning_rate": 8.921716010303255e-06, | |
| "loss": 0.6075, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.87984934086629, | |
| "grad_norm": 0.47514268261185605, | |
| "learning_rate": 8.907179603538411e-06, | |
| "loss": 0.5892, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.8838669177652229, | |
| "grad_norm": 0.4830403204501155, | |
| "learning_rate": 8.892557879669097e-06, | |
| "loss": 0.5962, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.8878844946641556, | |
| "grad_norm": 0.46322055953346064, | |
| "learning_rate": 8.877851157975017e-06, | |
| "loss": 0.6027, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.8919020715630885, | |
| "grad_norm": 0.4722416444611542, | |
| "learning_rate": 8.86305975959188e-06, | |
| "loss": 0.5949, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.8959196484620213, | |
| "grad_norm": 0.5039262176010645, | |
| "learning_rate": 8.848184007504404e-06, | |
| "loss": 0.5983, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.8999372253609542, | |
| "grad_norm": 0.5011682351198766, | |
| "learning_rate": 8.833224226539246e-06, | |
| "loss": 0.5902, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.903954802259887, | |
| "grad_norm": 0.46884988549973994, | |
| "learning_rate": 8.818180743357915e-06, | |
| "loss": 0.6043, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.9079723791588198, | |
| "grad_norm": 0.45627355824791144, | |
| "learning_rate": 8.803053886449644e-06, | |
| "loss": 0.609, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.9119899560577527, | |
| "grad_norm": 0.4489855391892701, | |
| "learning_rate": 8.787843986124214e-06, | |
| "loss": 0.5945, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.9160075329566855, | |
| "grad_norm": 0.5195815180128369, | |
| "learning_rate": 8.772551374504736e-06, | |
| "loss": 0.6032, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.9200251098556184, | |
| "grad_norm": 0.4547182251787525, | |
| "learning_rate": 8.757176385520406e-06, | |
| "loss": 0.6071, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.9240426867545511, | |
| "grad_norm": 0.4672155983783131, | |
| "learning_rate": 8.741719354899214e-06, | |
| "loss": 0.6026, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.928060263653484, | |
| "grad_norm": 0.44811195796882736, | |
| "learning_rate": 8.7261806201606e-06, | |
| "loss": 0.5903, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.9320778405524168, | |
| "grad_norm": 0.4609033648332187, | |
| "learning_rate": 8.710560520608106e-06, | |
| "loss": 0.5954, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.9360954174513496, | |
| "grad_norm": 0.5031025381027067, | |
| "learning_rate": 8.694859397321947e-06, | |
| "loss": 0.5971, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.9401129943502825, | |
| "grad_norm": 0.45508717131932036, | |
| "learning_rate": 8.67907759315157e-06, | |
| "loss": 0.6009, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.9441305712492153, | |
| "grad_norm": 0.46492655160451346, | |
| "learning_rate": 8.663215452708173e-06, | |
| "loss": 0.5971, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.9481481481481482, | |
| "grad_norm": 0.4891914193609098, | |
| "learning_rate": 8.647273322357174e-06, | |
| "loss": 0.5854, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.952165725047081, | |
| "grad_norm": 0.45300916670077845, | |
| "learning_rate": 8.631251550210645e-06, | |
| "loss": 0.6073, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.9561833019460139, | |
| "grad_norm": 0.46574627999413143, | |
| "learning_rate": 8.61515048611972e-06, | |
| "loss": 0.5973, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.9602008788449466, | |
| "grad_norm": 0.46780579054245386, | |
| "learning_rate": 8.598970481666949e-06, | |
| "loss": 0.5903, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.9642184557438794, | |
| "grad_norm": 0.49368155945672554, | |
| "learning_rate": 8.582711890158622e-06, | |
| "loss": 0.5918, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.9682360326428123, | |
| "grad_norm": 0.4981441902973779, | |
| "learning_rate": 8.566375066617056e-06, | |
| "loss": 0.5849, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.9722536095417451, | |
| "grad_norm": 0.4940426996715437, | |
| "learning_rate": 8.549960367772836e-06, | |
| "loss": 0.5983, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.976271186440678, | |
| "grad_norm": 0.4785729651530905, | |
| "learning_rate": 8.533468152057037e-06, | |
| "loss": 0.5886, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.9802887633396108, | |
| "grad_norm": 0.47987909811753693, | |
| "learning_rate": 8.51689877959339e-06, | |
| "loss": 0.5934, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.9843063402385436, | |
| "grad_norm": 0.5163558607429957, | |
| "learning_rate": 8.500252612190416e-06, | |
| "loss": 0.5996, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.9883239171374765, | |
| "grad_norm": 0.51101675304883, | |
| "learning_rate": 8.48353001333353e-06, | |
| "loss": 0.5914, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.9923414940364093, | |
| "grad_norm": 0.5029778228075064, | |
| "learning_rate": 8.466731348177106e-06, | |
| "loss": 0.5941, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.9963590709353422, | |
| "grad_norm": 0.4522455217876264, | |
| "learning_rate": 8.4498569835365e-06, | |
| "loss": 0.597, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.4817425446921033, | |
| "learning_rate": 8.432907287880033e-06, | |
| "loss": 0.6044, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.0040175768989328, | |
| "grad_norm": 0.5586942369932902, | |
| "learning_rate": 8.415882631320963e-06, | |
| "loss": 0.5356, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.0080351537978656, | |
| "grad_norm": 0.4841445380910929, | |
| "learning_rate": 8.398783385609386e-06, | |
| "loss": 0.5324, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 1.0120527306967986, | |
| "grad_norm": 0.5199870071061111, | |
| "learning_rate": 8.38160992412413e-06, | |
| "loss": 0.5408, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.0160703075957314, | |
| "grad_norm": 0.4803423902226154, | |
| "learning_rate": 8.364362621864595e-06, | |
| "loss": 0.5454, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 1.0200878844946641, | |
| "grad_norm": 0.5170354608787521, | |
| "learning_rate": 8.347041855442565e-06, | |
| "loss": 0.5438, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 1.024105461393597, | |
| "grad_norm": 0.48879934808926573, | |
| "learning_rate": 8.329648003073991e-06, | |
| "loss": 0.5409, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.0281230382925297, | |
| "grad_norm": 0.5352031093199895, | |
| "learning_rate": 8.312181444570722e-06, | |
| "loss": 0.5379, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.0321406151914627, | |
| "grad_norm": 0.503404555579561, | |
| "learning_rate": 8.29464256133222e-06, | |
| "loss": 0.5339, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 1.0361581920903955, | |
| "grad_norm": 0.5412849211631844, | |
| "learning_rate": 8.277031736337229e-06, | |
| "loss": 0.537, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.0401757689893283, | |
| "grad_norm": 0.49624211963806875, | |
| "learning_rate": 8.259349354135408e-06, | |
| "loss": 0.5365, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 1.044193345888261, | |
| "grad_norm": 0.47456260927212846, | |
| "learning_rate": 8.241595800838945e-06, | |
| "loss": 0.5331, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.048210922787194, | |
| "grad_norm": 0.5253188216147088, | |
| "learning_rate": 8.223771464114114e-06, | |
| "loss": 0.5407, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 1.0522284996861269, | |
| "grad_norm": 0.48560865787460045, | |
| "learning_rate": 8.205876733172813e-06, | |
| "loss": 0.5358, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 1.0562460765850596, | |
| "grad_norm": 0.4552759788335426, | |
| "learning_rate": 8.187911998764073e-06, | |
| "loss": 0.5383, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 1.0602636534839924, | |
| "grad_norm": 0.5238408302939632, | |
| "learning_rate": 8.169877653165512e-06, | |
| "loss": 0.5432, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.0642812303829252, | |
| "grad_norm": 0.5143828197718291, | |
| "learning_rate": 8.15177409017478e-06, | |
| "loss": 0.5449, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.0682988072818582, | |
| "grad_norm": 0.5003350510607426, | |
| "learning_rate": 8.13360170510096e-06, | |
| "loss": 0.5379, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.072316384180791, | |
| "grad_norm": 0.4862183359830462, | |
| "learning_rate": 8.115360894755928e-06, | |
| "loss": 0.5313, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 1.0763339610797238, | |
| "grad_norm": 0.525472490342403, | |
| "learning_rate": 8.097052057445696e-06, | |
| "loss": 0.5324, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.0803515379786566, | |
| "grad_norm": 0.4609085774775871, | |
| "learning_rate": 8.07867559296171e-06, | |
| "loss": 0.5339, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 1.0843691148775894, | |
| "grad_norm": 0.47498380053763667, | |
| "learning_rate": 8.060231902572123e-06, | |
| "loss": 0.5416, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.0883866917765224, | |
| "grad_norm": 0.4774259144620562, | |
| "learning_rate": 8.041721389013029e-06, | |
| "loss": 0.5315, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 1.0924042686754551, | |
| "grad_norm": 0.4535031254083697, | |
| "learning_rate": 8.023144456479677e-06, | |
| "loss": 0.5337, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.096421845574388, | |
| "grad_norm": 0.45369844007905547, | |
| "learning_rate": 8.004501510617631e-06, | |
| "loss": 0.5286, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 1.1004394224733207, | |
| "grad_norm": 0.47676753007555456, | |
| "learning_rate": 7.985792958513932e-06, | |
| "loss": 0.5316, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.1044569993722537, | |
| "grad_norm": 0.49166946653996263, | |
| "learning_rate": 7.967019208688187e-06, | |
| "loss": 0.534, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.1084745762711865, | |
| "grad_norm": 0.47818247256990665, | |
| "learning_rate": 7.948180671083665e-06, | |
| "loss": 0.5372, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.1124921531701193, | |
| "grad_norm": 0.49991506616495146, | |
| "learning_rate": 7.92927775705834e-06, | |
| "loss": 0.5497, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 1.116509730069052, | |
| "grad_norm": 0.5015448494254134, | |
| "learning_rate": 7.910310879375906e-06, | |
| "loss": 0.5335, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.1205273069679849, | |
| "grad_norm": 0.5009679314406517, | |
| "learning_rate": 7.891280452196767e-06, | |
| "loss": 0.5349, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 1.1245448838669179, | |
| "grad_norm": 0.46077740198691347, | |
| "learning_rate": 7.872186891068997e-06, | |
| "loss": 0.5474, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.1285624607658507, | |
| "grad_norm": 0.47629174007493424, | |
| "learning_rate": 7.85303061291925e-06, | |
| "loss": 0.5352, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.1325800376647834, | |
| "grad_norm": 0.5001826254949262, | |
| "learning_rate": 7.833812036043684e-06, | |
| "loss": 0.5253, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.1365976145637162, | |
| "grad_norm": 0.48877881868647444, | |
| "learning_rate": 7.814531580098799e-06, | |
| "loss": 0.5405, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.140615191462649, | |
| "grad_norm": 0.4525660243228381, | |
| "learning_rate": 7.795189666092286e-06, | |
| "loss": 0.5392, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.144632768361582, | |
| "grad_norm": 0.5016062315339999, | |
| "learning_rate": 7.77578671637384e-06, | |
| "loss": 0.5392, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.1486503452605148, | |
| "grad_norm": 0.46664182486781586, | |
| "learning_rate": 7.756323154625927e-06, | |
| "loss": 0.5307, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.1526679221594476, | |
| "grad_norm": 0.4851667799865368, | |
| "learning_rate": 7.736799405854531e-06, | |
| "loss": 0.5249, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.1566854990583804, | |
| "grad_norm": 0.44756010484495995, | |
| "learning_rate": 7.71721589637989e-06, | |
| "loss": 0.5423, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.1607030759573131, | |
| "grad_norm": 0.4810612674475816, | |
| "learning_rate": 7.697573053827163e-06, | |
| "loss": 0.5346, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.1647206528562462, | |
| "grad_norm": 0.5005415716085619, | |
| "learning_rate": 7.677871307117117e-06, | |
| "loss": 0.5277, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.168738229755179, | |
| "grad_norm": 0.48046892345205033, | |
| "learning_rate": 7.658111086456738e-06, | |
| "loss": 0.5372, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 1.1727558066541117, | |
| "grad_norm": 0.5231466496543029, | |
| "learning_rate": 7.638292823329861e-06, | |
| "loss": 0.5349, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.1767733835530445, | |
| "grad_norm": 0.47426409377347806, | |
| "learning_rate": 7.6184169504877195e-06, | |
| "loss": 0.5335, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 1.1807909604519775, | |
| "grad_norm": 0.4644152310984778, | |
| "learning_rate": 7.598483901939525e-06, | |
| "loss": 0.5375, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.1848085373509103, | |
| "grad_norm": 0.5016059422510154, | |
| "learning_rate": 7.5784941129429715e-06, | |
| "loss": 0.5336, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.188826114249843, | |
| "grad_norm": 0.4893646800410941, | |
| "learning_rate": 7.558448019994733e-06, | |
| "loss": 0.5427, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.1928436911487759, | |
| "grad_norm": 0.4964262727258161, | |
| "learning_rate": 7.5383460608209444e-06, | |
| "loss": 0.5362, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 1.1968612680477086, | |
| "grad_norm": 0.542942008787974, | |
| "learning_rate": 7.518188674367628e-06, | |
| "loss": 0.5474, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.2008788449466414, | |
| "grad_norm": 0.5299626906544336, | |
| "learning_rate": 7.497976300791114e-06, | |
| "loss": 0.5431, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 1.2048964218455744, | |
| "grad_norm": 0.45657822017276745, | |
| "learning_rate": 7.477709381448436e-06, | |
| "loss": 0.5207, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.2089139987445072, | |
| "grad_norm": 0.5192739282525728, | |
| "learning_rate": 7.457388358887682e-06, | |
| "loss": 0.5389, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 1.21293157564344, | |
| "grad_norm": 0.5108336845381785, | |
| "learning_rate": 7.437013676838345e-06, | |
| "loss": 0.5427, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.2169491525423728, | |
| "grad_norm": 0.4490173262151658, | |
| "learning_rate": 7.416585780201615e-06, | |
| "loss": 0.541, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 1.2209667294413058, | |
| "grad_norm": 0.4823167719289656, | |
| "learning_rate": 7.396105115040684e-06, | |
| "loss": 0.5396, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.2249843063402386, | |
| "grad_norm": 0.4926557447124054, | |
| "learning_rate": 7.37557212857099e-06, | |
| "loss": 0.5413, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.2290018832391714, | |
| "grad_norm": 0.4846891664452503, | |
| "learning_rate": 7.3549872691504646e-06, | |
| "loss": 0.5448, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.2330194601381042, | |
| "grad_norm": 0.5250489652016472, | |
| "learning_rate": 7.3343509862697295e-06, | |
| "loss": 0.5402, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 1.237037037037037, | |
| "grad_norm": 0.4588507155594117, | |
| "learning_rate": 7.313663730542295e-06, | |
| "loss": 0.5404, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 1.24105461393597, | |
| "grad_norm": 0.5009891938060933, | |
| "learning_rate": 7.292925953694705e-06, | |
| "loss": 0.5363, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 1.2450721908349027, | |
| "grad_norm": 0.5090944716207311, | |
| "learning_rate": 7.272138108556691e-06, | |
| "loss": 0.5284, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.2490897677338355, | |
| "grad_norm": 0.449393206268348, | |
| "learning_rate": 7.25130064905127e-06, | |
| "loss": 0.5296, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 1.2531073446327683, | |
| "grad_norm": 0.5002200548002171, | |
| "learning_rate": 7.230414030184835e-06, | |
| "loss": 0.531, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.2571249215317013, | |
| "grad_norm": 0.5114004159834465, | |
| "learning_rate": 7.209478708037225e-06, | |
| "loss": 0.5458, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 1.261142498430634, | |
| "grad_norm": 0.5166427993736156, | |
| "learning_rate": 7.1884951397517664e-06, | |
| "loss": 0.5309, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.2651600753295669, | |
| "grad_norm": 0.4851353630236827, | |
| "learning_rate": 7.167463783525282e-06, | |
| "loss": 0.5375, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.2691776522284997, | |
| "grad_norm": 0.508201816800996, | |
| "learning_rate": 7.146385098598092e-06, | |
| "loss": 0.5356, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 1.2731952291274324, | |
| "grad_norm": 0.47326774384605347, | |
| "learning_rate": 7.12525954524399e-06, | |
| "loss": 0.5281, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 1.2772128060263652, | |
| "grad_norm": 0.4964394626079393, | |
| "learning_rate": 7.1040875847601775e-06, | |
| "loss": 0.5339, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.2812303829252982, | |
| "grad_norm": 0.4965199644569434, | |
| "learning_rate": 7.082869679457214e-06, | |
| "loss": 0.5373, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 1.285247959824231, | |
| "grad_norm": 0.48647921670810396, | |
| "learning_rate": 7.061606292648899e-06, | |
| "loss": 0.5368, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.2892655367231638, | |
| "grad_norm": 0.4709672064019513, | |
| "learning_rate": 7.040297888642172e-06, | |
| "loss": 0.5401, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 1.2932831136220968, | |
| "grad_norm": 0.4710864047305743, | |
| "learning_rate": 7.018944932726963e-06, | |
| "loss": 0.538, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 1.2973006905210296, | |
| "grad_norm": 0.507624692636882, | |
| "learning_rate": 6.997547891166041e-06, | |
| "loss": 0.5333, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 1.3013182674199624, | |
| "grad_norm": 0.4692990883467198, | |
| "learning_rate": 6.976107231184823e-06, | |
| "loss": 0.5412, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 1.3053358443188952, | |
| "grad_norm": 0.5133848969883468, | |
| "learning_rate": 6.954623420961179e-06, | |
| "loss": 0.5254, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.309353421217828, | |
| "grad_norm": 0.5377405201588225, | |
| "learning_rate": 6.933096929615211e-06, | |
| "loss": 0.5304, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 1.3133709981167607, | |
| "grad_norm": 0.4449757913251056, | |
| "learning_rate": 6.911528227199e-06, | |
| "loss": 0.5345, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 1.3173885750156937, | |
| "grad_norm": 0.48594757234752356, | |
| "learning_rate": 6.88991778468635e-06, | |
| "loss": 0.5313, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 1.3214061519146265, | |
| "grad_norm": 0.44216301043487133, | |
| "learning_rate": 6.868266073962497e-06, | |
| "loss": 0.5301, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 1.3254237288135593, | |
| "grad_norm": 0.46671237233856316, | |
| "learning_rate": 6.846573567813819e-06, | |
| "loss": 0.5414, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.329441305712492, | |
| "grad_norm": 0.4812496937883229, | |
| "learning_rate": 6.8248407399174865e-06, | |
| "loss": 0.5364, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 1.333458882611425, | |
| "grad_norm": 0.5038329328787501, | |
| "learning_rate": 6.803068064831149e-06, | |
| "loss": 0.5425, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 1.3374764595103579, | |
| "grad_norm": 0.45630407505785825, | |
| "learning_rate": 6.781256017982555e-06, | |
| "loss": 0.5367, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 1.3414940364092907, | |
| "grad_norm": 0.49858693203976323, | |
| "learning_rate": 6.759405075659165e-06, | |
| "loss": 0.539, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.3455116133082234, | |
| "grad_norm": 0.4890924664342059, | |
| "learning_rate": 6.7375157149977755e-06, | |
| "loss": 0.5206, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.3495291902071562, | |
| "grad_norm": 0.4844858973506766, | |
| "learning_rate": 6.715588413974073e-06, | |
| "loss": 0.533, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.353546767106089, | |
| "grad_norm": 0.456108199064706, | |
| "learning_rate": 6.693623651392216e-06, | |
| "loss": 0.54, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 1.357564344005022, | |
| "grad_norm": 0.44821643945703865, | |
| "learning_rate": 6.671621906874366e-06, | |
| "loss": 0.5313, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 1.3615819209039548, | |
| "grad_norm": 0.44366483367693854, | |
| "learning_rate": 6.649583660850232e-06, | |
| "loss": 0.5445, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 1.3655994978028876, | |
| "grad_norm": 0.5007318603356307, | |
| "learning_rate": 6.627509394546558e-06, | |
| "loss": 0.5253, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.3696170747018206, | |
| "grad_norm": 0.47701484999347654, | |
| "learning_rate": 6.605399589976631e-06, | |
| "loss": 0.5432, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 1.3736346516007534, | |
| "grad_norm": 0.533932151873456, | |
| "learning_rate": 6.583254729929756e-06, | |
| "loss": 0.5362, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 1.3776522284996862, | |
| "grad_norm": 0.4885239761359488, | |
| "learning_rate": 6.5610752979607e-06, | |
| "loss": 0.5393, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 1.381669805398619, | |
| "grad_norm": 0.534453189520691, | |
| "learning_rate": 6.538861778379147e-06, | |
| "loss": 0.538, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 1.3856873822975517, | |
| "grad_norm": 0.55879808060491, | |
| "learning_rate": 6.516614656239115e-06, | |
| "loss": 0.5379, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.3897049591964845, | |
| "grad_norm": 0.5105051602697208, | |
| "learning_rate": 6.49433441732837e-06, | |
| "loss": 0.5434, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 1.3937225360954175, | |
| "grad_norm": 0.5849851114309839, | |
| "learning_rate": 6.472021548157812e-06, | |
| "loss": 0.5309, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 1.3977401129943503, | |
| "grad_norm": 0.4862484792197367, | |
| "learning_rate": 6.4496765359508575e-06, | |
| "loss": 0.5403, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 1.401757689893283, | |
| "grad_norm": 0.4785944624308064, | |
| "learning_rate": 6.427299868632795e-06, | |
| "loss": 0.5315, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 1.4057752667922159, | |
| "grad_norm": 0.5206432193262734, | |
| "learning_rate": 6.404892034820134e-06, | |
| "loss": 0.5363, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.4097928436911489, | |
| "grad_norm": 0.5223412310453764, | |
| "learning_rate": 6.382453523809939e-06, | |
| "loss": 0.5409, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 1.4138104205900817, | |
| "grad_norm": 0.5251307070269818, | |
| "learning_rate": 6.359984825569138e-06, | |
| "loss": 0.5286, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.4178279974890144, | |
| "grad_norm": 0.512721201250328, | |
| "learning_rate": 6.3374864307238235e-06, | |
| "loss": 0.5261, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 1.4218455743879472, | |
| "grad_norm": 0.5026376332917479, | |
| "learning_rate": 6.3149588305485475e-06, | |
| "loss": 0.5208, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 1.42586315128688, | |
| "grad_norm": 0.4829796384358809, | |
| "learning_rate": 6.2924025169555916e-06, | |
| "loss": 0.5433, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.4298807281858128, | |
| "grad_norm": 0.4957733273389861, | |
| "learning_rate": 6.269817982484212e-06, | |
| "loss": 0.529, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.4338983050847458, | |
| "grad_norm": 0.45279662345918176, | |
| "learning_rate": 6.247205720289907e-06, | |
| "loss": 0.5292, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 1.4379158819836786, | |
| "grad_norm": 0.45766889598494964, | |
| "learning_rate": 6.224566224133632e-06, | |
| "loss": 0.5358, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.4419334588826114, | |
| "grad_norm": 0.4809643743051477, | |
| "learning_rate": 6.201899988371022e-06, | |
| "loss": 0.544, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 1.4459510357815444, | |
| "grad_norm": 0.5019231578335686, | |
| "learning_rate": 6.1792075079416e-06, | |
| "loss": 0.5357, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.4499686126804772, | |
| "grad_norm": 0.4966841368294494, | |
| "learning_rate": 6.156489278357967e-06, | |
| "loss": 0.5315, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 1.45398618957941, | |
| "grad_norm": 0.5111737253668558, | |
| "learning_rate": 6.1337457956949774e-06, | |
| "loss": 0.5231, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.4580037664783427, | |
| "grad_norm": 0.5050504504365466, | |
| "learning_rate": 6.1109775565789164e-06, | |
| "loss": 0.5354, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 1.4620213433772755, | |
| "grad_norm": 0.49236484658873236, | |
| "learning_rate": 6.0881850581766515e-06, | |
| "loss": 0.5243, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.4660389202762083, | |
| "grad_norm": 0.4748265818153898, | |
| "learning_rate": 6.065368798184771e-06, | |
| "loss": 0.5391, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.4700564971751413, | |
| "grad_norm": 0.5182967229817997, | |
| "learning_rate": 6.042529274818724e-06, | |
| "loss": 0.5294, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.474074074074074, | |
| "grad_norm": 0.4752424911930453, | |
| "learning_rate": 6.019666986801936e-06, | |
| "loss": 0.5281, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 1.4780916509730069, | |
| "grad_norm": 0.47120824092970764, | |
| "learning_rate": 5.996782433354923e-06, | |
| "loss": 0.5253, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.4821092278719397, | |
| "grad_norm": 0.46585234442500195, | |
| "learning_rate": 5.973876114184388e-06, | |
| "loss": 0.5202, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 1.4861268047708727, | |
| "grad_norm": 0.47339586038582876, | |
| "learning_rate": 5.95094852947231e-06, | |
| "loss": 0.5288, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.4901443816698055, | |
| "grad_norm": 0.44170633595442677, | |
| "learning_rate": 5.928000179865024e-06, | |
| "loss": 0.531, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 1.4941619585687382, | |
| "grad_norm": 0.509631075940108, | |
| "learning_rate": 5.905031566462279e-06, | |
| "loss": 0.5371, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.498179535467671, | |
| "grad_norm": 0.4730072164920337, | |
| "learning_rate": 5.882043190806314e-06, | |
| "loss": 0.5275, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 1.5021971123666038, | |
| "grad_norm": 0.4821028115884746, | |
| "learning_rate": 5.859035554870893e-06, | |
| "loss": 0.5337, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 1.5062146892655366, | |
| "grad_norm": 0.4834994282281791, | |
| "learning_rate": 5.836009161050342e-06, | |
| "loss": 0.5289, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.5102322661644696, | |
| "grad_norm": 0.45785972034921696, | |
| "learning_rate": 5.812964512148589e-06, | |
| "loss": 0.5399, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.5142498430634024, | |
| "grad_norm": 0.4766787260315672, | |
| "learning_rate": 5.78990211136818e-06, | |
| "loss": 0.538, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 1.5182674199623352, | |
| "grad_norm": 0.4892932237467062, | |
| "learning_rate": 5.766822462299286e-06, | |
| "loss": 0.5393, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.5222849968612682, | |
| "grad_norm": 0.4837638271264737, | |
| "learning_rate": 5.743726068908717e-06, | |
| "loss": 0.5229, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 1.526302573760201, | |
| "grad_norm": 0.4868620820757227, | |
| "learning_rate": 5.72061343552891e-06, | |
| "loss": 0.5353, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.5303201506591337, | |
| "grad_norm": 0.49287861664200744, | |
| "learning_rate": 5.697485066846914e-06, | |
| "loss": 0.5407, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 1.5343377275580665, | |
| "grad_norm": 0.5070216858712217, | |
| "learning_rate": 5.674341467893378e-06, | |
| "loss": 0.5322, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.5383553044569993, | |
| "grad_norm": 0.48075109583598735, | |
| "learning_rate": 5.6511831440315215e-06, | |
| "loss": 0.5318, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 1.542372881355932, | |
| "grad_norm": 0.487828149268802, | |
| "learning_rate": 5.628010600946088e-06, | |
| "loss": 0.5367, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.5463904582548649, | |
| "grad_norm": 0.4434100223228771, | |
| "learning_rate": 5.604824344632319e-06, | |
| "loss": 0.5413, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.5504080351537979, | |
| "grad_norm": 0.46224887215867433, | |
| "learning_rate": 5.581624881384897e-06, | |
| "loss": 0.5287, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 1.5544256120527307, | |
| "grad_norm": 0.5122729795251854, | |
| "learning_rate": 5.55841271778689e-06, | |
| "loss": 0.5365, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 1.5584431889516637, | |
| "grad_norm": 0.690970740866929, | |
| "learning_rate": 5.535188360698687e-06, | |
| "loss": 0.5467, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 1.5624607658505965, | |
| "grad_norm": 0.4794120185813089, | |
| "learning_rate": 5.511952317246941e-06, | |
| "loss": 0.5348, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 1.5664783427495292, | |
| "grad_norm": 0.4818371908690834, | |
| "learning_rate": 5.4887050948134825e-06, | |
| "loss": 0.5412, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.570495919648462, | |
| "grad_norm": 0.486538488375387, | |
| "learning_rate": 5.465447201024248e-06, | |
| "loss": 0.5362, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 1.5745134965473948, | |
| "grad_norm": 0.5061137169976885, | |
| "learning_rate": 5.442179143738193e-06, | |
| "loss": 0.5363, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 1.5785310734463276, | |
| "grad_norm": 0.46226895825091646, | |
| "learning_rate": 5.418901431036205e-06, | |
| "loss": 0.5277, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 1.5825486503452604, | |
| "grad_norm": 0.49850901564672195, | |
| "learning_rate": 5.395614571210004e-06, | |
| "loss": 0.5253, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 1.5865662272441934, | |
| "grad_norm": 0.49839262038652726, | |
| "learning_rate": 5.372319072751046e-06, | |
| "loss": 0.5217, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.5905838041431262, | |
| "grad_norm": 0.4540519023429122, | |
| "learning_rate": 5.349015444339429e-06, | |
| "loss": 0.5174, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 1.5946013810420592, | |
| "grad_norm": 0.4615246890403801, | |
| "learning_rate": 5.325704194832759e-06, | |
| "loss": 0.5399, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 1.598618957940992, | |
| "grad_norm": 0.5069766547949516, | |
| "learning_rate": 5.302385833255076e-06, | |
| "loss": 0.5377, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 1.6026365348399247, | |
| "grad_norm": 0.5034072911043822, | |
| "learning_rate": 5.2790608687857034e-06, | |
| "loss": 0.5312, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 1.6066541117388575, | |
| "grad_norm": 0.478063165462391, | |
| "learning_rate": 5.2557298107481536e-06, | |
| "loss": 0.5235, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.6106716886377903, | |
| "grad_norm": 0.5051927530264109, | |
| "learning_rate": 5.2323931685989945e-06, | |
| "loss": 0.5282, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 1.614689265536723, | |
| "grad_norm": 0.449944668715227, | |
| "learning_rate": 5.209051451916733e-06, | |
| "loss": 0.5391, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 1.6187068424356559, | |
| "grad_norm": 0.4987609704482517, | |
| "learning_rate": 5.185705170390677e-06, | |
| "loss": 0.5401, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 1.6227244193345887, | |
| "grad_norm": 0.5129818470578882, | |
| "learning_rate": 5.162354833809815e-06, | |
| "loss": 0.5389, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 1.6267419962335217, | |
| "grad_norm": 0.46834889576653455, | |
| "learning_rate": 5.139000952051686e-06, | |
| "loss": 0.551, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.6307595731324545, | |
| "grad_norm": 0.5100548420871484, | |
| "learning_rate": 5.115644035071234e-06, | |
| "loss": 0.5353, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 1.6347771500313875, | |
| "grad_norm": 0.5091440448579789, | |
| "learning_rate": 5.0922845928896865e-06, | |
| "loss": 0.5312, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 1.6387947269303202, | |
| "grad_norm": 0.5011348467216399, | |
| "learning_rate": 5.068923135583405e-06, | |
| "loss": 0.5379, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 1.642812303829253, | |
| "grad_norm": 0.4879211850299191, | |
| "learning_rate": 5.04556017327276e-06, | |
| "loss": 0.5259, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 1.6468298807281858, | |
| "grad_norm": 0.47580521291496164, | |
| "learning_rate": 5.022196216110978e-06, | |
| "loss": 0.5264, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.6508474576271186, | |
| "grad_norm": 0.4836039036319484, | |
| "learning_rate": 4.998831774273016e-06, | |
| "loss": 0.5245, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 1.6548650345260514, | |
| "grad_norm": 0.45734991734522173, | |
| "learning_rate": 4.975467357944412e-06, | |
| "loss": 0.5347, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 1.6588826114249842, | |
| "grad_norm": 0.45580879464211926, | |
| "learning_rate": 4.9521034773101405e-06, | |
| "loss": 0.5281, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 1.6629001883239172, | |
| "grad_norm": 0.5088749078327436, | |
| "learning_rate": 4.928740642543491e-06, | |
| "loss": 0.5203, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 1.66691776522285, | |
| "grad_norm": 0.5023597172357365, | |
| "learning_rate": 4.905379363794907e-06, | |
| "loss": 0.5323, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.670935342121783, | |
| "grad_norm": 0.5160005322831623, | |
| "learning_rate": 4.882020151180852e-06, | |
| "loss": 0.5354, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 1.6749529190207157, | |
| "grad_norm": 0.48241576610764997, | |
| "learning_rate": 4.858663514772684e-06, | |
| "loss": 0.5256, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 1.6789704959196485, | |
| "grad_norm": 0.4350265055622632, | |
| "learning_rate": 4.8353099645855e-06, | |
| "loss": 0.5343, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 1.6829880728185813, | |
| "grad_norm": 0.5156247900477684, | |
| "learning_rate": 4.811960010567005e-06, | |
| "loss": 0.5235, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 1.687005649717514, | |
| "grad_norm": 0.49845097220709156, | |
| "learning_rate": 4.788614162586379e-06, | |
| "loss": 0.5311, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.6910232266164469, | |
| "grad_norm": 0.4684264084420117, | |
| "learning_rate": 4.76527293042315e-06, | |
| "loss": 0.5361, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 1.6950408035153797, | |
| "grad_norm": 0.5161852081555511, | |
| "learning_rate": 4.741936823756046e-06, | |
| "loss": 0.5207, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 1.6990583804143125, | |
| "grad_norm": 0.6119074989682534, | |
| "learning_rate": 4.718606352151874e-06, | |
| "loss": 0.5221, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 1.7030759573132455, | |
| "grad_norm": 0.4419977523098354, | |
| "learning_rate": 4.695282025054406e-06, | |
| "loss": 0.5336, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 1.7070935342121782, | |
| "grad_norm": 0.4776310585207038, | |
| "learning_rate": 4.671964351773229e-06, | |
| "loss": 0.5254, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.7111111111111112, | |
| "grad_norm": 0.4362671430805064, | |
| "learning_rate": 4.648653841472643e-06, | |
| "loss": 0.5368, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 1.715128688010044, | |
| "grad_norm": 0.4926985303907698, | |
| "learning_rate": 4.625351003160539e-06, | |
| "loss": 0.529, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 1.7191462649089768, | |
| "grad_norm": 0.5037843279607946, | |
| "learning_rate": 4.60205634567728e-06, | |
| "loss": 0.5266, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 1.7231638418079096, | |
| "grad_norm": 0.48300010587173975, | |
| "learning_rate": 4.578770377684593e-06, | |
| "loss": 0.5308, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 1.7271814187068424, | |
| "grad_norm": 0.4952905848146038, | |
| "learning_rate": 4.555493607654463e-06, | |
| "loss": 0.5348, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.7311989956057752, | |
| "grad_norm": 0.509232593416316, | |
| "learning_rate": 4.532226543858025e-06, | |
| "loss": 0.5363, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 1.735216572504708, | |
| "grad_norm": 0.5186412413734403, | |
| "learning_rate": 4.508969694354472e-06, | |
| "loss": 0.5158, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 1.739234149403641, | |
| "grad_norm": 0.4648730018824965, | |
| "learning_rate": 4.485723566979959e-06, | |
| "loss": 0.5205, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 1.7432517263025737, | |
| "grad_norm": 0.487919260567548, | |
| "learning_rate": 4.462488669336507e-06, | |
| "loss": 0.5292, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 1.7472693032015068, | |
| "grad_norm": 0.4741644363249272, | |
| "learning_rate": 4.439265508780932e-06, | |
| "loss": 0.5283, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.7512868801004395, | |
| "grad_norm": 0.49035056338707034, | |
| "learning_rate": 4.416054592413755e-06, | |
| "loss": 0.538, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 1.7553044569993723, | |
| "grad_norm": 0.4755513975974018, | |
| "learning_rate": 4.392856427068132e-06, | |
| "loss": 0.5297, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 1.759322033898305, | |
| "grad_norm": 0.46435677929151326, | |
| "learning_rate": 4.3696715192987904e-06, | |
| "loss": 0.5247, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 1.7633396107972379, | |
| "grad_norm": 0.48979753506487095, | |
| "learning_rate": 4.346500375370966e-06, | |
| "loss": 0.5165, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 1.7673571876961707, | |
| "grad_norm": 0.4487673109128978, | |
| "learning_rate": 4.323343501249346e-06, | |
| "loss": 0.5317, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.7713747645951035, | |
| "grad_norm": 0.5113864337117118, | |
| "learning_rate": 4.300201402587019e-06, | |
| "loss": 0.5382, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 1.7753923414940365, | |
| "grad_norm": 0.483652205814584, | |
| "learning_rate": 4.277074584714447e-06, | |
| "loss": 0.5311, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 1.7794099183929692, | |
| "grad_norm": 0.4759761657301343, | |
| "learning_rate": 4.253963552628411e-06, | |
| "loss": 0.5351, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 1.783427495291902, | |
| "grad_norm": 0.4821299331080685, | |
| "learning_rate": 4.230868810980997e-06, | |
| "loss": 0.5342, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 1.787445072190835, | |
| "grad_norm": 0.5245032360028585, | |
| "learning_rate": 4.207790864068573e-06, | |
| "loss": 0.5237, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.7914626490897678, | |
| "grad_norm": 0.49328875619112256, | |
| "learning_rate": 4.184730215820782e-06, | |
| "loss": 0.5317, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 1.7954802259887006, | |
| "grad_norm": 0.5008680438944126, | |
| "learning_rate": 4.161687369789526e-06, | |
| "loss": 0.517, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 1.7994978028876334, | |
| "grad_norm": 0.47287999035048983, | |
| "learning_rate": 4.138662829137984e-06, | |
| "loss": 0.5327, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 1.8035153797865662, | |
| "grad_norm": 0.49099568575427033, | |
| "learning_rate": 4.115657096629615e-06, | |
| "loss": 0.5302, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 1.807532956685499, | |
| "grad_norm": 0.4518440123720032, | |
| "learning_rate": 4.092670674617187e-06, | |
| "loss": 0.5153, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.8115505335844317, | |
| "grad_norm": 0.4893643353710452, | |
| "learning_rate": 4.069704065031804e-06, | |
| "loss": 0.5354, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 1.8155681104833647, | |
| "grad_norm": 0.4938753269976269, | |
| "learning_rate": 4.0467577693719436e-06, | |
| "loss": 0.5304, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 1.8195856873822975, | |
| "grad_norm": 0.48759036445701953, | |
| "learning_rate": 4.023832288692512e-06, | |
| "loss": 0.5333, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 1.8236032642812305, | |
| "grad_norm": 0.48751563543723775, | |
| "learning_rate": 4.000928123593898e-06, | |
| "loss": 0.5385, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 1.8276208411801633, | |
| "grad_norm": 0.512128536559346, | |
| "learning_rate": 3.978045774211043e-06, | |
| "loss": 0.5438, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.831638418079096, | |
| "grad_norm": 0.48148117588240913, | |
| "learning_rate": 3.9551857402025215e-06, | |
| "loss": 0.5321, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 1.835655994978029, | |
| "grad_norm": 0.5036763909061966, | |
| "learning_rate": 3.932348520739633e-06, | |
| "loss": 0.5321, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 1.8396735718769617, | |
| "grad_norm": 0.4850093548850179, | |
| "learning_rate": 3.909534614495495e-06, | |
| "loss": 0.5212, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 1.8436911487758945, | |
| "grad_norm": 0.5089724945679945, | |
| "learning_rate": 3.886744519634157e-06, | |
| "loss": 0.526, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 1.8477087256748272, | |
| "grad_norm": 0.4736889376864286, | |
| "learning_rate": 3.86397873379973e-06, | |
| "loss": 0.5355, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.8517263025737603, | |
| "grad_norm": 0.48275206618553307, | |
| "learning_rate": 3.841237754105508e-06, | |
| "loss": 0.5375, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 1.855743879472693, | |
| "grad_norm": 0.5070260134880437, | |
| "learning_rate": 3.818522077123119e-06, | |
| "loss": 0.5256, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 1.8597614563716258, | |
| "grad_norm": 0.48382223029891325, | |
| "learning_rate": 3.795832198871682e-06, | |
| "loss": 0.5272, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 1.8637790332705588, | |
| "grad_norm": 0.4533070548630681, | |
| "learning_rate": 3.7731686148069768e-06, | |
| "loss": 0.529, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 1.8677966101694916, | |
| "grad_norm": 0.5136994231340827, | |
| "learning_rate": 3.7505318198106226e-06, | |
| "loss": 0.5259, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.8718141870684244, | |
| "grad_norm": 0.4891813686506932, | |
| "learning_rate": 3.727922308179275e-06, | |
| "loss": 0.528, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 1.8758317639673572, | |
| "grad_norm": 0.4784464468836338, | |
| "learning_rate": 3.7053405736138228e-06, | |
| "loss": 0.5239, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 1.87984934086629, | |
| "grad_norm": 0.4439894514511087, | |
| "learning_rate": 3.6827871092086283e-06, | |
| "loss": 0.5278, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 1.8838669177652227, | |
| "grad_norm": 0.42133586965144204, | |
| "learning_rate": 3.6602624074407354e-06, | |
| "loss": 0.525, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 1.8878844946641555, | |
| "grad_norm": 0.4452525569153129, | |
| "learning_rate": 3.6377669601591314e-06, | |
| "loss": 0.5271, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.8919020715630885, | |
| "grad_norm": 0.4781861294441116, | |
| "learning_rate": 3.615301258574009e-06, | |
| "loss": 0.5244, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 1.8959196484620213, | |
| "grad_norm": 0.442730967082675, | |
| "learning_rate": 3.5928657932460252e-06, | |
| "loss": 0.5245, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 1.8999372253609543, | |
| "grad_norm": 0.45356827607495354, | |
| "learning_rate": 3.5704610540756035e-06, | |
| "loss": 0.5226, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 1.9039548022598871, | |
| "grad_norm": 0.44691195704566716, | |
| "learning_rate": 3.5480875302922296e-06, | |
| "loss": 0.5383, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 1.90797237915882, | |
| "grad_norm": 0.48059730824092567, | |
| "learning_rate": 3.525745710443774e-06, | |
| "loss": 0.5224, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.9119899560577527, | |
| "grad_norm": 0.447518168596057, | |
| "learning_rate": 3.503436082385817e-06, | |
| "loss": 0.529, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 1.9160075329566855, | |
| "grad_norm": 0.45980480004977614, | |
| "learning_rate": 3.4811591332710003e-06, | |
| "loss": 0.5283, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 1.9200251098556183, | |
| "grad_norm": 0.4641653971039642, | |
| "learning_rate": 3.4589153495383916e-06, | |
| "loss": 0.524, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 1.924042686754551, | |
| "grad_norm": 0.4608734211286836, | |
| "learning_rate": 3.4367052169028557e-06, | |
| "loss": 0.5154, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 1.928060263653484, | |
| "grad_norm": 0.45628159677386826, | |
| "learning_rate": 3.414529220344455e-06, | |
| "loss": 0.5246, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.9320778405524168, | |
| "grad_norm": 0.4571910097492241, | |
| "learning_rate": 3.3923878440978563e-06, | |
| "loss": 0.5355, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 1.9360954174513496, | |
| "grad_norm": 0.4701204963184072, | |
| "learning_rate": 3.370281571641759e-06, | |
| "loss": 0.519, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 1.9401129943502826, | |
| "grad_norm": 0.4644809253665488, | |
| "learning_rate": 3.348210885688337e-06, | |
| "loss": 0.5444, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 1.9441305712492154, | |
| "grad_norm": 0.4505954165735258, | |
| "learning_rate": 3.3261762681726955e-06, | |
| "loss": 0.5288, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 1.9481481481481482, | |
| "grad_norm": 0.5016011741209488, | |
| "learning_rate": 3.304178200242351e-06, | |
| "loss": 0.5279, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.952165725047081, | |
| "grad_norm": 0.48172105905267193, | |
| "learning_rate": 3.282217162246726e-06, | |
| "loss": 0.5331, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 1.9561833019460138, | |
| "grad_norm": 0.45055441596952966, | |
| "learning_rate": 3.260293633726656e-06, | |
| "loss": 0.5312, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 1.9602008788449465, | |
| "grad_norm": 0.4553856152933495, | |
| "learning_rate": 3.2384080934039193e-06, | |
| "loss": 0.5301, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 1.9642184557438793, | |
| "grad_norm": 0.468059642341407, | |
| "learning_rate": 3.2165610191707872e-06, | |
| "loss": 0.5265, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 1.9682360326428123, | |
| "grad_norm": 0.42050027272205776, | |
| "learning_rate": 3.194752888079585e-06, | |
| "loss": 0.5212, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.9722536095417451, | |
| "grad_norm": 0.49679149221807994, | |
| "learning_rate": 3.1729841763322776e-06, | |
| "loss": 0.5298, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 1.9762711864406781, | |
| "grad_norm": 0.4716285847340891, | |
| "learning_rate": 3.1512553592700622e-06, | |
| "loss": 0.5203, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 1.980288763339611, | |
| "grad_norm": 0.4740722369561679, | |
| "learning_rate": 3.129566911363009e-06, | |
| "loss": 0.5208, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 1.9843063402385437, | |
| "grad_norm": 0.4553213513296392, | |
| "learning_rate": 3.1079193061996803e-06, | |
| "loss": 0.5241, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 1.9883239171374765, | |
| "grad_norm": 0.48169953080880973, | |
| "learning_rate": 3.086313016476794e-06, | |
| "loss": 0.5418, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.9923414940364093, | |
| "grad_norm": 0.5764706666287279, | |
| "learning_rate": 3.0647485139889145e-06, | |
| "loss": 0.5259, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 1.996359070935342, | |
| "grad_norm": 0.43220459247826987, | |
| "learning_rate": 3.0432262696181336e-06, | |
| "loss": 0.522, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.4710113996508926, | |
| "learning_rate": 3.0217467533237956e-06, | |
| "loss": 0.5142, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 2.004017576898933, | |
| "grad_norm": 0.5202233442297398, | |
| "learning_rate": 3.000310434132237e-06, | |
| "loss": 0.4811, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 2.0080351537978656, | |
| "grad_norm": 0.5384401881895943, | |
| "learning_rate": 2.9789177801265455e-06, | |
| "loss": 0.4769, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.0120527306967984, | |
| "grad_norm": 0.4756295523176075, | |
| "learning_rate": 2.9575692584363337e-06, | |
| "loss": 0.4755, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 2.016070307595731, | |
| "grad_norm": 0.5172206673937627, | |
| "learning_rate": 2.9362653352275405e-06, | |
| "loss": 0.4813, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 2.0200878844946644, | |
| "grad_norm": 0.49483001522371817, | |
| "learning_rate": 2.915006475692256e-06, | |
| "loss": 0.472, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 2.024105461393597, | |
| "grad_norm": 0.4704791978578869, | |
| "learning_rate": 2.89379314403856e-06, | |
| "loss": 0.4747, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 2.02812303829253, | |
| "grad_norm": 0.5050782711725152, | |
| "learning_rate": 2.8726258034803866e-06, | |
| "loss": 0.4794, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 2.0321406151914627, | |
| "grad_norm": 0.4712269294430662, | |
| "learning_rate": 2.8515049162274057e-06, | |
| "loss": 0.4722, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 2.0361581920903955, | |
| "grad_norm": 0.48881220535426967, | |
| "learning_rate": 2.83043094347494e-06, | |
| "loss": 0.4678, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 2.0401757689893283, | |
| "grad_norm": 0.48771842977610164, | |
| "learning_rate": 2.8094043453938844e-06, | |
| "loss": 0.4665, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 2.044193345888261, | |
| "grad_norm": 0.5202925253687711, | |
| "learning_rate": 2.7884255811206584e-06, | |
| "loss": 0.4763, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 2.048210922787194, | |
| "grad_norm": 0.46874669055521, | |
| "learning_rate": 2.7674951087471858e-06, | |
| "loss": 0.4833, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 2.0522284996861266, | |
| "grad_norm": 0.47453927342332336, | |
| "learning_rate": 2.7466133853108935e-06, | |
| "loss": 0.4598, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 2.0562460765850594, | |
| "grad_norm": 0.45817909400058926, | |
| "learning_rate": 2.725780866784722e-06, | |
| "loss": 0.4719, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 2.0602636534839927, | |
| "grad_norm": 0.5261764803454092, | |
| "learning_rate": 2.704998008067177e-06, | |
| "loss": 0.4634, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 2.0642812303829254, | |
| "grad_norm": 0.47433900887045005, | |
| "learning_rate": 2.6842652629723907e-06, | |
| "loss": 0.4785, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 2.068298807281858, | |
| "grad_norm": 0.4711156322673265, | |
| "learning_rate": 2.6635830842202182e-06, | |
| "loss": 0.4625, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 2.072316384180791, | |
| "grad_norm": 0.4620663705804988, | |
| "learning_rate": 2.642951923426348e-06, | |
| "loss": 0.4775, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 2.076333961079724, | |
| "grad_norm": 0.4617355126174571, | |
| "learning_rate": 2.622372231092437e-06, | |
| "loss": 0.4817, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 2.0803515379786566, | |
| "grad_norm": 0.5411857216699084, | |
| "learning_rate": 2.6018444565962885e-06, | |
| "loss": 0.4731, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 2.0843691148775894, | |
| "grad_norm": 0.4943277164316023, | |
| "learning_rate": 2.5813690481820184e-06, | |
| "loss": 0.4693, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 2.088386691776522, | |
| "grad_norm": 0.5102055113637535, | |
| "learning_rate": 2.5609464529502815e-06, | |
| "loss": 0.4805, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 2.092404268675455, | |
| "grad_norm": 0.49132645013613546, | |
| "learning_rate": 2.540577116848505e-06, | |
| "loss": 0.4694, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 2.096421845574388, | |
| "grad_norm": 0.49577954933166757, | |
| "learning_rate": 2.52026148466115e-06, | |
| "loss": 0.4825, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 2.100439422473321, | |
| "grad_norm": 0.4840304988230105, | |
| "learning_rate": 2.5000000000000015e-06, | |
| "loss": 0.4775, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 2.1044569993722537, | |
| "grad_norm": 0.5036218210876587, | |
| "learning_rate": 2.4797931052944755e-06, | |
| "loss": 0.472, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 2.1084745762711865, | |
| "grad_norm": 0.4502304265079634, | |
| "learning_rate": 2.4596412417819708e-06, | |
| "loss": 0.4685, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 2.1124921531701193, | |
| "grad_norm": 0.5218870877367079, | |
| "learning_rate": 2.4395448494982198e-06, | |
| "loss": 0.4817, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 2.116509730069052, | |
| "grad_norm": 0.49197993587084365, | |
| "learning_rate": 2.419504367267689e-06, | |
| "loss": 0.4744, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 2.120527306967985, | |
| "grad_norm": 0.5349529463351869, | |
| "learning_rate": 2.3995202326939866e-06, | |
| "loss": 0.4872, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 2.1245448838669176, | |
| "grad_norm": 0.48741903300666545, | |
| "learning_rate": 2.3795928821503275e-06, | |
| "loss": 0.4688, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 2.1285624607658504, | |
| "grad_norm": 0.4999227078942744, | |
| "learning_rate": 2.359722750769981e-06, | |
| "loss": 0.4793, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 2.132580037664783, | |
| "grad_norm": 0.45470893654183736, | |
| "learning_rate": 2.339910272436782e-06, | |
| "loss": 0.4755, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 2.1365976145637164, | |
| "grad_norm": 0.4824984736023203, | |
| "learning_rate": 2.3201558797756602e-06, | |
| "loss": 0.472, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 2.1406151914626492, | |
| "grad_norm": 0.47042980851352273, | |
| "learning_rate": 2.300460004143182e-06, | |
| "loss": 0.477, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 2.144632768361582, | |
| "grad_norm": 0.47770309689595364, | |
| "learning_rate": 2.2808230756181344e-06, | |
| "loss": 0.4678, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 2.148650345260515, | |
| "grad_norm": 0.5137315970001541, | |
| "learning_rate": 2.261245522992141e-06, | |
| "loss": 0.4718, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 2.1526679221594476, | |
| "grad_norm": 0.47959163673834015, | |
| "learning_rate": 2.2417277737602967e-06, | |
| "loss": 0.4777, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 2.1566854990583804, | |
| "grad_norm": 0.5938128736434867, | |
| "learning_rate": 2.222270254111825e-06, | |
| "loss": 0.4573, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 2.160703075957313, | |
| "grad_norm": 0.4891138485781613, | |
| "learning_rate": 2.2028733889207787e-06, | |
| "loss": 0.4767, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 2.164720652856246, | |
| "grad_norm": 0.501211033879534, | |
| "learning_rate": 2.1835376017367665e-06, | |
| "loss": 0.4735, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 2.1687382297551787, | |
| "grad_norm": 0.5297335059398043, | |
| "learning_rate": 2.1642633147756894e-06, | |
| "loss": 0.4824, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 2.172755806654112, | |
| "grad_norm": 0.4658949552573407, | |
| "learning_rate": 2.145050948910536e-06, | |
| "loss": 0.4757, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 2.1767733835530447, | |
| "grad_norm": 0.5130444470120785, | |
| "learning_rate": 2.1259009236621857e-06, | |
| "loss": 0.4804, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 2.1807909604519775, | |
| "grad_norm": 0.5328006329035961, | |
| "learning_rate": 2.1068136571902527e-06, | |
| "loss": 0.4714, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 2.1848085373509103, | |
| "grad_norm": 0.49974994460658273, | |
| "learning_rate": 2.0877895662839477e-06, | |
| "loss": 0.4661, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 2.188826114249843, | |
| "grad_norm": 0.504181506688872, | |
| "learning_rate": 2.0688290663529813e-06, | |
| "loss": 0.469, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 2.192843691148776, | |
| "grad_norm": 0.5249615841875294, | |
| "learning_rate": 2.049932571418494e-06, | |
| "loss": 0.4784, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 2.1968612680477086, | |
| "grad_norm": 0.4718284348814741, | |
| "learning_rate": 2.031100494104014e-06, | |
| "loss": 0.4784, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 2.2008788449466414, | |
| "grad_norm": 0.5193890352090972, | |
| "learning_rate": 2.0123332456264473e-06, | |
| "loss": 0.4818, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 2.204896421845574, | |
| "grad_norm": 0.48046763032522966, | |
| "learning_rate": 1.9936312357870962e-06, | |
| "loss": 0.4802, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 2.2089139987445074, | |
| "grad_norm": 0.47752926213638963, | |
| "learning_rate": 1.9749948729627188e-06, | |
| "loss": 0.4686, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.2129315756434402, | |
| "grad_norm": 0.4800555208436017, | |
| "learning_rate": 1.956424564096602e-06, | |
| "loss": 0.482, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 2.216949152542373, | |
| "grad_norm": 0.443284090953342, | |
| "learning_rate": 1.9379207146896827e-06, | |
| "loss": 0.4733, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 2.220966729441306, | |
| "grad_norm": 0.5002931907484188, | |
| "learning_rate": 1.9194837287916817e-06, | |
| "loss": 0.4776, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 2.2249843063402386, | |
| "grad_norm": 0.45028976156882144, | |
| "learning_rate": 1.9011140089923013e-06, | |
| "loss": 0.4785, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 2.2290018832391714, | |
| "grad_norm": 0.4619774988789297, | |
| "learning_rate": 1.8828119564124159e-06, | |
| "loss": 0.475, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 2.233019460138104, | |
| "grad_norm": 0.47661490970644393, | |
| "learning_rate": 1.8645779706953188e-06, | |
| "loss": 0.4824, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 2.237037037037037, | |
| "grad_norm": 0.49398577636680097, | |
| "learning_rate": 1.8464124499980013e-06, | |
| "loss": 0.4719, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 2.2410546139359697, | |
| "grad_norm": 0.4799156749603329, | |
| "learning_rate": 1.8283157909824517e-06, | |
| "loss": 0.4787, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 2.2450721908349025, | |
| "grad_norm": 0.4903571200855578, | |
| "learning_rate": 1.8102883888069917e-06, | |
| "loss": 0.481, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 2.2490897677338357, | |
| "grad_norm": 0.4595319743138882, | |
| "learning_rate": 1.7923306371176542e-06, | |
| "loss": 0.4722, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 2.2531073446327685, | |
| "grad_norm": 0.4815148560520921, | |
| "learning_rate": 1.7744429280395903e-06, | |
| "loss": 0.4804, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 2.2571249215317013, | |
| "grad_norm": 0.49411596599772084, | |
| "learning_rate": 1.7566256521684966e-06, | |
| "loss": 0.4837, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 2.261142498430634, | |
| "grad_norm": 0.5331117323575773, | |
| "learning_rate": 1.7388791985620922e-06, | |
| "loss": 0.4705, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 2.265160075329567, | |
| "grad_norm": 0.52376875529828, | |
| "learning_rate": 1.721203954731624e-06, | |
| "loss": 0.4723, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 2.2691776522284997, | |
| "grad_norm": 0.48551462166212467, | |
| "learning_rate": 1.7036003066334012e-06, | |
| "loss": 0.4853, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 2.2731952291274324, | |
| "grad_norm": 0.5014457542958235, | |
| "learning_rate": 1.6860686386603719e-06, | |
| "loss": 0.4733, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 2.277212806026365, | |
| "grad_norm": 0.4996867895329777, | |
| "learning_rate": 1.6686093336337256e-06, | |
| "loss": 0.4741, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 2.281230382925298, | |
| "grad_norm": 0.48719109638855057, | |
| "learning_rate": 1.6512227727945391e-06, | |
| "loss": 0.4831, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 2.285247959824231, | |
| "grad_norm": 0.4856082508478335, | |
| "learning_rate": 1.6339093357954455e-06, | |
| "loss": 0.4833, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 2.289265536723164, | |
| "grad_norm": 0.5118289081317942, | |
| "learning_rate": 1.6166694006923479e-06, | |
| "loss": 0.4845, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 2.293283113622097, | |
| "grad_norm": 0.5338262164693043, | |
| "learning_rate": 1.5995033439361623e-06, | |
| "loss": 0.4725, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 2.2973006905210296, | |
| "grad_norm": 0.5147815344933698, | |
| "learning_rate": 1.5824115403646e-06, | |
| "loss": 0.471, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 2.3013182674199624, | |
| "grad_norm": 0.4925335147001341, | |
| "learning_rate": 1.5653943631939806e-06, | |
| "loss": 0.4748, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 2.305335844318895, | |
| "grad_norm": 0.5223852201900643, | |
| "learning_rate": 1.5484521840110812e-06, | |
| "loss": 0.4799, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 2.309353421217828, | |
| "grad_norm": 0.5102348798882654, | |
| "learning_rate": 1.5315853727650283e-06, | |
| "loss": 0.4734, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 2.3133709981167607, | |
| "grad_norm": 0.4806877999022709, | |
| "learning_rate": 1.5147942977592111e-06, | |
| "loss": 0.4793, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 2.3173885750156935, | |
| "grad_norm": 0.47394343796609417, | |
| "learning_rate": 1.4980793256432474e-06, | |
| "loss": 0.4778, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 2.3214061519146263, | |
| "grad_norm": 0.5169128981837072, | |
| "learning_rate": 1.4814408214049674e-06, | |
| "loss": 0.4756, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 2.3254237288135595, | |
| "grad_norm": 0.4653905202296831, | |
| "learning_rate": 1.4648791483624586e-06, | |
| "loss": 0.476, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 2.3294413057124923, | |
| "grad_norm": 0.49306875398820615, | |
| "learning_rate": 1.4483946681561178e-06, | |
| "loss": 0.4686, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 2.333458882611425, | |
| "grad_norm": 0.48126484624398314, | |
| "learning_rate": 1.4319877407407623e-06, | |
| "loss": 0.4757, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 2.337476459510358, | |
| "grad_norm": 0.4857946344608446, | |
| "learning_rate": 1.415658724377767e-06, | |
| "loss": 0.4707, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 2.3414940364092907, | |
| "grad_norm": 0.508812865037136, | |
| "learning_rate": 1.3994079756272467e-06, | |
| "loss": 0.4716, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 2.3455116133082234, | |
| "grad_norm": 0.4924478822718041, | |
| "learning_rate": 1.3832358493402591e-06, | |
| "loss": 0.4788, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 2.3495291902071562, | |
| "grad_norm": 0.5636558365101663, | |
| "learning_rate": 1.3671426986510667e-06, | |
| "loss": 0.4791, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 2.353546767106089, | |
| "grad_norm": 0.4787221048450779, | |
| "learning_rate": 1.3511288749694245e-06, | |
| "loss": 0.4774, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 2.357564344005022, | |
| "grad_norm": 0.509596234617302, | |
| "learning_rate": 1.3351947279729016e-06, | |
| "loss": 0.4738, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 2.361581920903955, | |
| "grad_norm": 0.48376004159048386, | |
| "learning_rate": 1.3193406055992485e-06, | |
| "loss": 0.4828, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 2.365599497802888, | |
| "grad_norm": 0.5038494271293078, | |
| "learning_rate": 1.3035668540388002e-06, | |
| "loss": 0.4864, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 2.3696170747018206, | |
| "grad_norm": 0.5316770270179247, | |
| "learning_rate": 1.2878738177269156e-06, | |
| "loss": 0.4785, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 2.3736346516007534, | |
| "grad_norm": 0.5142512043455278, | |
| "learning_rate": 1.2722618393364572e-06, | |
| "loss": 0.4817, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 2.377652228499686, | |
| "grad_norm": 0.468789477239248, | |
| "learning_rate": 1.2567312597703063e-06, | |
| "loss": 0.4735, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 2.381669805398619, | |
| "grad_norm": 0.4736300406705453, | |
| "learning_rate": 1.2412824181539256e-06, | |
| "loss": 0.467, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 2.3856873822975517, | |
| "grad_norm": 0.49508031018022314, | |
| "learning_rate": 1.2259156518279452e-06, | |
| "loss": 0.4854, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 2.3897049591964845, | |
| "grad_norm": 0.4889357358622553, | |
| "learning_rate": 1.2106312963408024e-06, | |
| "loss": 0.4683, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 2.3937225360954173, | |
| "grad_norm": 0.5186581775849328, | |
| "learning_rate": 1.1954296854414111e-06, | |
| "loss": 0.4743, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 2.3977401129943505, | |
| "grad_norm": 0.534928567712559, | |
| "learning_rate": 1.1803111510718774e-06, | |
| "loss": 0.4713, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 2.401757689893283, | |
| "grad_norm": 0.46937598704982375, | |
| "learning_rate": 1.1652760233602495e-06, | |
| "loss": 0.4773, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 2.405775266792216, | |
| "grad_norm": 0.49187943461524203, | |
| "learning_rate": 1.1503246306133099e-06, | |
| "loss": 0.48, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 2.409792843691149, | |
| "grad_norm": 0.4699512441083907, | |
| "learning_rate": 1.1354572993094031e-06, | |
| "loss": 0.4752, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.4138104205900817, | |
| "grad_norm": 0.514478387098126, | |
| "learning_rate": 1.1206743540913144e-06, | |
| "loss": 0.4735, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 2.4178279974890144, | |
| "grad_norm": 0.4912678185053618, | |
| "learning_rate": 1.1059761177591727e-06, | |
| "loss": 0.4738, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 2.4218455743879472, | |
| "grad_norm": 0.45184357921612245, | |
| "learning_rate": 1.0913629112634045e-06, | |
| "loss": 0.4764, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 2.42586315128688, | |
| "grad_norm": 0.45539851634790796, | |
| "learning_rate": 1.076835053697728e-06, | |
| "loss": 0.4758, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 2.429880728185813, | |
| "grad_norm": 0.5152318496750359, | |
| "learning_rate": 1.0623928622921825e-06, | |
| "loss": 0.4732, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 2.4338983050847456, | |
| "grad_norm": 0.5016552767543895, | |
| "learning_rate": 1.0480366524062041e-06, | |
| "loss": 0.483, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 2.4379158819836784, | |
| "grad_norm": 0.47698174699672474, | |
| "learning_rate": 1.0337667375217353e-06, | |
| "loss": 0.4737, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 2.4419334588826116, | |
| "grad_norm": 0.5215110019193456, | |
| "learning_rate": 1.0195834292363881e-06, | |
| "loss": 0.4717, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 2.4459510357815444, | |
| "grad_norm": 0.5003135573061556, | |
| "learning_rate": 1.0054870372566273e-06, | |
| "loss": 0.4711, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 2.449968612680477, | |
| "grad_norm": 0.5154412726359013, | |
| "learning_rate": 9.914778693910165e-07, | |
| "loss": 0.4738, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 2.45398618957941, | |
| "grad_norm": 0.5155157663249713, | |
| "learning_rate": 9.775562315435005e-07, | |
| "loss": 0.481, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 2.4580037664783427, | |
| "grad_norm": 0.4896545010247639, | |
| "learning_rate": 9.637224277067142e-07, | |
| "loss": 0.4869, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 2.4620213433772755, | |
| "grad_norm": 0.4603343227039726, | |
| "learning_rate": 9.499767599553528e-07, | |
| "loss": 0.4817, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 2.4660389202762083, | |
| "grad_norm": 0.4600261678419497, | |
| "learning_rate": 9.363195284395732e-07, | |
| "loss": 0.4679, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 2.470056497175141, | |
| "grad_norm": 0.4989876951619158, | |
| "learning_rate": 9.227510313784405e-07, | |
| "loss": 0.4805, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 2.474074074074074, | |
| "grad_norm": 0.49701651082881143, | |
| "learning_rate": 9.092715650534162e-07, | |
| "loss": 0.474, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 2.478091650973007, | |
| "grad_norm": 0.47689581346361015, | |
| "learning_rate": 8.958814238018864e-07, | |
| "loss": 0.4735, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 2.48210922787194, | |
| "grad_norm": 0.48466102861465504, | |
| "learning_rate": 8.825809000107382e-07, | |
| "loss": 0.4823, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 2.4861268047708727, | |
| "grad_norm": 0.43949823990630094, | |
| "learning_rate": 8.693702841099744e-07, | |
| "loss": 0.468, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 2.4901443816698055, | |
| "grad_norm": 0.4734599690753599, | |
| "learning_rate": 8.56249864566368e-07, | |
| "loss": 0.4716, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 2.4941619585687382, | |
| "grad_norm": 0.4837246488118121, | |
| "learning_rate": 8.432199278771679e-07, | |
| "loss": 0.4727, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 2.498179535467671, | |
| "grad_norm": 0.5212860772248176, | |
| "learning_rate": 8.302807585638401e-07, | |
| "loss": 0.4781, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 2.502197112366604, | |
| "grad_norm": 0.4952468033104653, | |
| "learning_rate": 8.174326391658561e-07, | |
| "loss": 0.4742, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 2.5062146892655366, | |
| "grad_norm": 0.4867979964745898, | |
| "learning_rate": 8.04675850234523e-07, | |
| "loss": 0.4731, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 2.5102322661644694, | |
| "grad_norm": 0.48350910576661243, | |
| "learning_rate": 7.92010670326856e-07, | |
| "loss": 0.4793, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 2.5142498430634026, | |
| "grad_norm": 0.48063080647615003, | |
| "learning_rate": 7.794373759995017e-07, | |
| "loss": 0.4814, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 2.518267419962335, | |
| "grad_norm": 0.5071850927755938, | |
| "learning_rate": 7.669562418026905e-07, | |
| "loss": 0.4726, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 2.522284996861268, | |
| "grad_norm": 0.5007552952463317, | |
| "learning_rate": 7.545675402742464e-07, | |
| "loss": 0.4701, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 2.526302573760201, | |
| "grad_norm": 0.49193921591367956, | |
| "learning_rate": 7.422715419336374e-07, | |
| "loss": 0.4798, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 2.5303201506591337, | |
| "grad_norm": 0.4631014183740996, | |
| "learning_rate": 7.30068515276064e-07, | |
| "loss": 0.4783, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 2.5343377275580665, | |
| "grad_norm": 0.4461109021704027, | |
| "learning_rate": 7.179587267665999e-07, | |
| "loss": 0.4807, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 2.5383553044569993, | |
| "grad_norm": 0.5615597716996266, | |
| "learning_rate": 7.059424408343713e-07, | |
| "loss": 0.476, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 2.542372881355932, | |
| "grad_norm": 0.4681517030493916, | |
| "learning_rate": 6.940199198667863e-07, | |
| "loss": 0.4746, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 2.546390458254865, | |
| "grad_norm": 0.4270578942801569, | |
| "learning_rate": 6.821914242038013e-07, | |
| "loss": 0.47, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 2.550408035153798, | |
| "grad_norm": 0.5120970629852971, | |
| "learning_rate": 6.704572121322356e-07, | |
| "loss": 0.4661, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 2.5544256120527304, | |
| "grad_norm": 0.4577342016167032, | |
| "learning_rate": 6.588175398801356e-07, | |
| "loss": 0.4778, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 2.5584431889516637, | |
| "grad_norm": 0.5112843701769388, | |
| "learning_rate": 6.472726616111797e-07, | |
| "loss": 0.4774, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 2.5624607658505965, | |
| "grad_norm": 0.5002523546947816, | |
| "learning_rate": 6.358228294191248e-07, | |
| "loss": 0.4745, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 2.5664783427495292, | |
| "grad_norm": 0.4844209522259677, | |
| "learning_rate": 6.244682933223023e-07, | |
| "loss": 0.4743, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 2.570495919648462, | |
| "grad_norm": 0.5054412286540757, | |
| "learning_rate": 6.13209301258162e-07, | |
| "loss": 0.4689, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 2.574513496547395, | |
| "grad_norm": 0.4613555569290718, | |
| "learning_rate": 6.020460990778537e-07, | |
| "loss": 0.4711, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 2.5785310734463276, | |
| "grad_norm": 0.47785054223407225, | |
| "learning_rate": 5.909789305408631e-07, | |
| "loss": 0.476, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 2.5825486503452604, | |
| "grad_norm": 0.5097922646263033, | |
| "learning_rate": 5.800080373096839e-07, | |
| "loss": 0.4628, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 2.5865662272441936, | |
| "grad_norm": 0.460211806295128, | |
| "learning_rate": 5.691336589445485e-07, | |
| "loss": 0.4693, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 2.590583804143126, | |
| "grad_norm": 0.47963475329302363, | |
| "learning_rate": 5.583560328981885e-07, | |
| "loss": 0.4741, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 2.594601381042059, | |
| "grad_norm": 0.521520019445044, | |
| "learning_rate": 5.476753945106556e-07, | |
| "loss": 0.4763, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 2.598618957940992, | |
| "grad_norm": 0.4716378386067206, | |
| "learning_rate": 5.370919770041799e-07, | |
| "loss": 0.4742, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 2.6026365348399247, | |
| "grad_norm": 0.5095512360791891, | |
| "learning_rate": 5.266060114780774e-07, | |
| "loss": 0.4769, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 2.6066541117388575, | |
| "grad_norm": 0.49008584458884724, | |
| "learning_rate": 5.162177269037061e-07, | |
| "loss": 0.4695, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 2.6106716886377903, | |
| "grad_norm": 0.4430564461046631, | |
| "learning_rate": 5.059273501194622e-07, | |
| "loss": 0.4738, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.614689265536723, | |
| "grad_norm": 0.48041503000167013, | |
| "learning_rate": 4.95735105825833e-07, | |
| "loss": 0.4671, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 2.618706842435656, | |
| "grad_norm": 0.47058785476417425, | |
| "learning_rate": 4.856412165804824e-07, | |
| "loss": 0.4656, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 2.6227244193345887, | |
| "grad_norm": 0.5186436231128471, | |
| "learning_rate": 4.756459027933974e-07, | |
| "loss": 0.4795, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 2.6267419962335214, | |
| "grad_norm": 0.4971208460235255, | |
| "learning_rate": 4.657493827220705e-07, | |
| "loss": 0.4745, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 2.6307595731324547, | |
| "grad_norm": 0.46196697310421064, | |
| "learning_rate": 4.559518724667411e-07, | |
| "loss": 0.4788, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 2.6347771500313875, | |
| "grad_norm": 0.516003177023342, | |
| "learning_rate": 4.462535859656675e-07, | |
| "loss": 0.476, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 2.6387947269303202, | |
| "grad_norm": 0.5015505649590103, | |
| "learning_rate": 4.36654734990461e-07, | |
| "loss": 0.4818, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 2.642812303829253, | |
| "grad_norm": 0.517201945888056, | |
| "learning_rate": 4.271555291414636e-07, | |
| "loss": 0.4564, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 2.646829880728186, | |
| "grad_norm": 0.4978218856523734, | |
| "learning_rate": 4.1775617584316476e-07, | |
| "loss": 0.4713, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 2.6508474576271186, | |
| "grad_norm": 0.4876775749761943, | |
| "learning_rate": 4.0845688033967435e-07, | |
| "loss": 0.4753, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 2.6548650345260514, | |
| "grad_norm": 0.49046863554031045, | |
| "learning_rate": 3.992578456902452e-07, | |
| "loss": 0.4719, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 2.658882611424984, | |
| "grad_norm": 0.4828102602774462, | |
| "learning_rate": 3.901592727648351e-07, | |
| "loss": 0.471, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 2.662900188323917, | |
| "grad_norm": 0.5128903958409182, | |
| "learning_rate": 3.811613602397202e-07, | |
| "loss": 0.4799, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 2.66691776522285, | |
| "grad_norm": 0.485862558152459, | |
| "learning_rate": 3.7226430459315957e-07, | |
| "loss": 0.4682, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 2.670935342121783, | |
| "grad_norm": 0.48320591671258245, | |
| "learning_rate": 3.634683001011019e-07, | |
| "loss": 0.4802, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 2.6749529190207157, | |
| "grad_norm": 0.4382446479240708, | |
| "learning_rate": 3.547735388329443e-07, | |
| "loss": 0.4728, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 2.6789704959196485, | |
| "grad_norm": 0.4939760563538167, | |
| "learning_rate": 3.461802106473411e-07, | |
| "loss": 0.4811, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 2.6829880728185813, | |
| "grad_norm": 0.50894968077572, | |
| "learning_rate": 3.3768850318805224e-07, | |
| "loss": 0.4666, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 2.687005649717514, | |
| "grad_norm": 0.5297225230888177, | |
| "learning_rate": 3.2929860187985216e-07, | |
| "loss": 0.4712, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 2.691023226616447, | |
| "grad_norm": 0.49359696533604985, | |
| "learning_rate": 3.210106899244775e-07, | |
| "loss": 0.4808, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 2.6950408035153797, | |
| "grad_norm": 0.47433607917767673, | |
| "learning_rate": 3.1282494829662556e-07, | |
| "loss": 0.4676, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 2.6990583804143125, | |
| "grad_norm": 0.4777730701958091, | |
| "learning_rate": 3.047415557400057e-07, | |
| "loss": 0.4777, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 2.7030759573132457, | |
| "grad_norm": 0.4944173220055023, | |
| "learning_rate": 2.967606887634344e-07, | |
| "loss": 0.4736, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 2.707093534212178, | |
| "grad_norm": 0.5904235702447377, | |
| "learning_rate": 2.888825216369806e-07, | |
| "loss": 0.4772, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 2.7111111111111112, | |
| "grad_norm": 0.4952766389802285, | |
| "learning_rate": 2.811072263881615e-07, | |
| "loss": 0.485, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 2.715128688010044, | |
| "grad_norm": 1.0088921588490039, | |
| "learning_rate": 2.7343497279818833e-07, | |
| "loss": 0.4695, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 2.719146264908977, | |
| "grad_norm": 0.4788614551696112, | |
| "learning_rate": 2.658659283982523e-07, | |
| "loss": 0.4737, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 2.7231638418079096, | |
| "grad_norm": 0.5299426999271306, | |
| "learning_rate": 2.58400258465874e-07, | |
| "loss": 0.4835, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 2.7271814187068424, | |
| "grad_norm": 0.496725834314719, | |
| "learning_rate": 2.510381260212874e-07, | |
| "loss": 0.4714, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 2.731198995605775, | |
| "grad_norm": 0.5015477715189429, | |
| "learning_rate": 2.4377969182388774e-07, | |
| "loss": 0.4692, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 2.735216572504708, | |
| "grad_norm": 0.5250384332220221, | |
| "learning_rate": 2.3662511436871538e-07, | |
| "loss": 0.4749, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 2.739234149403641, | |
| "grad_norm": 0.4422929805667402, | |
| "learning_rate": 2.295745498829949e-07, | |
| "loss": 0.475, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 2.7432517263025735, | |
| "grad_norm": 0.5083194750205056, | |
| "learning_rate": 2.2262815232272916e-07, | |
| "loss": 0.4683, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 2.7472693032015068, | |
| "grad_norm": 0.459034326672265, | |
| "learning_rate": 2.1578607336933177e-07, | |
| "loss": 0.4776, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 2.7512868801004395, | |
| "grad_norm": 0.4944061747494303, | |
| "learning_rate": 2.090484624263167e-07, | |
| "loss": 0.4686, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 2.7553044569993723, | |
| "grad_norm": 0.5465184475852697, | |
| "learning_rate": 2.0241546661603605e-07, | |
| "loss": 0.4694, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 2.759322033898305, | |
| "grad_norm": 0.4897764666579114, | |
| "learning_rate": 1.9588723077646976e-07, | |
| "loss": 0.4711, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 2.763339610797238, | |
| "grad_norm": 0.49639087360500034, | |
| "learning_rate": 1.8946389745805983e-07, | |
| "loss": 0.4747, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 2.7673571876961707, | |
| "grad_norm": 0.5022906703230215, | |
| "learning_rate": 1.8314560692059836e-07, | |
| "loss": 0.4735, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 2.7713747645951035, | |
| "grad_norm": 0.48564181193501227, | |
| "learning_rate": 1.7693249713016558e-07, | |
| "loss": 0.466, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 2.7753923414940367, | |
| "grad_norm": 0.5530530885750234, | |
| "learning_rate": 1.7082470375611614e-07, | |
| "loss": 0.4815, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 2.779409918392969, | |
| "grad_norm": 0.48580647047733116, | |
| "learning_rate": 1.648223601681176e-07, | |
| "loss": 0.4858, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 2.7834274952919023, | |
| "grad_norm": 0.4942106873900172, | |
| "learning_rate": 1.589255974332382e-07, | |
| "loss": 0.4755, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 2.787445072190835, | |
| "grad_norm": 0.5077413435223396, | |
| "learning_rate": 1.5313454431308494e-07, | |
| "loss": 0.4762, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 2.791462649089768, | |
| "grad_norm": 0.49459375051900323, | |
| "learning_rate": 1.4744932726099005e-07, | |
| "loss": 0.4678, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 2.7954802259887006, | |
| "grad_norm": 0.5103917447607162, | |
| "learning_rate": 1.4187007041925328e-07, | |
| "loss": 0.4734, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 2.7994978028876334, | |
| "grad_norm": 0.47776810942745174, | |
| "learning_rate": 1.363968956164269e-07, | |
| "loss": 0.4736, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 2.803515379786566, | |
| "grad_norm": 0.5055359246952604, | |
| "learning_rate": 1.310299223646594e-07, | |
| "loss": 0.4675, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 2.807532956685499, | |
| "grad_norm": 0.5159323211802336, | |
| "learning_rate": 1.2576926785708321e-07, | |
| "loss": 0.4796, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 2.8115505335844317, | |
| "grad_norm": 0.4717892098536317, | |
| "learning_rate": 1.2061504696525617e-07, | |
| "loss": 0.4752, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.8155681104833645, | |
| "grad_norm": 0.47314656128582044, | |
| "learning_rate": 1.1556737223665515e-07, | |
| "loss": 0.4715, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 2.8195856873822978, | |
| "grad_norm": 0.47908474784592364, | |
| "learning_rate": 1.1062635389221588e-07, | |
| "loss": 0.4865, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 2.8236032642812305, | |
| "grad_norm": 0.4460236716075369, | |
| "learning_rate": 1.0579209982392757e-07, | |
| "loss": 0.4692, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 2.8276208411801633, | |
| "grad_norm": 0.5320629331138326, | |
| "learning_rate": 1.0106471559247433e-07, | |
| "loss": 0.4692, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 2.831638418079096, | |
| "grad_norm": 0.5095821610850613, | |
| "learning_rate": 9.644430442493636e-08, | |
| "loss": 0.4635, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 2.835655994978029, | |
| "grad_norm": 0.45919702634359383, | |
| "learning_rate": 9.193096721252903e-08, | |
| "loss": 0.4623, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 2.8396735718769617, | |
| "grad_norm": 0.5030328915586799, | |
| "learning_rate": 8.752480250840411e-08, | |
| "loss": 0.4738, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 2.8436911487758945, | |
| "grad_norm": 0.5239356661209714, | |
| "learning_rate": 8.322590652549478e-08, | |
| "loss": 0.4717, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 2.8477087256748272, | |
| "grad_norm": 0.5003917496304626, | |
| "learning_rate": 7.903437313441842e-08, | |
| "loss": 0.4857, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 2.85172630257376, | |
| "grad_norm": 0.4708502707470253, | |
| "learning_rate": 7.495029386142382e-08, | |
| "loss": 0.4724, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 2.8557438794726933, | |
| "grad_norm": 0.5171601825808555, | |
| "learning_rate": 7.097375788639227e-08, | |
| "loss": 0.4655, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 2.8597614563716256, | |
| "grad_norm": 0.49349820404810807, | |
| "learning_rate": 6.710485204089456e-08, | |
| "loss": 0.4701, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 2.863779033270559, | |
| "grad_norm": 0.5150356271171084, | |
| "learning_rate": 6.334366080628873e-08, | |
| "loss": 0.482, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 2.8677966101694916, | |
| "grad_norm": 0.45255498690118884, | |
| "learning_rate": 5.96902663118798e-08, | |
| "loss": 0.4696, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 2.8718141870684244, | |
| "grad_norm": 0.4833344072840721, | |
| "learning_rate": 5.614474833312622e-08, | |
| "loss": 0.4686, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 2.875831763967357, | |
| "grad_norm": 0.44938418775925026, | |
| "learning_rate": 5.270718428989463e-08, | |
| "loss": 0.4671, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 2.87984934086629, | |
| "grad_norm": 0.5005428826464499, | |
| "learning_rate": 4.937764924477284e-08, | |
| "loss": 0.4757, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 2.8838669177652227, | |
| "grad_norm": 0.511470087270601, | |
| "learning_rate": 4.615621590142838e-08, | |
| "loss": 0.488, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 2.8878844946641555, | |
| "grad_norm": 0.5016274179565011, | |
| "learning_rate": 4.3042954603023655e-08, | |
| "loss": 0.4717, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 2.8919020715630888, | |
| "grad_norm": 0.46282357124816725, | |
| "learning_rate": 4.003793333067607e-08, | |
| "loss": 0.47, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 2.895919648462021, | |
| "grad_norm": 0.5009456286050996, | |
| "learning_rate": 3.714121770197754e-08, | |
| "loss": 0.467, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 2.8999372253609543, | |
| "grad_norm": 0.49427390713109254, | |
| "learning_rate": 3.435287096955897e-08, | |
| "loss": 0.4703, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 2.903954802259887, | |
| "grad_norm": 0.4756544687033634, | |
| "learning_rate": 3.167295401970971e-08, | |
| "loss": 0.475, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 2.90797237915882, | |
| "grad_norm": 0.4729939555426576, | |
| "learning_rate": 2.9101525371049154e-08, | |
| "loss": 0.4851, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 2.9119899560577527, | |
| "grad_norm": 0.4655382923368333, | |
| "learning_rate": 2.663864117324777e-08, | |
| "loss": 0.4755, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 2.9160075329566855, | |
| "grad_norm": 0.47231627841883606, | |
| "learning_rate": 2.42843552058003e-08, | |
| "loss": 0.4677, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 2.9200251098556183, | |
| "grad_norm": 0.48596292370132727, | |
| "learning_rate": 2.203871887685449e-08, | |
| "loss": 0.4744, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 2.924042686754551, | |
| "grad_norm": 0.4968533928087242, | |
| "learning_rate": 1.9901781222084192e-08, | |
| "loss": 0.4755, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 2.9280602636534843, | |
| "grad_norm": 0.5186135008077664, | |
| "learning_rate": 1.7873588903623006e-08, | |
| "loss": 0.479, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 2.9320778405524166, | |
| "grad_norm": 0.5196627425255889, | |
| "learning_rate": 1.5954186209042323e-08, | |
| "loss": 0.4684, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 2.93609541745135, | |
| "grad_norm": 0.4875506685262467, | |
| "learning_rate": 1.4143615050384862e-08, | |
| "loss": 0.4619, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 2.9401129943502826, | |
| "grad_norm": 0.4803641492705183, | |
| "learning_rate": 1.2441914963250423e-08, | |
| "loss": 0.4753, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 2.9441305712492154, | |
| "grad_norm": 0.4438429839046257, | |
| "learning_rate": 1.0849123105931558e-08, | |
| "loss": 0.4772, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 2.948148148148148, | |
| "grad_norm": 0.47157812956867295, | |
| "learning_rate": 9.365274258604229e-09, | |
| "loss": 0.4743, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 2.952165725047081, | |
| "grad_norm": 0.5144283649963357, | |
| "learning_rate": 7.990400822564525e-09, | |
| "loss": 0.4898, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 2.9561833019460138, | |
| "grad_norm": 0.4994406655154466, | |
| "learning_rate": 6.7245328195247875e-09, | |
| "loss": 0.4807, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 2.9602008788449465, | |
| "grad_norm": 0.4789005591934425, | |
| "learning_rate": 5.567697890955792e-09, | |
| "loss": 0.4809, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 2.9642184557438793, | |
| "grad_norm": 0.4556018148546147, | |
| "learning_rate": 4.519921297484464e-09, | |
| "loss": 0.4687, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 2.968236032642812, | |
| "grad_norm": 0.5209374897753106, | |
| "learning_rate": 3.5812259183426457e-09, | |
| "loss": 0.47, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 2.9722536095417453, | |
| "grad_norm": 0.4460089771421974, | |
| "learning_rate": 2.751632250865832e-09, | |
| "loss": 0.4778, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 2.976271186440678, | |
| "grad_norm": 0.4783096492145474, | |
| "learning_rate": 2.0311584100457526e-09, | |
| "loss": 0.4753, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 2.980288763339611, | |
| "grad_norm": 0.4882741296424901, | |
| "learning_rate": 1.4198201281373503e-09, | |
| "loss": 0.484, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 2.9843063402385437, | |
| "grad_norm": 0.4605012097308224, | |
| "learning_rate": 9.17630754312393e-10, | |
| "loss": 0.4795, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 2.9883239171374765, | |
| "grad_norm": 0.7503308211144039, | |
| "learning_rate": 5.246012543680401e-10, | |
| "loss": 0.4806, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 2.9923414940364093, | |
| "grad_norm": 0.4845828672261576, | |
| "learning_rate": 2.4074021049091954e-10, | |
| "loss": 0.4753, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 2.996359070935342, | |
| "grad_norm": 0.525299815132595, | |
| "learning_rate": 6.605382106505964e-11, | |
| "loss": 0.4796, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.5136490807064724, | |
| "learning_rate": 5.459005397723261e-13, | |
| "loss": 0.4664, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 7470, | |
| "total_flos": 4.845415158505275e+18, | |
| "train_loss": 0.5501844393999541, | |
| "train_runtime": 257337.3533, | |
| "train_samples_per_second": 3.714, | |
| "train_steps_per_second": 0.029 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 7470, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 24890, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.845415158505275e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |