| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9998985080686086, | |
| "eval_steps": 500, | |
| "global_step": 2463, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0040596772556581754, | |
| "grad_norm": 132.0478057861328, | |
| "learning_rate": 1.3513513513513515e-10, | |
| "loss": 15.9449, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.008119354511316351, | |
| "grad_norm": 185.23609924316406, | |
| "learning_rate": 2.702702702702703e-10, | |
| "loss": 17.0759, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.012179031766974525, | |
| "grad_norm": 138.67050170898438, | |
| "learning_rate": 4.0540540540540546e-10, | |
| "loss": 16.5406, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.016238709022632702, | |
| "grad_norm": 154.48605346679688, | |
| "learning_rate": 5.405405405405406e-10, | |
| "loss": 16.9727, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.020298386278290875, | |
| "grad_norm": 157.55892944335938, | |
| "learning_rate": 6.756756756756757e-10, | |
| "loss": 16.3976, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.02435806353394905, | |
| "grad_norm": 195.1601104736328, | |
| "learning_rate": 8.108108108108109e-10, | |
| "loss": 17.7916, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.028417740789607227, | |
| "grad_norm": 185.73776245117188, | |
| "learning_rate": 9.45945945945946e-10, | |
| "loss": 16.9119, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.032477418045265403, | |
| "grad_norm": 200.99549865722656, | |
| "learning_rate": 9.9998443648451e-10, | |
| "loss": 17.2071, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.036537095300923576, | |
| "grad_norm": 204.63539123535156, | |
| "learning_rate": 9.99889329620792e-10, | |
| "loss": 16.493, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.04059677255658175, | |
| "grad_norm": 176.94541931152344, | |
| "learning_rate": 9.997077787173976e-10, | |
| "loss": 17.9207, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04465644981223993, | |
| "grad_norm": 208.50137329101562, | |
| "learning_rate": 9.99439815169263e-10, | |
| "loss": 16.9535, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.0487161270678981, | |
| "grad_norm": 169.84095764160156, | |
| "learning_rate": 9.990854853143476e-10, | |
| "loss": 16.6511, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.052775804323556275, | |
| "grad_norm": 176.42965698242188, | |
| "learning_rate": 9.98644850425622e-10, | |
| "loss": 17.7791, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.056835481579214454, | |
| "grad_norm": 171.8761749267578, | |
| "learning_rate": 9.981179867004708e-10, | |
| "loss": 17.931, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.06089515883487263, | |
| "grad_norm": 222.7410888671875, | |
| "learning_rate": 9.97504985247518e-10, | |
| "loss": 16.1024, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.06495483609053081, | |
| "grad_norm": 173.39248657226562, | |
| "learning_rate": 9.968059520708706e-10, | |
| "loss": 17.2411, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.06901451334618898, | |
| "grad_norm": 202.84156799316406, | |
| "learning_rate": 9.960210080517876e-10, | |
| "loss": 17.6544, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.07307419060184715, | |
| "grad_norm": 195.48196411132812, | |
| "learning_rate": 9.951502889277773e-10, | |
| "loss": 17.2764, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.07713386785750533, | |
| "grad_norm": 204.30767822265625, | |
| "learning_rate": 9.941939452691238e-10, | |
| "loss": 17.3761, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.0811935451131635, | |
| "grad_norm": 233.84881591796875, | |
| "learning_rate": 9.931521424528503e-10, | |
| "loss": 17.5323, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.08525322236882169, | |
| "grad_norm": 171.0516357421875, | |
| "learning_rate": 9.920250606341204e-10, | |
| "loss": 17.3739, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.08931289962447986, | |
| "grad_norm": 225.2151641845703, | |
| "learning_rate": 9.908128947150849e-10, | |
| "loss": 17.3732, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.09337257688013803, | |
| "grad_norm": 144.97401428222656, | |
| "learning_rate": 9.895158543111775e-10, | |
| "loss": 16.4779, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.0974322541357962, | |
| "grad_norm": 194.57334899902344, | |
| "learning_rate": 9.881341637148678e-10, | |
| "loss": 17.972, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.10149193139145438, | |
| "grad_norm": 182.3833770751953, | |
| "learning_rate": 9.866680618568744e-10, | |
| "loss": 17.1289, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.10555160864711255, | |
| "grad_norm": 139.1823272705078, | |
| "learning_rate": 9.851178022648477e-10, | |
| "loss": 16.7695, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.10961128590277074, | |
| "grad_norm": 147.51815795898438, | |
| "learning_rate": 9.834836530195282e-10, | |
| "loss": 16.6021, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.11367096315842891, | |
| "grad_norm": 141.7736358642578, | |
| "learning_rate": 9.817658967083883e-10, | |
| "loss": 17.0966, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.11773064041408708, | |
| "grad_norm": 188.4720001220703, | |
| "learning_rate": 9.799648303767659e-10, | |
| "loss": 16.9828, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.12179031766974525, | |
| "grad_norm": 148.27267456054688, | |
| "learning_rate": 9.780807654764966e-10, | |
| "loss": 17.4211, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.12584999492540344, | |
| "grad_norm": 149.38882446289062, | |
| "learning_rate": 9.761140278120562e-10, | |
| "loss": 16.9751, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.12990967218106161, | |
| "grad_norm": 159.62318420410156, | |
| "learning_rate": 9.740649574842206e-10, | |
| "loss": 16.3416, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.1339693494367198, | |
| "grad_norm": 158.93472290039062, | |
| "learning_rate": 9.719339088312521e-10, | |
| "loss": 16.6636, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.13802902669237796, | |
| "grad_norm": 197.2371826171875, | |
| "learning_rate": 9.697212503676272e-10, | |
| "loss": 18.0939, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.14208870394803613, | |
| "grad_norm": 149.5404510498047, | |
| "learning_rate": 9.674273647203087e-10, | |
| "loss": 16.6984, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.1461483812036943, | |
| "grad_norm": 159.680908203125, | |
| "learning_rate": 9.650526485625804e-10, | |
| "loss": 17.404, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.15020805845935248, | |
| "grad_norm": 171.5859832763672, | |
| "learning_rate": 9.625975125454515e-10, | |
| "loss": 16.8117, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.15426773571501065, | |
| "grad_norm": 199.5972137451172, | |
| "learning_rate": 9.600623812266447e-10, | |
| "loss": 17.56, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.15832741297066882, | |
| "grad_norm": 147.773681640625, | |
| "learning_rate": 9.57447692997178e-10, | |
| "loss": 17.4291, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.162387090226327, | |
| "grad_norm": 218.25433349609375, | |
| "learning_rate": 9.54753900005557e-10, | |
| "loss": 17.5885, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.16644676748198517, | |
| "grad_norm": 169.73826599121094, | |
| "learning_rate": 9.519814680795842e-10, | |
| "loss": 16.6519, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.17050644473764337, | |
| "grad_norm": 166.3510284423828, | |
| "learning_rate": 9.491308766458076e-10, | |
| "loss": 17.2467, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.17456612199330154, | |
| "grad_norm": 182.20436096191406, | |
| "learning_rate": 9.462026186466134e-10, | |
| "loss": 17.4754, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.17862579924895972, | |
| "grad_norm": 162.10064697265625, | |
| "learning_rate": 9.431972004549834e-10, | |
| "loss": 16.3912, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.1826854765046179, | |
| "grad_norm": 211.85739135742188, | |
| "learning_rate": 9.40115141786931e-10, | |
| "loss": 18.0005, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.18674515376027606, | |
| "grad_norm": 156.6442413330078, | |
| "learning_rate": 9.369569756116282e-10, | |
| "loss": 16.4153, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.19080483101593423, | |
| "grad_norm": 160.61705017089844, | |
| "learning_rate": 9.337232480592392e-10, | |
| "loss": 17.6727, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.1948645082715924, | |
| "grad_norm": 152.6673583984375, | |
| "learning_rate": 9.304145183264834e-10, | |
| "loss": 17.8167, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.19892418552725058, | |
| "grad_norm": 200.42538452148438, | |
| "learning_rate": 9.270313585799328e-10, | |
| "loss": 17.4904, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.20298386278290875, | |
| "grad_norm": 232.02088928222656, | |
| "learning_rate": 9.235743538570709e-10, | |
| "loss": 16.3814, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.20704354003856693, | |
| "grad_norm": 159.0342254638672, | |
| "learning_rate": 9.200441019651237e-10, | |
| "loss": 16.7111, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.2111032172942251, | |
| "grad_norm": 189.23023986816406, | |
| "learning_rate": 9.164412133776831e-10, | |
| "loss": 17.5323, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.21516289454988327, | |
| "grad_norm": 126.5080337524414, | |
| "learning_rate": 9.127663111291399e-10, | |
| "loss": 17.2915, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.21922257180554147, | |
| "grad_norm": 206.55093383789062, | |
| "learning_rate": 9.09020030706945e-10, | |
| "loss": 17.1491, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.22328224906119964, | |
| "grad_norm": 219.1647491455078, | |
| "learning_rate": 9.052030199417168e-10, | |
| "loss": 17.3283, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.22734192631685782, | |
| "grad_norm": 178.6308135986328, | |
| "learning_rate": 9.013159388952136e-10, | |
| "loss": 16.8583, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.231401603572516, | |
| "grad_norm": 173.4936065673828, | |
| "learning_rate": 8.973594597461927e-10, | |
| "loss": 17.5231, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.23546128082817416, | |
| "grad_norm": 162.73269653320312, | |
| "learning_rate": 8.933342666741717e-10, | |
| "loss": 17.1647, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.23952095808383234, | |
| "grad_norm": 200.88888549804688, | |
| "learning_rate": 8.892410557411171e-10, | |
| "loss": 17.3196, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.2435806353394905, | |
| "grad_norm": 187.9020233154297, | |
| "learning_rate": 8.850805347710753e-10, | |
| "loss": 17.6811, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.24764031259514868, | |
| "grad_norm": 201.80628967285156, | |
| "learning_rate": 8.80853423227773e-10, | |
| "loss": 18.0601, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.2516999898508069, | |
| "grad_norm": 130.6888427734375, | |
| "learning_rate": 8.765604520902013e-10, | |
| "loss": 15.8318, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.255759667106465, | |
| "grad_norm": 174.96263122558594, | |
| "learning_rate": 8.722023637262114e-10, | |
| "loss": 17.533, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.25981934436212323, | |
| "grad_norm": 191.3065643310547, | |
| "learning_rate": 8.677799117641387e-10, | |
| "loss": 17.1311, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.2638790216177814, | |
| "grad_norm": 202.28585815429688, | |
| "learning_rate": 8.632938609624813e-10, | |
| "loss": 17.2724, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.2679386988734396, | |
| "grad_norm": 222.07972717285156, | |
| "learning_rate": 8.587449870776526e-10, | |
| "loss": 17.2216, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.2719983761290977, | |
| "grad_norm": 214.5223846435547, | |
| "learning_rate": 8.541340767298328e-10, | |
| "loss": 17.3321, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.2760580533847559, | |
| "grad_norm": 168.41421508789062, | |
| "learning_rate": 8.494619272669418e-10, | |
| "loss": 17.529, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.28011773064041406, | |
| "grad_norm": 182.80117797851562, | |
| "learning_rate": 8.447293466267558e-10, | |
| "loss": 18.1657, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.28417740789607226, | |
| "grad_norm": 190.1322784423828, | |
| "learning_rate": 8.399371531971954e-10, | |
| "loss": 18.3519, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.2882370851517304, | |
| "grad_norm": 195.91421508789062, | |
| "learning_rate": 8.350861756748022e-10, | |
| "loss": 17.8645, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.2922967624073886, | |
| "grad_norm": 133.7427215576172, | |
| "learning_rate": 8.301772529214376e-10, | |
| "loss": 17.2449, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.2963564396630468, | |
| "grad_norm": 197.28350830078125, | |
| "learning_rate": 8.252112338192204e-10, | |
| "loss": 17.3724, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.30041611691870496, | |
| "grad_norm": 185.95706176757812, | |
| "learning_rate": 8.201889771237327e-10, | |
| "loss": 16.9303, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.30447579417436316, | |
| "grad_norm": 167.47555541992188, | |
| "learning_rate": 8.151113513155189e-10, | |
| "loss": 17.2537, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.3085354714300213, | |
| "grad_norm": 232.46424865722656, | |
| "learning_rate": 8.099792344499018e-10, | |
| "loss": 17.4633, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.3125951486856795, | |
| "grad_norm": 188.18106079101562, | |
| "learning_rate": 8.047935140051446e-10, | |
| "loss": 17.2019, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.31665482594133765, | |
| "grad_norm": 177.53736877441406, | |
| "learning_rate": 7.995550867289819e-10, | |
| "loss": 16.7029, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.32071450319699585, | |
| "grad_norm": 184.06761169433594, | |
| "learning_rate": 7.942648584835484e-10, | |
| "loss": 18.0381, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.324774180452654, | |
| "grad_norm": 173.73468017578125, | |
| "learning_rate": 7.889237440887321e-10, | |
| "loss": 18.0302, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.3288338577083122, | |
| "grad_norm": 217.86709594726562, | |
| "learning_rate": 7.835326671639764e-10, | |
| "loss": 18.0424, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.33289353496397034, | |
| "grad_norm": 202.07118225097656, | |
| "learning_rate": 7.780925599685638e-10, | |
| "loss": 16.8956, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.33695321221962854, | |
| "grad_norm": 191.79275512695312, | |
| "learning_rate": 7.726043632404022e-10, | |
| "loss": 17.3942, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.34101288947528674, | |
| "grad_norm": 161.33790588378906, | |
| "learning_rate": 7.670690260333475e-10, | |
| "loss": 17.1583, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.3450725667309449, | |
| "grad_norm": 181.90426635742188, | |
| "learning_rate": 7.614875055530866e-10, | |
| "loss": 17.1477, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.3491322439866031, | |
| "grad_norm": 213.49960327148438, | |
| "learning_rate": 7.558607669916116e-10, | |
| "loss": 17.6481, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.35319192124226123, | |
| "grad_norm": 202.2276153564453, | |
| "learning_rate": 7.501897833603124e-10, | |
| "loss": 16.7866, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.35725159849791943, | |
| "grad_norm": 177.2437286376953, | |
| "learning_rate": 7.444755353217177e-10, | |
| "loss": 17.1007, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.3613112757535776, | |
| "grad_norm": 161.7916717529297, | |
| "learning_rate": 7.387190110199122e-10, | |
| "loss": 16.8443, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.3653709530092358, | |
| "grad_norm": 160.1624298095703, | |
| "learning_rate": 7.32921205909661e-10, | |
| "loss": 17.1523, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.3694306302648939, | |
| "grad_norm": 200.31753540039062, | |
| "learning_rate": 7.270831225842692e-10, | |
| "loss": 17.6586, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.3734903075205521, | |
| "grad_norm": 158.079833984375, | |
| "learning_rate": 7.212057706022059e-10, | |
| "loss": 17.1793, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.37754998477621027, | |
| "grad_norm": 224.93112182617188, | |
| "learning_rate": 7.152901663125267e-10, | |
| "loss": 18.1676, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.38160966203186847, | |
| "grad_norm": 199.39297485351562, | |
| "learning_rate": 7.09337332679119e-10, | |
| "loss": 15.8113, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.38566933928752667, | |
| "grad_norm": 202.5852508544922, | |
| "learning_rate": 7.033482991038051e-10, | |
| "loss": 17.3973, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.3897290165431848, | |
| "grad_norm": 206.29861450195312, | |
| "learning_rate": 6.97324101248331e-10, | |
| "loss": 16.953, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.393788693798843, | |
| "grad_norm": 140.68646240234375, | |
| "learning_rate": 6.91265780855274e-10, | |
| "loss": 17.5197, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.39784837105450116, | |
| "grad_norm": 191.14852905273438, | |
| "learning_rate": 6.851743855678965e-10, | |
| "loss": 17.6989, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.40190804831015936, | |
| "grad_norm": 152.35377502441406, | |
| "learning_rate": 6.79050968748983e-10, | |
| "loss": 17.5127, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.4059677255658175, | |
| "grad_norm": 181.50877380371094, | |
| "learning_rate": 6.728965892986838e-10, | |
| "loss": 16.8963, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.4100274028214757, | |
| "grad_norm": 192.32125854492188, | |
| "learning_rate": 6.667123114714048e-10, | |
| "loss": 17.2991, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.41408708007713385, | |
| "grad_norm": 202.2693634033203, | |
| "learning_rate": 6.604992046917688e-10, | |
| "loss": 16.8996, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.41814675733279205, | |
| "grad_norm": 151.45115661621094, | |
| "learning_rate": 6.542583433696846e-10, | |
| "loss": 16.8886, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.4222064345884502, | |
| "grad_norm": 157.8872528076172, | |
| "learning_rate": 6.479908067145527e-10, | |
| "loss": 17.0116, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.4262661118441084, | |
| "grad_norm": 228.60235595703125, | |
| "learning_rate": 6.416976785486416e-10, | |
| "loss": 17.6079, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.43032578909976654, | |
| "grad_norm": 219.45249938964844, | |
| "learning_rate": 6.353800471196667e-10, | |
| "loss": 16.9453, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.43438546635542474, | |
| "grad_norm": 164.8721923828125, | |
| "learning_rate": 6.290390049126031e-10, | |
| "loss": 17.2325, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.43844514361108294, | |
| "grad_norm": 184.8201904296875, | |
| "learning_rate": 6.226756484607668e-10, | |
| "loss": 17.1532, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.4425048208667411, | |
| "grad_norm": 187.04025268554688, | |
| "learning_rate": 6.162910781561946e-10, | |
| "loss": 16.4238, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.4465644981223993, | |
| "grad_norm": 200.2959747314453, | |
| "learning_rate": 6.098863980593574e-10, | |
| "loss": 18.0924, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.45062417537805743, | |
| "grad_norm": 214.22193908691406, | |
| "learning_rate": 6.034627157082394e-10, | |
| "loss": 17.5339, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.45468385263371563, | |
| "grad_norm": 219.8036651611328, | |
| "learning_rate": 5.970211419268152e-10, | |
| "loss": 17.7163, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.4587435298893738, | |
| "grad_norm": 177.9528045654297, | |
| "learning_rate": 5.905627906329592e-10, | |
| "loss": 17.277, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.462803207145032, | |
| "grad_norm": 181.62625122070312, | |
| "learning_rate": 5.840887786458205e-10, | |
| "loss": 17.0171, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.4668628844006901, | |
| "grad_norm": 212.0501251220703, | |
| "learning_rate": 5.776002254926935e-10, | |
| "loss": 17.2654, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.4709225616563483, | |
| "grad_norm": 185.97579956054688, | |
| "learning_rate": 5.710982532154247e-10, | |
| "loss": 17.6895, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.47498223891200647, | |
| "grad_norm": 232.2166748046875, | |
| "learning_rate": 5.645839861763805e-10, | |
| "loss": 18.0333, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.47904191616766467, | |
| "grad_norm": 176.52072143554688, | |
| "learning_rate": 5.580585508640152e-10, | |
| "loss": 16.8448, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.4831015934233229, | |
| "grad_norm": 189.46929931640625, | |
| "learning_rate": 5.515230756980719e-10, | |
| "loss": 17.2395, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.487161270678981, | |
| "grad_norm": 206.33079528808594, | |
| "learning_rate": 5.449786908344499e-10, | |
| "loss": 16.9241, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.4912209479346392, | |
| "grad_norm": 186.9293670654297, | |
| "learning_rate": 5.384265279697689e-10, | |
| "loss": 16.7443, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.49528062519029736, | |
| "grad_norm": 170.4814453125, | |
| "learning_rate": 5.318677201456708e-10, | |
| "loss": 16.6439, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.49934030244595556, | |
| "grad_norm": 181.9535675048828, | |
| "learning_rate": 5.253034015528856e-10, | |
| "loss": 16.3063, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.5033999797016138, | |
| "grad_norm": 181.1636505126953, | |
| "learning_rate": 5.187347073351006e-10, | |
| "loss": 17.3231, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.5074596569572719, | |
| "grad_norm": 186.47972106933594, | |
| "learning_rate": 5.121627733926641e-10, | |
| "loss": 17.0968, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.51151933421293, | |
| "grad_norm": 194.881591796875, | |
| "learning_rate": 5.055887361861582e-10, | |
| "loss": 18.201, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.5155790114685882, | |
| "grad_norm": 199.1874237060547, | |
| "learning_rate": 4.990137325398745e-10, | |
| "loss": 16.7817, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.5196386887242465, | |
| "grad_norm": 203.87411499023438, | |
| "learning_rate": 4.924388994452276e-10, | |
| "loss": 17.371, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.5236983659799046, | |
| "grad_norm": 177.25927734375, | |
| "learning_rate": 4.858653738641395e-10, | |
| "loss": 16.6596, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.5277580432355627, | |
| "grad_norm": 161.23329162597656, | |
| "learning_rate": 4.792942925324285e-10, | |
| "loss": 17.0887, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.531817720491221, | |
| "grad_norm": 188.26792907714844, | |
| "learning_rate": 4.727267917632377e-10, | |
| "loss": 17.4645, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.5358773977468791, | |
| "grad_norm": 204.50733947753906, | |
| "learning_rate": 4.661640072505365e-10, | |
| "loss": 17.5325, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.5399370750025373, | |
| "grad_norm": 180.15682983398438, | |
| "learning_rate": 4.5960707387272904e-10, | |
| "loss": 17.7173, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.5439967522581954, | |
| "grad_norm": 195.1600341796875, | |
| "learning_rate": 4.5305712549640504e-10, | |
| "loss": 16.8578, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.5480564295138537, | |
| "grad_norm": 201.564208984375, | |
| "learning_rate": 4.4651529478026227e-10, | |
| "loss": 17.7686, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.5521161067695118, | |
| "grad_norm": 207.5132293701172, | |
| "learning_rate": 4.3998271297924156e-10, | |
| "loss": 16.9821, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.55617578402517, | |
| "grad_norm": 243.6310272216797, | |
| "learning_rate": 4.3346050974890247e-10, | |
| "loss": 17.9338, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.5602354612808281, | |
| "grad_norm": 169.40707397460938, | |
| "learning_rate": 4.269498129500762e-10, | |
| "loss": 16.6915, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.5642951385364864, | |
| "grad_norm": 209.2589569091797, | |
| "learning_rate": 4.2045174845382885e-10, | |
| "loss": 17.3758, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.5683548157921445, | |
| "grad_norm": 171.83935546875, | |
| "learning_rate": 4.139674399467684e-10, | |
| "loss": 16.4755, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.5724144930478027, | |
| "grad_norm": 206.6162109375, | |
| "learning_rate": 4.074980087367294e-10, | |
| "loss": 17.9797, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.5764741703034608, | |
| "grad_norm": 173.0574951171875, | |
| "learning_rate": 4.010445735588702e-10, | |
| "loss": 16.503, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.5805338475591191, | |
| "grad_norm": 206.66969299316406, | |
| "learning_rate": 3.946082503822132e-10, | |
| "loss": 17.5007, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.5845935248147772, | |
| "grad_norm": 225.87709045410156, | |
| "learning_rate": 3.881901522166649e-10, | |
| "loss": 17.5912, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.5886532020704354, | |
| "grad_norm": 180.4112091064453, | |
| "learning_rate": 3.817913889205473e-10, | |
| "loss": 17.6061, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.5927128793260936, | |
| "grad_norm": 128.54103088378906, | |
| "learning_rate": 3.7541306700867386e-10, | |
| "loss": 16.0483, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.5967725565817518, | |
| "grad_norm": 181.9798126220703, | |
| "learning_rate": 3.6905628946100346e-10, | |
| "loss": 16.802, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.6008322338374099, | |
| "grad_norm": 149.59954833984375, | |
| "learning_rate": 3.6272215553190727e-10, | |
| "loss": 16.2398, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.6048919110930681, | |
| "grad_norm": 170.98695373535156, | |
| "learning_rate": 3.564117605600774e-10, | |
| "loss": 16.2826, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.6089515883487263, | |
| "grad_norm": 170.1161651611328, | |
| "learning_rate": 3.5012619577911544e-10, | |
| "loss": 17.1219, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.6130112656043845, | |
| "grad_norm": 197.9274139404297, | |
| "learning_rate": 3.438665481288278e-10, | |
| "loss": 16.7303, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.6170709428600426, | |
| "grad_norm": 187.0927276611328, | |
| "learning_rate": 3.376339000672664e-10, | |
| "loss": 17.0052, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.6211306201157007, | |
| "grad_norm": 161.8428497314453, | |
| "learning_rate": 3.3142932938354233e-10, | |
| "loss": 16.2225, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.625190297371359, | |
| "grad_norm": 198.08689880371094, | |
| "learning_rate": 3.252539090114484e-10, | |
| "loss": 17.4928, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.6292499746270172, | |
| "grad_norm": 165.53260803222656, | |
| "learning_rate": 3.1910870684392023e-10, | |
| "loss": 17.0441, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.6333096518826753, | |
| "grad_norm": 153.26893615722656, | |
| "learning_rate": 3.1299478554836934e-10, | |
| "loss": 16.6345, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.6373693291383336, | |
| "grad_norm": 166.07655334472656, | |
| "learning_rate": 3.069132023829202e-10, | |
| "loss": 16.7557, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.6414290063939917, | |
| "grad_norm": 206.7568817138672, | |
| "learning_rate": 3.0086500901358233e-10, | |
| "loss": 17.2537, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.6454886836496498, | |
| "grad_norm": 187.02734375, | |
| "learning_rate": 2.94851251332389e-10, | |
| "loss": 16.7615, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.649548360905308, | |
| "grad_norm": 183.49896240234375, | |
| "learning_rate": 2.888729692765365e-10, | |
| "loss": 17.6427, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.6536080381609662, | |
| "grad_norm": 201.25961303710938, | |
| "learning_rate": 2.8293119664854974e-10, | |
| "loss": 16.8277, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.6576677154166244, | |
| "grad_norm": 156.2751922607422, | |
| "learning_rate": 2.770269609375114e-10, | |
| "loss": 17.5363, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.6617273926722825, | |
| "grad_norm": 205.1450958251953, | |
| "learning_rate": 2.71161283141382e-10, | |
| "loss": 18.4642, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.6657870699279407, | |
| "grad_norm": 157.36001586914062, | |
| "learning_rate": 2.653351775904427e-10, | |
| "loss": 17.0324, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.6698467471835989, | |
| "grad_norm": 197.63540649414062, | |
| "learning_rate": 2.5954965177189e-10, | |
| "loss": 17.0267, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.6739064244392571, | |
| "grad_norm": 163.759033203125, | |
| "learning_rate": 2.5380570615561564e-10, | |
| "loss": 17.2452, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.6779661016949152, | |
| "grad_norm": 161.09716796875, | |
| "learning_rate": 2.481043340211986e-10, | |
| "loss": 17.429, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.6820257789505735, | |
| "grad_norm": 208.68508911132812, | |
| "learning_rate": 2.4244652128614036e-10, | |
| "loss": 17.7347, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.6860854562062316, | |
| "grad_norm": 154.68821716308594, | |
| "learning_rate": 2.3683324633537435e-10, | |
| "loss": 16.7167, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.6901451334618898, | |
| "grad_norm": 186.65939331054688, | |
| "learning_rate": 2.3126547985207759e-10, | |
| "loss": 17.0754, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.6942048107175479, | |
| "grad_norm": 155.3890838623047, | |
| "learning_rate": 2.2574418464981368e-10, | |
| "loss": 17.0158, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.6982644879732062, | |
| "grad_norm": 175.33834838867188, | |
| "learning_rate": 2.2027031550603654e-10, | |
| "loss": 17.5807, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.7023241652288643, | |
| "grad_norm": 200.68499755859375, | |
| "learning_rate": 2.148448189969854e-10, | |
| "loss": 15.5709, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.7063838424845225, | |
| "grad_norm": 186.82205200195312, | |
| "learning_rate": 2.094686333339953e-10, | |
| "loss": 16.648, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.7104435197401806, | |
| "grad_norm": 187.7284698486328, | |
| "learning_rate": 2.0414268820125654e-10, | |
| "loss": 17.0848, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.7145031969958389, | |
| "grad_norm": 170.2503662109375, | |
| "learning_rate": 1.9886790459504857e-10, | |
| "loss": 16.8571, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.718562874251497, | |
| "grad_norm": 176.3491668701172, | |
| "learning_rate": 1.9364519466447346e-10, | |
| "loss": 16.7827, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.7226225515071552, | |
| "grad_norm": 167.1256866455078, | |
| "learning_rate": 1.8847546155372252e-10, | |
| "loss": 16.8153, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.7266822287628134, | |
| "grad_norm": 187.24716186523438, | |
| "learning_rate": 1.8335959924589935e-10, | |
| "loss": 17.8325, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.7307419060184716, | |
| "grad_norm": 216.55247497558594, | |
| "learning_rate": 1.7829849240842516e-10, | |
| "loss": 17.5121, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.7348015832741297, | |
| "grad_norm": 200.8616180419922, | |
| "learning_rate": 1.732930162400579e-10, | |
| "loss": 16.8064, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.7388612605297878, | |
| "grad_norm": 183.3948516845703, | |
| "learning_rate": 1.6834403631954642e-10, | |
| "loss": 17.0833, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.7429209377854461, | |
| "grad_norm": 166.15834045410156, | |
| "learning_rate": 1.6345240845594933e-10, | |
| "loss": 17.7809, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.7469806150411042, | |
| "grad_norm": 165.8581085205078, | |
| "learning_rate": 1.586189785406429e-10, | |
| "loss": 17.0209, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.7510402922967624, | |
| "grad_norm": 212.00918579101562, | |
| "learning_rate": 1.5384458240104482e-10, | |
| "loss": 17.0343, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.7550999695524205, | |
| "grad_norm": 185.42967224121094, | |
| "learning_rate": 1.4913004565607665e-10, | |
| "loss": 16.6158, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.7591596468080788, | |
| "grad_norm": 195.2454071044922, | |
| "learning_rate": 1.4447618357339333e-10, | |
| "loss": 16.4979, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.7632193240637369, | |
| "grad_norm": 214.2625274658203, | |
| "learning_rate": 1.398838009284016e-10, | |
| "loss": 16.691, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.7672790013193951, | |
| "grad_norm": 181.38255310058594, | |
| "learning_rate": 1.3535369186509296e-10, | |
| "loss": 16.9062, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.7713386785750533, | |
| "grad_norm": 145.416748046875, | |
| "learning_rate": 1.308866397587153e-10, | |
| "loss": 17.7773, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.7753983558307115, | |
| "grad_norm": 216.9072723388672, | |
| "learning_rate": 1.264834170803072e-10, | |
| "loss": 16.9568, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.7794580330863696, | |
| "grad_norm": 200.9578094482422, | |
| "learning_rate": 1.2214478526311674e-10, | |
| "loss": 17.5622, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.7835177103420278, | |
| "grad_norm": 162.93185424804688, | |
| "learning_rate": 1.1787149457092962e-10, | |
| "loss": 16.9736, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.787577387597686, | |
| "grad_norm": 189.59182739257812, | |
| "learning_rate": 1.1366428396832929e-10, | |
| "loss": 15.8744, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.7916370648533442, | |
| "grad_norm": 188.7930450439453, | |
| "learning_rate": 1.0952388099290983e-10, | |
| "loss": 17.6766, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.7956967421090023, | |
| "grad_norm": 134.07015991210938, | |
| "learning_rate": 1.0545100162946586e-10, | |
| "loss": 16.6428, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.7997564193646605, | |
| "grad_norm": 142.8442840576172, | |
| "learning_rate": 1.0144635018618054e-10, | |
| "loss": 17.4065, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.8038160966203187, | |
| "grad_norm": 182.24485778808594, | |
| "learning_rate": 9.751061917283073e-11, | |
| "loss": 17.2971, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.8078757738759769, | |
| "grad_norm": 194.0862274169922, | |
| "learning_rate": 9.364448918103474e-11, | |
| "loss": 17.2544, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.811935451131635, | |
| "grad_norm": 191.11993408203125, | |
| "learning_rate": 8.984862876656026e-11, | |
| "loss": 17.1763, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.8159951283872932, | |
| "grad_norm": 188.06570434570312, | |
| "learning_rate": 8.612369433371265e-11, | |
| "loss": 16.6179, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.8200548056429514, | |
| "grad_norm": 167.46762084960938, | |
| "learning_rate": 8.247033002182614e-11, | |
| "loss": 16.6814, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.8241144828986096, | |
| "grad_norm": 159.38058471679688, | |
| "learning_rate": 7.888916759387471e-11, | |
| "loss": 16.5084, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.8281741601542677, | |
| "grad_norm": 172.08058166503906, | |
| "learning_rate": 7.538082632722371e-11, | |
| "loss": 17.3695, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.832233837409926, | |
| "grad_norm": 222.47935485839844, | |
| "learning_rate": 7.194591290654024e-11, | |
| "loss": 16.9923, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.8362935146655841, | |
| "grad_norm": 189.6218719482422, | |
| "learning_rate": 6.858502131888211e-11, | |
| "loss": 17.5893, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.8403531919212422, | |
| "grad_norm": 244.06753540039062, | |
| "learning_rate": 6.52987327509812e-11, | |
| "loss": 17.5454, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.8444128691769004, | |
| "grad_norm": 137.7332305908203, | |
| "learning_rate": 6.208761548874082e-11, | |
| "loss": 17.2953, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.8484725464325586, | |
| "grad_norm": 201.9289093017578, | |
| "learning_rate": 5.895222481896489e-11, | |
| "loss": 17.7196, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.8525322236882168, | |
| "grad_norm": 172.56558227539062, | |
| "learning_rate": 5.5893102933333277e-11, | |
| "loss": 17.0008, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.8565919009438749, | |
| "grad_norm": 173.23507690429688, | |
| "learning_rate": 5.291077883464307e-11, | |
| "loss": 16.3006, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.8606515781995331, | |
| "grad_norm": 200.89015197753906, | |
| "learning_rate": 5.0005768245330264e-11, | |
| "loss": 17.5656, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.8647112554551913, | |
| "grad_norm": 191.20590209960938, | |
| "learning_rate": 4.717857351828731e-11, | |
| "loss": 17.3456, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.8687709327108495, | |
| "grad_norm": 166.0770263671875, | |
| "learning_rate": 4.4429683549993106e-11, | |
| "loss": 17.2893, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.8728306099665076, | |
| "grad_norm": 172.1090850830078, | |
| "learning_rate": 4.175957369597039e-11, | |
| "loss": 16.464, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.8768902872221659, | |
| "grad_norm": 170.6510009765625, | |
| "learning_rate": 3.9168705688583555e-11, | |
| "loss": 17.662, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.880949964477824, | |
| "grad_norm": 201.04290771484375, | |
| "learning_rate": 3.665752755719332e-11, | |
| "loss": 17.4915, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.8850096417334822, | |
| "grad_norm": 194.7832794189453, | |
| "learning_rate": 3.422647355068076e-11, | |
| "loss": 18.2301, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.8890693189891403, | |
| "grad_norm": 181.28720092773438, | |
| "learning_rate": 3.187596406235421e-11, | |
| "loss": 17.7734, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.8931289962447986, | |
| "grad_norm": 152.69996643066406, | |
| "learning_rate": 2.9606405557251637e-11, | |
| "loss": 16.8411, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.8971886735004567, | |
| "grad_norm": 212.84933471679688, | |
| "learning_rate": 2.7418190501853014e-11, | |
| "loss": 17.3207, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.9012483507561149, | |
| "grad_norm": 144.8594207763672, | |
| "learning_rate": 2.5311697296211634e-11, | |
| "loss": 16.9442, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.905308028011773, | |
| "grad_norm": 232.24757385253906, | |
| "learning_rate": 2.328729020851961e-11, | |
| "loss": 18.1509, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.9093677052674313, | |
| "grad_norm": 180.50587463378906, | |
| "learning_rate": 2.134531931211542e-11, | |
| "loss": 16.2897, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.9134273825230894, | |
| "grad_norm": 176.4561004638672, | |
| "learning_rate": 1.9486120424947908e-11, | |
| "loss": 17.3459, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.9174870597787476, | |
| "grad_norm": 177.8277130126953, | |
| "learning_rate": 1.771001505150366e-11, | |
| "loss": 16.3936, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.9215467370344058, | |
| "grad_norm": 189.9925994873047, | |
| "learning_rate": 1.6017310327211155e-11, | |
| "loss": 17.3137, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.925606414290064, | |
| "grad_norm": 212.03208923339844, | |
| "learning_rate": 1.4408298965328472e-11, | |
| "loss": 17.907, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.9296660915457221, | |
| "grad_norm": 157.50892639160156, | |
| "learning_rate": 1.2883259206325493e-11, | |
| "loss": 16.9568, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.9337257688013803, | |
| "grad_norm": 184.62356567382812, | |
| "learning_rate": 1.1442454769769017e-11, | |
| "loss": 18.1454, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.9377854460570385, | |
| "grad_norm": 206.690185546875, | |
| "learning_rate": 1.0086134808718562e-11, | |
| "loss": 18.086, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.9418451233126967, | |
| "grad_norm": 171.5003662109375, | |
| "learning_rate": 8.814533866641106e-12, | |
| "loss": 18.0924, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.9459048005683548, | |
| "grad_norm": 154.81707763671875, | |
| "learning_rate": 7.627871836852652e-12, | |
| "loss": 16.5896, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.9499644778240129, | |
| "grad_norm": 193.352783203125, | |
| "learning_rate": 6.52635392449269e-12, | |
| "loss": 18.0086, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.9540241550796712, | |
| "grad_norm": 182.68508911132812, | |
| "learning_rate": 5.510170611038701e-12, | |
| "loss": 17.6251, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.9580838323353293, | |
| "grad_norm": 220.1875762939453, | |
| "learning_rate": 4.579497621367057e-12, | |
| "loss": 18.5577, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.9621435095909875, | |
| "grad_norm": 193.38424682617188, | |
| "learning_rate": 3.734495893365664e-12, | |
| "loss": 18.0829, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.9662031868466457, | |
| "grad_norm": 162.79934692382812, | |
| "learning_rate": 2.9753115501032213e-12, | |
| "loss": 17.6267, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.9702628641023039, | |
| "grad_norm": 185.84449768066406, | |
| "learning_rate": 2.3020758745610493e-12, | |
| "loss": 17.418, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.974322541357962, | |
| "grad_norm": 176.00831604003906, | |
| "learning_rate": 1.7149052869305794e-12, | |
| "loss": 17.5759, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.9783822186136202, | |
| "grad_norm": 163.9122772216797, | |
| "learning_rate": 1.2139013244812924e-12, | |
| "loss": 17.8926, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.9824418958692784, | |
| "grad_norm": 172.8951416015625, | |
| "learning_rate": 7.991506240022095e-13, | |
| "loss": 17.5553, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.9865015731249366, | |
| "grad_norm": 186.85015869140625, | |
| "learning_rate": 4.70724906820208e-13, | |
| "loss": 18.0582, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.9905612503805947, | |
| "grad_norm": 203.409423828125, | |
| "learning_rate": 2.286809663974987e-13, | |
| "loss": 18.464, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.9946209276362529, | |
| "grad_norm": 192.5825653076172, | |
| "learning_rate": 7.306065851042654e-14, | |
| "loss": 17.8112, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.9986806048919111, | |
| "grad_norm": 174.4022674560547, | |
| "learning_rate": 3.890894011593371e-15, | |
| "loss": 17.7037, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.9998985080686086, | |
| "step": 2463, | |
| "total_flos": 0.0, | |
| "train_loss": 17.218061584212457, | |
| "train_runtime": 5138.9594, | |
| "train_samples_per_second": 11.504, | |
| "train_steps_per_second": 0.479 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2463, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |