Invalid JSON: Unexpected token 'N', ..."ad_norm": NaN,
"... is not valid JSON
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 4302, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00697350069735007, | |
| "grad_norm": 13.183924674987793, | |
| "learning_rate": 1.9974895397489544e-05, | |
| "loss": 3.835, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01394700139470014, | |
| "grad_norm": 14.83541202545166, | |
| "learning_rate": 1.9947001394700142e-05, | |
| "loss": 3.7714, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02092050209205021, | |
| "grad_norm": 9.84648323059082, | |
| "learning_rate": 1.991910739191074e-05, | |
| "loss": 3.1422, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.02789400278940028, | |
| "grad_norm": 11.044331550598145, | |
| "learning_rate": 1.989121338912134e-05, | |
| "loss": 2.7074, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03486750348675035, | |
| "grad_norm": 8.12322998046875, | |
| "learning_rate": 1.986331938633194e-05, | |
| "loss": 2.0078, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.04184100418410042, | |
| "grad_norm": 4.419774532318115, | |
| "learning_rate": 1.983542538354254e-05, | |
| "loss": 2.2417, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.04881450488145049, | |
| "grad_norm": 8.11199951171875, | |
| "learning_rate": 1.980753138075314e-05, | |
| "loss": 2.0051, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.05578800557880056, | |
| "grad_norm": 3.7404990196228027, | |
| "learning_rate": 1.9779637377963737e-05, | |
| "loss": 1.2384, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.06276150627615062, | |
| "grad_norm": 5.447963714599609, | |
| "learning_rate": 1.975174337517434e-05, | |
| "loss": 1.2396, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.0697350069735007, | |
| "grad_norm": 4.791909217834473, | |
| "learning_rate": 1.9723849372384937e-05, | |
| "loss": 1.0734, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.07670850767085077, | |
| "grad_norm": 6.843250274658203, | |
| "learning_rate": 1.9695955369595538e-05, | |
| "loss": 1.0683, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.08368200836820083, | |
| "grad_norm": 4.563827991485596, | |
| "learning_rate": 1.966806136680614e-05, | |
| "loss": 0.9533, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.09065550906555091, | |
| "grad_norm": 2.3097805976867676, | |
| "learning_rate": 1.9640167364016738e-05, | |
| "loss": 1.0514, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.09762900976290098, | |
| "grad_norm": 2.343554973602295, | |
| "learning_rate": 1.961227336122734e-05, | |
| "loss": 0.8546, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.10460251046025104, | |
| "grad_norm": 4.010223388671875, | |
| "learning_rate": 1.9584379358437937e-05, | |
| "loss": 0.6854, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.11157601115760112, | |
| "grad_norm": 2.506309747695923, | |
| "learning_rate": 1.955648535564854e-05, | |
| "loss": 0.9116, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.11854951185495119, | |
| "grad_norm": 1.248282551765442, | |
| "learning_rate": 1.9528591352859137e-05, | |
| "loss": 0.6762, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.12552301255230125, | |
| "grad_norm": 2.629155397415161, | |
| "learning_rate": 1.9500697350069738e-05, | |
| "loss": 0.7382, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.13249651324965134, | |
| "grad_norm": 2.2664334774017334, | |
| "learning_rate": 1.9472803347280336e-05, | |
| "loss": 0.5232, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.1394700139470014, | |
| "grad_norm": 2.295131206512451, | |
| "learning_rate": 1.9444909344490938e-05, | |
| "loss": 0.6542, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.14644351464435146, | |
| "grad_norm": 2.584252119064331, | |
| "learning_rate": 1.9417015341701536e-05, | |
| "loss": 0.5236, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.15341701534170155, | |
| "grad_norm": 3.074542999267578, | |
| "learning_rate": 1.9389121338912137e-05, | |
| "loss": 0.6076, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.1603905160390516, | |
| "grad_norm": 2.1027579307556152, | |
| "learning_rate": 1.9361227336122735e-05, | |
| "loss": 0.4767, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.16736401673640167, | |
| "grad_norm": 1.3120466470718384, | |
| "learning_rate": 1.9333333333333333e-05, | |
| "loss": 0.4428, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.17433751743375175, | |
| "grad_norm": 2.9058358669281006, | |
| "learning_rate": 1.9305439330543935e-05, | |
| "loss": 0.4286, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.18131101813110181, | |
| "grad_norm": 3.063037157058716, | |
| "learning_rate": 1.9277545327754533e-05, | |
| "loss": 0.5119, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.18828451882845187, | |
| "grad_norm": 1.2567652463912964, | |
| "learning_rate": 1.9249651324965134e-05, | |
| "loss": 0.3206, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.19525801952580196, | |
| "grad_norm": 1.3967201709747314, | |
| "learning_rate": 1.9221757322175733e-05, | |
| "loss": 0.3629, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.20223152022315202, | |
| "grad_norm": 1.4811025857925415, | |
| "learning_rate": 1.9193863319386334e-05, | |
| "loss": 0.3026, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.20920502092050208, | |
| "grad_norm": 1.2193363904953003, | |
| "learning_rate": 1.9165969316596932e-05, | |
| "loss": 0.3382, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.21617852161785217, | |
| "grad_norm": 1.5240554809570312, | |
| "learning_rate": 1.9138075313807534e-05, | |
| "loss": 0.2803, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.22315202231520223, | |
| "grad_norm": 2.093574285507202, | |
| "learning_rate": 1.911018131101813e-05, | |
| "loss": 0.1951, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.2301255230125523, | |
| "grad_norm": 0.9359086155891418, | |
| "learning_rate": 1.9082287308228733e-05, | |
| "loss": 0.2116, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.23709902370990238, | |
| "grad_norm": 1.443814992904663, | |
| "learning_rate": 1.905439330543933e-05, | |
| "loss": 0.2668, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.24407252440725244, | |
| "grad_norm": 1.0664631128311157, | |
| "learning_rate": 1.9026499302649933e-05, | |
| "loss": 0.3009, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.2510460251046025, | |
| "grad_norm": 0.7187846899032593, | |
| "learning_rate": 1.899860529986053e-05, | |
| "loss": 0.1704, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.2580195258019526, | |
| "grad_norm": 3.616009473800659, | |
| "learning_rate": 1.8970711297071132e-05, | |
| "loss": 0.2365, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.2649930264993027, | |
| "grad_norm": 1.6073416471481323, | |
| "learning_rate": 1.894281729428173e-05, | |
| "loss": 0.3163, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.2719665271966527, | |
| "grad_norm": 3.3605103492736816, | |
| "learning_rate": 1.8914923291492332e-05, | |
| "loss": 0.2076, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.2789400278940028, | |
| "grad_norm": 0.6530489921569824, | |
| "learning_rate": 1.888702928870293e-05, | |
| "loss": 0.1256, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2859135285913529, | |
| "grad_norm": 0.6921319365501404, | |
| "learning_rate": 1.885913528591353e-05, | |
| "loss": 0.228, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.2928870292887029, | |
| "grad_norm": 0.6970381736755371, | |
| "learning_rate": 1.883124128312413e-05, | |
| "loss": 0.208, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.299860529986053, | |
| "grad_norm": 3.324526071548462, | |
| "learning_rate": 1.880334728033473e-05, | |
| "loss": 0.1737, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.3068340306834031, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.877545327754533e-05, | |
| "loss": 0.2422, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.3138075313807531, | |
| "grad_norm": 1.2172619104385376, | |
| "learning_rate": 1.8747559274755927e-05, | |
| "loss": 0.174, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.3207810320781032, | |
| "grad_norm": 0.3495887219905853, | |
| "learning_rate": 1.871966527196653e-05, | |
| "loss": 0.1924, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.3277545327754533, | |
| "grad_norm": 0.41428402066230774, | |
| "learning_rate": 1.8691771269177127e-05, | |
| "loss": 0.0965, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.33472803347280333, | |
| "grad_norm": 0.6227982044219971, | |
| "learning_rate": 1.8663877266387728e-05, | |
| "loss": 0.1796, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.3417015341701534, | |
| "grad_norm": 4.252852439880371, | |
| "learning_rate": 1.8635983263598326e-05, | |
| "loss": 0.1918, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.3486750348675035, | |
| "grad_norm": 2.1391079425811768, | |
| "learning_rate": 1.8608089260808928e-05, | |
| "loss": 0.2705, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.35564853556485354, | |
| "grad_norm": 3.4021589756011963, | |
| "learning_rate": 1.8580195258019526e-05, | |
| "loss": 0.0966, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.36262203626220363, | |
| "grad_norm": 4.478765487670898, | |
| "learning_rate": 1.8552301255230127e-05, | |
| "loss": 0.1666, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.3695955369595537, | |
| "grad_norm": 3.3071353435516357, | |
| "learning_rate": 1.8524407252440725e-05, | |
| "loss": 0.1089, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.37656903765690375, | |
| "grad_norm": 6.9056267738342285, | |
| "learning_rate": 1.8496513249651327e-05, | |
| "loss": 0.1055, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.38354253835425384, | |
| "grad_norm": 1.1857019662857056, | |
| "learning_rate": 1.8468619246861925e-05, | |
| "loss": 0.1114, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.3905160390516039, | |
| "grad_norm": 2.9364492893218994, | |
| "learning_rate": 1.8440725244072526e-05, | |
| "loss": 0.2345, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.39748953974895396, | |
| "grad_norm": 0.46063023805618286, | |
| "learning_rate": 1.8412831241283128e-05, | |
| "loss": 0.0427, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.40446304044630405, | |
| "grad_norm": 0.4743267893791199, | |
| "learning_rate": 1.8384937238493726e-05, | |
| "loss": 0.0756, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.41143654114365413, | |
| "grad_norm": 0.6988991498947144, | |
| "learning_rate": 1.8357043235704327e-05, | |
| "loss": 0.1294, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.41841004184100417, | |
| "grad_norm": 2.7544264793395996, | |
| "learning_rate": 1.8329149232914925e-05, | |
| "loss": 0.2031, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.42538354253835425, | |
| "grad_norm": 3.5098769664764404, | |
| "learning_rate": 1.8301255230125527e-05, | |
| "loss": 0.1283, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.43235704323570434, | |
| "grad_norm": 3.013094663619995, | |
| "learning_rate": 1.8273361227336125e-05, | |
| "loss": 0.1626, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.4393305439330544, | |
| "grad_norm": 2.9749810695648193, | |
| "learning_rate": 1.8245467224546723e-05, | |
| "loss": 0.0544, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.44630404463040446, | |
| "grad_norm": 0.1516144722700119, | |
| "learning_rate": 1.8217573221757325e-05, | |
| "loss": 0.0864, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.45327754532775455, | |
| "grad_norm": 3.139366626739502, | |
| "learning_rate": 1.8189679218967923e-05, | |
| "loss": 0.082, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.4602510460251046, | |
| "grad_norm": 3.0818684101104736, | |
| "learning_rate": 1.816178521617852e-05, | |
| "loss": 0.0263, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.46722454672245467, | |
| "grad_norm": 0.23410484194755554, | |
| "learning_rate": 1.8133891213389122e-05, | |
| "loss": 0.2833, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.47419804741980476, | |
| "grad_norm": 0.900560200214386, | |
| "learning_rate": 1.810599721059972e-05, | |
| "loss": 0.073, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.4811715481171548, | |
| "grad_norm": 11.757755279541016, | |
| "learning_rate": 1.8078103207810322e-05, | |
| "loss": 0.1058, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.4881450488145049, | |
| "grad_norm": 0.34188902378082275, | |
| "learning_rate": 1.805020920502092e-05, | |
| "loss": 0.209, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.49511854951185497, | |
| "grad_norm": 0.1428012251853943, | |
| "learning_rate": 1.802231520223152e-05, | |
| "loss": 0.0565, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.502092050209205, | |
| "grad_norm": 8.282142639160156, | |
| "learning_rate": 1.7994421199442123e-05, | |
| "loss": 0.1955, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.5090655509065551, | |
| "grad_norm": 0.49546220898628235, | |
| "learning_rate": 1.796652719665272e-05, | |
| "loss": 0.2029, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.5160390516039052, | |
| "grad_norm": 0.4430611729621887, | |
| "learning_rate": 1.7938633193863322e-05, | |
| "loss": 0.154, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.5230125523012552, | |
| "grad_norm": 2.5573275089263916, | |
| "learning_rate": 1.791073919107392e-05, | |
| "loss": 0.1891, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.5299860529986054, | |
| "grad_norm": 0.5785364508628845, | |
| "learning_rate": 1.7882845188284522e-05, | |
| "loss": 0.0325, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.5369595536959554, | |
| "grad_norm": 2.325209856033325, | |
| "learning_rate": 1.785495118549512e-05, | |
| "loss": 0.1057, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.5439330543933054, | |
| "grad_norm": 0.2150566428899765, | |
| "learning_rate": 1.782705718270572e-05, | |
| "loss": 0.1506, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.5509065550906556, | |
| "grad_norm": 0.37389442324638367, | |
| "learning_rate": 1.779916317991632e-05, | |
| "loss": 0.0842, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.5578800557880056, | |
| "grad_norm": 6.229526519775391, | |
| "learning_rate": 1.777126917712692e-05, | |
| "loss": 0.1864, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5648535564853556, | |
| "grad_norm": 0.1626635193824768, | |
| "learning_rate": 1.774337517433752e-05, | |
| "loss": 0.0813, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.5718270571827058, | |
| "grad_norm": 10.923450469970703, | |
| "learning_rate": 1.771548117154812e-05, | |
| "loss": 0.1013, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.5788005578800558, | |
| "grad_norm": 4.7759294509887695, | |
| "learning_rate": 1.768758716875872e-05, | |
| "loss": 0.0739, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.5857740585774058, | |
| "grad_norm": 11.055768966674805, | |
| "learning_rate": 1.7659693165969317e-05, | |
| "loss": 0.1342, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.592747559274756, | |
| "grad_norm": 1.800850510597229, | |
| "learning_rate": 1.7631799163179918e-05, | |
| "loss": 0.0595, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.599721059972106, | |
| "grad_norm": 6.058688163757324, | |
| "learning_rate": 1.7603905160390516e-05, | |
| "loss": 0.16, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.606694560669456, | |
| "grad_norm": 0.05257971212267876, | |
| "learning_rate": 1.7576011157601118e-05, | |
| "loss": 0.0637, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.6136680613668062, | |
| "grad_norm": 0.12368276715278625, | |
| "learning_rate": 1.7548117154811716e-05, | |
| "loss": 0.1242, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.6206415620641562, | |
| "grad_norm": 0.06948111951351166, | |
| "learning_rate": 1.7520223152022317e-05, | |
| "loss": 0.081, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.6276150627615062, | |
| "grad_norm": 5.596299171447754, | |
| "learning_rate": 1.7492329149232915e-05, | |
| "loss": 0.1923, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.6345885634588564, | |
| "grad_norm": 2.5386273860931396, | |
| "learning_rate": 1.7464435146443517e-05, | |
| "loss": 0.0898, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.6415620641562064, | |
| "grad_norm": 0.06753092259168625, | |
| "learning_rate": 1.7436541143654115e-05, | |
| "loss": 0.0224, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.6485355648535565, | |
| "grad_norm": 0.07754819095134735, | |
| "learning_rate": 1.7408647140864716e-05, | |
| "loss": 0.0242, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.6555090655509066, | |
| "grad_norm": 0.06803246587514877, | |
| "learning_rate": 1.7380753138075315e-05, | |
| "loss": 0.0439, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.6624825662482566, | |
| "grad_norm": 0.08195364475250244, | |
| "learning_rate": 1.7352859135285916e-05, | |
| "loss": 0.0437, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.6694560669456067, | |
| "grad_norm": 15.50480842590332, | |
| "learning_rate": 1.7324965132496514e-05, | |
| "loss": 0.1555, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.6764295676429568, | |
| "grad_norm": 1.3638603687286377, | |
| "learning_rate": 1.7297071129707116e-05, | |
| "loss": 0.1336, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.6834030683403068, | |
| "grad_norm": 0.08070117235183716, | |
| "learning_rate": 1.7269177126917714e-05, | |
| "loss": 0.0136, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.6903765690376569, | |
| "grad_norm": 0.0671255961060524, | |
| "learning_rate": 1.7241283124128315e-05, | |
| "loss": 0.0094, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.697350069735007, | |
| "grad_norm": 0.020939119160175323, | |
| "learning_rate": 1.7213389121338913e-05, | |
| "loss": 0.0177, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.704323570432357, | |
| "grad_norm": 0.0943412110209465, | |
| "learning_rate": 1.7185495118549515e-05, | |
| "loss": 0.1003, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.7112970711297071, | |
| "grad_norm": 0.05611201003193855, | |
| "learning_rate": 1.7157601115760113e-05, | |
| "loss": 0.0223, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.7182705718270572, | |
| "grad_norm": 0.10727556049823761, | |
| "learning_rate": 1.7129707112970714e-05, | |
| "loss": 0.1631, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.7252440725244073, | |
| "grad_norm": 0.06021064519882202, | |
| "learning_rate": 1.7101813110181312e-05, | |
| "loss": 0.0181, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.7322175732217573, | |
| "grad_norm": 4.713542938232422, | |
| "learning_rate": 1.707391910739191e-05, | |
| "loss": 0.1523, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.7391910739191074, | |
| "grad_norm": 0.07371031492948532, | |
| "learning_rate": 1.7046025104602512e-05, | |
| "loss": 0.2023, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.7461645746164575, | |
| "grad_norm": 0.10017743706703186, | |
| "learning_rate": 1.701813110181311e-05, | |
| "loss": 0.1815, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.7531380753138075, | |
| "grad_norm": 0.17356480658054352, | |
| "learning_rate": 1.699023709902371e-05, | |
| "loss": 0.0531, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.7601115760111576, | |
| "grad_norm": 6.460514068603516, | |
| "learning_rate": 1.696234309623431e-05, | |
| "loss": 0.1038, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.7670850767085077, | |
| "grad_norm": 1.358777403831482, | |
| "learning_rate": 1.693444909344491e-05, | |
| "loss": 0.0947, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.7740585774058577, | |
| "grad_norm": 0.11568786948919296, | |
| "learning_rate": 1.690655509065551e-05, | |
| "loss": 0.174, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.7810320781032078, | |
| "grad_norm": 1.7592473030090332, | |
| "learning_rate": 1.687866108786611e-05, | |
| "loss": 0.0833, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.7880055788005579, | |
| "grad_norm": 0.06832170486450195, | |
| "learning_rate": 1.685076708507671e-05, | |
| "loss": 0.1201, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.7949790794979079, | |
| "grad_norm": 0.04899122938513756, | |
| "learning_rate": 1.682287308228731e-05, | |
| "loss": 0.1153, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.8019525801952581, | |
| "grad_norm": 8.826611518859863, | |
| "learning_rate": 1.6794979079497908e-05, | |
| "loss": 0.0943, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.8089260808926081, | |
| "grad_norm": 1.2349894046783447, | |
| "learning_rate": 1.676708507670851e-05, | |
| "loss": 0.0512, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.8158995815899581, | |
| "grad_norm": 0.04042017459869385, | |
| "learning_rate": 1.6739191073919108e-05, | |
| "loss": 0.0176, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.8228730822873083, | |
| "grad_norm": 3.0442705154418945, | |
| "learning_rate": 1.671129707112971e-05, | |
| "loss": 0.0891, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.8298465829846583, | |
| "grad_norm": 0.11100644618272781, | |
| "learning_rate": 1.668340306834031e-05, | |
| "loss": 0.1416, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.8368200836820083, | |
| "grad_norm": 2.700934410095215, | |
| "learning_rate": 1.665550906555091e-05, | |
| "loss": 0.1365, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.8437935843793585, | |
| "grad_norm": 0.2526993155479431, | |
| "learning_rate": 1.6627615062761507e-05, | |
| "loss": 0.0633, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.8507670850767085, | |
| "grad_norm": 0.034327197819948196, | |
| "learning_rate": 1.6599721059972108e-05, | |
| "loss": 0.0566, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.8577405857740585, | |
| "grad_norm": 6.361912250518799, | |
| "learning_rate": 1.6571827057182706e-05, | |
| "loss": 0.0814, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.8647140864714087, | |
| "grad_norm": 0.2599698007106781, | |
| "learning_rate": 1.6543933054393308e-05, | |
| "loss": 0.0523, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.8716875871687587, | |
| "grad_norm": 0.1802450269460678, | |
| "learning_rate": 1.6516039051603906e-05, | |
| "loss": 0.0672, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.8786610878661087, | |
| "grad_norm": 0.05986528471112251, | |
| "learning_rate": 1.6488145048814504e-05, | |
| "loss": 0.031, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.8856345885634589, | |
| "grad_norm": 5.647293567657471, | |
| "learning_rate": 1.6460251046025105e-05, | |
| "loss": 0.1258, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.8926080892608089, | |
| "grad_norm": 0.18946939706802368, | |
| "learning_rate": 1.6432357043235704e-05, | |
| "loss": 0.1959, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.899581589958159, | |
| "grad_norm": 0.03759470209479332, | |
| "learning_rate": 1.6404463040446305e-05, | |
| "loss": 0.0162, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.9065550906555091, | |
| "grad_norm": 0.320306658744812, | |
| "learning_rate": 1.6376569037656903e-05, | |
| "loss": 0.0548, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.9135285913528591, | |
| "grad_norm": 0.04905885457992554, | |
| "learning_rate": 1.6348675034867505e-05, | |
| "loss": 0.0596, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.9205020920502092, | |
| "grad_norm": 0.03322403505444527, | |
| "learning_rate": 1.6320781032078103e-05, | |
| "loss": 0.1531, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.9274755927475593, | |
| "grad_norm": 0.051219817250967026, | |
| "learning_rate": 1.6292887029288704e-05, | |
| "loss": 0.0047, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.9344490934449093, | |
| "grad_norm": 0.1239551454782486, | |
| "learning_rate": 1.6264993026499306e-05, | |
| "loss": 0.1413, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.9414225941422594, | |
| "grad_norm": 0.1667717546224594, | |
| "learning_rate": 1.6237099023709904e-05, | |
| "loss": 0.1345, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.9483960948396095, | |
| "grad_norm": 5.152453899383545, | |
| "learning_rate": 1.6209205020920505e-05, | |
| "loss": 0.038, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.9553695955369595, | |
| "grad_norm": 10.803096771240234, | |
| "learning_rate": 1.6181311018131103e-05, | |
| "loss": 0.0478, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.9623430962343096, | |
| "grad_norm": 5.05436372756958, | |
| "learning_rate": 1.6153417015341705e-05, | |
| "loss": 0.0641, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.9693165969316597, | |
| "grad_norm": 0.05341633781790733, | |
| "learning_rate": 1.6125523012552303e-05, | |
| "loss": 0.1159, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.9762900976290098, | |
| "grad_norm": 0.030432600528001785, | |
| "learning_rate": 1.6097629009762904e-05, | |
| "loss": 0.2422, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.9832635983263598, | |
| "grad_norm": 0.0488433800637722, | |
| "learning_rate": 1.6069735006973502e-05, | |
| "loss": 0.017, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.9902370990237099, | |
| "grad_norm": 0.11435980349779129, | |
| "learning_rate": 1.6041841004184104e-05, | |
| "loss": 0.0678, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.99721059972106, | |
| "grad_norm": 0.04273105785250664, | |
| "learning_rate": 1.6013947001394702e-05, | |
| "loss": 0.148, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.00418410041841, | |
| "grad_norm": 0.03464385122060776, | |
| "learning_rate": 1.59860529986053e-05, | |
| "loss": 0.0116, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.0111576011157601, | |
| "grad_norm": 0.051146648824214935, | |
| "learning_rate": 1.59581589958159e-05, | |
| "loss": 0.0471, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.0181311018131103, | |
| "grad_norm": 0.03880644217133522, | |
| "learning_rate": 1.59302649930265e-05, | |
| "loss": 0.0805, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.0251046025104602, | |
| "grad_norm": 0.04440051317214966, | |
| "learning_rate": 1.5902370990237098e-05, | |
| "loss": 0.0185, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.0320781032078103, | |
| "grad_norm": 0.04550454020500183, | |
| "learning_rate": 1.58744769874477e-05, | |
| "loss": 0.0093, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.0390516039051605, | |
| "grad_norm": 0.062290117144584656, | |
| "learning_rate": 1.58465829846583e-05, | |
| "loss": 0.0061, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.0460251046025104, | |
| "grad_norm": 0.06050781160593033, | |
| "learning_rate": 1.58186889818689e-05, | |
| "loss": 0.0609, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.0529986052998606, | |
| "grad_norm": 0.7175278067588806, | |
| "learning_rate": 1.57907949790795e-05, | |
| "loss": 0.0077, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.0599721059972107, | |
| "grad_norm": 4.573083877563477, | |
| "learning_rate": 1.5762900976290098e-05, | |
| "loss": 0.0284, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.0669456066945606, | |
| "grad_norm": 5.817536354064941, | |
| "learning_rate": 1.57350069735007e-05, | |
| "loss": 0.0581, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.0739191073919108, | |
| "grad_norm": 1.3729101419448853, | |
| "learning_rate": 1.5707112970711298e-05, | |
| "loss": 0.1075, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.080892608089261, | |
| "grad_norm": 0.019089030101895332, | |
| "learning_rate": 1.56792189679219e-05, | |
| "loss": 0.1176, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.0878661087866108, | |
| "grad_norm": 0.05113361030817032, | |
| "learning_rate": 1.5651324965132497e-05, | |
| "loss": 0.011, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.094839609483961, | |
| "grad_norm": 0.15870462357997894, | |
| "learning_rate": 1.56234309623431e-05, | |
| "loss": 0.0555, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.1018131101813111, | |
| "grad_norm": 5.806714057922363, | |
| "learning_rate": 1.5595536959553697e-05, | |
| "loss": 0.1076, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.108786610878661, | |
| "grad_norm": 0.043709512799978256, | |
| "learning_rate": 1.55676429567643e-05, | |
| "loss": 0.1412, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.1157601115760112, | |
| "grad_norm": 3.896087884902954, | |
| "learning_rate": 1.5539748953974896e-05, | |
| "loss": 0.2978, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.1227336122733613, | |
| "grad_norm": 0.6703562140464783, | |
| "learning_rate": 1.5511854951185498e-05, | |
| "loss": 0.0219, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.1297071129707112, | |
| "grad_norm": 0.13185322284698486, | |
| "learning_rate": 1.5483960948396096e-05, | |
| "loss": 0.0379, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.1366806136680614, | |
| "grad_norm": 0.07004066556692123, | |
| "learning_rate": 1.5456066945606697e-05, | |
| "loss": 0.0063, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.1436541143654115, | |
| "grad_norm": 0.06793645024299622, | |
| "learning_rate": 1.5428172942817296e-05, | |
| "loss": 0.0618, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.1506276150627615, | |
| "grad_norm": 0.12030527740716934, | |
| "learning_rate": 1.5400278940027894e-05, | |
| "loss": 0.111, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.1576011157601116, | |
| "grad_norm": 0.018711797893047333, | |
| "learning_rate": 1.5372384937238495e-05, | |
| "loss": 0.0263, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.1645746164574617, | |
| "grad_norm": 0.01281982846558094, | |
| "learning_rate": 1.5344490934449093e-05, | |
| "loss": 0.0676, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.1715481171548117, | |
| "grad_norm": 10.88961410522461, | |
| "learning_rate": 1.5316596931659695e-05, | |
| "loss": 0.039, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.1785216178521618, | |
| "grad_norm": 0.0368611216545105, | |
| "learning_rate": 1.5288702928870293e-05, | |
| "loss": 0.0299, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.185495118549512, | |
| "grad_norm": 0.43271970748901367, | |
| "learning_rate": 1.5260808926080894e-05, | |
| "loss": 0.0125, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.1924686192468619, | |
| "grad_norm": 0.038611456751823425, | |
| "learning_rate": 1.5232914923291492e-05, | |
| "loss": 0.0692, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.199442119944212, | |
| "grad_norm": 0.05871947854757309, | |
| "learning_rate": 1.5205020920502094e-05, | |
| "loss": 0.0361, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.2064156206415622, | |
| "grad_norm": 0.13699010014533997, | |
| "learning_rate": 1.5177126917712692e-05, | |
| "loss": 0.018, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.213389121338912, | |
| "grad_norm": 0.028825167566537857, | |
| "learning_rate": 1.5149232914923293e-05, | |
| "loss": 0.0237, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.2203626220362622, | |
| "grad_norm": 6.847581386566162, | |
| "learning_rate": 1.5121338912133891e-05, | |
| "loss": 0.1168, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.2273361227336124, | |
| "grad_norm": 0.048287533223629, | |
| "learning_rate": 1.5093444909344493e-05, | |
| "loss": 0.0206, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.2343096234309623, | |
| "grad_norm": 0.5911905169487, | |
| "learning_rate": 1.5065550906555091e-05, | |
| "loss": 0.004, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.2412831241283124, | |
| "grad_norm": 2.227172374725342, | |
| "learning_rate": 1.5037656903765692e-05, | |
| "loss": 0.0032, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.2482566248256626, | |
| "grad_norm": 0.014612744562327862, | |
| "learning_rate": 1.5009762900976292e-05, | |
| "loss": 0.1276, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.2552301255230125, | |
| "grad_norm": 0.04997061565518379, | |
| "learning_rate": 1.498186889818689e-05, | |
| "loss": 0.07, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.2622036262203626, | |
| "grad_norm": 0.033887382596731186, | |
| "learning_rate": 1.4953974895397492e-05, | |
| "loss": 0.0372, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.2691771269177128, | |
| "grad_norm": 0.017684003338217735, | |
| "learning_rate": 1.492608089260809e-05, | |
| "loss": 0.0411, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.2761506276150627, | |
| "grad_norm": 7.278536319732666, | |
| "learning_rate": 1.4898186889818691e-05, | |
| "loss": 0.1178, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.2831241283124128, | |
| "grad_norm": 0.05286577343940735, | |
| "learning_rate": 1.487029288702929e-05, | |
| "loss": 0.2894, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.2900976290097628, | |
| "grad_norm": 0.13287349045276642, | |
| "learning_rate": 1.4842398884239891e-05, | |
| "loss": 0.1081, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.297071129707113, | |
| "grad_norm": 0.031001577153801918, | |
| "learning_rate": 1.4814504881450489e-05, | |
| "loss": 0.0861, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.304044630404463, | |
| "grad_norm": 0.08806217461824417, | |
| "learning_rate": 1.4786610878661089e-05, | |
| "loss": 0.1862, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.3110181311018132, | |
| "grad_norm": 0.025183405727148056, | |
| "learning_rate": 1.4758716875871689e-05, | |
| "loss": 0.0808, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.3179916317991631, | |
| "grad_norm": 0.04694396257400513, | |
| "learning_rate": 1.4730822873082288e-05, | |
| "loss": 0.161, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.3249651324965133, | |
| "grad_norm": 17.191736221313477, | |
| "learning_rate": 1.4702928870292888e-05, | |
| "loss": 0.0253, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.3319386331938632, | |
| "grad_norm": 0.03222784027457237, | |
| "learning_rate": 1.4675034867503488e-05, | |
| "loss": 0.0053, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.3389121338912133, | |
| "grad_norm": 0.033389899879693985, | |
| "learning_rate": 1.4647140864714086e-05, | |
| "loss": 0.2636, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.3458856345885635, | |
| "grad_norm": 0.5124267935752869, | |
| "learning_rate": 1.4619246861924687e-05, | |
| "loss": 0.0165, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.3528591352859136, | |
| "grad_norm": 0.01550813764333725, | |
| "learning_rate": 1.4591352859135289e-05, | |
| "loss": 0.1907, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.3598326359832635, | |
| "grad_norm": 0.47707241773605347, | |
| "learning_rate": 1.4563458856345887e-05, | |
| "loss": 0.1509, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.3668061366806137, | |
| "grad_norm": 0.030653545632958412, | |
| "learning_rate": 1.4535564853556487e-05, | |
| "loss": 0.0977, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.3737796373779636, | |
| "grad_norm": 0.2786136865615845, | |
| "learning_rate": 1.4507670850767087e-05, | |
| "loss": 0.0355, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.3807531380753137, | |
| "grad_norm": 0.13970093429088593, | |
| "learning_rate": 1.4479776847977686e-05, | |
| "loss": 0.0175, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.387726638772664, | |
| "grad_norm": 1.529632329940796, | |
| "learning_rate": 1.4451882845188286e-05, | |
| "loss": 0.1932, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.394700139470014, | |
| "grad_norm": 0.02077634632587433, | |
| "learning_rate": 1.4423988842398886e-05, | |
| "loss": 0.0691, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.401673640167364, | |
| "grad_norm": 0.529776394367218, | |
| "learning_rate": 1.4396094839609484e-05, | |
| "loss": 0.0948, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.408647140864714, | |
| "grad_norm": 0.006440140772610903, | |
| "learning_rate": 1.4368200836820085e-05, | |
| "loss": 0.1153, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.415620641562064, | |
| "grad_norm": 0.049642570316791534, | |
| "learning_rate": 1.4340306834030684e-05, | |
| "loss": 0.002, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.4225941422594142, | |
| "grad_norm": 0.017692767083644867, | |
| "learning_rate": 1.4312412831241285e-05, | |
| "loss": 0.005, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.4295676429567643, | |
| "grad_norm": 3.4156153202056885, | |
| "learning_rate": 1.4284518828451883e-05, | |
| "loss": 0.0185, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.4365411436541144, | |
| "grad_norm": 0.01563469134271145, | |
| "learning_rate": 1.4256624825662485e-05, | |
| "loss": 0.0187, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.4435146443514644, | |
| "grad_norm": 0.016272351145744324, | |
| "learning_rate": 1.4228730822873083e-05, | |
| "loss": 0.0831, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.4504881450488145, | |
| "grad_norm": 0.02539738453924656, | |
| "learning_rate": 1.4200836820083682e-05, | |
| "loss": 0.0104, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.4574616457461644, | |
| "grad_norm": 0.036670733243227005, | |
| "learning_rate": 1.4172942817294282e-05, | |
| "loss": 0.0592, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 1.4644351464435146, | |
| "grad_norm": 0.04291674867272377, | |
| "learning_rate": 1.4145048814504882e-05, | |
| "loss": 0.0906, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.4714086471408647, | |
| "grad_norm": 9.656791687011719, | |
| "learning_rate": 1.4117154811715483e-05, | |
| "loss": 0.0653, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 1.4783821478382149, | |
| "grad_norm": 0.19923704862594604, | |
| "learning_rate": 1.4089260808926082e-05, | |
| "loss": 0.0298, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 1.4853556485355648, | |
| "grad_norm": 19.535799026489258, | |
| "learning_rate": 1.4061366806136683e-05, | |
| "loss": 0.1084, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 1.492329149232915, | |
| "grad_norm": 0.007252114824950695, | |
| "learning_rate": 1.4033472803347281e-05, | |
| "loss": 0.0036, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 1.499302649930265, | |
| "grad_norm": 7.753361701965332, | |
| "learning_rate": 1.4005578800557883e-05, | |
| "loss": 0.0369, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.506276150627615, | |
| "grad_norm": 2.255980968475342, | |
| "learning_rate": 1.397768479776848e-05, | |
| "loss": 0.0174, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.5132496513249651, | |
| "grad_norm": 0.9268475770950317, | |
| "learning_rate": 1.394979079497908e-05, | |
| "loss": 0.119, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 1.5202231520223153, | |
| "grad_norm": 1.6389845609664917, | |
| "learning_rate": 1.392189679218968e-05, | |
| "loss": 0.0027, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.5271966527196654, | |
| "grad_norm": 13.793027877807617, | |
| "learning_rate": 1.389400278940028e-05, | |
| "loss": 0.0421, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 1.5341701534170153, | |
| "grad_norm": 15.437527656555176, | |
| "learning_rate": 1.386610878661088e-05, | |
| "loss": 0.1532, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.5411436541143653, | |
| "grad_norm": 0.054035939276218414, | |
| "learning_rate": 1.383821478382148e-05, | |
| "loss": 0.0961, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 1.5481171548117154, | |
| "grad_norm": 0.01988278515636921, | |
| "learning_rate": 1.3810320781032078e-05, | |
| "loss": 0.0214, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.5550906555090656, | |
| "grad_norm": 3.2793631553649902, | |
| "learning_rate": 1.3782426778242679e-05, | |
| "loss": 0.115, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.5620641562064157, | |
| "grad_norm": 12.380880355834961, | |
| "learning_rate": 1.3754532775453277e-05, | |
| "loss": 0.1112, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.5690376569037658, | |
| "grad_norm": 0.30685436725616455, | |
| "learning_rate": 1.3726638772663879e-05, | |
| "loss": 0.0509, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.5760111576011158, | |
| "grad_norm": 0.05449102073907852, | |
| "learning_rate": 1.3698744769874478e-05, | |
| "loss": 0.0088, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.5829846582984657, | |
| "grad_norm": 18.799972534179688, | |
| "learning_rate": 1.3670850767085078e-05, | |
| "loss": 0.0667, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 1.5899581589958158, | |
| "grad_norm": 0.01865663006901741, | |
| "learning_rate": 1.3642956764295678e-05, | |
| "loss": 0.141, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.596931659693166, | |
| "grad_norm": 0.021439887583255768, | |
| "learning_rate": 1.3615062761506278e-05, | |
| "loss": 0.0022, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 1.6039051603905161, | |
| "grad_norm": 0.1574893295764923, | |
| "learning_rate": 1.3587168758716878e-05, | |
| "loss": 0.0027, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.6108786610878663, | |
| "grad_norm": 6.708405494689941, | |
| "learning_rate": 1.3559274755927476e-05, | |
| "loss": 0.0474, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 1.6178521617852162, | |
| "grad_norm": 0.020400822162628174, | |
| "learning_rate": 1.3531380753138077e-05, | |
| "loss": 0.0521, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.624825662482566, | |
| "grad_norm": 0.012889823876321316, | |
| "learning_rate": 1.3503486750348675e-05, | |
| "loss": 0.0589, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 1.6317991631799162, | |
| "grad_norm": 0.03564007952809334, | |
| "learning_rate": 1.3475592747559277e-05, | |
| "loss": 0.0519, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.6387726638772664, | |
| "grad_norm": 0.03289749100804329, | |
| "learning_rate": 1.3447698744769875e-05, | |
| "loss": 0.1251, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.6457461645746165, | |
| "grad_norm": 0.011527667753398418, | |
| "learning_rate": 1.3419804741980476e-05, | |
| "loss": 0.0593, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.6527196652719667, | |
| "grad_norm": 0.007492201868444681, | |
| "learning_rate": 1.3391910739191074e-05, | |
| "loss": 0.0047, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 1.6596931659693166, | |
| "grad_norm": 0.02754775807261467, | |
| "learning_rate": 1.3364016736401674e-05, | |
| "loss": 0.0077, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 0.006202343851327896, | |
| "learning_rate": 1.3336122733612274e-05, | |
| "loss": 0.0031, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 1.6736401673640167, | |
| "grad_norm": 0.008386103436350822, | |
| "learning_rate": 1.3308228730822874e-05, | |
| "loss": 0.0455, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.6806136680613668, | |
| "grad_norm": 0.027837343513965607, | |
| "learning_rate": 1.3280334728033475e-05, | |
| "loss": 0.0758, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 1.687587168758717, | |
| "grad_norm": 0.027335721999406815, | |
| "learning_rate": 1.3252440725244073e-05, | |
| "loss": 0.1159, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 1.694560669456067, | |
| "grad_norm": 0.030841577798128128, | |
| "learning_rate": 1.3224546722454675e-05, | |
| "loss": 0.0242, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 1.701534170153417, | |
| "grad_norm": 0.1417429894208908, | |
| "learning_rate": 1.3196652719665273e-05, | |
| "loss": 0.005, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 1.708507670850767, | |
| "grad_norm": 0.05901753529906273, | |
| "learning_rate": 1.3168758716875874e-05, | |
| "loss": 0.0349, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.715481171548117, | |
| "grad_norm": 9.147476196289062, | |
| "learning_rate": 1.3140864714086472e-05, | |
| "loss": 0.0876, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.7224546722454672, | |
| "grad_norm": 0.012487313710153103, | |
| "learning_rate": 1.3112970711297072e-05, | |
| "loss": 0.0508, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 1.7294281729428174, | |
| "grad_norm": 0.045956723392009735, | |
| "learning_rate": 1.3085076708507672e-05, | |
| "loss": 0.0589, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.7364016736401675, | |
| "grad_norm": 0.013152926228940487, | |
| "learning_rate": 1.3057182705718272e-05, | |
| "loss": 0.0377, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.7433751743375174, | |
| "grad_norm": 0.13610075414180756, | |
| "learning_rate": 1.3029288702928871e-05, | |
| "loss": 0.1634, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.7503486750348674, | |
| "grad_norm": 0.012711451388895512, | |
| "learning_rate": 1.3001394700139471e-05, | |
| "loss": 0.0012, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 1.7573221757322175, | |
| "grad_norm": 15.870648384094238, | |
| "learning_rate": 1.297350069735007e-05, | |
| "loss": 0.0779, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.7642956764295676, | |
| "grad_norm": 0.06364299356937408, | |
| "learning_rate": 1.294560669456067e-05, | |
| "loss": 0.0842, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 1.7712691771269178, | |
| "grad_norm": 0.19660770893096924, | |
| "learning_rate": 1.2917712691771269e-05, | |
| "loss": 0.0031, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 1.778242677824268, | |
| "grad_norm": 0.012577497400343418, | |
| "learning_rate": 1.288981868898187e-05, | |
| "loss": 0.0342, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.7852161785216178, | |
| "grad_norm": 0.02324119582772255, | |
| "learning_rate": 1.286192468619247e-05, | |
| "loss": 0.0026, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.7921896792189678, | |
| "grad_norm": 0.010656113736331463, | |
| "learning_rate": 1.283403068340307e-05, | |
| "loss": 0.0023, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 1.799163179916318, | |
| "grad_norm": 0.007635409012436867, | |
| "learning_rate": 1.280613668061367e-05, | |
| "loss": 0.0607, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.806136680613668, | |
| "grad_norm": 1.153235912322998, | |
| "learning_rate": 1.2778242677824268e-05, | |
| "loss": 0.2041, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 1.8131101813110182, | |
| "grad_norm": 11.79737663269043, | |
| "learning_rate": 1.275034867503487e-05, | |
| "loss": 0.0801, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.8200836820083683, | |
| "grad_norm": 1.4480602741241455, | |
| "learning_rate": 1.2722454672245467e-05, | |
| "loss": 0.0129, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 1.8270571827057183, | |
| "grad_norm": 0.05854364112019539, | |
| "learning_rate": 1.2694560669456069e-05, | |
| "loss": 0.0485, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 1.8340306834030682, | |
| "grad_norm": 0.01310009602457285, | |
| "learning_rate": 1.2666666666666667e-05, | |
| "loss": 0.065, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 1.8410041841004183, | |
| "grad_norm": 0.12124790251255035, | |
| "learning_rate": 1.2638772663877268e-05, | |
| "loss": 0.0031, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.8479776847977685, | |
| "grad_norm": 0.06004326790571213, | |
| "learning_rate": 1.2610878661087866e-05, | |
| "loss": 0.0682, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.8549511854951186, | |
| "grad_norm": 0.015955684706568718, | |
| "learning_rate": 1.2582984658298468e-05, | |
| "loss": 0.0437, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.8619246861924688, | |
| "grad_norm": 0.005588918924331665, | |
| "learning_rate": 1.2555090655509066e-05, | |
| "loss": 0.0257, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 1.8688981868898187, | |
| "grad_norm": 0.028774168342351913, | |
| "learning_rate": 1.2527196652719666e-05, | |
| "loss": 0.0281, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.8758716875871686, | |
| "grad_norm": 0.01642877236008644, | |
| "learning_rate": 1.2499302649930265e-05, | |
| "loss": 0.0204, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 1.8828451882845187, | |
| "grad_norm": 0.3172270357608795, | |
| "learning_rate": 1.2471408647140865e-05, | |
| "loss": 0.0508, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.8898186889818689, | |
| "grad_norm": 0.049273423850536346, | |
| "learning_rate": 1.2443514644351467e-05, | |
| "loss": 0.0508, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 1.896792189679219, | |
| "grad_norm": 0.018894419074058533, | |
| "learning_rate": 1.2415620641562065e-05, | |
| "loss": 0.0015, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.9037656903765692, | |
| "grad_norm": 0.04853112995624542, | |
| "learning_rate": 1.2387726638772666e-05, | |
| "loss": 0.0027, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 1.910739191073919, | |
| "grad_norm": 0.017383117228746414, | |
| "learning_rate": 1.2359832635983264e-05, | |
| "loss": 0.0127, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.917712691771269, | |
| "grad_norm": 0.003187231719493866, | |
| "learning_rate": 1.2331938633193866e-05, | |
| "loss": 0.0153, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.9246861924686192, | |
| "grad_norm": 0.03848472237586975, | |
| "learning_rate": 1.2304044630404464e-05, | |
| "loss": 0.0011, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.9316596931659693, | |
| "grad_norm": 2.478860378265381, | |
| "learning_rate": 1.2276150627615064e-05, | |
| "loss": 0.1436, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 1.9386331938633194, | |
| "grad_norm": 0.010695680044591427, | |
| "learning_rate": 1.2248256624825663e-05, | |
| "loss": 0.0103, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.9456066945606696, | |
| "grad_norm": 0.2534022927284241, | |
| "learning_rate": 1.2220362622036263e-05, | |
| "loss": 0.0396, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 1.9525801952580195, | |
| "grad_norm": 0.04518653079867363, | |
| "learning_rate": 1.2192468619246863e-05, | |
| "loss": 0.0089, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.9595536959553694, | |
| "grad_norm": 0.040403205901384354, | |
| "learning_rate": 1.2164574616457463e-05, | |
| "loss": 0.0028, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.9665271966527196, | |
| "grad_norm": 0.004626311827450991, | |
| "learning_rate": 1.2136680613668061e-05, | |
| "loss": 0.1684, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.9735006973500697, | |
| "grad_norm": 0.009998406283557415, | |
| "learning_rate": 1.2108786610878662e-05, | |
| "loss": 0.0401, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.9804741980474199, | |
| "grad_norm": 0.02394164726138115, | |
| "learning_rate": 1.208089260808926e-05, | |
| "loss": 0.0128, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.98744769874477, | |
| "grad_norm": 0.203586608171463, | |
| "learning_rate": 1.2052998605299862e-05, | |
| "loss": 0.0039, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.99442119944212, | |
| "grad_norm": 0.6173217296600342, | |
| "learning_rate": 1.2025104602510462e-05, | |
| "loss": 0.0021, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 2.00139470013947, | |
| "grad_norm": 0.01286329049617052, | |
| "learning_rate": 1.1997210599721061e-05, | |
| "loss": 0.0445, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 2.00836820083682, | |
| "grad_norm": 0.019031327217817307, | |
| "learning_rate": 1.1969316596931661e-05, | |
| "loss": 0.0255, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 2.01534170153417, | |
| "grad_norm": 3.7492589950561523, | |
| "learning_rate": 1.194142259414226e-05, | |
| "loss": 0.0061, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 2.0223152022315203, | |
| "grad_norm": 0.015447934158146381, | |
| "learning_rate": 1.191352859135286e-05, | |
| "loss": 0.004, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.0292887029288704, | |
| "grad_norm": 0.010356171987950802, | |
| "learning_rate": 1.1885634588563459e-05, | |
| "loss": 0.0311, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 2.0362622036262206, | |
| "grad_norm": 0.020356524735689163, | |
| "learning_rate": 1.185774058577406e-05, | |
| "loss": 0.0018, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 2.0432357043235703, | |
| "grad_norm": 0.3535953164100647, | |
| "learning_rate": 1.1829846582984658e-05, | |
| "loss": 0.0014, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 2.0502092050209204, | |
| "grad_norm": 0.014560551382601261, | |
| "learning_rate": 1.180195258019526e-05, | |
| "loss": 0.03, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 2.0571827057182706, | |
| "grad_norm": 9.034151077270508, | |
| "learning_rate": 1.1774058577405858e-05, | |
| "loss": 0.0116, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 2.0641562064156207, | |
| "grad_norm": 6.865650177001953, | |
| "learning_rate": 1.174616457461646e-05, | |
| "loss": 0.1078, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 2.071129707112971, | |
| "grad_norm": 0.040994029492139816, | |
| "learning_rate": 1.1718270571827058e-05, | |
| "loss": 0.05, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 2.078103207810321, | |
| "grad_norm": 0.004967757500708103, | |
| "learning_rate": 1.1690376569037657e-05, | |
| "loss": 0.0031, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 2.0850767085076707, | |
| "grad_norm": 0.01656835526227951, | |
| "learning_rate": 1.1662482566248257e-05, | |
| "loss": 0.1353, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 2.092050209205021, | |
| "grad_norm": 0.020232422277331352, | |
| "learning_rate": 1.1634588563458857e-05, | |
| "loss": 0.0378, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.099023709902371, | |
| "grad_norm": 0.006969059351831675, | |
| "learning_rate": 1.1606694560669458e-05, | |
| "loss": 0.1451, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 2.105997210599721, | |
| "grad_norm": 0.011694218032062054, | |
| "learning_rate": 1.1578800557880056e-05, | |
| "loss": 0.0263, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 2.1129707112970713, | |
| "grad_norm": 0.07425375282764435, | |
| "learning_rate": 1.1550906555090658e-05, | |
| "loss": 0.0022, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 2.1199442119944214, | |
| "grad_norm": 0.10199136286973953, | |
| "learning_rate": 1.1523012552301256e-05, | |
| "loss": 0.0621, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 2.126917712691771, | |
| "grad_norm": 0.10195229947566986, | |
| "learning_rate": 1.1495118549511857e-05, | |
| "loss": 0.0342, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 2.1338912133891212, | |
| "grad_norm": 0.012728706002235413, | |
| "learning_rate": 1.1467224546722456e-05, | |
| "loss": 0.1303, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 2.1408647140864714, | |
| "grad_norm": 0.018413392826914787, | |
| "learning_rate": 1.1439330543933055e-05, | |
| "loss": 0.0026, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 2.1478382147838215, | |
| "grad_norm": 0.00421161437407136, | |
| "learning_rate": 1.1411436541143655e-05, | |
| "loss": 0.0795, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 2.1548117154811717, | |
| "grad_norm": 0.006007787771522999, | |
| "learning_rate": 1.1383542538354255e-05, | |
| "loss": 0.018, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 2.161785216178522, | |
| "grad_norm": 0.004907716065645218, | |
| "learning_rate": 1.1355648535564853e-05, | |
| "loss": 0.0009, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.1687587168758715, | |
| "grad_norm": 5.030208110809326, | |
| "learning_rate": 1.1327754532775454e-05, | |
| "loss": 0.0816, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 2.1757322175732217, | |
| "grad_norm": 0.021412838250398636, | |
| "learning_rate": 1.1299860529986053e-05, | |
| "loss": 0.0573, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 2.182705718270572, | |
| "grad_norm": 0.020408878102898598, | |
| "learning_rate": 1.1271966527196654e-05, | |
| "loss": 0.0005, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 2.189679218967922, | |
| "grad_norm": 0.20092400908470154, | |
| "learning_rate": 1.1244072524407252e-05, | |
| "loss": 0.0013, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 2.196652719665272, | |
| "grad_norm": 0.007184051908552647, | |
| "learning_rate": 1.1216178521617854e-05, | |
| "loss": 0.0143, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 2.2036262203626222, | |
| "grad_norm": 0.0019369281362742186, | |
| "learning_rate": 1.1188284518828453e-05, | |
| "loss": 0.0068, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 2.210599721059972, | |
| "grad_norm": 0.008907733485102654, | |
| "learning_rate": 1.1160390516039053e-05, | |
| "loss": 0.0017, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 2.217573221757322, | |
| "grad_norm": 8.365190505981445, | |
| "learning_rate": 1.1132496513249653e-05, | |
| "loss": 0.0422, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 2.224546722454672, | |
| "grad_norm": 1.0398991107940674, | |
| "learning_rate": 1.1104602510460251e-05, | |
| "loss": 0.0015, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 2.2315202231520224, | |
| "grad_norm": 0.01677248626947403, | |
| "learning_rate": 1.1076708507670852e-05, | |
| "loss": 0.0053, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.2384937238493725, | |
| "grad_norm": 0.05190812796354294, | |
| "learning_rate": 1.104881450488145e-05, | |
| "loss": 0.0005, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 2.2454672245467227, | |
| "grad_norm": 4.997675895690918, | |
| "learning_rate": 1.1020920502092052e-05, | |
| "loss": 0.2023, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 2.2524407252440724, | |
| "grad_norm": 0.006259276531636715, | |
| "learning_rate": 1.099302649930265e-05, | |
| "loss": 0.004, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 2.2594142259414225, | |
| "grad_norm": 0.008285343647003174, | |
| "learning_rate": 1.0965132496513252e-05, | |
| "loss": 0.0159, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 2.2663877266387726, | |
| "grad_norm": 3.1502225399017334, | |
| "learning_rate": 1.093723849372385e-05, | |
| "loss": 0.0056, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 2.273361227336123, | |
| "grad_norm": 1.8060733079910278, | |
| "learning_rate": 1.0909344490934451e-05, | |
| "loss": 0.003, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 2.280334728033473, | |
| "grad_norm": 0.6619565486907959, | |
| "learning_rate": 1.088145048814505e-05, | |
| "loss": 0.0021, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 2.287308228730823, | |
| "grad_norm": 0.008702186867594719, | |
| "learning_rate": 1.0853556485355649e-05, | |
| "loss": 0.0678, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 2.2942817294281728, | |
| "grad_norm": 0.006757175084203482, | |
| "learning_rate": 1.0825662482566249e-05, | |
| "loss": 0.0019, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 2.301255230125523, | |
| "grad_norm": 0.04931594431400299, | |
| "learning_rate": 1.0797768479776849e-05, | |
| "loss": 0.0014, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.308228730822873, | |
| "grad_norm": 0.4247874319553375, | |
| "learning_rate": 1.0769874476987448e-05, | |
| "loss": 0.1117, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 2.315202231520223, | |
| "grad_norm": 0.006627683062106371, | |
| "learning_rate": 1.0741980474198048e-05, | |
| "loss": 0.0004, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 2.3221757322175733, | |
| "grad_norm": 0.07560670375823975, | |
| "learning_rate": 1.071408647140865e-05, | |
| "loss": 0.0014, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 2.3291492329149235, | |
| "grad_norm": 0.00642388267442584, | |
| "learning_rate": 1.0686192468619248e-05, | |
| "loss": 0.0008, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 2.336122733612273, | |
| "grad_norm": 0.010923785157501698, | |
| "learning_rate": 1.0658298465829849e-05, | |
| "loss": 0.0005, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 2.3430962343096233, | |
| "grad_norm": 0.015175443142652512, | |
| "learning_rate": 1.0630404463040447e-05, | |
| "loss": 0.0223, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 2.3500697350069735, | |
| "grad_norm": 0.004983542487025261, | |
| "learning_rate": 1.0602510460251047e-05, | |
| "loss": 0.0004, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 2.3570432357043236, | |
| "grad_norm": 0.08845807611942291, | |
| "learning_rate": 1.0574616457461647e-05, | |
| "loss": 0.0568, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 2.3640167364016738, | |
| "grad_norm": 1.122623085975647, | |
| "learning_rate": 1.0546722454672247e-05, | |
| "loss": 0.0016, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 2.370990237099024, | |
| "grad_norm": 0.012382916174829006, | |
| "learning_rate": 1.0518828451882845e-05, | |
| "loss": 0.0397, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.3779637377963736, | |
| "grad_norm": 0.052785713225603104, | |
| "learning_rate": 1.0490934449093446e-05, | |
| "loss": 0.0308, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 2.3849372384937237, | |
| "grad_norm": 0.003585915081202984, | |
| "learning_rate": 1.0463040446304044e-05, | |
| "loss": 0.0005, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 2.391910739191074, | |
| "grad_norm": 0.13946089148521423, | |
| "learning_rate": 1.0435146443514646e-05, | |
| "loss": 0.0496, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 2.398884239888424, | |
| "grad_norm": 0.014828328974545002, | |
| "learning_rate": 1.0407252440725244e-05, | |
| "loss": 0.0888, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 2.405857740585774, | |
| "grad_norm": 5.3019585609436035, | |
| "learning_rate": 1.0379358437935845e-05, | |
| "loss": 0.189, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 2.4128312412831243, | |
| "grad_norm": 0.019477086141705513, | |
| "learning_rate": 1.0351464435146443e-05, | |
| "loss": 0.0021, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 2.419804741980474, | |
| "grad_norm": 0.004096941091120243, | |
| "learning_rate": 1.0323570432357045e-05, | |
| "loss": 0.0014, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 2.426778242677824, | |
| "grad_norm": 0.012808839790523052, | |
| "learning_rate": 1.0295676429567645e-05, | |
| "loss": 0.0013, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 2.4337517433751743, | |
| "grad_norm": 0.027664173394441605, | |
| "learning_rate": 1.0267782426778243e-05, | |
| "loss": 0.0017, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 2.4407252440725244, | |
| "grad_norm": 0.002692542504519224, | |
| "learning_rate": 1.0239888423988844e-05, | |
| "loss": 0.0006, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.4476987447698746, | |
| "grad_norm": 0.013481782749295235, | |
| "learning_rate": 1.0211994421199442e-05, | |
| "loss": 0.0006, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 2.4546722454672247, | |
| "grad_norm": 0.04460657387971878, | |
| "learning_rate": 1.0184100418410044e-05, | |
| "loss": 0.1172, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 2.4616457461645744, | |
| "grad_norm": 0.009075530804693699, | |
| "learning_rate": 1.0156206415620642e-05, | |
| "loss": 0.0003, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 2.4686192468619246, | |
| "grad_norm": 0.05851946026086807, | |
| "learning_rate": 1.0128312412831243e-05, | |
| "loss": 0.0385, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 2.4755927475592747, | |
| "grad_norm": 0.011403707787394524, | |
| "learning_rate": 1.0100418410041841e-05, | |
| "loss": 0.0553, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 2.482566248256625, | |
| "grad_norm": 0.008116642013192177, | |
| "learning_rate": 1.0072524407252443e-05, | |
| "loss": 0.0455, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 2.489539748953975, | |
| "grad_norm": 0.01469523087143898, | |
| "learning_rate": 1.004463040446304e-05, | |
| "loss": 0.09, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 2.496513249651325, | |
| "grad_norm": 0.009429101832211018, | |
| "learning_rate": 1.001673640167364e-05, | |
| "loss": 0.0003, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 2.5034867503486753, | |
| "grad_norm": 0.002064500702545047, | |
| "learning_rate": 9.98884239888424e-06, | |
| "loss": 0.0277, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 2.510460251046025, | |
| "grad_norm": 0.0018807955784723163, | |
| "learning_rate": 9.96094839609484e-06, | |
| "loss": 0.0035, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.517433751743375, | |
| "grad_norm": 0.0022944663651287556, | |
| "learning_rate": 9.93305439330544e-06, | |
| "loss": 0.0659, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 2.5244072524407253, | |
| "grad_norm": 0.004956633783876896, | |
| "learning_rate": 9.90516039051604e-06, | |
| "loss": 0.0005, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 2.5313807531380754, | |
| "grad_norm": 0.009603263810276985, | |
| "learning_rate": 9.87726638772664e-06, | |
| "loss": 0.0008, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 2.5383542538354256, | |
| "grad_norm": 0.00588383199647069, | |
| "learning_rate": 9.84937238493724e-06, | |
| "loss": 0.0004, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 2.5453277545327753, | |
| "grad_norm": 0.0017828900599852204, | |
| "learning_rate": 9.821478382147839e-06, | |
| "loss": 0.0769, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 2.5523012552301254, | |
| "grad_norm": 2.002347946166992, | |
| "learning_rate": 9.793584379358439e-06, | |
| "loss": 0.0405, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 2.5592747559274756, | |
| "grad_norm": 0.08858360350131989, | |
| "learning_rate": 9.765690376569039e-06, | |
| "loss": 0.0006, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 2.5662482566248257, | |
| "grad_norm": 0.0016570795560255647, | |
| "learning_rate": 9.737796373779638e-06, | |
| "loss": 0.0156, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 2.573221757322176, | |
| "grad_norm": 0.008444487117230892, | |
| "learning_rate": 9.709902370990238e-06, | |
| "loss": 0.0004, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 2.5801952580195255, | |
| "grad_norm": 0.009042381308972836, | |
| "learning_rate": 9.682008368200838e-06, | |
| "loss": 0.0639, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.587168758716876, | |
| "grad_norm": 0.01534841675311327, | |
| "learning_rate": 9.654114365411438e-06, | |
| "loss": 0.008, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 2.594142259414226, | |
| "grad_norm": 0.004281465895473957, | |
| "learning_rate": 9.626220362622038e-06, | |
| "loss": 0.001, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 2.601115760111576, | |
| "grad_norm": 0.012187506072223186, | |
| "learning_rate": 9.598326359832637e-06, | |
| "loss": 0.0013, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 2.608089260808926, | |
| "grad_norm": 0.007811861112713814, | |
| "learning_rate": 9.570432357043237e-06, | |
| "loss": 0.0761, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 2.6150627615062763, | |
| "grad_norm": 0.0038925069384276867, | |
| "learning_rate": 9.542538354253837e-06, | |
| "loss": 0.0054, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 2.6220362622036264, | |
| "grad_norm": 0.004444095306098461, | |
| "learning_rate": 9.514644351464437e-06, | |
| "loss": 0.0115, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 2.629009762900976, | |
| "grad_norm": 0.035112250596284866, | |
| "learning_rate": 9.486750348675036e-06, | |
| "loss": 0.0016, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 2.6359832635983262, | |
| "grad_norm": 13.684135437011719, | |
| "learning_rate": 9.458856345885634e-06, | |
| "loss": 0.073, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 2.6429567642956764, | |
| "grad_norm": 0.012164488434791565, | |
| "learning_rate": 9.430962343096234e-06, | |
| "loss": 0.0013, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 2.6499302649930265, | |
| "grad_norm": 0.052418239414691925, | |
| "learning_rate": 9.403068340306834e-06, | |
| "loss": 0.0007, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.6569037656903767, | |
| "grad_norm": 0.00277147744782269, | |
| "learning_rate": 9.375174337517434e-06, | |
| "loss": 0.1436, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 2.6638772663877264, | |
| "grad_norm": 0.0020822423975914717, | |
| "learning_rate": 9.347280334728034e-06, | |
| "loss": 0.029, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 2.670850767085077, | |
| "grad_norm": 0.04161955416202545, | |
| "learning_rate": 9.319386331938633e-06, | |
| "loss": 0.0618, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 2.6778242677824267, | |
| "grad_norm": 0.03144453093409538, | |
| "learning_rate": 9.291492329149233e-06, | |
| "loss": 0.0429, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 2.684797768479777, | |
| "grad_norm": 0.01364242285490036, | |
| "learning_rate": 9.263598326359835e-06, | |
| "loss": 0.0201, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 2.691771269177127, | |
| "grad_norm": 0.007967864163219929, | |
| "learning_rate": 9.235704323570434e-06, | |
| "loss": 0.0029, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 2.698744769874477, | |
| "grad_norm": 0.006493957247585058, | |
| "learning_rate": 9.207810320781032e-06, | |
| "loss": 0.052, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 2.7057182705718272, | |
| "grad_norm": 0.07452358305454254, | |
| "learning_rate": 9.179916317991632e-06, | |
| "loss": 0.0026, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 2.712691771269177, | |
| "grad_norm": 0.005458319094032049, | |
| "learning_rate": 9.152022315202232e-06, | |
| "loss": 0.0017, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 2.719665271966527, | |
| "grad_norm": 0.010741036385297775, | |
| "learning_rate": 9.124128312412832e-06, | |
| "loss": 0.0732, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.726638772663877, | |
| "grad_norm": 0.007957357913255692, | |
| "learning_rate": 9.096234309623432e-06, | |
| "loss": 0.0003, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 2.7336122733612274, | |
| "grad_norm": 0.31968942284584045, | |
| "learning_rate": 9.068340306834031e-06, | |
| "loss": 0.0171, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 2.7405857740585775, | |
| "grad_norm": 0.0007297178963199258, | |
| "learning_rate": 9.040446304044631e-06, | |
| "loss": 0.0003, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 2.747559274755927, | |
| "grad_norm": 0.009427106007933617, | |
| "learning_rate": 9.012552301255231e-06, | |
| "loss": 0.0219, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 2.754532775453278, | |
| "grad_norm": 0.001962635898962617, | |
| "learning_rate": 8.98465829846583e-06, | |
| "loss": 0.0371, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 2.7615062761506275, | |
| "grad_norm": 0.020612264052033424, | |
| "learning_rate": 8.95676429567643e-06, | |
| "loss": 0.0006, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 2.7684797768479776, | |
| "grad_norm": 0.004890106618404388, | |
| "learning_rate": 8.92887029288703e-06, | |
| "loss": 0.0016, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 2.775453277545328, | |
| "grad_norm": 0.03778740391135216, | |
| "learning_rate": 8.90097629009763e-06, | |
| "loss": 0.0061, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 2.782426778242678, | |
| "grad_norm": 0.0058617801405489445, | |
| "learning_rate": 8.873082287308228e-06, | |
| "loss": 0.0445, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 2.789400278940028, | |
| "grad_norm": 0.005100315902382135, | |
| "learning_rate": 8.84518828451883e-06, | |
| "loss": 0.087, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.7963737796373778, | |
| "grad_norm": 0.030730150640010834, | |
| "learning_rate": 8.81729428172943e-06, | |
| "loss": 0.0791, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 2.803347280334728, | |
| "grad_norm": 0.00628610560670495, | |
| "learning_rate": 8.789400278940029e-06, | |
| "loss": 0.1605, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 2.810320781032078, | |
| "grad_norm": 0.0032315838616341352, | |
| "learning_rate": 8.761506276150629e-06, | |
| "loss": 0.0085, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 2.817294281729428, | |
| "grad_norm": 0.0032316127326339483, | |
| "learning_rate": 8.733612273361229e-06, | |
| "loss": 0.0006, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 2.8242677824267783, | |
| "grad_norm": 0.0020475969649851322, | |
| "learning_rate": 8.705718270571828e-06, | |
| "loss": 0.0293, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.831241283124128, | |
| "grad_norm": 0.032252971082925797, | |
| "learning_rate": 8.677824267782428e-06, | |
| "loss": 0.0309, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 2.8382147838214786, | |
| "grad_norm": 0.07949300855398178, | |
| "learning_rate": 8.649930264993028e-06, | |
| "loss": 0.0026, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 2.8451882845188283, | |
| "grad_norm": 0.0017817869083955884, | |
| "learning_rate": 8.622036262203626e-06, | |
| "loss": 0.0005, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 2.8521617852161785, | |
| "grad_norm": 0.07673770934343338, | |
| "learning_rate": 8.594142259414226e-06, | |
| "loss": 0.0016, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 2.8591352859135286, | |
| "grad_norm": 13.104846954345703, | |
| "learning_rate": 8.566248256624826e-06, | |
| "loss": 0.0546, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.8661087866108788, | |
| "grad_norm": 14.739638328552246, | |
| "learning_rate": 8.538354253835425e-06, | |
| "loss": 0.0658, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 2.873082287308229, | |
| "grad_norm": 0.018161823973059654, | |
| "learning_rate": 8.510460251046025e-06, | |
| "loss": 0.0839, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 2.8800557880055786, | |
| "grad_norm": 0.02036408893764019, | |
| "learning_rate": 8.482566248256625e-06, | |
| "loss": 0.0007, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 2.8870292887029287, | |
| "grad_norm": 0.007715345360338688, | |
| "learning_rate": 8.454672245467225e-06, | |
| "loss": 0.0009, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 2.894002789400279, | |
| "grad_norm": 0.3437242805957794, | |
| "learning_rate": 8.426778242677825e-06, | |
| "loss": 0.0007, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.900976290097629, | |
| "grad_norm": 0.027546469122171402, | |
| "learning_rate": 8.398884239888424e-06, | |
| "loss": 0.0124, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 2.907949790794979, | |
| "grad_norm": 0.0030263513326644897, | |
| "learning_rate": 8.370990237099024e-06, | |
| "loss": 0.0808, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 2.914923291492329, | |
| "grad_norm": 0.0032276464626193047, | |
| "learning_rate": 8.343096234309624e-06, | |
| "loss": 0.0014, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 2.9218967921896795, | |
| "grad_norm": 0.0074035353027284145, | |
| "learning_rate": 8.315202231520224e-06, | |
| "loss": 0.0004, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 2.928870292887029, | |
| "grad_norm": 0.016391828656196594, | |
| "learning_rate": 8.287308228730823e-06, | |
| "loss": 0.0314, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.9358437935843793, | |
| "grad_norm": 0.9994223713874817, | |
| "learning_rate": 8.259414225941423e-06, | |
| "loss": 0.0013, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 2.9428172942817294, | |
| "grad_norm": 0.006372373551130295, | |
| "learning_rate": 8.231520223152023e-06, | |
| "loss": 0.0654, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 2.9497907949790796, | |
| "grad_norm": 0.006764199584722519, | |
| "learning_rate": 8.203626220362623e-06, | |
| "loss": 0.0004, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 2.9567642956764297, | |
| "grad_norm": 0.012071878649294376, | |
| "learning_rate": 8.175732217573223e-06, | |
| "loss": 0.0341, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 2.9637377963737794, | |
| "grad_norm": 0.10174605250358582, | |
| "learning_rate": 8.147838214783822e-06, | |
| "loss": 0.0285, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.9707112970711296, | |
| "grad_norm": 0.010248345322906971, | |
| "learning_rate": 8.119944211994422e-06, | |
| "loss": 0.0931, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 2.9776847977684797, | |
| "grad_norm": 0.06934584677219391, | |
| "learning_rate": 8.092050209205022e-06, | |
| "loss": 0.0008, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 2.98465829846583, | |
| "grad_norm": 0.04618504270911217, | |
| "learning_rate": 8.064156206415622e-06, | |
| "loss": 0.0534, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 2.99163179916318, | |
| "grad_norm": 0.04851532354950905, | |
| "learning_rate": 8.03626220362622e-06, | |
| "loss": 0.0679, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 2.99860529986053, | |
| "grad_norm": 0.004045933019369841, | |
| "learning_rate": 8.00836820083682e-06, | |
| "loss": 0.0816, | |
| "step": 4300 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 7170, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4652091396864000.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |