| { | |
| "best_global_step": 42464, | |
| "best_metric": 0.29507139325141907, | |
| "best_model_checkpoint": "/content/drive/MyDrive/trsql/sqltr_model/checkpoint-42464", | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 42464, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.001177467972871138, | |
| "grad_norm": 404.0032958984375, | |
| "learning_rate": 2.884728600023549e-07, | |
| "loss": 5.589, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.002354935945742276, | |
| "grad_norm": 343.14361572265625, | |
| "learning_rate": 5.828329212292476e-07, | |
| "loss": 5.4979, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0035324039186134136, | |
| "grad_norm": 108.14688110351562, | |
| "learning_rate": 8.771929824561404e-07, | |
| "loss": 5.3407, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.004709871891484552, | |
| "grad_norm": 177.83741760253906, | |
| "learning_rate": 1.1715530436830331e-06, | |
| "loss": 5.152, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.00588733986435569, | |
| "grad_norm": 478.1287536621094, | |
| "learning_rate": 1.465913104909926e-06, | |
| "loss": 4.8301, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.007064807837226827, | |
| "grad_norm": 332.8993225097656, | |
| "learning_rate": 1.7602731661368187e-06, | |
| "loss": 4.5302, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.008242275810097965, | |
| "grad_norm": 120.71070098876953, | |
| "learning_rate": 2.0546332273637114e-06, | |
| "loss": 4.2293, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.009419743782969104, | |
| "grad_norm": 183.82774353027344, | |
| "learning_rate": 2.3489932885906044e-06, | |
| "loss": 3.8529, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.010597211755840242, | |
| "grad_norm": 101.04035186767578, | |
| "learning_rate": 2.643353349817497e-06, | |
| "loss": 3.5668, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.01177467972871138, | |
| "grad_norm": 94.07927703857422, | |
| "learning_rate": 2.93771341104439e-06, | |
| "loss": 3.2295, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.012952147701582517, | |
| "grad_norm": 1335.56884765625, | |
| "learning_rate": 3.2320734722712825e-06, | |
| "loss": 3.0231, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.014129615674453654, | |
| "grad_norm": 1761.2337646484375, | |
| "learning_rate": 3.5264335334981755e-06, | |
| "loss": 2.8073, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.015307083647324794, | |
| "grad_norm": 1014.411865234375, | |
| "learning_rate": 3.820793594725068e-06, | |
| "loss": 2.5802, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.01648455162019593, | |
| "grad_norm": 107.9889144897461, | |
| "learning_rate": 4.115153655951961e-06, | |
| "loss": 2.4422, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.01766201959306707, | |
| "grad_norm": 91.43941497802734, | |
| "learning_rate": 4.409513717178854e-06, | |
| "loss": 2.2729, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.018839487565938208, | |
| "grad_norm": 65.33817291259766, | |
| "learning_rate": 4.703873778405746e-06, | |
| "loss": 2.0897, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.020016955538809344, | |
| "grad_norm": 93.88240051269531, | |
| "learning_rate": 4.998233839632639e-06, | |
| "loss": 1.9823, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.021194423511680483, | |
| "grad_norm": 102.11677551269531, | |
| "learning_rate": 5.292593900859532e-06, | |
| "loss": 1.8626, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.02237189148455162, | |
| "grad_norm": 1674.0389404296875, | |
| "learning_rate": 5.586953962086424e-06, | |
| "loss": 1.7901, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.02354935945742276, | |
| "grad_norm": 76.04745483398438, | |
| "learning_rate": 5.881314023313317e-06, | |
| "loss": 1.7232, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.024726827430293898, | |
| "grad_norm": 85.01644134521484, | |
| "learning_rate": 6.175674084540209e-06, | |
| "loss": 1.7204, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.025904295403165033, | |
| "grad_norm": 45.74072265625, | |
| "learning_rate": 6.470034145767102e-06, | |
| "loss": 1.6678, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.027081763376036173, | |
| "grad_norm": 103.06343078613281, | |
| "learning_rate": 6.764394206993996e-06, | |
| "loss": 1.6336, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.02825923134890731, | |
| "grad_norm": 42.505645751953125, | |
| "learning_rate": 7.058754268220888e-06, | |
| "loss": 1.6207, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.029436699321778448, | |
| "grad_norm": 43.26211166381836, | |
| "learning_rate": 7.353114329447781e-06, | |
| "loss": 1.58, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.030614167294649587, | |
| "grad_norm": 143.49293518066406, | |
| "learning_rate": 7.647474390674673e-06, | |
| "loss": 1.5373, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.031791635267520726, | |
| "grad_norm": 27.412628173828125, | |
| "learning_rate": 7.941834451901566e-06, | |
| "loss": 1.5327, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.03296910324039186, | |
| "grad_norm": 72.7859115600586, | |
| "learning_rate": 8.23619451312846e-06, | |
| "loss": 1.4967, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.034146571213263, | |
| "grad_norm": 60.79092025756836, | |
| "learning_rate": 8.530554574355352e-06, | |
| "loss": 1.4764, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.03532403918613414, | |
| "grad_norm": 68.81829071044922, | |
| "learning_rate": 8.824914635582245e-06, | |
| "loss": 1.4932, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.03650150715900528, | |
| "grad_norm": 93.37459564208984, | |
| "learning_rate": 9.119274696809138e-06, | |
| "loss": 1.4965, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.037678975131876416, | |
| "grad_norm": 71.68579864501953, | |
| "learning_rate": 9.41363475803603e-06, | |
| "loss": 1.4488, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.03885644310474755, | |
| "grad_norm": 45.63780212402344, | |
| "learning_rate": 9.707994819262922e-06, | |
| "loss": 1.4483, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.04003391107761869, | |
| "grad_norm": 39.220069885253906, | |
| "learning_rate": 1.0002354880489815e-05, | |
| "loss": 1.4033, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.04121137905048983, | |
| "grad_norm": 98.83927917480469, | |
| "learning_rate": 1.0296714941716708e-05, | |
| "loss": 1.411, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.042388847023360966, | |
| "grad_norm": 25.23127555847168, | |
| "learning_rate": 1.0591075002943601e-05, | |
| "loss": 1.3996, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.043566314996232106, | |
| "grad_norm": 296.96875, | |
| "learning_rate": 1.0885435064170493e-05, | |
| "loss": 1.4069, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.04474378296910324, | |
| "grad_norm": 147.0619659423828, | |
| "learning_rate": 1.1179795125397387e-05, | |
| "loss": 1.3956, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.04592125094197438, | |
| "grad_norm": 32.09125900268555, | |
| "learning_rate": 1.1474155186624279e-05, | |
| "loss": 1.3287, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.04709871891484552, | |
| "grad_norm": 55.88424301147461, | |
| "learning_rate": 1.1768515247851172e-05, | |
| "loss": 1.3557, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.048276186887716656, | |
| "grad_norm": 445.3227844238281, | |
| "learning_rate": 1.2062875309078065e-05, | |
| "loss": 1.3539, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.049453654860587795, | |
| "grad_norm": 27.51380729675293, | |
| "learning_rate": 1.2357235370304957e-05, | |
| "loss": 1.3417, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.05063112283345893, | |
| "grad_norm": 61.84370040893555, | |
| "learning_rate": 1.2651595431531852e-05, | |
| "loss": 1.3182, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.05180859080633007, | |
| "grad_norm": 27.6585693359375, | |
| "learning_rate": 1.2945955492758743e-05, | |
| "loss": 1.3201, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.052986058779201206, | |
| "grad_norm": 45.15522384643555, | |
| "learning_rate": 1.3240315553985635e-05, | |
| "loss": 1.2967, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.054163526752072345, | |
| "grad_norm": 50.67666244506836, | |
| "learning_rate": 1.3534675615212528e-05, | |
| "loss": 1.2977, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.055340994724943485, | |
| "grad_norm": 49.8477897644043, | |
| "learning_rate": 1.3829035676439422e-05, | |
| "loss": 1.2915, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.05651846269781462, | |
| "grad_norm": 91.68016815185547, | |
| "learning_rate": 1.4123395737666315e-05, | |
| "loss": 1.3088, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.057695930670685756, | |
| "grad_norm": 37.220088958740234, | |
| "learning_rate": 1.4417755798893207e-05, | |
| "loss": 1.2942, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.058873398643556896, | |
| "grad_norm": 49.617408752441406, | |
| "learning_rate": 1.4712115860120098e-05, | |
| "loss": 1.2696, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.060050866616428035, | |
| "grad_norm": 106.0230484008789, | |
| "learning_rate": 1.5006475921346994e-05, | |
| "loss": 1.2725, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.061228334589299174, | |
| "grad_norm": 89.16209411621094, | |
| "learning_rate": 1.5300835982573886e-05, | |
| "loss": 1.229, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.06240580256217031, | |
| "grad_norm": 28.10127830505371, | |
| "learning_rate": 1.5595196043800777e-05, | |
| "loss": 1.2537, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.06358327053504145, | |
| "grad_norm": 100.1103515625, | |
| "learning_rate": 1.5889556105027668e-05, | |
| "loss": 1.2554, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.06476073850791259, | |
| "grad_norm": 89.11134338378906, | |
| "learning_rate": 1.6183916166254566e-05, | |
| "loss": 1.2076, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.06593820648078372, | |
| "grad_norm": 95.72467041015625, | |
| "learning_rate": 1.6478276227481457e-05, | |
| "loss": 1.2461, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.06711567445365486, | |
| "grad_norm": 61.87881851196289, | |
| "learning_rate": 1.677263628870835e-05, | |
| "loss": 1.2324, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.068293142426526, | |
| "grad_norm": 88.15873718261719, | |
| "learning_rate": 1.706699634993524e-05, | |
| "loss": 1.1963, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.06947061039939714, | |
| "grad_norm": 193.8809814453125, | |
| "learning_rate": 1.7361356411162135e-05, | |
| "loss": 1.2058, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.07064807837226827, | |
| "grad_norm": 30.5418701171875, | |
| "learning_rate": 1.765571647238903e-05, | |
| "loss": 1.1762, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.07182554634513941, | |
| "grad_norm": 94.26049041748047, | |
| "learning_rate": 1.795007653361592e-05, | |
| "loss": 1.2193, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.07300301431801055, | |
| "grad_norm": 78.64865112304688, | |
| "learning_rate": 1.8244436594842812e-05, | |
| "loss": 1.1901, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.07418048229088169, | |
| "grad_norm": 178.8012237548828, | |
| "learning_rate": 1.8538796656069703e-05, | |
| "loss": 1.1713, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.07535795026375283, | |
| "grad_norm": 36.485416412353516, | |
| "learning_rate": 1.8833156717296598e-05, | |
| "loss": 1.1723, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.07653541823662396, | |
| "grad_norm": 51.394840240478516, | |
| "learning_rate": 1.9127516778523493e-05, | |
| "loss": 1.167, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.0777128862094951, | |
| "grad_norm": 61.70398712158203, | |
| "learning_rate": 1.9421876839750384e-05, | |
| "loss": 1.1831, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.07889035418236624, | |
| "grad_norm": 50.275169372558594, | |
| "learning_rate": 1.9716236900977275e-05, | |
| "loss": 1.1871, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.08006782215523738, | |
| "grad_norm": 30.377246856689453, | |
| "learning_rate": 2.001059696220417e-05, | |
| "loss": 1.149, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.08124529012810852, | |
| "grad_norm": 45.5155029296875, | |
| "learning_rate": 2.030495702343106e-05, | |
| "loss": 1.1141, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.08242275810097965, | |
| "grad_norm": 28.413341522216797, | |
| "learning_rate": 2.0599317084657956e-05, | |
| "loss": 1.1189, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.08360022607385079, | |
| "grad_norm": 28.7467098236084, | |
| "learning_rate": 2.0893677145884847e-05, | |
| "loss": 1.1519, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.08477769404672193, | |
| "grad_norm": 73.48779296875, | |
| "learning_rate": 2.118803720711174e-05, | |
| "loss": 1.1515, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.08595516201959306, | |
| "grad_norm": 38.0214729309082, | |
| "learning_rate": 2.1482397268338633e-05, | |
| "loss": 1.1486, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.08713262999246421, | |
| "grad_norm": 53.11909103393555, | |
| "learning_rate": 2.1776757329565524e-05, | |
| "loss": 1.1267, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.08831009796533534, | |
| "grad_norm": 48.59964370727539, | |
| "learning_rate": 2.207111739079242e-05, | |
| "loss": 1.1413, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.08948756593820648, | |
| "grad_norm": 88.6882095336914, | |
| "learning_rate": 2.2365477452019314e-05, | |
| "loss": 1.1103, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.09066503391107762, | |
| "grad_norm": 161.33514404296875, | |
| "learning_rate": 2.2659837513246205e-05, | |
| "loss": 1.1208, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.09184250188394875, | |
| "grad_norm": 67.24893188476562, | |
| "learning_rate": 2.2954197574473096e-05, | |
| "loss": 1.1324, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.0930199698568199, | |
| "grad_norm": 52.10124206542969, | |
| "learning_rate": 2.3248557635699987e-05, | |
| "loss": 1.1029, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.09419743782969103, | |
| "grad_norm": 20.676158905029297, | |
| "learning_rate": 2.3542917696926882e-05, | |
| "loss": 1.1103, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.09537490580256217, | |
| "grad_norm": 186.64627075195312, | |
| "learning_rate": 2.3837277758153777e-05, | |
| "loss": 1.1213, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.09655237377543331, | |
| "grad_norm": 82.08380889892578, | |
| "learning_rate": 2.4131637819380668e-05, | |
| "loss": 1.1148, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.09772984174830444, | |
| "grad_norm": 18.62107276916504, | |
| "learning_rate": 2.442599788060756e-05, | |
| "loss": 1.096, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.09890730972117559, | |
| "grad_norm": 80.53936767578125, | |
| "learning_rate": 2.4720357941834454e-05, | |
| "loss": 1.0927, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.10008477769404672, | |
| "grad_norm": 19.27259063720703, | |
| "learning_rate": 2.5014718003061345e-05, | |
| "loss": 1.0901, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.10126224566691786, | |
| "grad_norm": 28.516977310180664, | |
| "learning_rate": 2.530907806428824e-05, | |
| "loss": 1.0799, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.102439713639789, | |
| "grad_norm": 139.80172729492188, | |
| "learning_rate": 2.5603438125515135e-05, | |
| "loss": 1.0407, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.10361718161266013, | |
| "grad_norm": 79.58622741699219, | |
| "learning_rate": 2.5897798186742022e-05, | |
| "loss": 1.0767, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.10479464958553128, | |
| "grad_norm": 57.44203567504883, | |
| "learning_rate": 2.6192158247968917e-05, | |
| "loss": 1.0458, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.10597211755840241, | |
| "grad_norm": 39.183570861816406, | |
| "learning_rate": 2.648651830919581e-05, | |
| "loss": 1.0319, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.10714958553127354, | |
| "grad_norm": 27.675334930419922, | |
| "learning_rate": 2.6780878370422703e-05, | |
| "loss": 1.0346, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.10832705350414469, | |
| "grad_norm": 49.14881134033203, | |
| "learning_rate": 2.7075238431649598e-05, | |
| "loss": 1.0513, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.10950452147701582, | |
| "grad_norm": 69.12327575683594, | |
| "learning_rate": 2.7369598492876486e-05, | |
| "loss": 0.9958, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.11068198944988697, | |
| "grad_norm": 44.547706604003906, | |
| "learning_rate": 2.766395855410338e-05, | |
| "loss": 0.9994, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.1118594574227581, | |
| "grad_norm": 36.13666534423828, | |
| "learning_rate": 2.7958318615330275e-05, | |
| "loss": 1.0335, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.11303692539562923, | |
| "grad_norm": 118.04364013671875, | |
| "learning_rate": 2.8252678676557166e-05, | |
| "loss": 1.023, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.11421439336850038, | |
| "grad_norm": 49.03740310668945, | |
| "learning_rate": 2.854703873778406e-05, | |
| "loss": 0.99, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.11539186134137151, | |
| "grad_norm": 82.06845092773438, | |
| "learning_rate": 2.884139879901095e-05, | |
| "loss": 1.0061, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.11656932931424266, | |
| "grad_norm": 25.45916175842285, | |
| "learning_rate": 2.9135758860237844e-05, | |
| "loss": 1.0165, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.11774679728711379, | |
| "grad_norm": 40.93219757080078, | |
| "learning_rate": 2.9430118921464738e-05, | |
| "loss": 0.9996, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.11892426525998492, | |
| "grad_norm": 65.33716583251953, | |
| "learning_rate": 2.972447898269163e-05, | |
| "loss": 0.996, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.12010173323285607, | |
| "grad_norm": 30.791894912719727, | |
| "learning_rate": 3.0018839043918524e-05, | |
| "loss": 0.9544, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.1212792012057272, | |
| "grad_norm": 206.5362091064453, | |
| "learning_rate": 3.031319910514542e-05, | |
| "loss": 0.9894, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.12245666917859835, | |
| "grad_norm": 32.16919708251953, | |
| "learning_rate": 3.060755916637231e-05, | |
| "loss": 0.9779, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.12363413715146948, | |
| "grad_norm": 31.138160705566406, | |
| "learning_rate": 3.0901919227599205e-05, | |
| "loss": 0.9787, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.12481160512434061, | |
| "grad_norm": 58.650028228759766, | |
| "learning_rate": 3.119627928882609e-05, | |
| "loss": 0.9614, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.12598907309721175, | |
| "grad_norm": 22.53007698059082, | |
| "learning_rate": 3.149063935005299e-05, | |
| "loss": 0.9387, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.1271665410700829, | |
| "grad_norm": 129.4586944580078, | |
| "learning_rate": 3.178499941127988e-05, | |
| "loss": 0.9333, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.12834400904295404, | |
| "grad_norm": 94.5034408569336, | |
| "learning_rate": 3.207935947250677e-05, | |
| "loss": 0.9316, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.12952147701582517, | |
| "grad_norm": 197.27320861816406, | |
| "learning_rate": 3.2373719533733665e-05, | |
| "loss": 0.9391, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.1306989449886963, | |
| "grad_norm": 33.92900466918945, | |
| "learning_rate": 3.266807959496056e-05, | |
| "loss": 0.9578, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.13187641296156744, | |
| "grad_norm": 13.522852897644043, | |
| "learning_rate": 3.296243965618745e-05, | |
| "loss": 0.9619, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.1330538809344386, | |
| "grad_norm": 99.31133270263672, | |
| "learning_rate": 3.325679971741434e-05, | |
| "loss": 0.9484, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.13423134890730973, | |
| "grad_norm": 39.666805267333984, | |
| "learning_rate": 3.3551159778641236e-05, | |
| "loss": 0.8977, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.13540881688018086, | |
| "grad_norm": 44.98002624511719, | |
| "learning_rate": 3.384551983986813e-05, | |
| "loss": 0.9372, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.136586284853052, | |
| "grad_norm": 14.170408248901367, | |
| "learning_rate": 3.4139879901095026e-05, | |
| "loss": 0.9051, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.13776375282592312, | |
| "grad_norm": 59.49055480957031, | |
| "learning_rate": 3.4434239962321914e-05, | |
| "loss": 0.8961, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.13894122079879428, | |
| "grad_norm": 59.51968765258789, | |
| "learning_rate": 3.472860002354881e-05, | |
| "loss": 0.9058, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.14011868877166542, | |
| "grad_norm": 28.59142303466797, | |
| "learning_rate": 3.5022960084775696e-05, | |
| "loss": 0.9106, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.14129615674453655, | |
| "grad_norm": 49.447086334228516, | |
| "learning_rate": 3.531732014600259e-05, | |
| "loss": 0.9102, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.14247362471740768, | |
| "grad_norm": 36.19523239135742, | |
| "learning_rate": 3.5611680207229486e-05, | |
| "loss": 0.8853, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.14365109269027881, | |
| "grad_norm": 20.434724807739258, | |
| "learning_rate": 3.5906040268456373e-05, | |
| "loss": 0.8872, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.14482856066314997, | |
| "grad_norm": 25.5008544921875, | |
| "learning_rate": 3.620040032968327e-05, | |
| "loss": 0.8819, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.1460060286360211, | |
| "grad_norm": 66.22479248046875, | |
| "learning_rate": 3.649476039091016e-05, | |
| "loss": 0.8754, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.14718349660889224, | |
| "grad_norm": 19.697364807128906, | |
| "learning_rate": 3.678912045213706e-05, | |
| "loss": 0.8713, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.14836096458176337, | |
| "grad_norm": 20.61383628845215, | |
| "learning_rate": 3.708348051336395e-05, | |
| "loss": 0.8626, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.1495384325546345, | |
| "grad_norm": 17.327913284301758, | |
| "learning_rate": 3.737784057459084e-05, | |
| "loss": 0.8773, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.15071590052750566, | |
| "grad_norm": 61.033538818359375, | |
| "learning_rate": 3.7672200635817735e-05, | |
| "loss": 0.8651, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.1518933685003768, | |
| "grad_norm": 209.96270751953125, | |
| "learning_rate": 3.796656069704463e-05, | |
| "loss": 0.8564, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.15307083647324793, | |
| "grad_norm": 25.952232360839844, | |
| "learning_rate": 3.826092075827152e-05, | |
| "loss": 0.843, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.15424830444611906, | |
| "grad_norm": 32.41584777832031, | |
| "learning_rate": 3.855528081949841e-05, | |
| "loss": 0.8602, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 0.1554257724189902, | |
| "grad_norm": 12.570914268493652, | |
| "learning_rate": 3.8849640880725307e-05, | |
| "loss": 0.8638, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.15660324039186135, | |
| "grad_norm": 39.16158676147461, | |
| "learning_rate": 3.9144000941952194e-05, | |
| "loss": 0.8333, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 0.15778070836473249, | |
| "grad_norm": 88.96316528320312, | |
| "learning_rate": 3.943836100317909e-05, | |
| "loss": 0.8476, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.15895817633760362, | |
| "grad_norm": 29.973859786987305, | |
| "learning_rate": 3.9732721064405984e-05, | |
| "loss": 0.8369, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.16013564431047475, | |
| "grad_norm": 48.19563674926758, | |
| "learning_rate": 4.002708112563288e-05, | |
| "loss": 0.8138, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.16131311228334588, | |
| "grad_norm": 21.87266731262207, | |
| "learning_rate": 4.032144118685977e-05, | |
| "loss": 0.8497, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 0.16249058025621704, | |
| "grad_norm": 40.32388687133789, | |
| "learning_rate": 4.061580124808666e-05, | |
| "loss": 0.809, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.16366804822908818, | |
| "grad_norm": 66.052734375, | |
| "learning_rate": 4.0910161309313556e-05, | |
| "loss": 0.8396, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 0.1648455162019593, | |
| "grad_norm": 17.28368377685547, | |
| "learning_rate": 4.120452137054045e-05, | |
| "loss": 0.8478, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.16602298417483044, | |
| "grad_norm": 36.08332824707031, | |
| "learning_rate": 4.149888143176734e-05, | |
| "loss": 0.828, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.16720045214770157, | |
| "grad_norm": 33.32647705078125, | |
| "learning_rate": 4.179324149299423e-05, | |
| "loss": 0.8348, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.16837792012057273, | |
| "grad_norm": 84.66690063476562, | |
| "learning_rate": 4.208760155422112e-05, | |
| "loss": 0.7938, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 0.16955538809344387, | |
| "grad_norm": 115.47782897949219, | |
| "learning_rate": 4.2381961615448016e-05, | |
| "loss": 0.8115, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.170732856066315, | |
| "grad_norm": 30.028301239013672, | |
| "learning_rate": 4.267632167667491e-05, | |
| "loss": 0.8344, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.17191032403918613, | |
| "grad_norm": 104.7485122680664, | |
| "learning_rate": 4.2970681737901805e-05, | |
| "loss": 0.8141, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.17308779201205726, | |
| "grad_norm": 963.008056640625, | |
| "learning_rate": 4.32650417991287e-05, | |
| "loss": 0.7877, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.17426525998492842, | |
| "grad_norm": 17.78093719482422, | |
| "learning_rate": 4.355940186035559e-05, | |
| "loss": 0.7869, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.17544272795779955, | |
| "grad_norm": 29.313289642333984, | |
| "learning_rate": 4.385376192158248e-05, | |
| "loss": 0.8084, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 0.1766201959306707, | |
| "grad_norm": 26.251182556152344, | |
| "learning_rate": 4.414812198280938e-05, | |
| "loss": 0.8102, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.17779766390354182, | |
| "grad_norm": 15.284724235534668, | |
| "learning_rate": 4.4442482044036265e-05, | |
| "loss": 0.7954, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 0.17897513187641295, | |
| "grad_norm": 17.943359375, | |
| "learning_rate": 4.473684210526316e-05, | |
| "loss": 0.8042, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.1801525998492841, | |
| "grad_norm": 24.00495147705078, | |
| "learning_rate": 4.5031202166490054e-05, | |
| "loss": 0.8011, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 0.18133006782215524, | |
| "grad_norm": 43.83684539794922, | |
| "learning_rate": 4.532556222771694e-05, | |
| "loss": 0.7911, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.18250753579502638, | |
| "grad_norm": 26.42839241027832, | |
| "learning_rate": 4.5619922288943837e-05, | |
| "loss": 0.7778, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.1836850037678975, | |
| "grad_norm": 63.756202697753906, | |
| "learning_rate": 4.591428235017073e-05, | |
| "loss": 0.7942, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.18486247174076864, | |
| "grad_norm": 75.14784240722656, | |
| "learning_rate": 4.6208642411397626e-05, | |
| "loss": 0.785, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 0.1860399397136398, | |
| "grad_norm": 16.827974319458008, | |
| "learning_rate": 4.650300247262452e-05, | |
| "loss": 0.7802, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.18721740768651093, | |
| "grad_norm": 24.744388580322266, | |
| "learning_rate": 4.679736253385141e-05, | |
| "loss": 0.7788, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 0.18839487565938207, | |
| "grad_norm": 44.67934036254883, | |
| "learning_rate": 4.70917225950783e-05, | |
| "loss": 0.7716, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.1895723436322532, | |
| "grad_norm": 17.738672256469727, | |
| "learning_rate": 4.73860826563052e-05, | |
| "loss": 0.7411, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 0.19074981160512433, | |
| "grad_norm": 225.26141357421875, | |
| "learning_rate": 4.7680442717532086e-05, | |
| "loss": 0.7576, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.1919272795779955, | |
| "grad_norm": 45.020912170410156, | |
| "learning_rate": 4.797480277875898e-05, | |
| "loss": 0.7423, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 0.19310474755086662, | |
| "grad_norm": 21.80771255493164, | |
| "learning_rate": 4.826916283998587e-05, | |
| "loss": 0.7598, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.19428221552373776, | |
| "grad_norm": 13.382050514221191, | |
| "learning_rate": 4.856352290121276e-05, | |
| "loss": 0.7403, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 0.1954596834966089, | |
| "grad_norm": 102.00588989257812, | |
| "learning_rate": 4.885788296243966e-05, | |
| "loss": 0.7419, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.19663715146948002, | |
| "grad_norm": 21.822450637817383, | |
| "learning_rate": 4.915224302366655e-05, | |
| "loss": 0.7498, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 0.19781461944235118, | |
| "grad_norm": 26.330812454223633, | |
| "learning_rate": 4.944660308489345e-05, | |
| "loss": 0.7437, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.1989920874152223, | |
| "grad_norm": 74.99825286865234, | |
| "learning_rate": 4.9740963146120335e-05, | |
| "loss": 0.7723, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 0.20016955538809345, | |
| "grad_norm": 211.88345336914062, | |
| "learning_rate": 4.999999982942934e-05, | |
| "loss": 0.7463, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.20134702336096458, | |
| "grad_norm": 37.77700424194336, | |
| "learning_rate": 4.9999985141401405e-05, | |
| "loss": 0.7098, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 0.2025244913338357, | |
| "grad_norm": 15.47326374053955, | |
| "learning_rate": 4.999994676301943e-05, | |
| "loss": 0.7135, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.20370195930670687, | |
| "grad_norm": 36.71703338623047, | |
| "learning_rate": 4.999988469431976e-05, | |
| "loss": 0.7226, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 0.204879427279578, | |
| "grad_norm": 12.64379596710205, | |
| "learning_rate": 4.999979893536123e-05, | |
| "loss": 0.7266, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.20605689525244913, | |
| "grad_norm": 13.452332496643066, | |
| "learning_rate": 4.9999689486225106e-05, | |
| "loss": 0.7117, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 0.20723436322532027, | |
| "grad_norm": 159.58099365234375, | |
| "learning_rate": 4.9999556347015095e-05, | |
| "loss": 0.7298, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.2084118311981914, | |
| "grad_norm": 16.47060775756836, | |
| "learning_rate": 4.999939951785736e-05, | |
| "loss": 0.7203, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 0.20958929917106256, | |
| "grad_norm": 21.21535873413086, | |
| "learning_rate": 4.9999218998900523e-05, | |
| "loss": 0.716, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.2107667671439337, | |
| "grad_norm": 110.04914093017578, | |
| "learning_rate": 4.999901479031564e-05, | |
| "loss": 0.7329, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 0.21194423511680482, | |
| "grad_norm": 38.44062423706055, | |
| "learning_rate": 4.999878689229623e-05, | |
| "loss": 0.6916, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.21312170308967596, | |
| "grad_norm": 30.527116775512695, | |
| "learning_rate": 4.999853530505824e-05, | |
| "loss": 0.7027, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 0.2142991710625471, | |
| "grad_norm": 29.67304039001465, | |
| "learning_rate": 4.999826002884009e-05, | |
| "loss": 0.694, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.21547663903541825, | |
| "grad_norm": 56.335365295410156, | |
| "learning_rate": 4.999796106390263e-05, | |
| "loss": 0.7201, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 0.21665410700828938, | |
| "grad_norm": 21.41983985900879, | |
| "learning_rate": 4.999763841052917e-05, | |
| "loss": 0.6969, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.21783157498116051, | |
| "grad_norm": 51.87239074707031, | |
| "learning_rate": 4.999729206902545e-05, | |
| "loss": 0.7047, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 0.21900904295403165, | |
| "grad_norm": 25.496810913085938, | |
| "learning_rate": 4.9996922039719675e-05, | |
| "loss": 0.7165, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.22018651092690278, | |
| "grad_norm": 63.06888198852539, | |
| "learning_rate": 4.999652832296249e-05, | |
| "loss": 0.7115, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 0.22136397889977394, | |
| "grad_norm": 11.511476516723633, | |
| "learning_rate": 4.999611091912698e-05, | |
| "loss": 0.7008, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.22254144687264507, | |
| "grad_norm": 18.342121124267578, | |
| "learning_rate": 4.9995669828608695e-05, | |
| "loss": 0.6988, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 0.2237189148455162, | |
| "grad_norm": 150.98287963867188, | |
| "learning_rate": 4.999520505182561e-05, | |
| "loss": 0.6715, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.22489638281838734, | |
| "grad_norm": 36.15058135986328, | |
| "learning_rate": 4.999471658921816e-05, | |
| "loss": 0.7017, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 0.22607385079125847, | |
| "grad_norm": 19.319927215576172, | |
| "learning_rate": 4.999420444124922e-05, | |
| "loss": 0.6897, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.22725131876412963, | |
| "grad_norm": 28.105056762695312, | |
| "learning_rate": 4.9993668608404096e-05, | |
| "loss": 0.679, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 0.22842878673700076, | |
| "grad_norm": 18.27001953125, | |
| "learning_rate": 4.999310909119057e-05, | |
| "loss": 0.6848, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.2296062547098719, | |
| "grad_norm": 20.29434585571289, | |
| "learning_rate": 4.999252589013883e-05, | |
| "loss": 0.6932, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 0.23078372268274303, | |
| "grad_norm": 23.66309356689453, | |
| "learning_rate": 4.999191900580155e-05, | |
| "loss": 0.7086, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.23196119065561416, | |
| "grad_norm": 34.9160270690918, | |
| "learning_rate": 4.9991288438753794e-05, | |
| "loss": 0.6828, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 0.23313865862848532, | |
| "grad_norm": 73.04290008544922, | |
| "learning_rate": 4.999063418959311e-05, | |
| "loss": 0.7024, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.23431612660135645, | |
| "grad_norm": 15.245363235473633, | |
| "learning_rate": 4.9989956258939484e-05, | |
| "loss": 0.6819, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 0.23549359457422758, | |
| "grad_norm": 9.7080078125, | |
| "learning_rate": 4.998925464743531e-05, | |
| "loss": 0.6842, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.23667106254709872, | |
| "grad_norm": 12.597461700439453, | |
| "learning_rate": 4.998852935574547e-05, | |
| "loss": 0.6707, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 0.23784853051996985, | |
| "grad_norm": 28.19225311279297, | |
| "learning_rate": 4.9987780384557256e-05, | |
| "loss": 0.6893, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.239025998492841, | |
| "grad_norm": 17.039337158203125, | |
| "learning_rate": 4.9987007734580386e-05, | |
| "loss": 0.6803, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 0.24020346646571214, | |
| "grad_norm": 83.43086242675781, | |
| "learning_rate": 4.998621140654705e-05, | |
| "loss": 0.6865, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.24138093443858327, | |
| "grad_norm": 23.12519073486328, | |
| "learning_rate": 4.998539140121186e-05, | |
| "loss": 0.6861, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 0.2425584024114544, | |
| "grad_norm": 14.634021759033203, | |
| "learning_rate": 4.998454771935186e-05, | |
| "loss": 0.6699, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.24373587038432554, | |
| "grad_norm": 13.147838592529297, | |
| "learning_rate": 4.998368036176654e-05, | |
| "loss": 0.668, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 0.2449133383571967, | |
| "grad_norm": 121.20626831054688, | |
| "learning_rate": 4.998278932927781e-05, | |
| "loss": 0.6685, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.24609080633006783, | |
| "grad_norm": 36.35004806518555, | |
| "learning_rate": 4.998187462273004e-05, | |
| "loss": 0.6794, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 0.24726827430293896, | |
| "grad_norm": 173.51571655273438, | |
| "learning_rate": 4.9980936242990015e-05, | |
| "loss": 0.6835, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.2484457422758101, | |
| "grad_norm": 16.550615310668945, | |
| "learning_rate": 4.997997419094696e-05, | |
| "loss": 0.6682, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 0.24962321024868123, | |
| "grad_norm": 31.895750045776367, | |
| "learning_rate": 4.997898846751251e-05, | |
| "loss": 0.6526, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.2508006782215524, | |
| "grad_norm": 91.27217864990234, | |
| "learning_rate": 4.9977979073620774e-05, | |
| "loss": 0.6457, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 0.2519781461944235, | |
| "grad_norm": 18.613304138183594, | |
| "learning_rate": 4.997694601022826e-05, | |
| "loss": 0.6745, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.25315561416729465, | |
| "grad_norm": 15.010387420654297, | |
| "learning_rate": 4.997588927831391e-05, | |
| "loss": 0.6703, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 0.2543330821401658, | |
| "grad_norm": 40.144100189208984, | |
| "learning_rate": 4.997480887887912e-05, | |
| "loss": 0.6512, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.2555105501130369, | |
| "grad_norm": 83.31613159179688, | |
| "learning_rate": 4.997370481294766e-05, | |
| "loss": 0.6482, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 0.2566880180859081, | |
| "grad_norm": 142.00633239746094, | |
| "learning_rate": 4.997257708156578e-05, | |
| "loss": 0.6444, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.2578654860587792, | |
| "grad_norm": 12.526217460632324, | |
| "learning_rate": 4.997142568580213e-05, | |
| "loss": 0.6594, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 0.25904295403165034, | |
| "grad_norm": 10.37883472442627, | |
| "learning_rate": 4.9970250626747794e-05, | |
| "loss": 0.6404, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.2602204220045215, | |
| "grad_norm": 23.270999908447266, | |
| "learning_rate": 4.9969051905516264e-05, | |
| "loss": 0.6525, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 0.2613978899773926, | |
| "grad_norm": 7.1313252449035645, | |
| "learning_rate": 4.996782952324348e-05, | |
| "loss": 0.6537, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.26257535795026377, | |
| "grad_norm": 18.296316146850586, | |
| "learning_rate": 4.996658348108778e-05, | |
| "loss": 0.6306, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 0.26375282592313487, | |
| "grad_norm": 10.690421104431152, | |
| "learning_rate": 4.996531378022993e-05, | |
| "loss": 0.6426, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.26493029389600603, | |
| "grad_norm": 25.587663650512695, | |
| "learning_rate": 4.996402042187313e-05, | |
| "loss": 0.6447, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 0.2661077618688772, | |
| "grad_norm": 44.08433151245117, | |
| "learning_rate": 4.996270340724297e-05, | |
| "loss": 0.6523, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.2672852298417483, | |
| "grad_norm": 10.2158842086792, | |
| "learning_rate": 4.9961362737587476e-05, | |
| "loss": 0.6415, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 0.26846269781461946, | |
| "grad_norm": 16.302034378051758, | |
| "learning_rate": 4.995999841417709e-05, | |
| "loss": 0.6465, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.26964016578749056, | |
| "grad_norm": 9.03493881225586, | |
| "learning_rate": 4.995861043830467e-05, | |
| "loss": 0.6485, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 0.2708176337603617, | |
| "grad_norm": 55.2092399597168, | |
| "learning_rate": 4.995719881128548e-05, | |
| "loss": 0.633, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.2719951017332329, | |
| "grad_norm": 14.244236946105957, | |
| "learning_rate": 4.995576353445718e-05, | |
| "loss": 0.6398, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 0.273172569706104, | |
| "grad_norm": 16.29423713684082, | |
| "learning_rate": 4.995430460917989e-05, | |
| "loss": 0.635, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.27435003767897514, | |
| "grad_norm": 16.8837890625, | |
| "learning_rate": 4.995282203683609e-05, | |
| "loss": 0.6311, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 0.27552750565184625, | |
| "grad_norm": 27.479188919067383, | |
| "learning_rate": 4.995131581883069e-05, | |
| "loss": 0.6183, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.2767049736247174, | |
| "grad_norm": 22.264968872070312, | |
| "learning_rate": 4.994978595659101e-05, | |
| "loss": 0.6217, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 0.27788244159758857, | |
| "grad_norm": 33.55051803588867, | |
| "learning_rate": 4.9948232451566754e-05, | |
| "loss": 0.6244, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.2790599095704597, | |
| "grad_norm": 14.833633422851562, | |
| "learning_rate": 4.994665530523007e-05, | |
| "loss": 0.6148, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 0.28023737754333083, | |
| "grad_norm": 20.879810333251953, | |
| "learning_rate": 4.994505451907546e-05, | |
| "loss": 0.6412, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.28141484551620194, | |
| "grad_norm": 20.95462417602539, | |
| "learning_rate": 4.994343009461988e-05, | |
| "loss": 0.6383, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 0.2825923134890731, | |
| "grad_norm": 17.24226188659668, | |
| "learning_rate": 4.994178203340264e-05, | |
| "loss": 0.628, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.28376978146194426, | |
| "grad_norm": 25.367177963256836, | |
| "learning_rate": 4.9940110336985465e-05, | |
| "loss": 0.6122, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 0.28494724943481536, | |
| "grad_norm": 21.224437713623047, | |
| "learning_rate": 4.993841500695249e-05, | |
| "loss": 0.6304, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.2861247174076865, | |
| "grad_norm": 401.3937683105469, | |
| "learning_rate": 4.9936696044910224e-05, | |
| "loss": 0.6331, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 0.28730218538055763, | |
| "grad_norm": 10.814560890197754, | |
| "learning_rate": 4.9934953452487596e-05, | |
| "loss": 0.6339, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.2884796533534288, | |
| "grad_norm": 12.864246368408203, | |
| "learning_rate": 4.9933187231335895e-05, | |
| "loss": 0.6132, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 0.28965712132629995, | |
| "grad_norm": 14.243012428283691, | |
| "learning_rate": 4.993139738312884e-05, | |
| "loss": 0.625, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.29083458929917105, | |
| "grad_norm": 18.89797019958496, | |
| "learning_rate": 4.992958390956249e-05, | |
| "loss": 0.6226, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 0.2920120572720422, | |
| "grad_norm": 413.899169921875, | |
| "learning_rate": 4.9927746812355336e-05, | |
| "loss": 0.5958, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.2931895252449133, | |
| "grad_norm": 29.873369216918945, | |
| "learning_rate": 4.992588609324823e-05, | |
| "loss": 0.608, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 0.2943669932177845, | |
| "grad_norm": 10.579913139343262, | |
| "learning_rate": 4.992400175400444e-05, | |
| "loss": 0.6148, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.29554446119065564, | |
| "grad_norm": 53.12296676635742, | |
| "learning_rate": 4.992209379640955e-05, | |
| "loss": 0.5993, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 0.29672192916352674, | |
| "grad_norm": 33.217254638671875, | |
| "learning_rate": 4.9920162222271616e-05, | |
| "loss": 0.62, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.2978993971363979, | |
| "grad_norm": 14.847016334533691, | |
| "learning_rate": 4.991820703342099e-05, | |
| "loss": 0.6108, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 0.299076865109269, | |
| "grad_norm": 8.893908500671387, | |
| "learning_rate": 4.991622823171046e-05, | |
| "loss": 0.6154, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.30025433308214017, | |
| "grad_norm": 19.143251419067383, | |
| "learning_rate": 4.9914225819015156e-05, | |
| "loss": 0.6068, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 0.30143180105501133, | |
| "grad_norm": 39.867637634277344, | |
| "learning_rate": 4.9912199797232604e-05, | |
| "loss": 0.6121, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.30260926902788243, | |
| "grad_norm": 11.49783706665039, | |
| "learning_rate": 4.991015016828269e-05, | |
| "loss": 0.6047, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 0.3037867370007536, | |
| "grad_norm": 18.417495727539062, | |
| "learning_rate": 4.9908076934107655e-05, | |
| "loss": 0.6191, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.3049642049736247, | |
| "grad_norm": 17.24270248413086, | |
| "learning_rate": 4.9905980096672146e-05, | |
| "loss": 0.6212, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 0.30614167294649586, | |
| "grad_norm": 10.193714141845703, | |
| "learning_rate": 4.990385965796315e-05, | |
| "loss": 0.5895, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.307319140919367, | |
| "grad_norm": 17.702852249145508, | |
| "learning_rate": 4.9901715619990026e-05, | |
| "loss": 0.605, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 0.3084966088922381, | |
| "grad_norm": 17.40943717956543, | |
| "learning_rate": 4.989954798478449e-05, | |
| "loss": 0.6032, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.3096740768651093, | |
| "grad_norm": 29.134885787963867, | |
| "learning_rate": 4.9897356754400646e-05, | |
| "loss": 0.6102, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 0.3108515448379804, | |
| "grad_norm": 31.190221786499023, | |
| "learning_rate": 4.989514193091491e-05, | |
| "loss": 0.6037, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.31202901281085155, | |
| "grad_norm": 16.936580657958984, | |
| "learning_rate": 4.98929035164261e-05, | |
| "loss": 0.624, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 0.3132064807837227, | |
| "grad_norm": 28.878084182739258, | |
| "learning_rate": 4.9890641513055356e-05, | |
| "loss": 0.5916, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.3143839487565938, | |
| "grad_norm": 26.654775619506836, | |
| "learning_rate": 4.98883559229462e-05, | |
| "loss": 0.5916, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 0.31556141672946497, | |
| "grad_norm": 6.164857864379883, | |
| "learning_rate": 4.988604674826448e-05, | |
| "loss": 0.6022, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.3167388847023361, | |
| "grad_norm": 39.537601470947266, | |
| "learning_rate": 4.988371399119841e-05, | |
| "loss": 0.5913, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 0.31791635267520724, | |
| "grad_norm": 13.560423851013184, | |
| "learning_rate": 4.9881357653958545e-05, | |
| "loss": 0.6084, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.3190938206480784, | |
| "grad_norm": 64.97435760498047, | |
| "learning_rate": 4.987897773877778e-05, | |
| "loss": 0.6209, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 0.3202712886209495, | |
| "grad_norm": 25.303564071655273, | |
| "learning_rate": 4.987657424791136e-05, | |
| "loss": 0.6021, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.32144875659382066, | |
| "grad_norm": 15.440890312194824, | |
| "learning_rate": 4.987414718363687e-05, | |
| "loss": 0.5892, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 0.32262622456669177, | |
| "grad_norm": 23.87912368774414, | |
| "learning_rate": 4.987169654825423e-05, | |
| "loss": 0.5906, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.3238036925395629, | |
| "grad_norm": 13.745635032653809, | |
| "learning_rate": 4.9869222344085695e-05, | |
| "loss": 0.5936, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 0.3249811605124341, | |
| "grad_norm": 37.19462203979492, | |
| "learning_rate": 4.986672457347588e-05, | |
| "loss": 0.563, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.3261586284853052, | |
| "grad_norm": 22.92323875427246, | |
| "learning_rate": 4.986420323879167e-05, | |
| "loss": 0.5725, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 0.32733609645817635, | |
| "grad_norm": 39.19350814819336, | |
| "learning_rate": 4.986165834242235e-05, | |
| "loss": 0.5958, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.32851356443104746, | |
| "grad_norm": 19.643781661987305, | |
| "learning_rate": 4.9859089886779475e-05, | |
| "loss": 0.5632, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 0.3296910324039186, | |
| "grad_norm": 16.849578857421875, | |
| "learning_rate": 4.9856497874296984e-05, | |
| "loss": 0.5925, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.3308685003767898, | |
| "grad_norm": 38.75376892089844, | |
| "learning_rate": 4.985388230743108e-05, | |
| "loss": 0.587, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 0.3320459683496609, | |
| "grad_norm": 13.032364845275879, | |
| "learning_rate": 4.9851243188660325e-05, | |
| "loss": 0.5955, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.33322343632253204, | |
| "grad_norm": 27.331321716308594, | |
| "learning_rate": 4.9848580520485586e-05, | |
| "loss": 0.5845, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 0.33440090429540315, | |
| "grad_norm": 9.578264236450195, | |
| "learning_rate": 4.984589430543004e-05, | |
| "loss": 0.5688, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.3355783722682743, | |
| "grad_norm": 27.368913650512695, | |
| "learning_rate": 4.984318454603919e-05, | |
| "loss": 0.5773, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 0.33675584024114547, | |
| "grad_norm": 51.01844787597656, | |
| "learning_rate": 4.984045124488084e-05, | |
| "loss": 0.5665, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.33793330821401657, | |
| "grad_norm": 34.19673156738281, | |
| "learning_rate": 4.983769440454511e-05, | |
| "loss": 0.579, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 0.33911077618688773, | |
| "grad_norm": 14.910712242126465, | |
| "learning_rate": 4.983491402764442e-05, | |
| "loss": 0.5757, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.34028824415975883, | |
| "grad_norm": 9.398964881896973, | |
| "learning_rate": 4.98321101168135e-05, | |
| "loss": 0.581, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 0.34146571213263, | |
| "grad_norm": 32.145729064941406, | |
| "learning_rate": 4.982928267470938e-05, | |
| "loss": 0.5873, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.34264318010550116, | |
| "grad_norm": 28.668739318847656, | |
| "learning_rate": 4.9826431704011366e-05, | |
| "loss": 0.5791, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 0.34382064807837226, | |
| "grad_norm": 14.041146278381348, | |
| "learning_rate": 4.98235572074211e-05, | |
| "loss": 0.577, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.3449981160512434, | |
| "grad_norm": 41.43647384643555, | |
| "learning_rate": 4.982065918766249e-05, | |
| "loss": 0.5608, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 0.3461755840241145, | |
| "grad_norm": 153.56007385253906, | |
| "learning_rate": 4.9817737647481746e-05, | |
| "loss": 0.5555, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.3473530519969857, | |
| "grad_norm": 30.211868286132812, | |
| "learning_rate": 4.9814792589647364e-05, | |
| "loss": 0.563, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 0.34853051996985684, | |
| "grad_norm": 9.888477325439453, | |
| "learning_rate": 4.981182401695011e-05, | |
| "loss": 0.5729, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.34970798794272795, | |
| "grad_norm": 20.61911964416504, | |
| "learning_rate": 4.980883193220306e-05, | |
| "loss": 0.5595, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 0.3508854559155991, | |
| "grad_norm": 33.634788513183594, | |
| "learning_rate": 4.980581633824156e-05, | |
| "loss": 0.5765, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.3520629238884702, | |
| "grad_norm": 21.180368423461914, | |
| "learning_rate": 4.980277723792322e-05, | |
| "loss": 0.5668, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 0.3532403918613414, | |
| "grad_norm": 18.765335083007812, | |
| "learning_rate": 4.9799714634127945e-05, | |
| "loss": 0.5759, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.35441785983421253, | |
| "grad_norm": 8.680352210998535, | |
| "learning_rate": 4.9796628529757905e-05, | |
| "loss": 0.5652, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 0.35559532780708364, | |
| "grad_norm": 9.612824440002441, | |
| "learning_rate": 4.979351892773753e-05, | |
| "loss": 0.5677, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.3567727957799548, | |
| "grad_norm": 9.030202865600586, | |
| "learning_rate": 4.979038583101352e-05, | |
| "loss": 0.551, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 0.3579502637528259, | |
| "grad_norm": 14.939108848571777, | |
| "learning_rate": 4.978722924255486e-05, | |
| "loss": 0.5583, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.35912773172569706, | |
| "grad_norm": 16.380714416503906, | |
| "learning_rate": 4.9784049165352775e-05, | |
| "loss": 0.5604, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 0.3603051996985682, | |
| "grad_norm": 11.510544776916504, | |
| "learning_rate": 4.978084560242075e-05, | |
| "loss": 0.5631, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.36148266767143933, | |
| "grad_norm": 20.98238754272461, | |
| "learning_rate": 4.977761855679451e-05, | |
| "loss": 0.5634, | |
| "step": 15350 | |
| }, | |
| { | |
| "epoch": 0.3626601356443105, | |
| "grad_norm": 26.42758560180664, | |
| "learning_rate": 4.9774368031532084e-05, | |
| "loss": 0.5598, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.3638376036171816, | |
| "grad_norm": 23.497520446777344, | |
| "learning_rate": 4.9771094029713705e-05, | |
| "loss": 0.5672, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 0.36501507159005275, | |
| "grad_norm": 126.72555541992188, | |
| "learning_rate": 4.976779655444186e-05, | |
| "loss": 0.5612, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.3661925395629239, | |
| "grad_norm": 564.0137329101562, | |
| "learning_rate": 4.9764475608841285e-05, | |
| "loss": 0.5589, | |
| "step": 15550 | |
| }, | |
| { | |
| "epoch": 0.367370007535795, | |
| "grad_norm": 7.599761009216309, | |
| "learning_rate": 4.976113119605896e-05, | |
| "loss": 0.5643, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.3685474755086662, | |
| "grad_norm": 21.206104278564453, | |
| "learning_rate": 4.97577633192641e-05, | |
| "loss": 0.5589, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 0.3697249434815373, | |
| "grad_norm": 26.903715133666992, | |
| "learning_rate": 4.975437198164816e-05, | |
| "loss": 0.5506, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.37090241145440844, | |
| "grad_norm": 12.74087142944336, | |
| "learning_rate": 4.9750957186424804e-05, | |
| "loss": 0.569, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 0.3720798794272796, | |
| "grad_norm": 9.654675483703613, | |
| "learning_rate": 4.974751893682996e-05, | |
| "loss": 0.549, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.3732573474001507, | |
| "grad_norm": 16.640594482421875, | |
| "learning_rate": 4.974405723612176e-05, | |
| "loss": 0.5612, | |
| "step": 15850 | |
| }, | |
| { | |
| "epoch": 0.37443481537302187, | |
| "grad_norm": 13.887221336364746, | |
| "learning_rate": 4.9740572087580564e-05, | |
| "loss": 0.556, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.37561228334589297, | |
| "grad_norm": 26.20138931274414, | |
| "learning_rate": 4.973706349450894e-05, | |
| "loss": 0.5402, | |
| "step": 15950 | |
| }, | |
| { | |
| "epoch": 0.37678975131876413, | |
| "grad_norm": 5.653136253356934, | |
| "learning_rate": 4.97335314602317e-05, | |
| "loss": 0.548, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.3779672192916353, | |
| "grad_norm": 15.277802467346191, | |
| "learning_rate": 4.972997598809583e-05, | |
| "loss": 0.5315, | |
| "step": 16050 | |
| }, | |
| { | |
| "epoch": 0.3791446872645064, | |
| "grad_norm": 43.58806610107422, | |
| "learning_rate": 4.9726397081470553e-05, | |
| "loss": 0.5449, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.38032215523737756, | |
| "grad_norm": 11.691394805908203, | |
| "learning_rate": 4.9722794743747316e-05, | |
| "loss": 0.5388, | |
| "step": 16150 | |
| }, | |
| { | |
| "epoch": 0.38149962321024866, | |
| "grad_norm": 16.332839965820312, | |
| "learning_rate": 4.971916897833972e-05, | |
| "loss": 0.5509, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.3826770911831198, | |
| "grad_norm": 10.875502586364746, | |
| "learning_rate": 4.9715519788683606e-05, | |
| "loss": 0.5434, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 0.383854559155991, | |
| "grad_norm": 12.470973014831543, | |
| "learning_rate": 4.971184717823699e-05, | |
| "loss": 0.5411, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.3850320271288621, | |
| "grad_norm": 19.289705276489258, | |
| "learning_rate": 4.970815115048011e-05, | |
| "loss": 0.5364, | |
| "step": 16350 | |
| }, | |
| { | |
| "epoch": 0.38620949510173325, | |
| "grad_norm": 15.058762550354004, | |
| "learning_rate": 4.9704431708915365e-05, | |
| "loss": 0.5336, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.38738696307460435, | |
| "grad_norm": 14.070786476135254, | |
| "learning_rate": 4.970068885706736e-05, | |
| "loss": 0.533, | |
| "step": 16450 | |
| }, | |
| { | |
| "epoch": 0.3885644310474755, | |
| "grad_norm": 8.538634300231934, | |
| "learning_rate": 4.9696922598482854e-05, | |
| "loss": 0.5339, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.38974189902034667, | |
| "grad_norm": 5.575499534606934, | |
| "learning_rate": 4.969313293673084e-05, | |
| "loss": 0.54, | |
| "step": 16550 | |
| }, | |
| { | |
| "epoch": 0.3909193669932178, | |
| "grad_norm": 5.332086563110352, | |
| "learning_rate": 4.968931987540243e-05, | |
| "loss": 0.5488, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.39209683496608894, | |
| "grad_norm": 9.076286315917969, | |
| "learning_rate": 4.968548341811096e-05, | |
| "loss": 0.5327, | |
| "step": 16650 | |
| }, | |
| { | |
| "epoch": 0.39327430293896004, | |
| "grad_norm": 20.207744598388672, | |
| "learning_rate": 4.96816235684919e-05, | |
| "loss": 0.5254, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.3944517709118312, | |
| "grad_norm": 24.268632888793945, | |
| "learning_rate": 4.96777403302029e-05, | |
| "loss": 0.5376, | |
| "step": 16750 | |
| }, | |
| { | |
| "epoch": 0.39562923888470236, | |
| "grad_norm": 11.742340087890625, | |
| "learning_rate": 4.967383370692378e-05, | |
| "loss": 0.5377, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.39680670685757347, | |
| "grad_norm": 16.477985382080078, | |
| "learning_rate": 4.966990370235651e-05, | |
| "loss": 0.5343, | |
| "step": 16850 | |
| }, | |
| { | |
| "epoch": 0.3979841748304446, | |
| "grad_norm": 5.740753650665283, | |
| "learning_rate": 4.9665950320225215e-05, | |
| "loss": 0.5354, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.39916164280331573, | |
| "grad_norm": 6.4536895751953125, | |
| "learning_rate": 4.96619735642762e-05, | |
| "loss": 0.5335, | |
| "step": 16950 | |
| }, | |
| { | |
| "epoch": 0.4003391107761869, | |
| "grad_norm": 9.816080093383789, | |
| "learning_rate": 4.965797343827787e-05, | |
| "loss": 0.5352, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.40151657874905805, | |
| "grad_norm": 27.946269989013672, | |
| "learning_rate": 4.965394994602082e-05, | |
| "loss": 0.535, | |
| "step": 17050 | |
| }, | |
| { | |
| "epoch": 0.40269404672192916, | |
| "grad_norm": 17.012920379638672, | |
| "learning_rate": 4.9649903091317763e-05, | |
| "loss": 0.5385, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.4038715146948003, | |
| "grad_norm": 13.954458236694336, | |
| "learning_rate": 4.964583287800356e-05, | |
| "loss": 0.5297, | |
| "step": 17150 | |
| }, | |
| { | |
| "epoch": 0.4050489826676714, | |
| "grad_norm": 10.597694396972656, | |
| "learning_rate": 4.9641739309935206e-05, | |
| "loss": 0.5287, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.4062264506405426, | |
| "grad_norm": 25.098743438720703, | |
| "learning_rate": 4.9637622390991825e-05, | |
| "loss": 0.5274, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 0.40740391861341374, | |
| "grad_norm": 10.398055076599121, | |
| "learning_rate": 4.963348212507467e-05, | |
| "loss": 0.5223, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.40858138658628484, | |
| "grad_norm": 10.347573280334473, | |
| "learning_rate": 4.962931851610713e-05, | |
| "loss": 0.5346, | |
| "step": 17350 | |
| }, | |
| { | |
| "epoch": 0.409758854559156, | |
| "grad_norm": 27.749868392944336, | |
| "learning_rate": 4.962513156803468e-05, | |
| "loss": 0.5202, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.4109363225320271, | |
| "grad_norm": 13.547270774841309, | |
| "learning_rate": 4.962092128482495e-05, | |
| "loss": 0.5398, | |
| "step": 17450 | |
| }, | |
| { | |
| "epoch": 0.41211379050489827, | |
| "grad_norm": 71.393798828125, | |
| "learning_rate": 4.9616687670467655e-05, | |
| "loss": 0.5132, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.41329125847776943, | |
| "grad_norm": 3.4714207649230957, | |
| "learning_rate": 4.961243072897464e-05, | |
| "loss": 0.5258, | |
| "step": 17550 | |
| }, | |
| { | |
| "epoch": 0.41446872645064053, | |
| "grad_norm": 18.045419692993164, | |
| "learning_rate": 4.9608150464379844e-05, | |
| "loss": 0.5301, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.4156461944235117, | |
| "grad_norm": 5.658825874328613, | |
| "learning_rate": 4.96038468807393e-05, | |
| "loss": 0.5191, | |
| "step": 17650 | |
| }, | |
| { | |
| "epoch": 0.4168236623963828, | |
| "grad_norm": 6.130117893218994, | |
| "learning_rate": 4.959951998213116e-05, | |
| "loss": 0.5163, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.41800113036925396, | |
| "grad_norm": 4.835055828094482, | |
| "learning_rate": 4.959516977265565e-05, | |
| "loss": 0.5302, | |
| "step": 17750 | |
| }, | |
| { | |
| "epoch": 0.4191785983421251, | |
| "grad_norm": 12.25149917602539, | |
| "learning_rate": 4.959079625643509e-05, | |
| "loss": 0.5259, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.4203560663149962, | |
| "grad_norm": 7.990649223327637, | |
| "learning_rate": 4.95863994376139e-05, | |
| "loss": 0.5243, | |
| "step": 17850 | |
| }, | |
| { | |
| "epoch": 0.4215335342878674, | |
| "grad_norm": 42.99150085449219, | |
| "learning_rate": 4.9581979320358564e-05, | |
| "loss": 0.5236, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.4227110022607385, | |
| "grad_norm": 6.2766571044921875, | |
| "learning_rate": 4.957753590885764e-05, | |
| "loss": 0.5204, | |
| "step": 17950 | |
| }, | |
| { | |
| "epoch": 0.42388847023360965, | |
| "grad_norm": 8.19412612915039, | |
| "learning_rate": 4.957306920732177e-05, | |
| "loss": 0.5238, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.4250659382064808, | |
| "grad_norm": 9.799030303955078, | |
| "learning_rate": 4.9568579219983693e-05, | |
| "loss": 0.5134, | |
| "step": 18050 | |
| }, | |
| { | |
| "epoch": 0.4262434061793519, | |
| "grad_norm": 7.384710311889648, | |
| "learning_rate": 4.956406595109816e-05, | |
| "loss": 0.5153, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.4274208741522231, | |
| "grad_norm": 9.234545707702637, | |
| "learning_rate": 4.9559529404942015e-05, | |
| "loss": 0.5196, | |
| "step": 18150 | |
| }, | |
| { | |
| "epoch": 0.4285983421250942, | |
| "grad_norm": 29.552440643310547, | |
| "learning_rate": 4.955496958581417e-05, | |
| "loss": 0.5069, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.42977581009796534, | |
| "grad_norm": 10.646990776062012, | |
| "learning_rate": 4.955038649803556e-05, | |
| "loss": 0.5188, | |
| "step": 18250 | |
| }, | |
| { | |
| "epoch": 0.4309532780708365, | |
| "grad_norm": 7.426240921020508, | |
| "learning_rate": 4.954578014594919e-05, | |
| "loss": 0.5046, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.4321307460437076, | |
| "grad_norm": 15.19766902923584, | |
| "learning_rate": 4.954115053392012e-05, | |
| "loss": 0.5008, | |
| "step": 18350 | |
| }, | |
| { | |
| "epoch": 0.43330821401657876, | |
| "grad_norm": 3.9134976863861084, | |
| "learning_rate": 4.953649766633543e-05, | |
| "loss": 0.5116, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.43448568198944987, | |
| "grad_norm": 28.57962417602539, | |
| "learning_rate": 4.953182154760424e-05, | |
| "loss": 0.5131, | |
| "step": 18450 | |
| }, | |
| { | |
| "epoch": 0.43566314996232103, | |
| "grad_norm": 9.201138496398926, | |
| "learning_rate": 4.952712218215772e-05, | |
| "loss": 0.514, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.4368406179351922, | |
| "grad_norm": 4.026820182800293, | |
| "learning_rate": 4.952239957444905e-05, | |
| "loss": 0.5141, | |
| "step": 18550 | |
| }, | |
| { | |
| "epoch": 0.4380180859080633, | |
| "grad_norm": 8.49820613861084, | |
| "learning_rate": 4.951765372895344e-05, | |
| "loss": 0.513, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.43919555388093445, | |
| "grad_norm": 11.013725280761719, | |
| "learning_rate": 4.951288465016813e-05, | |
| "loss": 0.5191, | |
| "step": 18650 | |
| }, | |
| { | |
| "epoch": 0.44037302185380556, | |
| "grad_norm": 14.165763854980469, | |
| "learning_rate": 4.9508092342612365e-05, | |
| "loss": 0.5192, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.4415504898266767, | |
| "grad_norm": 12.503982543945312, | |
| "learning_rate": 4.950327681082742e-05, | |
| "loss": 0.494, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 0.4427279577995479, | |
| "grad_norm": 19.506237030029297, | |
| "learning_rate": 4.949843805937654e-05, | |
| "loss": 0.4922, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.443905425772419, | |
| "grad_norm": 8.808703422546387, | |
| "learning_rate": 4.9493576092845014e-05, | |
| "loss": 0.5045, | |
| "step": 18850 | |
| }, | |
| { | |
| "epoch": 0.44508289374529014, | |
| "grad_norm": 20.078441619873047, | |
| "learning_rate": 4.948869091584011e-05, | |
| "loss": 0.5088, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.44626036171816125, | |
| "grad_norm": 7.974308490753174, | |
| "learning_rate": 4.9483782532991084e-05, | |
| "loss": 0.4935, | |
| "step": 18950 | |
| }, | |
| { | |
| "epoch": 0.4474378296910324, | |
| "grad_norm": 4.810613632202148, | |
| "learning_rate": 4.9478850948949207e-05, | |
| "loss": 0.5275, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.44861529766390357, | |
| "grad_norm": 8.379694938659668, | |
| "learning_rate": 4.9473896168387714e-05, | |
| "loss": 0.5155, | |
| "step": 19050 | |
| }, | |
| { | |
| "epoch": 0.44979276563677467, | |
| "grad_norm": 13.977643013000488, | |
| "learning_rate": 4.9468918196001824e-05, | |
| "loss": 0.497, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.45097023360964583, | |
| "grad_norm": 9.306808471679688, | |
| "learning_rate": 4.946391703650874e-05, | |
| "loss": 0.5096, | |
| "step": 19150 | |
| }, | |
| { | |
| "epoch": 0.45214770158251694, | |
| "grad_norm": 5.565212726593018, | |
| "learning_rate": 4.9458892694647634e-05, | |
| "loss": 0.5042, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.4533251695553881, | |
| "grad_norm": 10.773277282714844, | |
| "learning_rate": 4.945384517517965e-05, | |
| "loss": 0.5006, | |
| "step": 19250 | |
| }, | |
| { | |
| "epoch": 0.45450263752825926, | |
| "grad_norm": 14.982840538024902, | |
| "learning_rate": 4.944877448288789e-05, | |
| "loss": 0.4996, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.45568010550113036, | |
| "grad_norm": 41.28907775878906, | |
| "learning_rate": 4.9443680622577416e-05, | |
| "loss": 0.4888, | |
| "step": 19350 | |
| }, | |
| { | |
| "epoch": 0.4568575734740015, | |
| "grad_norm": 14.52718448638916, | |
| "learning_rate": 4.9438563599075236e-05, | |
| "loss": 0.4854, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.4580350414468726, | |
| "grad_norm": 17.74559783935547, | |
| "learning_rate": 4.943342341723034e-05, | |
| "loss": 0.5007, | |
| "step": 19450 | |
| }, | |
| { | |
| "epoch": 0.4592125094197438, | |
| "grad_norm": 4.745278835296631, | |
| "learning_rate": 4.9428260081913615e-05, | |
| "loss": 0.4956, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.46038997739261495, | |
| "grad_norm": 8.55624771118164, | |
| "learning_rate": 4.942307359801793e-05, | |
| "loss": 0.5078, | |
| "step": 19550 | |
| }, | |
| { | |
| "epoch": 0.46156744536548605, | |
| "grad_norm": 6.845993518829346, | |
| "learning_rate": 4.941786397045806e-05, | |
| "loss": 0.4827, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.4627449133383572, | |
| "grad_norm": 4.983789920806885, | |
| "learning_rate": 4.941263120417074e-05, | |
| "loss": 0.5063, | |
| "step": 19650 | |
| }, | |
| { | |
| "epoch": 0.4639223813112283, | |
| "grad_norm": 6.237537860870361, | |
| "learning_rate": 4.9407375304114605e-05, | |
| "loss": 0.5019, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.4650998492840995, | |
| "grad_norm": 9.849225044250488, | |
| "learning_rate": 4.9402096275270226e-05, | |
| "loss": 0.4905, | |
| "step": 19750 | |
| }, | |
| { | |
| "epoch": 0.46627731725697064, | |
| "grad_norm": 3.9349374771118164, | |
| "learning_rate": 4.9396794122640096e-05, | |
| "loss": 0.4815, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.46745478522984174, | |
| "grad_norm": 5.73204231262207, | |
| "learning_rate": 4.93914688512486e-05, | |
| "loss": 0.5013, | |
| "step": 19850 | |
| }, | |
| { | |
| "epoch": 0.4686322532027129, | |
| "grad_norm": 20.584959030151367, | |
| "learning_rate": 4.938612046614205e-05, | |
| "loss": 0.4816, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.469809721175584, | |
| "grad_norm": 6.290115833282471, | |
| "learning_rate": 4.938074897238866e-05, | |
| "loss": 0.4827, | |
| "step": 19950 | |
| }, | |
| { | |
| "epoch": 0.47098718914845517, | |
| "grad_norm": 4.5813469886779785, | |
| "learning_rate": 4.9375354375078524e-05, | |
| "loss": 0.4936, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.4721646571213263, | |
| "grad_norm": 5.614234447479248, | |
| "learning_rate": 4.936993667932366e-05, | |
| "loss": 0.491, | |
| "step": 20050 | |
| }, | |
| { | |
| "epoch": 0.47334212509419743, | |
| "grad_norm": 7.700331687927246, | |
| "learning_rate": 4.936449589025793e-05, | |
| "loss": 0.4854, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.4745195930670686, | |
| "grad_norm": 12.170330047607422, | |
| "learning_rate": 4.935903201303713e-05, | |
| "loss": 0.4785, | |
| "step": 20150 | |
| }, | |
| { | |
| "epoch": 0.4756970610399397, | |
| "grad_norm": 8.411639213562012, | |
| "learning_rate": 4.93535450528389e-05, | |
| "loss": 0.4917, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.47687452901281085, | |
| "grad_norm": 14.996103286743164, | |
| "learning_rate": 4.934803501486277e-05, | |
| "loss": 0.5034, | |
| "step": 20250 | |
| }, | |
| { | |
| "epoch": 0.478051996985682, | |
| "grad_norm": 20.404251098632812, | |
| "learning_rate": 4.9342501904330125e-05, | |
| "loss": 0.4828, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.4792294649585531, | |
| "grad_norm": 25.698162078857422, | |
| "learning_rate": 4.933694572648423e-05, | |
| "loss": 0.4932, | |
| "step": 20350 | |
| }, | |
| { | |
| "epoch": 0.4804069329314243, | |
| "grad_norm": 11.195846557617188, | |
| "learning_rate": 4.933136648659019e-05, | |
| "loss": 0.5025, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.4815844009042954, | |
| "grad_norm": 16.01174545288086, | |
| "learning_rate": 4.9325764189934985e-05, | |
| "loss": 0.4942, | |
| "step": 20450 | |
| }, | |
| { | |
| "epoch": 0.48276186887716654, | |
| "grad_norm": 13.14828109741211, | |
| "learning_rate": 4.932013884182743e-05, | |
| "loss": 0.489, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.4839393368500377, | |
| "grad_norm": 3.127265691757202, | |
| "learning_rate": 4.9314490447598186e-05, | |
| "loss": 0.486, | |
| "step": 20550 | |
| }, | |
| { | |
| "epoch": 0.4851168048229088, | |
| "grad_norm": 6.591541767120361, | |
| "learning_rate": 4.930881901259976e-05, | |
| "loss": 0.4918, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.48629427279577997, | |
| "grad_norm": 20.416730880737305, | |
| "learning_rate": 4.930312454220649e-05, | |
| "loss": 0.4707, | |
| "step": 20650 | |
| }, | |
| { | |
| "epoch": 0.4874717407686511, | |
| "grad_norm": 8.26778507232666, | |
| "learning_rate": 4.9297407041814526e-05, | |
| "loss": 0.5067, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.48864920874152223, | |
| "grad_norm": 13.52769660949707, | |
| "learning_rate": 4.929166651684186e-05, | |
| "loss": 0.477, | |
| "step": 20750 | |
| }, | |
| { | |
| "epoch": 0.4898266767143934, | |
| "grad_norm": 20.53351402282715, | |
| "learning_rate": 4.9285902972728314e-05, | |
| "loss": 0.4735, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.4910041446872645, | |
| "grad_norm": 8.244770050048828, | |
| "learning_rate": 4.928011641493549e-05, | |
| "loss": 0.4931, | |
| "step": 20850 | |
| }, | |
| { | |
| "epoch": 0.49218161266013566, | |
| "grad_norm": 7.644371509552002, | |
| "learning_rate": 4.9274306848946815e-05, | |
| "loss": 0.481, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.49335908063300676, | |
| "grad_norm": 9.137931823730469, | |
| "learning_rate": 4.926847428026753e-05, | |
| "loss": 0.4699, | |
| "step": 20950 | |
| }, | |
| { | |
| "epoch": 0.4945365486058779, | |
| "grad_norm": 76.88018798828125, | |
| "learning_rate": 4.9262618714424655e-05, | |
| "loss": 0.5037, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.4957140165787491, | |
| "grad_norm": 30.11381721496582, | |
| "learning_rate": 4.925674015696702e-05, | |
| "loss": 0.4775, | |
| "step": 21050 | |
| }, | |
| { | |
| "epoch": 0.4968914845516202, | |
| "grad_norm": 20.36177635192871, | |
| "learning_rate": 4.9250838613465215e-05, | |
| "loss": 0.4813, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.49806895252449135, | |
| "grad_norm": 8.58780288696289, | |
| "learning_rate": 4.924491408951165e-05, | |
| "loss": 0.4915, | |
| "step": 21150 | |
| }, | |
| { | |
| "epoch": 0.49924642049736245, | |
| "grad_norm": 9.879990577697754, | |
| "learning_rate": 4.923896659072047e-05, | |
| "loss": 0.4832, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.5004238884702336, | |
| "grad_norm": 11.694302558898926, | |
| "learning_rate": 4.923299612272764e-05, | |
| "loss": 0.481, | |
| "step": 21250 | |
| }, | |
| { | |
| "epoch": 0.5016013564431048, | |
| "grad_norm": 9.9400634765625, | |
| "learning_rate": 4.922700269119083e-05, | |
| "loss": 0.4629, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.5027788244159759, | |
| "grad_norm": 25.097944259643555, | |
| "learning_rate": 4.922098630178953e-05, | |
| "loss": 0.4682, | |
| "step": 21350 | |
| }, | |
| { | |
| "epoch": 0.503956292388847, | |
| "grad_norm": 3.444863796234131, | |
| "learning_rate": 4.921494696022495e-05, | |
| "loss": 0.4874, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.5051337603617182, | |
| "grad_norm": 31.27939224243164, | |
| "learning_rate": 4.920888467222006e-05, | |
| "loss": 0.4772, | |
| "step": 21450 | |
| }, | |
| { | |
| "epoch": 0.5063112283345893, | |
| "grad_norm": 11.116825103759766, | |
| "learning_rate": 4.920279944351956e-05, | |
| "loss": 0.4758, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.5074886963074604, | |
| "grad_norm": 7.495817184448242, | |
| "learning_rate": 4.919669127988993e-05, | |
| "loss": 0.473, | |
| "step": 21550 | |
| }, | |
| { | |
| "epoch": 0.5086661642803316, | |
| "grad_norm": 4.236988544464111, | |
| "learning_rate": 4.9190560187119336e-05, | |
| "loss": 0.4881, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.5098436322532027, | |
| "grad_norm": 42.83885955810547, | |
| "learning_rate": 4.9184406171017706e-05, | |
| "loss": 0.472, | |
| "step": 21650 | |
| }, | |
| { | |
| "epoch": 0.5110211002260738, | |
| "grad_norm": 5.7662882804870605, | |
| "learning_rate": 4.917822923741665e-05, | |
| "loss": 0.485, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.5121985681989449, | |
| "grad_norm": 18.703794479370117, | |
| "learning_rate": 4.917202939216955e-05, | |
| "loss": 0.4593, | |
| "step": 21750 | |
| }, | |
| { | |
| "epoch": 0.5133760361718162, | |
| "grad_norm": 37.928951263427734, | |
| "learning_rate": 4.916580664115146e-05, | |
| "loss": 0.488, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.5145535041446873, | |
| "grad_norm": 10.761280059814453, | |
| "learning_rate": 4.915956099025914e-05, | |
| "loss": 0.4611, | |
| "step": 21850 | |
| }, | |
| { | |
| "epoch": 0.5157309721175584, | |
| "grad_norm": 11.497634887695312, | |
| "learning_rate": 4.915329244541107e-05, | |
| "loss": 0.4699, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.5169084400904296, | |
| "grad_norm": 3.9913153648376465, | |
| "learning_rate": 4.914700101254742e-05, | |
| "loss": 0.4659, | |
| "step": 21950 | |
| }, | |
| { | |
| "epoch": 0.5180859080633007, | |
| "grad_norm": 16.224578857421875, | |
| "learning_rate": 4.914068669763005e-05, | |
| "loss": 0.4546, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.5192633760361718, | |
| "grad_norm": 6.127202987670898, | |
| "learning_rate": 4.913434950664247e-05, | |
| "loss": 0.4589, | |
| "step": 22050 | |
| }, | |
| { | |
| "epoch": 0.520440844009043, | |
| "grad_norm": 17.401851654052734, | |
| "learning_rate": 4.912798944558992e-05, | |
| "loss": 0.4709, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.5216183119819141, | |
| "grad_norm": 6.758654594421387, | |
| "learning_rate": 4.9121606520499283e-05, | |
| "loss": 0.4798, | |
| "step": 22150 | |
| }, | |
| { | |
| "epoch": 0.5227957799547852, | |
| "grad_norm": 20.36205291748047, | |
| "learning_rate": 4.911520073741911e-05, | |
| "loss": 0.4698, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.5239732479276563, | |
| "grad_norm": 9.44455337524414, | |
| "learning_rate": 4.910877210241961e-05, | |
| "loss": 0.4666, | |
| "step": 22250 | |
| }, | |
| { | |
| "epoch": 0.5251507159005275, | |
| "grad_norm": 8.453359603881836, | |
| "learning_rate": 4.910232062159267e-05, | |
| "loss": 0.4684, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.5263281838733986, | |
| "grad_norm": 8.231782913208008, | |
| "learning_rate": 4.9095846301051784e-05, | |
| "loss": 0.4557, | |
| "step": 22350 | |
| }, | |
| { | |
| "epoch": 0.5275056518462697, | |
| "grad_norm": 16.109474182128906, | |
| "learning_rate": 4.908934914693213e-05, | |
| "loss": 0.4799, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.528683119819141, | |
| "grad_norm": 30.345848083496094, | |
| "learning_rate": 4.90828291653905e-05, | |
| "loss": 0.4721, | |
| "step": 22450 | |
| }, | |
| { | |
| "epoch": 0.5298605877920121, | |
| "grad_norm": 9.078557014465332, | |
| "learning_rate": 4.907628636260533e-05, | |
| "loss": 0.4564, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.5310380557648832, | |
| "grad_norm": 7.780555248260498, | |
| "learning_rate": 4.9069720744776674e-05, | |
| "loss": 0.4643, | |
| "step": 22550 | |
| }, | |
| { | |
| "epoch": 0.5322155237377544, | |
| "grad_norm": 18.726869583129883, | |
| "learning_rate": 4.906313231812621e-05, | |
| "loss": 0.4786, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.5333929917106255, | |
| "grad_norm": 39.67422866821289, | |
| "learning_rate": 4.9056521088897224e-05, | |
| "loss": 0.4853, | |
| "step": 22650 | |
| }, | |
| { | |
| "epoch": 0.5345704596834966, | |
| "grad_norm": 21.54363441467285, | |
| "learning_rate": 4.904988706335461e-05, | |
| "loss": 0.469, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.5357479276563677, | |
| "grad_norm": 39.44266128540039, | |
| "learning_rate": 4.904323024778488e-05, | |
| "loss": 0.4798, | |
| "step": 22750 | |
| }, | |
| { | |
| "epoch": 0.5369253956292389, | |
| "grad_norm": 8.508508682250977, | |
| "learning_rate": 4.903655064849613e-05, | |
| "loss": 0.4676, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.53810286360211, | |
| "grad_norm": 65.33773040771484, | |
| "learning_rate": 4.9029848271818023e-05, | |
| "loss": 0.4595, | |
| "step": 22850 | |
| }, | |
| { | |
| "epoch": 0.5392803315749811, | |
| "grad_norm": 5.9413862228393555, | |
| "learning_rate": 4.9023123124101865e-05, | |
| "loss": 0.479, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.5404577995478523, | |
| "grad_norm": 4.099421501159668, | |
| "learning_rate": 4.9016375211720485e-05, | |
| "loss": 0.4575, | |
| "step": 22950 | |
| }, | |
| { | |
| "epoch": 0.5416352675207234, | |
| "grad_norm": 7.643558979034424, | |
| "learning_rate": 4.90096045410683e-05, | |
| "loss": 0.4619, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.5428127354935945, | |
| "grad_norm": 6.532565593719482, | |
| "learning_rate": 4.900281111856131e-05, | |
| "loss": 0.4664, | |
| "step": 23050 | |
| }, | |
| { | |
| "epoch": 0.5439902034664658, | |
| "grad_norm": 6.786928176879883, | |
| "learning_rate": 4.899599495063706e-05, | |
| "loss": 0.4615, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.5451676714393369, | |
| "grad_norm": 10.264178276062012, | |
| "learning_rate": 4.898915604375464e-05, | |
| "loss": 0.4576, | |
| "step": 23150 | |
| }, | |
| { | |
| "epoch": 0.546345139412208, | |
| "grad_norm": 224.33949279785156, | |
| "learning_rate": 4.8982294404394716e-05, | |
| "loss": 0.4588, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.5475226073850791, | |
| "grad_norm": 5.424437046051025, | |
| "learning_rate": 4.897541003905945e-05, | |
| "loss": 0.4789, | |
| "step": 23250 | |
| }, | |
| { | |
| "epoch": 0.5487000753579503, | |
| "grad_norm": 10.393671989440918, | |
| "learning_rate": 4.896850295427261e-05, | |
| "loss": 0.4446, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.5498775433308214, | |
| "grad_norm": 6.611886501312256, | |
| "learning_rate": 4.8961573156579416e-05, | |
| "loss": 0.4571, | |
| "step": 23350 | |
| }, | |
| { | |
| "epoch": 0.5510550113036925, | |
| "grad_norm": 6.91979455947876, | |
| "learning_rate": 4.895462065254666e-05, | |
| "loss": 0.4424, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.5522324792765637, | |
| "grad_norm": 4.5380635261535645, | |
| "learning_rate": 4.894764544876264e-05, | |
| "loss": 0.4694, | |
| "step": 23450 | |
| }, | |
| { | |
| "epoch": 0.5534099472494348, | |
| "grad_norm": 9.971095085144043, | |
| "learning_rate": 4.894064755183715e-05, | |
| "loss": 0.4444, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.5545874152223059, | |
| "grad_norm": 8.661789894104004, | |
| "learning_rate": 4.893362696840151e-05, | |
| "loss": 0.4607, | |
| "step": 23550 | |
| }, | |
| { | |
| "epoch": 0.5557648831951771, | |
| "grad_norm": 5.1170783042907715, | |
| "learning_rate": 4.892658370510853e-05, | |
| "loss": 0.4457, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.5569423511680482, | |
| "grad_norm": 13.117242813110352, | |
| "learning_rate": 4.8919517768632504e-05, | |
| "loss": 0.4646, | |
| "step": 23650 | |
| }, | |
| { | |
| "epoch": 0.5581198191409193, | |
| "grad_norm": 19.30152702331543, | |
| "learning_rate": 4.8912429165669225e-05, | |
| "loss": 0.4509, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.5592972871137905, | |
| "grad_norm": 10.446329116821289, | |
| "learning_rate": 4.890531790293595e-05, | |
| "loss": 0.4569, | |
| "step": 23750 | |
| }, | |
| { | |
| "epoch": 0.5604747550866617, | |
| "grad_norm": 11.556958198547363, | |
| "learning_rate": 4.889818398717142e-05, | |
| "loss": 0.4629, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.5616522230595328, | |
| "grad_norm": 44.43030548095703, | |
| "learning_rate": 4.889102742513583e-05, | |
| "loss": 0.4603, | |
| "step": 23850 | |
| }, | |
| { | |
| "epoch": 0.5628296910324039, | |
| "grad_norm": 3.154510974884033, | |
| "learning_rate": 4.888384822361085e-05, | |
| "loss": 0.4493, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.5640071590052751, | |
| "grad_norm": 61.21367263793945, | |
| "learning_rate": 4.88766463893996e-05, | |
| "loss": 0.455, | |
| "step": 23950 | |
| }, | |
| { | |
| "epoch": 0.5651846269781462, | |
| "grad_norm": 4.503913879394531, | |
| "learning_rate": 4.8869421929326644e-05, | |
| "loss": 0.4639, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.5663620949510173, | |
| "grad_norm": 8.775500297546387, | |
| "learning_rate": 4.886217485023799e-05, | |
| "loss": 0.4492, | |
| "step": 24050 | |
| }, | |
| { | |
| "epoch": 0.5675395629238885, | |
| "grad_norm": 11.14522933959961, | |
| "learning_rate": 4.885490515900105e-05, | |
| "loss": 0.4416, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.5687170308967596, | |
| "grad_norm": 10.5628080368042, | |
| "learning_rate": 4.884761286250473e-05, | |
| "loss": 0.4556, | |
| "step": 24150 | |
| }, | |
| { | |
| "epoch": 0.5698944988696307, | |
| "grad_norm": 17.35209083557129, | |
| "learning_rate": 4.88402979676593e-05, | |
| "loss": 0.451, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.5710719668425018, | |
| "grad_norm": 9.928131103515625, | |
| "learning_rate": 4.883296048139645e-05, | |
| "loss": 0.455, | |
| "step": 24250 | |
| }, | |
| { | |
| "epoch": 0.572249434815373, | |
| "grad_norm": 5.427646636962891, | |
| "learning_rate": 4.882560041066932e-05, | |
| "loss": 0.4672, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.5734269027882442, | |
| "grad_norm": 41.32688903808594, | |
| "learning_rate": 4.8818217762452384e-05, | |
| "loss": 0.4526, | |
| "step": 24350 | |
| }, | |
| { | |
| "epoch": 0.5746043707611153, | |
| "grad_norm": 6.402476787567139, | |
| "learning_rate": 4.8810812543741575e-05, | |
| "loss": 0.4404, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.5757818387339865, | |
| "grad_norm": 8.651934623718262, | |
| "learning_rate": 4.880338476155418e-05, | |
| "loss": 0.4527, | |
| "step": 24450 | |
| }, | |
| { | |
| "epoch": 0.5769593067068576, | |
| "grad_norm": 5.511447429656982, | |
| "learning_rate": 4.879593442292887e-05, | |
| "loss": 0.4388, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.5781367746797287, | |
| "grad_norm": 8.449271202087402, | |
| "learning_rate": 4.87884615349257e-05, | |
| "loss": 0.4508, | |
| "step": 24550 | |
| }, | |
| { | |
| "epoch": 0.5793142426525999, | |
| "grad_norm": 6.713787078857422, | |
| "learning_rate": 4.87809661046261e-05, | |
| "loss": 0.4646, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.580491710625471, | |
| "grad_norm": 7.550659656524658, | |
| "learning_rate": 4.8773448139132826e-05, | |
| "loss": 0.4515, | |
| "step": 24650 | |
| }, | |
| { | |
| "epoch": 0.5816691785983421, | |
| "grad_norm": 13.547931671142578, | |
| "learning_rate": 4.876590764557003e-05, | |
| "loss": 0.4564, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.5828466465712132, | |
| "grad_norm": 7.133912086486816, | |
| "learning_rate": 4.875834463108319e-05, | |
| "loss": 0.4412, | |
| "step": 24750 | |
| }, | |
| { | |
| "epoch": 0.5840241145440844, | |
| "grad_norm": 4.595999240875244, | |
| "learning_rate": 4.8750759102839126e-05, | |
| "loss": 0.4551, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.5852015825169555, | |
| "grad_norm": 5.551638603210449, | |
| "learning_rate": 4.8743151068026006e-05, | |
| "loss": 0.4594, | |
| "step": 24850 | |
| }, | |
| { | |
| "epoch": 0.5863790504898266, | |
| "grad_norm": 38.925514221191406, | |
| "learning_rate": 4.8735520533853305e-05, | |
| "loss": 0.4609, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.5875565184626979, | |
| "grad_norm": 8.806419372558594, | |
| "learning_rate": 4.872786750755184e-05, | |
| "loss": 0.4482, | |
| "step": 24950 | |
| }, | |
| { | |
| "epoch": 0.588733986435569, | |
| "grad_norm": 7.807914733886719, | |
| "learning_rate": 4.872019199637372e-05, | |
| "loss": 0.4597, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.5899114544084401, | |
| "grad_norm": 5.391265869140625, | |
| "learning_rate": 4.871249400759238e-05, | |
| "loss": 0.4446, | |
| "step": 25050 | |
| }, | |
| { | |
| "epoch": 0.5910889223813113, | |
| "grad_norm": 12.07422161102295, | |
| "learning_rate": 4.870477354850255e-05, | |
| "loss": 0.4613, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 0.5922663903541824, | |
| "grad_norm": 6.568973064422607, | |
| "learning_rate": 4.869703062642024e-05, | |
| "loss": 0.4487, | |
| "step": 25150 | |
| }, | |
| { | |
| "epoch": 0.5934438583270535, | |
| "grad_norm": 27.290000915527344, | |
| "learning_rate": 4.868926524868277e-05, | |
| "loss": 0.4487, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.5946213262999246, | |
| "grad_norm": 6.316644668579102, | |
| "learning_rate": 4.868147742264872e-05, | |
| "loss": 0.45, | |
| "step": 25250 | |
| }, | |
| { | |
| "epoch": 0.5957987942727958, | |
| "grad_norm": 7.125376224517822, | |
| "learning_rate": 4.867366715569794e-05, | |
| "loss": 0.4564, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 0.5969762622456669, | |
| "grad_norm": 7.223470211029053, | |
| "learning_rate": 4.866583445523157e-05, | |
| "loss": 0.4567, | |
| "step": 25350 | |
| }, | |
| { | |
| "epoch": 0.598153730218538, | |
| "grad_norm": 18.58697509765625, | |
| "learning_rate": 4.865797932867199e-05, | |
| "loss": 0.4459, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.5993311981914092, | |
| "grad_norm": 16.599380493164062, | |
| "learning_rate": 4.865010178346282e-05, | |
| "loss": 0.4415, | |
| "step": 25450 | |
| }, | |
| { | |
| "epoch": 0.6005086661642803, | |
| "grad_norm": 10.445894241333008, | |
| "learning_rate": 4.8642201827068946e-05, | |
| "loss": 0.4487, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.6016861341371514, | |
| "grad_norm": 12.73167896270752, | |
| "learning_rate": 4.8634279466976486e-05, | |
| "loss": 0.4354, | |
| "step": 25550 | |
| }, | |
| { | |
| "epoch": 0.6028636021100227, | |
| "grad_norm": 19.48681640625, | |
| "learning_rate": 4.862633471069278e-05, | |
| "loss": 0.4366, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.6040410700828938, | |
| "grad_norm": 4.970024108886719, | |
| "learning_rate": 4.86183675657464e-05, | |
| "loss": 0.4475, | |
| "step": 25650 | |
| }, | |
| { | |
| "epoch": 0.6052185380557649, | |
| "grad_norm": 8.190299987792969, | |
| "learning_rate": 4.861037803968713e-05, | |
| "loss": 0.4549, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 0.606396006028636, | |
| "grad_norm": 11.79710578918457, | |
| "learning_rate": 4.860236614008596e-05, | |
| "loss": 0.4281, | |
| "step": 25750 | |
| }, | |
| { | |
| "epoch": 0.6075734740015072, | |
| "grad_norm": 16.114788055419922, | |
| "learning_rate": 4.8594331874535085e-05, | |
| "loss": 0.4407, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.6087509419743783, | |
| "grad_norm": 5.199133396148682, | |
| "learning_rate": 4.8586275250647895e-05, | |
| "loss": 0.4341, | |
| "step": 25850 | |
| }, | |
| { | |
| "epoch": 0.6099284099472494, | |
| "grad_norm": 5.4275641441345215, | |
| "learning_rate": 4.8578196276058965e-05, | |
| "loss": 0.4425, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 0.6111058779201206, | |
| "grad_norm": 6.487822532653809, | |
| "learning_rate": 4.857009495842404e-05, | |
| "loss": 0.4387, | |
| "step": 25950 | |
| }, | |
| { | |
| "epoch": 0.6122833458929917, | |
| "grad_norm": 5.207398891448975, | |
| "learning_rate": 4.8561971305420065e-05, | |
| "loss": 0.4437, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.6134608138658628, | |
| "grad_norm": 4.550735950469971, | |
| "learning_rate": 4.8553825324745125e-05, | |
| "loss": 0.4356, | |
| "step": 26050 | |
| }, | |
| { | |
| "epoch": 0.614638281838734, | |
| "grad_norm": 35.63388442993164, | |
| "learning_rate": 4.8545657024118464e-05, | |
| "loss": 0.4423, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 0.6158157498116051, | |
| "grad_norm": 5.647826194763184, | |
| "learning_rate": 4.8537466411280494e-05, | |
| "loss": 0.444, | |
| "step": 26150 | |
| }, | |
| { | |
| "epoch": 0.6169932177844762, | |
| "grad_norm": 9.764333724975586, | |
| "learning_rate": 4.852925349399277e-05, | |
| "loss": 0.4414, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.6181706857573473, | |
| "grad_norm": 5.748869895935059, | |
| "learning_rate": 4.852101828003794e-05, | |
| "loss": 0.434, | |
| "step": 26250 | |
| }, | |
| { | |
| "epoch": 0.6193481537302186, | |
| "grad_norm": 17.17038917541504, | |
| "learning_rate": 4.8512760777219846e-05, | |
| "loss": 0.4251, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 0.6205256217030897, | |
| "grad_norm": 32.0035285949707, | |
| "learning_rate": 4.850448099336341e-05, | |
| "loss": 0.437, | |
| "step": 26350 | |
| }, | |
| { | |
| "epoch": 0.6217030896759608, | |
| "grad_norm": 5.867980480194092, | |
| "learning_rate": 4.849617893631468e-05, | |
| "loss": 0.4229, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.622880557648832, | |
| "grad_norm": 7.499533176422119, | |
| "learning_rate": 4.8487854613940784e-05, | |
| "loss": 0.4337, | |
| "step": 26450 | |
| }, | |
| { | |
| "epoch": 0.6240580256217031, | |
| "grad_norm": 6.576634407043457, | |
| "learning_rate": 4.8479508034130004e-05, | |
| "loss": 0.4427, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.6252354935945742, | |
| "grad_norm": 14.996600151062012, | |
| "learning_rate": 4.847113920479167e-05, | |
| "loss": 0.4332, | |
| "step": 26550 | |
| }, | |
| { | |
| "epoch": 0.6264129615674454, | |
| "grad_norm": 16.811450958251953, | |
| "learning_rate": 4.846274813385621e-05, | |
| "loss": 0.4378, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.6275904295403165, | |
| "grad_norm": 6.706115245819092, | |
| "learning_rate": 4.845433482927512e-05, | |
| "loss": 0.4384, | |
| "step": 26650 | |
| }, | |
| { | |
| "epoch": 0.6287678975131876, | |
| "grad_norm": 5.594850063323975, | |
| "learning_rate": 4.844589929902097e-05, | |
| "loss": 0.4367, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 0.6299453654860587, | |
| "grad_norm": 7.255009651184082, | |
| "learning_rate": 4.84374415510874e-05, | |
| "loss": 0.4176, | |
| "step": 26750 | |
| }, | |
| { | |
| "epoch": 0.6311228334589299, | |
| "grad_norm": 6.982823848724365, | |
| "learning_rate": 4.842896159348909e-05, | |
| "loss": 0.4294, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.632300301431801, | |
| "grad_norm": 7.431040287017822, | |
| "learning_rate": 4.842045943426178e-05, | |
| "loss": 0.4459, | |
| "step": 26850 | |
| }, | |
| { | |
| "epoch": 0.6334777694046722, | |
| "grad_norm": 6.041873931884766, | |
| "learning_rate": 4.841193508146225e-05, | |
| "loss": 0.4217, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 0.6346552373775434, | |
| "grad_norm": 8.257255554199219, | |
| "learning_rate": 4.840338854316827e-05, | |
| "loss": 0.4361, | |
| "step": 26950 | |
| }, | |
| { | |
| "epoch": 0.6358327053504145, | |
| "grad_norm": 17.32215690612793, | |
| "learning_rate": 4.83948198274787e-05, | |
| "loss": 0.432, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.6370101733232856, | |
| "grad_norm": 9.02050495147705, | |
| "learning_rate": 4.838622894251336e-05, | |
| "loss": 0.4342, | |
| "step": 27050 | |
| }, | |
| { | |
| "epoch": 0.6381876412961568, | |
| "grad_norm": 22.568437576293945, | |
| "learning_rate": 4.837761589641311e-05, | |
| "loss": 0.4218, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 0.6393651092690279, | |
| "grad_norm": 18.67146110534668, | |
| "learning_rate": 4.836898069733979e-05, | |
| "loss": 0.4229, | |
| "step": 27150 | |
| }, | |
| { | |
| "epoch": 0.640542577241899, | |
| "grad_norm": 14.506811141967773, | |
| "learning_rate": 4.836032335347625e-05, | |
| "loss": 0.4333, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.6417200452147701, | |
| "grad_norm": 4.083027362823486, | |
| "learning_rate": 4.835164387302631e-05, | |
| "loss": 0.4175, | |
| "step": 27250 | |
| }, | |
| { | |
| "epoch": 0.6428975131876413, | |
| "grad_norm": 15.342577934265137, | |
| "learning_rate": 4.8342942264214786e-05, | |
| "loss": 0.4329, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 0.6440749811605124, | |
| "grad_norm": 6.424405097961426, | |
| "learning_rate": 4.8334218535287436e-05, | |
| "loss": 0.4182, | |
| "step": 27350 | |
| }, | |
| { | |
| "epoch": 0.6452524491333835, | |
| "grad_norm": 3.555016040802002, | |
| "learning_rate": 4.8325472694511e-05, | |
| "loss": 0.444, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.6464299171062547, | |
| "grad_norm": 5.33071231842041, | |
| "learning_rate": 4.8316704750173166e-05, | |
| "loss": 0.4308, | |
| "step": 27450 | |
| }, | |
| { | |
| "epoch": 0.6476073850791259, | |
| "grad_norm": 10.168743133544922, | |
| "learning_rate": 4.830791471058257e-05, | |
| "loss": 0.4293, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.648784853051997, | |
| "grad_norm": 5.484958171844482, | |
| "learning_rate": 4.8299102584068776e-05, | |
| "loss": 0.4209, | |
| "step": 27550 | |
| }, | |
| { | |
| "epoch": 0.6499623210248682, | |
| "grad_norm": 7.4925312995910645, | |
| "learning_rate": 4.8290268378982287e-05, | |
| "loss": 0.4228, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.6511397889977393, | |
| "grad_norm": 61.65214157104492, | |
| "learning_rate": 4.828141210369453e-05, | |
| "loss": 0.4187, | |
| "step": 27650 | |
| }, | |
| { | |
| "epoch": 0.6523172569706104, | |
| "grad_norm": 8.267818450927734, | |
| "learning_rate": 4.827253376659783e-05, | |
| "loss": 0.4229, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 0.6534947249434815, | |
| "grad_norm": 8.555291175842285, | |
| "learning_rate": 4.8263633376105444e-05, | |
| "loss": 0.4082, | |
| "step": 27750 | |
| }, | |
| { | |
| "epoch": 0.6546721929163527, | |
| "grad_norm": 18.954345703125, | |
| "learning_rate": 4.825471094065151e-05, | |
| "loss": 0.4224, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.6558496608892238, | |
| "grad_norm": 4.276530742645264, | |
| "learning_rate": 4.8245766468691057e-05, | |
| "loss": 0.4354, | |
| "step": 27850 | |
| }, | |
| { | |
| "epoch": 0.6570271288620949, | |
| "grad_norm": 17.24860954284668, | |
| "learning_rate": 4.82367999687e-05, | |
| "loss": 0.4246, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 0.6582045968349661, | |
| "grad_norm": 9.74885368347168, | |
| "learning_rate": 4.822781144917512e-05, | |
| "loss": 0.4272, | |
| "step": 27950 | |
| }, | |
| { | |
| "epoch": 0.6593820648078372, | |
| "grad_norm": 12.988977432250977, | |
| "learning_rate": 4.821880091863408e-05, | |
| "loss": 0.4253, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.6605595327807083, | |
| "grad_norm": 5.453243255615234, | |
| "learning_rate": 4.820976838561538e-05, | |
| "loss": 0.4269, | |
| "step": 28050 | |
| }, | |
| { | |
| "epoch": 0.6617370007535796, | |
| "grad_norm": 4.44385290145874, | |
| "learning_rate": 4.82007138586784e-05, | |
| "loss": 0.4275, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 0.6629144687264507, | |
| "grad_norm": 4.186730861663818, | |
| "learning_rate": 4.819163734640332e-05, | |
| "loss": 0.424, | |
| "step": 28150 | |
| }, | |
| { | |
| "epoch": 0.6640919366993218, | |
| "grad_norm": 56.707759857177734, | |
| "learning_rate": 4.81825388573912e-05, | |
| "loss": 0.4231, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 0.6652694046721929, | |
| "grad_norm": 4.561465263366699, | |
| "learning_rate": 4.817341840026388e-05, | |
| "loss": 0.4196, | |
| "step": 28250 | |
| }, | |
| { | |
| "epoch": 0.6664468726450641, | |
| "grad_norm": 13.327962875366211, | |
| "learning_rate": 4.816427598366405e-05, | |
| "loss": 0.4259, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 0.6676243406179352, | |
| "grad_norm": 6.9228949546813965, | |
| "learning_rate": 4.81551116162552e-05, | |
| "loss": 0.4269, | |
| "step": 28350 | |
| }, | |
| { | |
| "epoch": 0.6688018085908063, | |
| "grad_norm": 4.576337814331055, | |
| "learning_rate": 4.814592530672162e-05, | |
| "loss": 0.4248, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.6699792765636775, | |
| "grad_norm": 6.842184066772461, | |
| "learning_rate": 4.813671706376839e-05, | |
| "loss": 0.4075, | |
| "step": 28450 | |
| }, | |
| { | |
| "epoch": 0.6711567445365486, | |
| "grad_norm": 7.599248886108398, | |
| "learning_rate": 4.8127486896121364e-05, | |
| "loss": 0.4205, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.6723342125094197, | |
| "grad_norm": 12.973711013793945, | |
| "learning_rate": 4.8118234812527206e-05, | |
| "loss": 0.4136, | |
| "step": 28550 | |
| }, | |
| { | |
| "epoch": 0.6735116804822909, | |
| "grad_norm": 62.3187141418457, | |
| "learning_rate": 4.8108960821753324e-05, | |
| "loss": 0.4156, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 0.674689148455162, | |
| "grad_norm": 12.37547492980957, | |
| "learning_rate": 4.8099664932587874e-05, | |
| "loss": 0.4139, | |
| "step": 28650 | |
| }, | |
| { | |
| "epoch": 0.6758666164280331, | |
| "grad_norm": 11.823864936828613, | |
| "learning_rate": 4.809034715383979e-05, | |
| "loss": 0.4311, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 0.6770440844009042, | |
| "grad_norm": 4.698902606964111, | |
| "learning_rate": 4.808100749433873e-05, | |
| "loss": 0.4067, | |
| "step": 28750 | |
| }, | |
| { | |
| "epoch": 0.6782215523737755, | |
| "grad_norm": 5.277897357940674, | |
| "learning_rate": 4.80716459629351e-05, | |
| "loss": 0.4195, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.6793990203466466, | |
| "grad_norm": 7.38442325592041, | |
| "learning_rate": 4.806226256850001e-05, | |
| "loss": 0.4178, | |
| "step": 28850 | |
| }, | |
| { | |
| "epoch": 0.6805764883195177, | |
| "grad_norm": 46.425537109375, | |
| "learning_rate": 4.805285731992532e-05, | |
| "loss": 0.4239, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 0.6817539562923889, | |
| "grad_norm": 11.643020629882812, | |
| "learning_rate": 4.804343022612357e-05, | |
| "loss": 0.417, | |
| "step": 28950 | |
| }, | |
| { | |
| "epoch": 0.68293142426526, | |
| "grad_norm": 23.75605583190918, | |
| "learning_rate": 4.8033981296028016e-05, | |
| "loss": 0.4239, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.6841088922381311, | |
| "grad_norm": 6.298062801361084, | |
| "learning_rate": 4.80245105385926e-05, | |
| "loss": 0.4106, | |
| "step": 29050 | |
| }, | |
| { | |
| "epoch": 0.6852863602110023, | |
| "grad_norm": 9.20297908782959, | |
| "learning_rate": 4.801501796279197e-05, | |
| "loss": 0.42, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 0.6864638281838734, | |
| "grad_norm": 8.227057456970215, | |
| "learning_rate": 4.8005503577621414e-05, | |
| "loss": 0.4127, | |
| "step": 29150 | |
| }, | |
| { | |
| "epoch": 0.6876412961567445, | |
| "grad_norm": 19.5969295501709, | |
| "learning_rate": 4.799596739209689e-05, | |
| "loss": 0.4172, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.6888187641296156, | |
| "grad_norm": 14.509115219116211, | |
| "learning_rate": 4.798640941525506e-05, | |
| "loss": 0.4243, | |
| "step": 29250 | |
| }, | |
| { | |
| "epoch": 0.6899962321024868, | |
| "grad_norm": 6.977189064025879, | |
| "learning_rate": 4.797682965615319e-05, | |
| "loss": 0.4154, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 0.6911737000753579, | |
| "grad_norm": 4.62774133682251, | |
| "learning_rate": 4.796722812386919e-05, | |
| "loss": 0.4216, | |
| "step": 29350 | |
| }, | |
| { | |
| "epoch": 0.692351168048229, | |
| "grad_norm": 4.500463485717773, | |
| "learning_rate": 4.795760482750162e-05, | |
| "loss": 0.4218, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 0.6935286360211003, | |
| "grad_norm": 29.660913467407227, | |
| "learning_rate": 4.7947959776169666e-05, | |
| "loss": 0.4239, | |
| "step": 29450 | |
| }, | |
| { | |
| "epoch": 0.6947061039939714, | |
| "grad_norm": 12.277323722839355, | |
| "learning_rate": 4.793829297901311e-05, | |
| "loss": 0.4136, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.6958835719668425, | |
| "grad_norm": 6.913842678070068, | |
| "learning_rate": 4.7928604445192357e-05, | |
| "loss": 0.4152, | |
| "step": 29550 | |
| }, | |
| { | |
| "epoch": 0.6970610399397137, | |
| "grad_norm": 66.11016082763672, | |
| "learning_rate": 4.7918894183888396e-05, | |
| "loss": 0.4163, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 0.6982385079125848, | |
| "grad_norm": 9.231396675109863, | |
| "learning_rate": 4.7909162204302824e-05, | |
| "loss": 0.4168, | |
| "step": 29650 | |
| }, | |
| { | |
| "epoch": 0.6994159758854559, | |
| "grad_norm": 8.67923355102539, | |
| "learning_rate": 4.789940851565781e-05, | |
| "loss": 0.4051, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 0.700593443858327, | |
| "grad_norm": 9.884023666381836, | |
| "learning_rate": 4.788963312719608e-05, | |
| "loss": 0.4121, | |
| "step": 29750 | |
| }, | |
| { | |
| "epoch": 0.7017709118311982, | |
| "grad_norm": 7.803267955780029, | |
| "learning_rate": 4.7879836048180935e-05, | |
| "loss": 0.4145, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 0.7029483798040693, | |
| "grad_norm": 14.009085655212402, | |
| "learning_rate": 4.7870017287896254e-05, | |
| "loss": 0.4159, | |
| "step": 29850 | |
| }, | |
| { | |
| "epoch": 0.7041258477769404, | |
| "grad_norm": 24.33967399597168, | |
| "learning_rate": 4.786017685564642e-05, | |
| "loss": 0.4127, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 0.7053033157498116, | |
| "grad_norm": 140.727783203125, | |
| "learning_rate": 4.785031476075638e-05, | |
| "loss": 0.402, | |
| "step": 29950 | |
| }, | |
| { | |
| "epoch": 0.7064807837226827, | |
| "grad_norm": 11.9456205368042, | |
| "learning_rate": 4.7840431012571583e-05, | |
| "loss": 0.4042, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.7076582516955539, | |
| "grad_norm": 7.010389804840088, | |
| "learning_rate": 4.7830525620458035e-05, | |
| "loss": 0.4113, | |
| "step": 30050 | |
| }, | |
| { | |
| "epoch": 0.7088357196684251, | |
| "grad_norm": 6.530120849609375, | |
| "learning_rate": 4.7820598593802224e-05, | |
| "loss": 0.4141, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 0.7100131876412962, | |
| "grad_norm": 6.79564905166626, | |
| "learning_rate": 4.7810649942011145e-05, | |
| "loss": 0.4163, | |
| "step": 30150 | |
| }, | |
| { | |
| "epoch": 0.7111906556141673, | |
| "grad_norm": 3.8069498538970947, | |
| "learning_rate": 4.7800679674512286e-05, | |
| "loss": 0.4032, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 0.7123681235870384, | |
| "grad_norm": 8.744211196899414, | |
| "learning_rate": 4.779068780075363e-05, | |
| "loss": 0.4271, | |
| "step": 30250 | |
| }, | |
| { | |
| "epoch": 0.7135455915599096, | |
| "grad_norm": 2.691483974456787, | |
| "learning_rate": 4.7780674330203614e-05, | |
| "loss": 0.416, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 0.7147230595327807, | |
| "grad_norm": 11.353119850158691, | |
| "learning_rate": 4.7770639272351145e-05, | |
| "loss": 0.4268, | |
| "step": 30350 | |
| }, | |
| { | |
| "epoch": 0.7159005275056518, | |
| "grad_norm": 9.705777168273926, | |
| "learning_rate": 4.7760582636705595e-05, | |
| "loss": 0.396, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 0.717077995478523, | |
| "grad_norm": 21.71885108947754, | |
| "learning_rate": 4.77505044327968e-05, | |
| "loss": 0.4142, | |
| "step": 30450 | |
| }, | |
| { | |
| "epoch": 0.7182554634513941, | |
| "grad_norm": 7.8633270263671875, | |
| "learning_rate": 4.7740404670174974e-05, | |
| "loss": 0.4039, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.7194329314242652, | |
| "grad_norm": 9.407065391540527, | |
| "learning_rate": 4.7730283358410844e-05, | |
| "loss": 0.4155, | |
| "step": 30550 | |
| }, | |
| { | |
| "epoch": 0.7206103993971364, | |
| "grad_norm": 7.942194938659668, | |
| "learning_rate": 4.772014050709549e-05, | |
| "loss": 0.4089, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 0.7217878673700076, | |
| "grad_norm": 7.428655624389648, | |
| "learning_rate": 4.770997612584043e-05, | |
| "loss": 0.4071, | |
| "step": 30650 | |
| }, | |
| { | |
| "epoch": 0.7229653353428787, | |
| "grad_norm": 4.3990278244018555, | |
| "learning_rate": 4.769979022427758e-05, | |
| "loss": 0.4121, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 0.7241428033157498, | |
| "grad_norm": 4.404142379760742, | |
| "learning_rate": 4.768958281205925e-05, | |
| "loss": 0.4004, | |
| "step": 30750 | |
| }, | |
| { | |
| "epoch": 0.725320271288621, | |
| "grad_norm": 3.742658853530884, | |
| "learning_rate": 4.767935389885815e-05, | |
| "loss": 0.4053, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 0.7264977392614921, | |
| "grad_norm": 4.433485507965088, | |
| "learning_rate": 4.7669103494367326e-05, | |
| "loss": 0.4077, | |
| "step": 30850 | |
| }, | |
| { | |
| "epoch": 0.7276752072343632, | |
| "grad_norm": 18.64955711364746, | |
| "learning_rate": 4.7658831608300225e-05, | |
| "loss": 0.4067, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 0.7288526752072344, | |
| "grad_norm": 68.18895721435547, | |
| "learning_rate": 4.764853825039064e-05, | |
| "loss": 0.3977, | |
| "step": 30950 | |
| }, | |
| { | |
| "epoch": 0.7300301431801055, | |
| "grad_norm": 7.118121147155762, | |
| "learning_rate": 4.76382234303927e-05, | |
| "loss": 0.4168, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.7312076111529766, | |
| "grad_norm": 4.834046363830566, | |
| "learning_rate": 4.762788715808088e-05, | |
| "loss": 0.4134, | |
| "step": 31050 | |
| }, | |
| { | |
| "epoch": 0.7323850791258478, | |
| "grad_norm": 8.732151985168457, | |
| "learning_rate": 4.761752944324999e-05, | |
| "loss": 0.3988, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 0.7335625470987189, | |
| "grad_norm": 12.013757705688477, | |
| "learning_rate": 4.760715029571515e-05, | |
| "loss": 0.4036, | |
| "step": 31150 | |
| }, | |
| { | |
| "epoch": 0.73474001507159, | |
| "grad_norm": 23.86073875427246, | |
| "learning_rate": 4.75967497253118e-05, | |
| "loss": 0.4058, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 0.7359174830444611, | |
| "grad_norm": 11.801138877868652, | |
| "learning_rate": 4.758632774189566e-05, | |
| "loss": 0.4057, | |
| "step": 31250 | |
| }, | |
| { | |
| "epoch": 0.7370949510173324, | |
| "grad_norm": 39.732666015625, | |
| "learning_rate": 4.757588435534277e-05, | |
| "loss": 0.4054, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 0.7382724189902035, | |
| "grad_norm": 5.140982151031494, | |
| "learning_rate": 4.756541957554942e-05, | |
| "loss": 0.3985, | |
| "step": 31350 | |
| }, | |
| { | |
| "epoch": 0.7394498869630746, | |
| "grad_norm": 32.54568099975586, | |
| "learning_rate": 4.75549334124322e-05, | |
| "loss": 0.4072, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 0.7406273549359458, | |
| "grad_norm": 4.446203231811523, | |
| "learning_rate": 4.754442587592796e-05, | |
| "loss": 0.4131, | |
| "step": 31450 | |
| }, | |
| { | |
| "epoch": 0.7418048229088169, | |
| "grad_norm": 5.91099214553833, | |
| "learning_rate": 4.7533896975993786e-05, | |
| "loss": 0.3979, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.742982290881688, | |
| "grad_norm": 29.59516143798828, | |
| "learning_rate": 4.752334672260701e-05, | |
| "loss": 0.3975, | |
| "step": 31550 | |
| }, | |
| { | |
| "epoch": 0.7441597588545592, | |
| "grad_norm": 9.375574111938477, | |
| "learning_rate": 4.751277512576523e-05, | |
| "loss": 0.3972, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 0.7453372268274303, | |
| "grad_norm": 44.80549240112305, | |
| "learning_rate": 4.7502182195486224e-05, | |
| "loss": 0.3981, | |
| "step": 31650 | |
| }, | |
| { | |
| "epoch": 0.7465146948003014, | |
| "grad_norm": 9.062840461730957, | |
| "learning_rate": 4.749156794180803e-05, | |
| "loss": 0.391, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 0.7476921627731725, | |
| "grad_norm": 3.556516408920288, | |
| "learning_rate": 4.748093237478885e-05, | |
| "loss": 0.399, | |
| "step": 31750 | |
| }, | |
| { | |
| "epoch": 0.7488696307460437, | |
| "grad_norm": 4.87206506729126, | |
| "learning_rate": 4.7470275504507125e-05, | |
| "loss": 0.3993, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 0.7500470987189148, | |
| "grad_norm": 9.916251182556152, | |
| "learning_rate": 4.7459597341061435e-05, | |
| "loss": 0.4091, | |
| "step": 31850 | |
| }, | |
| { | |
| "epoch": 0.7512245666917859, | |
| "grad_norm": 9.017475128173828, | |
| "learning_rate": 4.7448897894570595e-05, | |
| "loss": 0.4031, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 0.7524020346646572, | |
| "grad_norm": 16.49560546875, | |
| "learning_rate": 4.7438177175173535e-05, | |
| "loss": 0.3899, | |
| "step": 31950 | |
| }, | |
| { | |
| "epoch": 0.7535795026375283, | |
| "grad_norm": 5.768393516540527, | |
| "learning_rate": 4.742743519302939e-05, | |
| "loss": 0.4013, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.7547569706103994, | |
| "grad_norm": 2.916512966156006, | |
| "learning_rate": 4.741667195831739e-05, | |
| "loss": 0.4001, | |
| "step": 32050 | |
| }, | |
| { | |
| "epoch": 0.7559344385832706, | |
| "grad_norm": 5.852372646331787, | |
| "learning_rate": 4.740588748123697e-05, | |
| "loss": 0.4063, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 0.7571119065561417, | |
| "grad_norm": 22.347827911376953, | |
| "learning_rate": 4.7395081772007625e-05, | |
| "loss": 0.4026, | |
| "step": 32150 | |
| }, | |
| { | |
| "epoch": 0.7582893745290128, | |
| "grad_norm": 15.438483238220215, | |
| "learning_rate": 4.738425484086902e-05, | |
| "loss": 0.3867, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 0.7594668425018839, | |
| "grad_norm": 28.649736404418945, | |
| "learning_rate": 4.737340669808092e-05, | |
| "loss": 0.3883, | |
| "step": 32250 | |
| }, | |
| { | |
| "epoch": 0.7606443104747551, | |
| "grad_norm": 9.691723823547363, | |
| "learning_rate": 4.736253735392318e-05, | |
| "loss": 0.4035, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 0.7618217784476262, | |
| "grad_norm": 6.743752479553223, | |
| "learning_rate": 4.7351646818695746e-05, | |
| "loss": 0.3993, | |
| "step": 32350 | |
| }, | |
| { | |
| "epoch": 0.7629992464204973, | |
| "grad_norm": 14.10403823852539, | |
| "learning_rate": 4.734073510271866e-05, | |
| "loss": 0.3987, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 0.7641767143933685, | |
| "grad_norm": 44.799556732177734, | |
| "learning_rate": 4.7329802216332006e-05, | |
| "loss": 0.3951, | |
| "step": 32450 | |
| }, | |
| { | |
| "epoch": 0.7653541823662396, | |
| "grad_norm": 10.39458179473877, | |
| "learning_rate": 4.731884816989597e-05, | |
| "loss": 0.4178, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.7665316503391107, | |
| "grad_norm": 8.49219799041748, | |
| "learning_rate": 4.730787297379075e-05, | |
| "loss": 0.3939, | |
| "step": 32550 | |
| }, | |
| { | |
| "epoch": 0.767709118311982, | |
| "grad_norm": 8.608924865722656, | |
| "learning_rate": 4.729687663841661e-05, | |
| "loss": 0.4009, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 0.7688865862848531, | |
| "grad_norm": 6.803063869476318, | |
| "learning_rate": 4.7285859174193845e-05, | |
| "loss": 0.3955, | |
| "step": 32650 | |
| }, | |
| { | |
| "epoch": 0.7700640542577242, | |
| "grad_norm": 7.5847978591918945, | |
| "learning_rate": 4.727482059156276e-05, | |
| "loss": 0.3897, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 0.7712415222305953, | |
| "grad_norm": 26.286178588867188, | |
| "learning_rate": 4.726376090098369e-05, | |
| "loss": 0.3987, | |
| "step": 32750 | |
| }, | |
| { | |
| "epoch": 0.7724189902034665, | |
| "grad_norm": 10.330301284790039, | |
| "learning_rate": 4.7252680112936944e-05, | |
| "loss": 0.3955, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 0.7735964581763376, | |
| "grad_norm": 16.25479507446289, | |
| "learning_rate": 4.724157823792284e-05, | |
| "loss": 0.3971, | |
| "step": 32850 | |
| }, | |
| { | |
| "epoch": 0.7747739261492087, | |
| "grad_norm": 4.899224758148193, | |
| "learning_rate": 4.723045528646169e-05, | |
| "loss": 0.3999, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 0.7759513941220799, | |
| "grad_norm": 7.083283424377441, | |
| "learning_rate": 4.7219311269093755e-05, | |
| "loss": 0.4046, | |
| "step": 32950 | |
| }, | |
| { | |
| "epoch": 0.777128862094951, | |
| "grad_norm": 11.80024242401123, | |
| "learning_rate": 4.720814619637929e-05, | |
| "loss": 0.3905, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.7783063300678221, | |
| "grad_norm": 5.462294578552246, | |
| "learning_rate": 4.7196960078898455e-05, | |
| "loss": 0.3942, | |
| "step": 33050 | |
| }, | |
| { | |
| "epoch": 0.7794837980406933, | |
| "grad_norm": 30.12801170349121, | |
| "learning_rate": 4.7185752927251406e-05, | |
| "loss": 0.3915, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 0.7806612660135644, | |
| "grad_norm": 15.410928726196289, | |
| "learning_rate": 4.717452475205818e-05, | |
| "loss": 0.3969, | |
| "step": 33150 | |
| }, | |
| { | |
| "epoch": 0.7818387339864356, | |
| "grad_norm": 6.87001895904541, | |
| "learning_rate": 4.7163275563958786e-05, | |
| "loss": 0.3893, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 0.7830162019593067, | |
| "grad_norm": 8.446171760559082, | |
| "learning_rate": 4.715200537361309e-05, | |
| "loss": 0.3962, | |
| "step": 33250 | |
| }, | |
| { | |
| "epoch": 0.7841936699321779, | |
| "grad_norm": 35.13418960571289, | |
| "learning_rate": 4.714071419170093e-05, | |
| "loss": 0.404, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 0.785371137905049, | |
| "grad_norm": 13.51883602142334, | |
| "learning_rate": 4.712940202892196e-05, | |
| "loss": 0.394, | |
| "step": 33350 | |
| }, | |
| { | |
| "epoch": 0.7865486058779201, | |
| "grad_norm": 7.975137710571289, | |
| "learning_rate": 4.711806889599577e-05, | |
| "loss": 0.3949, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 0.7877260738507913, | |
| "grad_norm": 8.67740535736084, | |
| "learning_rate": 4.71067148036618e-05, | |
| "loss": 0.3932, | |
| "step": 33450 | |
| }, | |
| { | |
| "epoch": 0.7889035418236624, | |
| "grad_norm": 6.285601615905762, | |
| "learning_rate": 4.709533976267936e-05, | |
| "loss": 0.3875, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.7900810097965335, | |
| "grad_norm": 7.787820339202881, | |
| "learning_rate": 4.708394378382759e-05, | |
| "loss": 0.386, | |
| "step": 33550 | |
| }, | |
| { | |
| "epoch": 0.7912584777694047, | |
| "grad_norm": 20.8675537109375, | |
| "learning_rate": 4.707252687790551e-05, | |
| "loss": 0.3896, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 0.7924359457422758, | |
| "grad_norm": 2.7611262798309326, | |
| "learning_rate": 4.7061089055731934e-05, | |
| "loss": 0.3936, | |
| "step": 33650 | |
| }, | |
| { | |
| "epoch": 0.7936134137151469, | |
| "grad_norm": 45.79184341430664, | |
| "learning_rate": 4.704963032814551e-05, | |
| "loss": 0.3826, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 0.794790881688018, | |
| "grad_norm": 15.176276206970215, | |
| "learning_rate": 4.70381507060047e-05, | |
| "loss": 0.3917, | |
| "step": 33750 | |
| }, | |
| { | |
| "epoch": 0.7959683496608893, | |
| "grad_norm": 43.62869644165039, | |
| "learning_rate": 4.702665020018777e-05, | |
| "loss": 0.3928, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 0.7971458176337604, | |
| "grad_norm": 3.3066062927246094, | |
| "learning_rate": 4.701512882159276e-05, | |
| "loss": 0.3839, | |
| "step": 33850 | |
| }, | |
| { | |
| "epoch": 0.7983232856066315, | |
| "grad_norm": 10.182275772094727, | |
| "learning_rate": 4.7003586581137494e-05, | |
| "loss": 0.3997, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 0.7995007535795027, | |
| "grad_norm": 14.264429092407227, | |
| "learning_rate": 4.699202348975958e-05, | |
| "loss": 0.3917, | |
| "step": 33950 | |
| }, | |
| { | |
| "epoch": 0.8006782215523738, | |
| "grad_norm": 33.70845413208008, | |
| "learning_rate": 4.698043955841637e-05, | |
| "loss": 0.3913, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.8018556895252449, | |
| "grad_norm": 6.397038459777832, | |
| "learning_rate": 4.696883479808497e-05, | |
| "loss": 0.4038, | |
| "step": 34050 | |
| }, | |
| { | |
| "epoch": 0.8030331574981161, | |
| "grad_norm": 13.475255012512207, | |
| "learning_rate": 4.695720921976221e-05, | |
| "loss": 0.3922, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 0.8042106254709872, | |
| "grad_norm": 5.805014133453369, | |
| "learning_rate": 4.694556283446468e-05, | |
| "loss": 0.3969, | |
| "step": 34150 | |
| }, | |
| { | |
| "epoch": 0.8053880934438583, | |
| "grad_norm": 41.0355224609375, | |
| "learning_rate": 4.6933895653228645e-05, | |
| "loss": 0.394, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 0.8065655614167294, | |
| "grad_norm": 4.529848098754883, | |
| "learning_rate": 4.6922207687110107e-05, | |
| "loss": 0.4015, | |
| "step": 34250 | |
| }, | |
| { | |
| "epoch": 0.8077430293896006, | |
| "grad_norm": 4.76627254486084, | |
| "learning_rate": 4.691049894718475e-05, | |
| "loss": 0.3859, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 0.8089204973624717, | |
| "grad_norm": 6.644199848175049, | |
| "learning_rate": 4.689876944454797e-05, | |
| "loss": 0.3821, | |
| "step": 34350 | |
| }, | |
| { | |
| "epoch": 0.8100979653353428, | |
| "grad_norm": 8.427165031433105, | |
| "learning_rate": 4.6887019190314783e-05, | |
| "loss": 0.3886, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 0.8112754333082141, | |
| "grad_norm": 121.33244323730469, | |
| "learning_rate": 4.687524819561993e-05, | |
| "loss": 0.3968, | |
| "step": 34450 | |
| }, | |
| { | |
| "epoch": 0.8124529012810852, | |
| "grad_norm": 10.001495361328125, | |
| "learning_rate": 4.686345647161776e-05, | |
| "loss": 0.3882, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.8136303692539563, | |
| "grad_norm": 3.111377000808716, | |
| "learning_rate": 4.68516440294823e-05, | |
| "loss": 0.3858, | |
| "step": 34550 | |
| }, | |
| { | |
| "epoch": 0.8148078372268275, | |
| "grad_norm": 7.6306843757629395, | |
| "learning_rate": 4.683981088040719e-05, | |
| "loss": 0.3887, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 0.8159853051996986, | |
| "grad_norm": 5.915834426879883, | |
| "learning_rate": 4.682795703560568e-05, | |
| "loss": 0.3914, | |
| "step": 34650 | |
| }, | |
| { | |
| "epoch": 0.8171627731725697, | |
| "grad_norm": 7.867639541625977, | |
| "learning_rate": 4.681608250631066e-05, | |
| "loss": 0.3986, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 0.8183402411454408, | |
| "grad_norm": 4.4137444496154785, | |
| "learning_rate": 4.680418730377463e-05, | |
| "loss": 0.3892, | |
| "step": 34750 | |
| }, | |
| { | |
| "epoch": 0.819517709118312, | |
| "grad_norm": 7.099762439727783, | |
| "learning_rate": 4.6792271439269616e-05, | |
| "loss": 0.3927, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 0.8206951770911831, | |
| "grad_norm": 3.4745028018951416, | |
| "learning_rate": 4.678033492408731e-05, | |
| "loss": 0.3868, | |
| "step": 34850 | |
| }, | |
| { | |
| "epoch": 0.8218726450640542, | |
| "grad_norm": 18.559595108032227, | |
| "learning_rate": 4.6768377769538894e-05, | |
| "loss": 0.3928, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 0.8230501130369254, | |
| "grad_norm": 7.237882137298584, | |
| "learning_rate": 4.675639998695516e-05, | |
| "loss": 0.398, | |
| "step": 34950 | |
| }, | |
| { | |
| "epoch": 0.8242275810097965, | |
| "grad_norm": 6.579901218414307, | |
| "learning_rate": 4.6744401587686436e-05, | |
| "loss": 0.3797, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.8254050489826676, | |
| "grad_norm": 13.161747932434082, | |
| "learning_rate": 4.6732382583102574e-05, | |
| "loss": 0.3907, | |
| "step": 35050 | |
| }, | |
| { | |
| "epoch": 0.8265825169555389, | |
| "grad_norm": 5.063140392303467, | |
| "learning_rate": 4.672034298459296e-05, | |
| "loss": 0.393, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 0.82775998492841, | |
| "grad_norm": 9.866806983947754, | |
| "learning_rate": 4.6708282803566495e-05, | |
| "loss": 0.3794, | |
| "step": 35150 | |
| }, | |
| { | |
| "epoch": 0.8289374529012811, | |
| "grad_norm": 7.7420430183410645, | |
| "learning_rate": 4.669620205145159e-05, | |
| "loss": 0.3942, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 0.8301149208741522, | |
| "grad_norm": 5.4539408683776855, | |
| "learning_rate": 4.668410073969613e-05, | |
| "loss": 0.374, | |
| "step": 35250 | |
| }, | |
| { | |
| "epoch": 0.8312923888470234, | |
| "grad_norm": 4.6781392097473145, | |
| "learning_rate": 4.667197887976751e-05, | |
| "loss": 0.3763, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 0.8324698568198945, | |
| "grad_norm": 6.535099506378174, | |
| "learning_rate": 4.665983648315258e-05, | |
| "loss": 0.3948, | |
| "step": 35350 | |
| }, | |
| { | |
| "epoch": 0.8336473247927656, | |
| "grad_norm": 8.786108016967773, | |
| "learning_rate": 4.664767356135765e-05, | |
| "loss": 0.3852, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 0.8348247927656368, | |
| "grad_norm": 3.571674108505249, | |
| "learning_rate": 4.663549012590849e-05, | |
| "loss": 0.3802, | |
| "step": 35450 | |
| }, | |
| { | |
| "epoch": 0.8360022607385079, | |
| "grad_norm": 3.58697509765625, | |
| "learning_rate": 4.66232861883503e-05, | |
| "loss": 0.393, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.837179728711379, | |
| "grad_norm": 8.02945327758789, | |
| "learning_rate": 4.66110617602477e-05, | |
| "loss": 0.39, | |
| "step": 35550 | |
| }, | |
| { | |
| "epoch": 0.8383571966842502, | |
| "grad_norm": 6.256012916564941, | |
| "learning_rate": 4.659881685318475e-05, | |
| "loss": 0.3874, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 0.8395346646571213, | |
| "grad_norm": 3.2590229511260986, | |
| "learning_rate": 4.658655147876491e-05, | |
| "loss": 0.3822, | |
| "step": 35650 | |
| }, | |
| { | |
| "epoch": 0.8407121326299924, | |
| "grad_norm": 5.324990749359131, | |
| "learning_rate": 4.657426564861102e-05, | |
| "loss": 0.3904, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 0.8418896006028636, | |
| "grad_norm": 4.558837890625, | |
| "learning_rate": 4.656195937436531e-05, | |
| "loss": 0.3881, | |
| "step": 35750 | |
| }, | |
| { | |
| "epoch": 0.8430670685757348, | |
| "grad_norm": 7.039790630340576, | |
| "learning_rate": 4.654963266768939e-05, | |
| "loss": 0.393, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 0.8442445365486059, | |
| "grad_norm": 10.441879272460938, | |
| "learning_rate": 4.653728554026423e-05, | |
| "loss": 0.3884, | |
| "step": 35850 | |
| }, | |
| { | |
| "epoch": 0.845422004521477, | |
| "grad_norm": 16.346277236938477, | |
| "learning_rate": 4.652491800379015e-05, | |
| "loss": 0.3883, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 0.8465994724943482, | |
| "grad_norm": 5.829379081726074, | |
| "learning_rate": 4.6512530069986817e-05, | |
| "loss": 0.3853, | |
| "step": 35950 | |
| }, | |
| { | |
| "epoch": 0.8477769404672193, | |
| "grad_norm": 13.366453170776367, | |
| "learning_rate": 4.650012175059321e-05, | |
| "loss": 0.3837, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.8489544084400904, | |
| "grad_norm": 15.298567771911621, | |
| "learning_rate": 4.648769305736763e-05, | |
| "loss": 0.382, | |
| "step": 36050 | |
| }, | |
| { | |
| "epoch": 0.8501318764129616, | |
| "grad_norm": 9.239766120910645, | |
| "learning_rate": 4.6475244002087705e-05, | |
| "loss": 0.3829, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 0.8513093443858327, | |
| "grad_norm": 3.5200560092926025, | |
| "learning_rate": 4.646277459655034e-05, | |
| "loss": 0.389, | |
| "step": 36150 | |
| }, | |
| { | |
| "epoch": 0.8524868123587038, | |
| "grad_norm": 6.855247497558594, | |
| "learning_rate": 4.645028485257171e-05, | |
| "loss": 0.3873, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 0.8536642803315749, | |
| "grad_norm": 7.053743362426758, | |
| "learning_rate": 4.6437774781987295e-05, | |
| "loss": 0.3822, | |
| "step": 36250 | |
| }, | |
| { | |
| "epoch": 0.8548417483044461, | |
| "grad_norm": 22.360563278198242, | |
| "learning_rate": 4.6425244396651825e-05, | |
| "loss": 0.3853, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 0.8560192162773173, | |
| "grad_norm": 26.815019607543945, | |
| "learning_rate": 4.641269370843927e-05, | |
| "loss": 0.378, | |
| "step": 36350 | |
| }, | |
| { | |
| "epoch": 0.8571966842501884, | |
| "grad_norm": 8.894818305969238, | |
| "learning_rate": 4.640012272924285e-05, | |
| "loss": 0.38, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 0.8583741522230596, | |
| "grad_norm": 42.91030502319336, | |
| "learning_rate": 4.638753147097501e-05, | |
| "loss": 0.3741, | |
| "step": 36450 | |
| }, | |
| { | |
| "epoch": 0.8595516201959307, | |
| "grad_norm": 7.152801036834717, | |
| "learning_rate": 4.637491994556742e-05, | |
| "loss": 0.389, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.8607290881688018, | |
| "grad_norm": 5.190051555633545, | |
| "learning_rate": 4.6362288164970924e-05, | |
| "loss": 0.3794, | |
| "step": 36550 | |
| }, | |
| { | |
| "epoch": 0.861906556141673, | |
| "grad_norm": 8.604781150817871, | |
| "learning_rate": 4.634963614115561e-05, | |
| "loss": 0.3775, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 0.8630840241145441, | |
| "grad_norm": 29.41929054260254, | |
| "learning_rate": 4.6336963886110696e-05, | |
| "loss": 0.3819, | |
| "step": 36650 | |
| }, | |
| { | |
| "epoch": 0.8642614920874152, | |
| "grad_norm": 7.723423957824707, | |
| "learning_rate": 4.6324271411844624e-05, | |
| "loss": 0.3822, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 0.8654389600602863, | |
| "grad_norm": 9.10047435760498, | |
| "learning_rate": 4.631155873038495e-05, | |
| "loss": 0.3883, | |
| "step": 36750 | |
| }, | |
| { | |
| "epoch": 0.8666164280331575, | |
| "grad_norm": 8.435608863830566, | |
| "learning_rate": 4.6298825853778406e-05, | |
| "loss": 0.3811, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 0.8677938960060286, | |
| "grad_norm": 6.002137660980225, | |
| "learning_rate": 4.6286072794090854e-05, | |
| "loss": 0.3794, | |
| "step": 36850 | |
| }, | |
| { | |
| "epoch": 0.8689713639788997, | |
| "grad_norm": 4.113153457641602, | |
| "learning_rate": 4.627329956340727e-05, | |
| "loss": 0.3687, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 0.870148831951771, | |
| "grad_norm": 13.070047378540039, | |
| "learning_rate": 4.626050617383177e-05, | |
| "loss": 0.3814, | |
| "step": 36950 | |
| }, | |
| { | |
| "epoch": 0.8713262999246421, | |
| "grad_norm": 7.600546836853027, | |
| "learning_rate": 4.6247692637487566e-05, | |
| "loss": 0.381, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.8725037678975132, | |
| "grad_norm": 2.707479238510132, | |
| "learning_rate": 4.623485896651693e-05, | |
| "loss": 0.3673, | |
| "step": 37050 | |
| }, | |
| { | |
| "epoch": 0.8736812358703844, | |
| "grad_norm": 17.407522201538086, | |
| "learning_rate": 4.622200517308125e-05, | |
| "loss": 0.3841, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 0.8748587038432555, | |
| "grad_norm": 7.627296447753906, | |
| "learning_rate": 4.620913126936097e-05, | |
| "loss": 0.3761, | |
| "step": 37150 | |
| }, | |
| { | |
| "epoch": 0.8760361718161266, | |
| "grad_norm": 4.266987323760986, | |
| "learning_rate": 4.619623726755559e-05, | |
| "loss": 0.386, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 0.8772136397889977, | |
| "grad_norm": 11.322697639465332, | |
| "learning_rate": 4.6183323179883654e-05, | |
| "loss": 0.3866, | |
| "step": 37250 | |
| }, | |
| { | |
| "epoch": 0.8783911077618689, | |
| "grad_norm": 6.096189498901367, | |
| "learning_rate": 4.617038901858274e-05, | |
| "loss": 0.3655, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 0.87956857573474, | |
| "grad_norm": 3.697171688079834, | |
| "learning_rate": 4.615743479590946e-05, | |
| "loss": 0.3728, | |
| "step": 37350 | |
| }, | |
| { | |
| "epoch": 0.8807460437076111, | |
| "grad_norm": 4.448515892028809, | |
| "learning_rate": 4.6144460524139416e-05, | |
| "loss": 0.3794, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 0.8819235116804823, | |
| "grad_norm": 6.569329261779785, | |
| "learning_rate": 4.613146621556722e-05, | |
| "loss": 0.3818, | |
| "step": 37450 | |
| }, | |
| { | |
| "epoch": 0.8831009796533534, | |
| "grad_norm": 8.72360897064209, | |
| "learning_rate": 4.611845188250647e-05, | |
| "loss": 0.3782, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.8842784476262245, | |
| "grad_norm": 5.113489151000977, | |
| "learning_rate": 4.610541753728975e-05, | |
| "loss": 0.3722, | |
| "step": 37550 | |
| }, | |
| { | |
| "epoch": 0.8854559155990958, | |
| "grad_norm": 6.97896146774292, | |
| "learning_rate": 4.609236319226858e-05, | |
| "loss": 0.3936, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 0.8866333835719669, | |
| "grad_norm": 6.273303508758545, | |
| "learning_rate": 4.607928885981346e-05, | |
| "loss": 0.378, | |
| "step": 37650 | |
| }, | |
| { | |
| "epoch": 0.887810851544838, | |
| "grad_norm": 14.060749053955078, | |
| "learning_rate": 4.606619455231382e-05, | |
| "loss": 0.3763, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 0.8889883195177091, | |
| "grad_norm": 9.937809944152832, | |
| "learning_rate": 4.605308028217802e-05, | |
| "loss": 0.3825, | |
| "step": 37750 | |
| }, | |
| { | |
| "epoch": 0.8901657874905803, | |
| "grad_norm": 99.67310333251953, | |
| "learning_rate": 4.603994606183333e-05, | |
| "loss": 0.3726, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 0.8913432554634514, | |
| "grad_norm": 5.380475997924805, | |
| "learning_rate": 4.602679190372593e-05, | |
| "loss": 0.3728, | |
| "step": 37850 | |
| }, | |
| { | |
| "epoch": 0.8925207234363225, | |
| "grad_norm": 4.643420696258545, | |
| "learning_rate": 4.6013617820320905e-05, | |
| "loss": 0.3715, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 0.8936981914091937, | |
| "grad_norm": 3.417965888977051, | |
| "learning_rate": 4.6000423824102204e-05, | |
| "loss": 0.3736, | |
| "step": 37950 | |
| }, | |
| { | |
| "epoch": 0.8948756593820648, | |
| "grad_norm": 3.9035496711730957, | |
| "learning_rate": 4.598720992757264e-05, | |
| "loss": 0.3888, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.8960531273549359, | |
| "grad_norm": 18.530710220336914, | |
| "learning_rate": 4.597397614325391e-05, | |
| "loss": 0.3721, | |
| "step": 38050 | |
| }, | |
| { | |
| "epoch": 0.8972305953278071, | |
| "grad_norm": 6.487109184265137, | |
| "learning_rate": 4.5960722483686545e-05, | |
| "loss": 0.3733, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 0.8984080633006782, | |
| "grad_norm": 3.24798846244812, | |
| "learning_rate": 4.5947448961429895e-05, | |
| "loss": 0.3859, | |
| "step": 38150 | |
| }, | |
| { | |
| "epoch": 0.8995855312735493, | |
| "grad_norm": 5.06166410446167, | |
| "learning_rate": 4.593415558906215e-05, | |
| "loss": 0.3701, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 0.9007629992464204, | |
| "grad_norm": 5.312416076660156, | |
| "learning_rate": 4.592084237918033e-05, | |
| "loss": 0.3662, | |
| "step": 38250 | |
| }, | |
| { | |
| "epoch": 0.9019404672192917, | |
| "grad_norm": 3.8001291751861572, | |
| "learning_rate": 4.590750934440019e-05, | |
| "loss": 0.3748, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 0.9031179351921628, | |
| "grad_norm": 12.390177726745605, | |
| "learning_rate": 4.5894156497356325e-05, | |
| "loss": 0.3713, | |
| "step": 38350 | |
| }, | |
| { | |
| "epoch": 0.9042954031650339, | |
| "grad_norm": 8.299680709838867, | |
| "learning_rate": 4.5880783850702094e-05, | |
| "loss": 0.3692, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 0.9054728711379051, | |
| "grad_norm": 11.960047721862793, | |
| "learning_rate": 4.586739141710962e-05, | |
| "loss": 0.3762, | |
| "step": 38450 | |
| }, | |
| { | |
| "epoch": 0.9066503391107762, | |
| "grad_norm": 9.23426342010498, | |
| "learning_rate": 4.585397920926975e-05, | |
| "loss": 0.366, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.9078278070836473, | |
| "grad_norm": 13.51667308807373, | |
| "learning_rate": 4.58405472398921e-05, | |
| "loss": 0.3714, | |
| "step": 38550 | |
| }, | |
| { | |
| "epoch": 0.9090052750565185, | |
| "grad_norm": 4.549753665924072, | |
| "learning_rate": 4.582709552170501e-05, | |
| "loss": 0.3657, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 0.9101827430293896, | |
| "grad_norm": 4.02241849899292, | |
| "learning_rate": 4.581362406745552e-05, | |
| "loss": 0.3698, | |
| "step": 38650 | |
| }, | |
| { | |
| "epoch": 0.9113602110022607, | |
| "grad_norm": 11.28242015838623, | |
| "learning_rate": 4.580013288990937e-05, | |
| "loss": 0.3708, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 0.9125376789751318, | |
| "grad_norm": 4.79355525970459, | |
| "learning_rate": 4.578662200185102e-05, | |
| "loss": 0.3635, | |
| "step": 38750 | |
| }, | |
| { | |
| "epoch": 0.913715146948003, | |
| "grad_norm": 5.503510475158691, | |
| "learning_rate": 4.5773091416083555e-05, | |
| "loss": 0.3786, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 0.9148926149208741, | |
| "grad_norm": 65.38331604003906, | |
| "learning_rate": 4.575954114542879e-05, | |
| "loss": 0.374, | |
| "step": 38850 | |
| }, | |
| { | |
| "epoch": 0.9160700828937453, | |
| "grad_norm": 3.9852523803710938, | |
| "learning_rate": 4.574597120272714e-05, | |
| "loss": 0.3841, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 0.9172475508666165, | |
| "grad_norm": 5.05305814743042, | |
| "learning_rate": 4.5732381600837696e-05, | |
| "loss": 0.3805, | |
| "step": 38950 | |
| }, | |
| { | |
| "epoch": 0.9184250188394876, | |
| "grad_norm": 5.482520580291748, | |
| "learning_rate": 4.571877235263814e-05, | |
| "loss": 0.3798, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.9196024868123587, | |
| "grad_norm": 5.336310863494873, | |
| "learning_rate": 4.570514347102483e-05, | |
| "loss": 0.3742, | |
| "step": 39050 | |
| }, | |
| { | |
| "epoch": 0.9207799547852299, | |
| "grad_norm": 6.86510705947876, | |
| "learning_rate": 4.569149496891267e-05, | |
| "loss": 0.3636, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 0.921957422758101, | |
| "grad_norm": 25.996662139892578, | |
| "learning_rate": 4.56778268592352e-05, | |
| "loss": 0.3667, | |
| "step": 39150 | |
| }, | |
| { | |
| "epoch": 0.9231348907309721, | |
| "grad_norm": 21.86874008178711, | |
| "learning_rate": 4.56641391549445e-05, | |
| "loss": 0.3699, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 0.9243123587038432, | |
| "grad_norm": 15.313295364379883, | |
| "learning_rate": 4.5650431869011254e-05, | |
| "loss": 0.3694, | |
| "step": 39250 | |
| }, | |
| { | |
| "epoch": 0.9254898266767144, | |
| "grad_norm": 11.989869117736816, | |
| "learning_rate": 4.563670501442469e-05, | |
| "loss": 0.3708, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 0.9266672946495855, | |
| "grad_norm": 5.615723609924316, | |
| "learning_rate": 4.562295860419258e-05, | |
| "loss": 0.3689, | |
| "step": 39350 | |
| }, | |
| { | |
| "epoch": 0.9278447626224566, | |
| "grad_norm": 4.626934051513672, | |
| "learning_rate": 4.5609192651341206e-05, | |
| "loss": 0.3694, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 0.9290222305953278, | |
| "grad_norm": 6.918455600738525, | |
| "learning_rate": 4.5595407168915405e-05, | |
| "loss": 0.3724, | |
| "step": 39450 | |
| }, | |
| { | |
| "epoch": 0.930199698568199, | |
| "grad_norm": 14.303245544433594, | |
| "learning_rate": 4.55816021699785e-05, | |
| "loss": 0.3695, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.9313771665410701, | |
| "grad_norm": 7.935323238372803, | |
| "learning_rate": 4.556777766761231e-05, | |
| "loss": 0.3819, | |
| "step": 39550 | |
| }, | |
| { | |
| "epoch": 0.9325546345139413, | |
| "grad_norm": 4.901387691497803, | |
| "learning_rate": 4.5553933674917134e-05, | |
| "loss": 0.3719, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 0.9337321024868124, | |
| "grad_norm": 5.408039093017578, | |
| "learning_rate": 4.554007020501174e-05, | |
| "loss": 0.369, | |
| "step": 39650 | |
| }, | |
| { | |
| "epoch": 0.9349095704596835, | |
| "grad_norm": 12.067142486572266, | |
| "learning_rate": 4.5526187271033374e-05, | |
| "loss": 0.3793, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 0.9360870384325546, | |
| "grad_norm": 5.030888557434082, | |
| "learning_rate": 4.551228488613769e-05, | |
| "loss": 0.3738, | |
| "step": 39750 | |
| }, | |
| { | |
| "epoch": 0.9372645064054258, | |
| "grad_norm": 4.130500316619873, | |
| "learning_rate": 4.54983630634988e-05, | |
| "loss": 0.368, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 0.9384419743782969, | |
| "grad_norm": 18.96745491027832, | |
| "learning_rate": 4.5484421816309224e-05, | |
| "loss": 0.3618, | |
| "step": 39850 | |
| }, | |
| { | |
| "epoch": 0.939619442351168, | |
| "grad_norm": 3.345635414123535, | |
| "learning_rate": 4.54704611577799e-05, | |
| "loss": 0.3643, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 0.9407969103240392, | |
| "grad_norm": 3.7599053382873535, | |
| "learning_rate": 4.5456481101140154e-05, | |
| "loss": 0.371, | |
| "step": 39950 | |
| }, | |
| { | |
| "epoch": 0.9419743782969103, | |
| "grad_norm": 10.631580352783203, | |
| "learning_rate": 4.544248165963769e-05, | |
| "loss": 0.3737, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.9431518462697814, | |
| "grad_norm": 9.388734817504883, | |
| "learning_rate": 4.5428462846538575e-05, | |
| "loss": 0.3716, | |
| "step": 40050 | |
| }, | |
| { | |
| "epoch": 0.9443293142426527, | |
| "grad_norm": 8.07081127166748, | |
| "learning_rate": 4.541442467512726e-05, | |
| "loss": 0.374, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 0.9455067822155238, | |
| "grad_norm": 16.615015029907227, | |
| "learning_rate": 4.540036715870651e-05, | |
| "loss": 0.3718, | |
| "step": 40150 | |
| }, | |
| { | |
| "epoch": 0.9466842501883949, | |
| "grad_norm": 4.868950843811035, | |
| "learning_rate": 4.538629031059744e-05, | |
| "loss": 0.3699, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 0.947861718161266, | |
| "grad_norm": 6.033292770385742, | |
| "learning_rate": 4.537219414413949e-05, | |
| "loss": 0.3667, | |
| "step": 40250 | |
| }, | |
| { | |
| "epoch": 0.9490391861341372, | |
| "grad_norm": 3.052788257598877, | |
| "learning_rate": 4.535807867269037e-05, | |
| "loss": 0.3658, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 0.9502166541070083, | |
| "grad_norm": 3.774036169052124, | |
| "learning_rate": 4.534394390962613e-05, | |
| "loss": 0.3602, | |
| "step": 40350 | |
| }, | |
| { | |
| "epoch": 0.9513941220798794, | |
| "grad_norm": 6.746449947357178, | |
| "learning_rate": 4.5329789868341075e-05, | |
| "loss": 0.3728, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 0.9525715900527506, | |
| "grad_norm": 7.460921764373779, | |
| "learning_rate": 4.5315616562247766e-05, | |
| "loss": 0.3697, | |
| "step": 40450 | |
| }, | |
| { | |
| "epoch": 0.9537490580256217, | |
| "grad_norm": 10.803895950317383, | |
| "learning_rate": 4.530142400477706e-05, | |
| "loss": 0.368, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.9549265259984928, | |
| "grad_norm": 3.733963966369629, | |
| "learning_rate": 4.5287212209378015e-05, | |
| "loss": 0.3714, | |
| "step": 40550 | |
| }, | |
| { | |
| "epoch": 0.956103993971364, | |
| "grad_norm": 9.356433868408203, | |
| "learning_rate": 4.527298118951796e-05, | |
| "loss": 0.3658, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 0.9572814619442351, | |
| "grad_norm": 7.683218955993652, | |
| "learning_rate": 4.5258730958682396e-05, | |
| "loss": 0.3693, | |
| "step": 40650 | |
| }, | |
| { | |
| "epoch": 0.9584589299171062, | |
| "grad_norm": 15.705303192138672, | |
| "learning_rate": 4.524446153037506e-05, | |
| "loss": 0.3734, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 0.9596363978899773, | |
| "grad_norm": 20.39037322998047, | |
| "learning_rate": 4.523017291811787e-05, | |
| "loss": 0.3625, | |
| "step": 40750 | |
| }, | |
| { | |
| "epoch": 0.9608138658628486, | |
| "grad_norm": 20.0559024810791, | |
| "learning_rate": 4.5215865135450935e-05, | |
| "loss": 0.3643, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 0.9619913338357197, | |
| "grad_norm": 16.901758193969727, | |
| "learning_rate": 4.520153819593251e-05, | |
| "loss": 0.3613, | |
| "step": 40850 | |
| }, | |
| { | |
| "epoch": 0.9631688018085908, | |
| "grad_norm": 10.643461227416992, | |
| "learning_rate": 4.518719211313902e-05, | |
| "loss": 0.3719, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 0.964346269781462, | |
| "grad_norm": 24.11075782775879, | |
| "learning_rate": 4.517282690066502e-05, | |
| "loss": 0.3677, | |
| "step": 40950 | |
| }, | |
| { | |
| "epoch": 0.9655237377543331, | |
| "grad_norm": 4.633491039276123, | |
| "learning_rate": 4.5158442572123206e-05, | |
| "loss": 0.3651, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.9667012057272042, | |
| "grad_norm": 11.38755989074707, | |
| "learning_rate": 4.5144039141144366e-05, | |
| "loss": 0.3592, | |
| "step": 41050 | |
| }, | |
| { | |
| "epoch": 0.9678786737000754, | |
| "grad_norm": 6.12951135635376, | |
| "learning_rate": 4.512961662137741e-05, | |
| "loss": 0.3715, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 0.9690561416729465, | |
| "grad_norm": 14.67646312713623, | |
| "learning_rate": 4.511517502648933e-05, | |
| "loss": 0.3664, | |
| "step": 41150 | |
| }, | |
| { | |
| "epoch": 0.9702336096458176, | |
| "grad_norm": 7.611536026000977, | |
| "learning_rate": 4.51007143701652e-05, | |
| "loss": 0.3731, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 0.9714110776186887, | |
| "grad_norm": 8.646364212036133, | |
| "learning_rate": 4.508623466610814e-05, | |
| "loss": 0.364, | |
| "step": 41250 | |
| }, | |
| { | |
| "epoch": 0.9725885455915599, | |
| "grad_norm": 9.640769958496094, | |
| "learning_rate": 4.507173592803933e-05, | |
| "loss": 0.3676, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 0.973766013564431, | |
| "grad_norm": 11.874971389770508, | |
| "learning_rate": 4.5057218169698e-05, | |
| "loss": 0.3516, | |
| "step": 41350 | |
| }, | |
| { | |
| "epoch": 0.9749434815373021, | |
| "grad_norm": 16.078182220458984, | |
| "learning_rate": 4.504268140484138e-05, | |
| "loss": 0.3811, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 0.9761209495101734, | |
| "grad_norm": 4.882361888885498, | |
| "learning_rate": 4.5028125647244735e-05, | |
| "loss": 0.3641, | |
| "step": 41450 | |
| }, | |
| { | |
| "epoch": 0.9772984174830445, | |
| "grad_norm": 7.0901265144348145, | |
| "learning_rate": 4.50135509107013e-05, | |
| "loss": 0.36, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.9784758854559156, | |
| "grad_norm": 8.467730522155762, | |
| "learning_rate": 4.499895720902232e-05, | |
| "loss": 0.3628, | |
| "step": 41550 | |
| }, | |
| { | |
| "epoch": 0.9796533534287868, | |
| "grad_norm": 12.875937461853027, | |
| "learning_rate": 4.4984344556037003e-05, | |
| "loss": 0.3589, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 0.9808308214016579, | |
| "grad_norm": 11.278694152832031, | |
| "learning_rate": 4.4969712965592505e-05, | |
| "loss": 0.3562, | |
| "step": 41650 | |
| }, | |
| { | |
| "epoch": 0.982008289374529, | |
| "grad_norm": 11.084808349609375, | |
| "learning_rate": 4.4955062451553944e-05, | |
| "loss": 0.3578, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 0.9831857573474001, | |
| "grad_norm": 13.773730278015137, | |
| "learning_rate": 4.494039302780436e-05, | |
| "loss": 0.3531, | |
| "step": 41750 | |
| }, | |
| { | |
| "epoch": 0.9843632253202713, | |
| "grad_norm": 3.569322347640991, | |
| "learning_rate": 4.4925704708244715e-05, | |
| "loss": 0.3631, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 0.9855406932931424, | |
| "grad_norm": 3.8381340503692627, | |
| "learning_rate": 4.4910997506793876e-05, | |
| "loss": 0.3636, | |
| "step": 41850 | |
| }, | |
| { | |
| "epoch": 0.9867181612660135, | |
| "grad_norm": 6.162775039672852, | |
| "learning_rate": 4.489627143738861e-05, | |
| "loss": 0.3702, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 0.9878956292388847, | |
| "grad_norm": 8.147390365600586, | |
| "learning_rate": 4.4881526513983555e-05, | |
| "loss": 0.3502, | |
| "step": 41950 | |
| }, | |
| { | |
| "epoch": 0.9890730972117558, | |
| "grad_norm": 6.755366802215576, | |
| "learning_rate": 4.4866762750551204e-05, | |
| "loss": 0.3676, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.990250565184627, | |
| "grad_norm": 4.249057769775391, | |
| "learning_rate": 4.485198016108193e-05, | |
| "loss": 0.3649, | |
| "step": 42050 | |
| }, | |
| { | |
| "epoch": 0.9914280331574982, | |
| "grad_norm": 4.345348834991455, | |
| "learning_rate": 4.483717875958393e-05, | |
| "loss": 0.3549, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 0.9926055011303693, | |
| "grad_norm": 1.9621384143829346, | |
| "learning_rate": 4.482235856008324e-05, | |
| "loss": 0.3646, | |
| "step": 42150 | |
| }, | |
| { | |
| "epoch": 0.9937829691032404, | |
| "grad_norm": 3.9806275367736816, | |
| "learning_rate": 4.480751957662368e-05, | |
| "loss": 0.3528, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 0.9949604370761115, | |
| "grad_norm": 5.289800643920898, | |
| "learning_rate": 4.47926618232669e-05, | |
| "loss": 0.3591, | |
| "step": 42250 | |
| }, | |
| { | |
| "epoch": 0.9961379050489827, | |
| "grad_norm": 8.356411933898926, | |
| "learning_rate": 4.477778531409232e-05, | |
| "loss": 0.3653, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 0.9973153730218538, | |
| "grad_norm": 16.573802947998047, | |
| "learning_rate": 4.476289006319715e-05, | |
| "loss": 0.3704, | |
| "step": 42350 | |
| }, | |
| { | |
| "epoch": 0.9984928409947249, | |
| "grad_norm": 5.761173248291016, | |
| "learning_rate": 4.474797608469634e-05, | |
| "loss": 0.3704, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 0.9996703089675961, | |
| "grad_norm": 10.71335220336914, | |
| "learning_rate": 4.47330433927226e-05, | |
| "loss": 0.3649, | |
| "step": 42450 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.29507139325141907, | |
| "eval_runtime": 609.0505, | |
| "eval_samples_per_second": 247.897, | |
| "eval_steps_per_second": 30.988, | |
| "step": 42464 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 169856, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.416683370203136e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |