| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 1182, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0008460236886632825, | |
| "grad_norm": 4.550535678863525, | |
| "learning_rate": 0.0, | |
| "loss": 1.2052, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.001692047377326565, | |
| "grad_norm": 4.09000301361084, | |
| "learning_rate": 1.3888888888888888e-07, | |
| "loss": 1.0467, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0025380710659898475, | |
| "grad_norm": 4.39274787902832, | |
| "learning_rate": 2.7777777777777776e-07, | |
| "loss": 1.1306, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.00338409475465313, | |
| "grad_norm": 4.3457722663879395, | |
| "learning_rate": 4.1666666666666667e-07, | |
| "loss": 1.0388, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.004230118443316413, | |
| "grad_norm": 4.247500419616699, | |
| "learning_rate": 5.555555555555555e-07, | |
| "loss": 1.1666, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.005076142131979695, | |
| "grad_norm": 4.16987943649292, | |
| "learning_rate": 6.944444444444446e-07, | |
| "loss": 1.1084, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.005922165820642978, | |
| "grad_norm": 3.504650592803955, | |
| "learning_rate": 8.333333333333333e-07, | |
| "loss": 0.8725, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.00676818950930626, | |
| "grad_norm": 4.1873297691345215, | |
| "learning_rate": 9.722222222222224e-07, | |
| "loss": 1.129, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.007614213197969543, | |
| "grad_norm": 4.281223773956299, | |
| "learning_rate": 1.111111111111111e-06, | |
| "loss": 1.0329, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.008460236886632826, | |
| "grad_norm": 3.8109793663024902, | |
| "learning_rate": 1.25e-06, | |
| "loss": 1.0024, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.009306260575296108, | |
| "grad_norm": 4.020341873168945, | |
| "learning_rate": 1.3888888888888892e-06, | |
| "loss": 1.054, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.01015228426395939, | |
| "grad_norm": 3.5737178325653076, | |
| "learning_rate": 1.527777777777778e-06, | |
| "loss": 1.0726, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.010998307952622674, | |
| "grad_norm": 3.3822622299194336, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 0.9914, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.011844331641285956, | |
| "grad_norm": 3.4175751209259033, | |
| "learning_rate": 1.8055555555555557e-06, | |
| "loss": 1.1581, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.012690355329949238, | |
| "grad_norm": 3.0857129096984863, | |
| "learning_rate": 1.944444444444445e-06, | |
| "loss": 0.9684, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.01353637901861252, | |
| "grad_norm": 3.017608165740967, | |
| "learning_rate": 2.0833333333333334e-06, | |
| "loss": 1.0433, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.014382402707275803, | |
| "grad_norm": 3.069457530975342, | |
| "learning_rate": 2.222222222222222e-06, | |
| "loss": 1.0798, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.015228426395939087, | |
| "grad_norm": 2.6652209758758545, | |
| "learning_rate": 2.361111111111111e-06, | |
| "loss": 0.9204, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.016074450084602367, | |
| "grad_norm": 2.924373149871826, | |
| "learning_rate": 2.5e-06, | |
| "loss": 1.0251, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.01692047377326565, | |
| "grad_norm": 2.1823043823242188, | |
| "learning_rate": 2.6388888888888893e-06, | |
| "loss": 0.8016, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.017766497461928935, | |
| "grad_norm": 2.238309860229492, | |
| "learning_rate": 2.7777777777777783e-06, | |
| "loss": 0.8186, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.018612521150592216, | |
| "grad_norm": 2.507589817047119, | |
| "learning_rate": 2.916666666666667e-06, | |
| "loss": 0.9278, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.0194585448392555, | |
| "grad_norm": 2.268179416656494, | |
| "learning_rate": 3.055555555555556e-06, | |
| "loss": 0.8594, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.02030456852791878, | |
| "grad_norm": 2.1308953762054443, | |
| "learning_rate": 3.1944444444444443e-06, | |
| "loss": 0.9286, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.021150592216582064, | |
| "grad_norm": 2.1231722831726074, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.7292, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.021996615905245348, | |
| "grad_norm": 2.203334331512451, | |
| "learning_rate": 3.4722222222222224e-06, | |
| "loss": 0.806, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.02284263959390863, | |
| "grad_norm": 2.161112070083618, | |
| "learning_rate": 3.6111111111111115e-06, | |
| "loss": 0.8608, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.023688663282571912, | |
| "grad_norm": 2.1429355144500732, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "loss": 0.7729, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.024534686971235193, | |
| "grad_norm": 2.123563051223755, | |
| "learning_rate": 3.88888888888889e-06, | |
| "loss": 0.7233, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.025380710659898477, | |
| "grad_norm": 2.211416482925415, | |
| "learning_rate": 4.027777777777779e-06, | |
| "loss": 0.796, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.02622673434856176, | |
| "grad_norm": 2.374946355819702, | |
| "learning_rate": 4.166666666666667e-06, | |
| "loss": 0.7406, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.02707275803722504, | |
| "grad_norm": 1.9759607315063477, | |
| "learning_rate": 4.305555555555556e-06, | |
| "loss": 0.7946, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.027918781725888325, | |
| "grad_norm": 2.052825689315796, | |
| "learning_rate": 4.444444444444444e-06, | |
| "loss": 0.8375, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.028764805414551606, | |
| "grad_norm": 1.8392261266708374, | |
| "learning_rate": 4.583333333333333e-06, | |
| "loss": 0.7907, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.02961082910321489, | |
| "grad_norm": 1.8615978956222534, | |
| "learning_rate": 4.722222222222222e-06, | |
| "loss": 0.7694, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.030456852791878174, | |
| "grad_norm": 1.7482495307922363, | |
| "learning_rate": 4.861111111111111e-06, | |
| "loss": 0.7296, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.031302876480541454, | |
| "grad_norm": 1.808103322982788, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7169, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.032148900169204735, | |
| "grad_norm": 1.6232045888900757, | |
| "learning_rate": 4.999990606222893e-06, | |
| "loss": 0.6382, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.03299492385786802, | |
| "grad_norm": 1.79764986038208, | |
| "learning_rate": 4.9999624249621655e-06, | |
| "loss": 0.7091, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.0338409475465313, | |
| "grad_norm": 1.9869894981384277, | |
| "learning_rate": 4.999915456429602e-06, | |
| "loss": 0.7723, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03468697123519458, | |
| "grad_norm": 1.8739275932312012, | |
| "learning_rate": 4.99984970097817e-06, | |
| "loss": 0.7096, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.03553299492385787, | |
| "grad_norm": 1.8806772232055664, | |
| "learning_rate": 4.999765159102025e-06, | |
| "loss": 0.7704, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.03637901861252115, | |
| "grad_norm": 1.8095641136169434, | |
| "learning_rate": 4.999661831436499e-06, | |
| "loss": 0.6302, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.03722504230118443, | |
| "grad_norm": 1.986670970916748, | |
| "learning_rate": 4.9995397187581026e-06, | |
| "loss": 0.6971, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.03807106598984772, | |
| "grad_norm": 1.7087007761001587, | |
| "learning_rate": 4.9993988219845155e-06, | |
| "loss": 0.7039, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.038917089678511, | |
| "grad_norm": 1.7987544536590576, | |
| "learning_rate": 4.999239142174581e-06, | |
| "loss": 0.7259, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.03976311336717428, | |
| "grad_norm": 2.194416046142578, | |
| "learning_rate": 4.999060680528294e-06, | |
| "loss": 0.7221, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.04060913705583756, | |
| "grad_norm": 1.895753026008606, | |
| "learning_rate": 4.9988634383867995e-06, | |
| "loss": 0.712, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.04145516074450085, | |
| "grad_norm": 1.4732792377471924, | |
| "learning_rate": 4.998647417232375e-06, | |
| "loss": 0.636, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.04230118443316413, | |
| "grad_norm": 1.7982600927352905, | |
| "learning_rate": 4.998412618688426e-06, | |
| "loss": 0.6754, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.04314720812182741, | |
| "grad_norm": 1.6426688432693481, | |
| "learning_rate": 4.9981590445194675e-06, | |
| "loss": 0.6804, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.043993231810490696, | |
| "grad_norm": 1.800573706626892, | |
| "learning_rate": 4.997886696631115e-06, | |
| "loss": 0.6273, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.04483925549915398, | |
| "grad_norm": 1.5397448539733887, | |
| "learning_rate": 4.997595577070068e-06, | |
| "loss": 0.6667, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.04568527918781726, | |
| "grad_norm": 1.6163142919540405, | |
| "learning_rate": 4.997285688024097e-06, | |
| "loss": 0.6126, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.046531302876480544, | |
| "grad_norm": 1.696107268333435, | |
| "learning_rate": 4.996957031822026e-06, | |
| "loss": 0.6148, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.047377326565143825, | |
| "grad_norm": 1.809167742729187, | |
| "learning_rate": 4.996609610933713e-06, | |
| "loss": 0.6086, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.048223350253807105, | |
| "grad_norm": 1.7550158500671387, | |
| "learning_rate": 4.996243427970032e-06, | |
| "loss": 0.661, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.049069373942470386, | |
| "grad_norm": 1.5835374593734741, | |
| "learning_rate": 4.995858485682857e-06, | |
| "loss": 0.6386, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.04991539763113367, | |
| "grad_norm": 1.7450281381607056, | |
| "learning_rate": 4.995454786965037e-06, | |
| "loss": 0.6046, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.050761421319796954, | |
| "grad_norm": 1.586624026298523, | |
| "learning_rate": 4.995032334850378e-06, | |
| "loss": 0.6807, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.051607445008460234, | |
| "grad_norm": 1.6673095226287842, | |
| "learning_rate": 4.994591132513616e-06, | |
| "loss": 0.6778, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.05245346869712352, | |
| "grad_norm": 1.4863988161087036, | |
| "learning_rate": 4.994131183270396e-06, | |
| "loss": 0.5943, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.0532994923857868, | |
| "grad_norm": 1.789526343345642, | |
| "learning_rate": 4.9936524905772466e-06, | |
| "loss": 0.6049, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.05414551607445008, | |
| "grad_norm": 1.6632400751113892, | |
| "learning_rate": 4.993155058031554e-06, | |
| "loss": 0.7222, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.05499153976311337, | |
| "grad_norm": 1.4629849195480347, | |
| "learning_rate": 4.992638889371534e-06, | |
| "loss": 0.5864, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.05583756345177665, | |
| "grad_norm": 1.3855196237564087, | |
| "learning_rate": 4.992103988476206e-06, | |
| "loss": 0.6227, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.05668358714043993, | |
| "grad_norm": 1.7248979806900024, | |
| "learning_rate": 4.99155035936536e-06, | |
| "loss": 0.6841, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.05752961082910321, | |
| "grad_norm": 1.5749865770339966, | |
| "learning_rate": 4.990978006199534e-06, | |
| "loss": 0.6157, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.0583756345177665, | |
| "grad_norm": 1.5531669855117798, | |
| "learning_rate": 4.990386933279973e-06, | |
| "loss": 0.5916, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.05922165820642978, | |
| "grad_norm": 1.590692400932312, | |
| "learning_rate": 4.989777145048601e-06, | |
| "loss": 0.612, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.06006768189509306, | |
| "grad_norm": 1.673030138015747, | |
| "learning_rate": 4.989148646087992e-06, | |
| "loss": 0.6037, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.06091370558375635, | |
| "grad_norm": 1.7553675174713135, | |
| "learning_rate": 4.988501441121328e-06, | |
| "loss": 0.6356, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.06175972927241963, | |
| "grad_norm": 1.5859638452529907, | |
| "learning_rate": 4.987835535012371e-06, | |
| "loss": 0.5881, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.06260575296108291, | |
| "grad_norm": 1.5672540664672852, | |
| "learning_rate": 4.987150932765415e-06, | |
| "loss": 0.6047, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.06345177664974619, | |
| "grad_norm": 1.6025596857070923, | |
| "learning_rate": 4.986447639525266e-06, | |
| "loss": 0.6815, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.06429780033840947, | |
| "grad_norm": 1.5830960273742676, | |
| "learning_rate": 4.985725660577184e-06, | |
| "loss": 0.6036, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.06514382402707276, | |
| "grad_norm": 1.7896863222122192, | |
| "learning_rate": 4.984985001346859e-06, | |
| "loss": 0.6463, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.06598984771573604, | |
| "grad_norm": 1.6958873271942139, | |
| "learning_rate": 4.984225667400359e-06, | |
| "loss": 0.6724, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.06683587140439932, | |
| "grad_norm": 1.622676968574524, | |
| "learning_rate": 4.983447664444097e-06, | |
| "loss": 0.6548, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.0676818950930626, | |
| "grad_norm": 1.4418054819107056, | |
| "learning_rate": 4.982650998324781e-06, | |
| "loss": 0.5953, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.06852791878172589, | |
| "grad_norm": 1.5336499214172363, | |
| "learning_rate": 4.981835675029375e-06, | |
| "loss": 0.6232, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.06937394247038917, | |
| "grad_norm": 1.6581315994262695, | |
| "learning_rate": 4.981001700685051e-06, | |
| "loss": 0.6987, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.07021996615905245, | |
| "grad_norm": 1.5908305644989014, | |
| "learning_rate": 4.980149081559142e-06, | |
| "loss": 0.5882, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.07106598984771574, | |
| "grad_norm": 1.8210628032684326, | |
| "learning_rate": 4.979277824059103e-06, | |
| "loss": 0.6913, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.07191201353637902, | |
| "grad_norm": 1.5042943954467773, | |
| "learning_rate": 4.978387934732451e-06, | |
| "loss": 0.693, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.0727580372250423, | |
| "grad_norm": 1.7726975679397583, | |
| "learning_rate": 4.9774794202667236e-06, | |
| "loss": 0.7089, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.07360406091370558, | |
| "grad_norm": 1.7052823305130005, | |
| "learning_rate": 4.976552287489427e-06, | |
| "loss": 0.6448, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.07445008460236886, | |
| "grad_norm": 1.5747593641281128, | |
| "learning_rate": 4.975606543367983e-06, | |
| "loss": 0.5367, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.07529610829103214, | |
| "grad_norm": 1.4632954597473145, | |
| "learning_rate": 4.974642195009681e-06, | |
| "loss": 0.5494, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.07614213197969544, | |
| "grad_norm": 1.7577087879180908, | |
| "learning_rate": 4.97365924966162e-06, | |
| "loss": 0.6178, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.07698815566835872, | |
| "grad_norm": 1.7583465576171875, | |
| "learning_rate": 4.972657714710653e-06, | |
| "loss": 0.622, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.077834179357022, | |
| "grad_norm": 1.6962776184082031, | |
| "learning_rate": 4.9716375976833395e-06, | |
| "loss": 0.5397, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.07868020304568528, | |
| "grad_norm": 1.7553181648254395, | |
| "learning_rate": 4.9705989062458805e-06, | |
| "loss": 0.5369, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.07952622673434856, | |
| "grad_norm": 1.4741019010543823, | |
| "learning_rate": 4.969541648204064e-06, | |
| "loss": 0.5877, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.08037225042301184, | |
| "grad_norm": 1.5855069160461426, | |
| "learning_rate": 4.968465831503207e-06, | |
| "loss": 0.7098, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.08121827411167512, | |
| "grad_norm": 1.7892258167266846, | |
| "learning_rate": 4.967371464228096e-06, | |
| "loss": 0.6401, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.08206429780033841, | |
| "grad_norm": 1.4679720401763916, | |
| "learning_rate": 4.966258554602924e-06, | |
| "loss": 0.5463, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.0829103214890017, | |
| "grad_norm": 1.7447293996810913, | |
| "learning_rate": 4.965127110991232e-06, | |
| "loss": 0.6407, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.08375634517766498, | |
| "grad_norm": 1.4224369525909424, | |
| "learning_rate": 4.9639771418958434e-06, | |
| "loss": 0.568, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.08460236886632826, | |
| "grad_norm": 1.539178729057312, | |
| "learning_rate": 4.9628086559588e-06, | |
| "loss": 0.6125, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.08544839255499154, | |
| "grad_norm": 1.6318973302841187, | |
| "learning_rate": 4.961621661961299e-06, | |
| "loss": 0.5793, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.08629441624365482, | |
| "grad_norm": 1.7556897401809692, | |
| "learning_rate": 4.960416168823626e-06, | |
| "loss": 0.5352, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.08714043993231811, | |
| "grad_norm": 1.5747413635253906, | |
| "learning_rate": 4.959192185605089e-06, | |
| "loss": 0.6511, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.08798646362098139, | |
| "grad_norm": 1.4685758352279663, | |
| "learning_rate": 4.957949721503947e-06, | |
| "loss": 0.5377, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.08883248730964467, | |
| "grad_norm": 1.4151921272277832, | |
| "learning_rate": 4.956688785857345e-06, | |
| "loss": 0.5788, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.08967851099830795, | |
| "grad_norm": 1.6794856786727905, | |
| "learning_rate": 4.955409388141243e-06, | |
| "loss": 0.6054, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.09052453468697123, | |
| "grad_norm": 1.4817862510681152, | |
| "learning_rate": 4.954111537970342e-06, | |
| "loss": 0.6027, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.09137055837563451, | |
| "grad_norm": 1.728560209274292, | |
| "learning_rate": 4.952795245098013e-06, | |
| "loss": 0.552, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.0922165820642978, | |
| "grad_norm": 1.6162961721420288, | |
| "learning_rate": 4.951460519416228e-06, | |
| "loss": 0.6239, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.09306260575296109, | |
| "grad_norm": 1.5593509674072266, | |
| "learning_rate": 4.950107370955477e-06, | |
| "loss": 0.6413, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.09390862944162437, | |
| "grad_norm": 1.680822491645813, | |
| "learning_rate": 4.948735809884701e-06, | |
| "loss": 0.5699, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.09475465313028765, | |
| "grad_norm": 1.745162010192871, | |
| "learning_rate": 4.94734584651121e-06, | |
| "loss": 0.5752, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.09560067681895093, | |
| "grad_norm": 1.4649637937545776, | |
| "learning_rate": 4.945937491280611e-06, | |
| "loss": 0.5046, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.09644670050761421, | |
| "grad_norm": 1.5797159671783447, | |
| "learning_rate": 4.944510754776724e-06, | |
| "loss": 0.6037, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.09729272419627749, | |
| "grad_norm": 1.8420116901397705, | |
| "learning_rate": 4.9430656477215016e-06, | |
| "loss": 0.647, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.09813874788494077, | |
| "grad_norm": 1.5039803981781006, | |
| "learning_rate": 4.941602180974958e-06, | |
| "loss": 0.6207, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.09898477157360407, | |
| "grad_norm": 1.636516809463501, | |
| "learning_rate": 4.940120365535076e-06, | |
| "loss": 0.5839, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.09983079526226735, | |
| "grad_norm": 1.7785464525222778, | |
| "learning_rate": 4.938620212537733e-06, | |
| "loss": 0.5822, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.10067681895093063, | |
| "grad_norm": 1.5129812955856323, | |
| "learning_rate": 4.937101733256608e-06, | |
| "loss": 0.6025, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.10152284263959391, | |
| "grad_norm": 1.603428602218628, | |
| "learning_rate": 4.9355649391031066e-06, | |
| "loss": 0.6247, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.10236886632825719, | |
| "grad_norm": 1.5037899017333984, | |
| "learning_rate": 4.934009841626272e-06, | |
| "loss": 0.5521, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.10321489001692047, | |
| "grad_norm": 1.5160306692123413, | |
| "learning_rate": 4.932436452512693e-06, | |
| "loss": 0.6395, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.10406091370558376, | |
| "grad_norm": 1.4215550422668457, | |
| "learning_rate": 4.930844783586424e-06, | |
| "loss": 0.4997, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.10490693739424704, | |
| "grad_norm": 1.7557592391967773, | |
| "learning_rate": 4.929234846808893e-06, | |
| "loss": 0.6924, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.10575296108291032, | |
| "grad_norm": 1.5873064994812012, | |
| "learning_rate": 4.927606654278809e-06, | |
| "loss": 0.5761, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.1065989847715736, | |
| "grad_norm": 1.5326778888702393, | |
| "learning_rate": 4.925960218232073e-06, | |
| "loss": 0.5501, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.10744500846023688, | |
| "grad_norm": 1.5582760572433472, | |
| "learning_rate": 4.924295551041688e-06, | |
| "loss": 0.5711, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.10829103214890017, | |
| "grad_norm": 1.5439339876174927, | |
| "learning_rate": 4.922612665217664e-06, | |
| "loss": 0.5736, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.10913705583756345, | |
| "grad_norm": 1.3591135740280151, | |
| "learning_rate": 4.920911573406925e-06, | |
| "loss": 0.5472, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.10998307952622674, | |
| "grad_norm": 1.655671238899231, | |
| "learning_rate": 4.919192288393213e-06, | |
| "loss": 0.5782, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.11082910321489002, | |
| "grad_norm": 1.5855729579925537, | |
| "learning_rate": 4.917454823096991e-06, | |
| "loss": 0.6764, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.1116751269035533, | |
| "grad_norm": 1.4577858448028564, | |
| "learning_rate": 4.915699190575349e-06, | |
| "loss": 0.4923, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.11252115059221658, | |
| "grad_norm": 1.5771598815917969, | |
| "learning_rate": 4.913925404021905e-06, | |
| "loss": 0.548, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.11336717428087986, | |
| "grad_norm": 1.7482346296310425, | |
| "learning_rate": 4.912133476766701e-06, | |
| "loss": 0.629, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.11421319796954314, | |
| "grad_norm": 1.6378353834152222, | |
| "learning_rate": 4.91032342227611e-06, | |
| "loss": 0.6119, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.11505922165820642, | |
| "grad_norm": 1.4870620965957642, | |
| "learning_rate": 4.9084952541527315e-06, | |
| "loss": 0.5078, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.11590524534686972, | |
| "grad_norm": 1.6145250797271729, | |
| "learning_rate": 4.9066489861352875e-06, | |
| "loss": 0.608, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.116751269035533, | |
| "grad_norm": 1.616117000579834, | |
| "learning_rate": 4.904784632098523e-06, | |
| "loss": 0.5443, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.11759729272419628, | |
| "grad_norm": 1.6198755502700806, | |
| "learning_rate": 4.902902206053099e-06, | |
| "loss": 0.6141, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.11844331641285956, | |
| "grad_norm": 1.4478083848953247, | |
| "learning_rate": 4.9010017221454875e-06, | |
| "loss": 0.5369, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.11928934010152284, | |
| "grad_norm": 1.6716082096099854, | |
| "learning_rate": 4.899083194657867e-06, | |
| "loss": 0.5421, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.12013536379018612, | |
| "grad_norm": 1.6240705251693726, | |
| "learning_rate": 4.897146638008012e-06, | |
| "loss": 0.5594, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.12098138747884941, | |
| "grad_norm": 1.4059849977493286, | |
| "learning_rate": 4.89519206674919e-06, | |
| "loss": 0.5416, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.1218274111675127, | |
| "grad_norm": 1.4964284896850586, | |
| "learning_rate": 4.893219495570043e-06, | |
| "loss": 0.5634, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.12267343485617598, | |
| "grad_norm": 1.5191673040390015, | |
| "learning_rate": 4.891228939294489e-06, | |
| "loss": 0.5912, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.12351945854483926, | |
| "grad_norm": 1.652429223060608, | |
| "learning_rate": 4.8892204128816e-06, | |
| "loss": 0.6106, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.12436548223350254, | |
| "grad_norm": 1.706398606300354, | |
| "learning_rate": 4.8871939314254965e-06, | |
| "loss": 0.6298, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.12521150592216582, | |
| "grad_norm": 1.6904054880142212, | |
| "learning_rate": 4.88514951015523e-06, | |
| "loss": 0.5076, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.1260575296108291, | |
| "grad_norm": 1.4144283533096313, | |
| "learning_rate": 4.883087164434672e-06, | |
| "loss": 0.5625, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.12690355329949238, | |
| "grad_norm": 1.6963647603988647, | |
| "learning_rate": 4.881006909762394e-06, | |
| "loss": 0.7107, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.12774957698815567, | |
| "grad_norm": 1.7292715311050415, | |
| "learning_rate": 4.878908761771555e-06, | |
| "loss": 0.5773, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.12859560067681894, | |
| "grad_norm": 1.321929693222046, | |
| "learning_rate": 4.876792736229782e-06, | |
| "loss": 0.5283, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.12944162436548223, | |
| "grad_norm": 1.5928312540054321, | |
| "learning_rate": 4.874658849039054e-06, | |
| "loss": 0.5278, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.13028764805414553, | |
| "grad_norm": 1.5850692987442017, | |
| "learning_rate": 4.8725071162355805e-06, | |
| "loss": 0.6298, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.1311336717428088, | |
| "grad_norm": 1.534232497215271, | |
| "learning_rate": 4.870337553989678e-06, | |
| "loss": 0.5157, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.1319796954314721, | |
| "grad_norm": 1.4662110805511475, | |
| "learning_rate": 4.8681501786056545e-06, | |
| "loss": 0.4884, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.13282571912013535, | |
| "grad_norm": 1.4880340099334717, | |
| "learning_rate": 4.865945006521684e-06, | |
| "loss": 0.6217, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.13367174280879865, | |
| "grad_norm": 1.578230857849121, | |
| "learning_rate": 4.863722054309682e-06, | |
| "loss": 0.6814, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.13451776649746192, | |
| "grad_norm": 1.6861345767974854, | |
| "learning_rate": 4.861481338675183e-06, | |
| "loss": 0.5715, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.1353637901861252, | |
| "grad_norm": 1.561371922492981, | |
| "learning_rate": 4.8592228764572135e-06, | |
| "loss": 0.5708, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.1362098138747885, | |
| "grad_norm": 1.7995845079421997, | |
| "learning_rate": 4.856946684628167e-06, | |
| "loss": 0.6436, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.13705583756345177, | |
| "grad_norm": 1.5640236139297485, | |
| "learning_rate": 4.854652780293672e-06, | |
| "loss": 0.6295, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.13790186125211507, | |
| "grad_norm": 1.3940402269363403, | |
| "learning_rate": 4.852341180692471e-06, | |
| "loss": 0.5547, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.13874788494077833, | |
| "grad_norm": 1.512726068496704, | |
| "learning_rate": 4.8500119031962845e-06, | |
| "loss": 0.5077, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.13959390862944163, | |
| "grad_norm": 1.684700608253479, | |
| "learning_rate": 4.847664965309684e-06, | |
| "loss": 0.5076, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.1404399323181049, | |
| "grad_norm": 1.5767724514007568, | |
| "learning_rate": 4.845300384669958e-06, | |
| "loss": 0.6229, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.1412859560067682, | |
| "grad_norm": 1.6619733572006226, | |
| "learning_rate": 4.842918179046982e-06, | |
| "loss": 0.555, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.14213197969543148, | |
| "grad_norm": 1.633802056312561, | |
| "learning_rate": 4.840518366343083e-06, | |
| "loss": 0.592, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.14297800338409475, | |
| "grad_norm": 1.6347830295562744, | |
| "learning_rate": 4.8381009645929044e-06, | |
| "loss": 0.6016, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.14382402707275804, | |
| "grad_norm": 1.4932504892349243, | |
| "learning_rate": 4.835665991963274e-06, | |
| "loss": 0.5356, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.1446700507614213, | |
| "grad_norm": 1.5529918670654297, | |
| "learning_rate": 4.833213466753063e-06, | |
| "loss": 0.503, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.1455160744500846, | |
| "grad_norm": 1.5487970113754272, | |
| "learning_rate": 4.830743407393052e-06, | |
| "loss": 0.5763, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.1463620981387479, | |
| "grad_norm": 1.521419644355774, | |
| "learning_rate": 4.82825583244579e-06, | |
| "loss": 0.5377, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.14720812182741116, | |
| "grad_norm": 1.516845703125, | |
| "learning_rate": 4.825750760605458e-06, | |
| "loss": 0.6147, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.14805414551607446, | |
| "grad_norm": 1.7243348360061646, | |
| "learning_rate": 4.823228210697723e-06, | |
| "loss": 0.5545, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.14890016920473773, | |
| "grad_norm": 1.5753135681152344, | |
| "learning_rate": 4.820688201679605e-06, | |
| "loss": 0.5235, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.14974619289340102, | |
| "grad_norm": 1.7663754224777222, | |
| "learning_rate": 4.818130752639326e-06, | |
| "loss": 0.6196, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.1505922165820643, | |
| "grad_norm": 1.7618986368179321, | |
| "learning_rate": 4.815555882796169e-06, | |
| "loss": 0.6838, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.15143824027072758, | |
| "grad_norm": 1.4118471145629883, | |
| "learning_rate": 4.8129636115003396e-06, | |
| "loss": 0.5275, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.15228426395939088, | |
| "grad_norm": 1.6629770994186401, | |
| "learning_rate": 4.810353958232811e-06, | |
| "loss": 0.5783, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.15313028764805414, | |
| "grad_norm": 1.4159945249557495, | |
| "learning_rate": 4.807726942605184e-06, | |
| "loss": 0.508, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.15397631133671744, | |
| "grad_norm": 1.6594223976135254, | |
| "learning_rate": 4.8050825843595395e-06, | |
| "loss": 0.5711, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.1548223350253807, | |
| "grad_norm": 1.604934811592102, | |
| "learning_rate": 4.802420903368286e-06, | |
| "loss": 0.5777, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.155668358714044, | |
| "grad_norm": 1.6225334405899048, | |
| "learning_rate": 4.7997419196340136e-06, | |
| "loss": 0.5079, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.15651438240270726, | |
| "grad_norm": 1.7522289752960205, | |
| "learning_rate": 4.797045653289343e-06, | |
| "loss": 0.587, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.15736040609137056, | |
| "grad_norm": 1.5537394285202026, | |
| "learning_rate": 4.794332124596775e-06, | |
| "loss": 0.5068, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.15820642978003385, | |
| "grad_norm": 1.5667427778244019, | |
| "learning_rate": 4.791601353948537e-06, | |
| "loss": 0.6201, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.15905245346869712, | |
| "grad_norm": 1.667194128036499, | |
| "learning_rate": 4.788853361866429e-06, | |
| "loss": 0.5411, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.1598984771573604, | |
| "grad_norm": 1.434585452079773, | |
| "learning_rate": 4.786088169001671e-06, | |
| "loss": 0.5377, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.16074450084602368, | |
| "grad_norm": 1.4391204118728638, | |
| "learning_rate": 4.7833057961347476e-06, | |
| "loss": 0.5865, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.16159052453468697, | |
| "grad_norm": 1.5104649066925049, | |
| "learning_rate": 4.78050626417525e-06, | |
| "loss": 0.4837, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.16243654822335024, | |
| "grad_norm": 1.483412742614746, | |
| "learning_rate": 4.777689594161724e-06, | |
| "loss": 0.5627, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.16328257191201354, | |
| "grad_norm": 1.9065080881118774, | |
| "learning_rate": 4.774855807261504e-06, | |
| "loss": 0.611, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.16412859560067683, | |
| "grad_norm": 1.5537798404693604, | |
| "learning_rate": 4.77200492477056e-06, | |
| "loss": 0.5078, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.1649746192893401, | |
| "grad_norm": 1.5670864582061768, | |
| "learning_rate": 4.769136968113337e-06, | |
| "loss": 0.5509, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.1658206429780034, | |
| "grad_norm": 1.5053881406784058, | |
| "learning_rate": 4.766251958842589e-06, | |
| "loss": 0.5504, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.16666666666666666, | |
| "grad_norm": 1.4908168315887451, | |
| "learning_rate": 4.763349918639228e-06, | |
| "loss": 0.5645, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.16751269035532995, | |
| "grad_norm": 1.4086905717849731, | |
| "learning_rate": 4.760430869312144e-06, | |
| "loss": 0.4633, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.16835871404399322, | |
| "grad_norm": 1.4943495988845825, | |
| "learning_rate": 4.757494832798057e-06, | |
| "loss": 0.5893, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.1692047377326565, | |
| "grad_norm": 1.524116039276123, | |
| "learning_rate": 4.7545418311613485e-06, | |
| "loss": 0.5761, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1700507614213198, | |
| "grad_norm": 1.6232064962387085, | |
| "learning_rate": 4.751571886593886e-06, | |
| "loss": 0.5514, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.17089678510998307, | |
| "grad_norm": 1.592065453529358, | |
| "learning_rate": 4.748585021414869e-06, | |
| "loss": 0.586, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.17174280879864637, | |
| "grad_norm": 1.39573335647583, | |
| "learning_rate": 4.745581258070654e-06, | |
| "loss": 0.538, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.17258883248730963, | |
| "grad_norm": 1.5501444339752197, | |
| "learning_rate": 4.742560619134587e-06, | |
| "loss": 0.6041, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.17343485617597293, | |
| "grad_norm": 1.645410418510437, | |
| "learning_rate": 4.739523127306837e-06, | |
| "loss": 0.5364, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.17428087986463622, | |
| "grad_norm": 1.623639464378357, | |
| "learning_rate": 4.736468805414218e-06, | |
| "loss": 0.6014, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.1751269035532995, | |
| "grad_norm": 1.521011471748352, | |
| "learning_rate": 4.733397676410027e-06, | |
| "loss": 0.5821, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.17597292724196278, | |
| "grad_norm": 1.590009331703186, | |
| "learning_rate": 4.730309763373866e-06, | |
| "loss": 0.5419, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.17681895093062605, | |
| "grad_norm": 1.5016567707061768, | |
| "learning_rate": 4.727205089511466e-06, | |
| "loss": 0.5539, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.17766497461928935, | |
| "grad_norm": 1.3730658292770386, | |
| "learning_rate": 4.7240836781545205e-06, | |
| "loss": 0.5356, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.1785109983079526, | |
| "grad_norm": 1.5500950813293457, | |
| "learning_rate": 4.720945552760503e-06, | |
| "loss": 0.5907, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.1793570219966159, | |
| "grad_norm": 1.5380445718765259, | |
| "learning_rate": 4.717790736912493e-06, | |
| "loss": 0.5068, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.1802030456852792, | |
| "grad_norm": 1.4724454879760742, | |
| "learning_rate": 4.7146192543190005e-06, | |
| "loss": 0.5491, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.18104906937394247, | |
| "grad_norm": 1.558326244354248, | |
| "learning_rate": 4.711431128813787e-06, | |
| "loss": 0.5843, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.18189509306260576, | |
| "grad_norm": 1.6515816450119019, | |
| "learning_rate": 4.708226384355684e-06, | |
| "loss": 0.5486, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.18274111675126903, | |
| "grad_norm": 1.4587641954421997, | |
| "learning_rate": 4.705005045028415e-06, | |
| "loss": 0.6305, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.18358714043993232, | |
| "grad_norm": 1.5102735757827759, | |
| "learning_rate": 4.701767135040415e-06, | |
| "loss": 0.4159, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.1844331641285956, | |
| "grad_norm": 1.6922459602355957, | |
| "learning_rate": 4.698512678724649e-06, | |
| "loss": 0.5456, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.18527918781725888, | |
| "grad_norm": 1.7045525312423706, | |
| "learning_rate": 4.695241700538425e-06, | |
| "loss": 0.5584, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.18612521150592218, | |
| "grad_norm": 1.6846798658370972, | |
| "learning_rate": 4.691954225063218e-06, | |
| "loss": 0.5568, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.18697123519458544, | |
| "grad_norm": 1.4572404623031616, | |
| "learning_rate": 4.688650277004474e-06, | |
| "loss": 0.5408, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.18781725888324874, | |
| "grad_norm": 1.7677491903305054, | |
| "learning_rate": 4.685329881191436e-06, | |
| "loss": 0.6165, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.188663282571912, | |
| "grad_norm": 1.613861322402954, | |
| "learning_rate": 4.68199306257695e-06, | |
| "loss": 0.5826, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.1895093062605753, | |
| "grad_norm": 1.4331867694854736, | |
| "learning_rate": 4.678639846237281e-06, | |
| "loss": 0.54, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.19035532994923857, | |
| "grad_norm": 1.5444533824920654, | |
| "learning_rate": 4.675270257371922e-06, | |
| "loss": 0.5222, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.19120135363790186, | |
| "grad_norm": 1.3472926616668701, | |
| "learning_rate": 4.671884321303407e-06, | |
| "loss": 0.5211, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.19204737732656516, | |
| "grad_norm": 1.4535293579101562, | |
| "learning_rate": 4.668482063477118e-06, | |
| "loss": 0.5718, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.19289340101522842, | |
| "grad_norm": 1.6504758596420288, | |
| "learning_rate": 4.665063509461098e-06, | |
| "loss": 0.5433, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.19373942470389172, | |
| "grad_norm": 1.2919827699661255, | |
| "learning_rate": 4.661628684945851e-06, | |
| "loss": 0.4668, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.19458544839255498, | |
| "grad_norm": 1.582503318786621, | |
| "learning_rate": 4.658177615744162e-06, | |
| "loss": 0.5492, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.19543147208121828, | |
| "grad_norm": 1.4641120433807373, | |
| "learning_rate": 4.654710327790889e-06, | |
| "loss": 0.556, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.19627749576988154, | |
| "grad_norm": 1.5751663446426392, | |
| "learning_rate": 4.651226847142774e-06, | |
| "loss": 0.5209, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.19712351945854484, | |
| "grad_norm": 1.361720085144043, | |
| "learning_rate": 4.647727199978255e-06, | |
| "loss": 0.5849, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.19796954314720813, | |
| "grad_norm": 1.4193429946899414, | |
| "learning_rate": 4.644211412597251e-06, | |
| "loss": 0.4808, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.1988155668358714, | |
| "grad_norm": 1.641695261001587, | |
| "learning_rate": 4.640679511420983e-06, | |
| "loss": 0.5782, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.1996615905245347, | |
| "grad_norm": 1.509385347366333, | |
| "learning_rate": 4.6371315229917644e-06, | |
| "loss": 0.5397, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.20050761421319796, | |
| "grad_norm": 1.431472897529602, | |
| "learning_rate": 4.6335674739728055e-06, | |
| "loss": 0.5817, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.20135363790186125, | |
| "grad_norm": 1.4169070720672607, | |
| "learning_rate": 4.629987391148012e-06, | |
| "loss": 0.536, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.20219966159052452, | |
| "grad_norm": 1.3376789093017578, | |
| "learning_rate": 4.6263913014217826e-06, | |
| "loss": 0.4489, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.20304568527918782, | |
| "grad_norm": 1.4299821853637695, | |
| "learning_rate": 4.622779231818811e-06, | |
| "loss": 0.555, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.2038917089678511, | |
| "grad_norm": 1.5063016414642334, | |
| "learning_rate": 4.619151209483879e-06, | |
| "loss": 0.5898, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.20473773265651438, | |
| "grad_norm": 1.5440738201141357, | |
| "learning_rate": 4.6155072616816515e-06, | |
| "loss": 0.5395, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.20558375634517767, | |
| "grad_norm": 1.6442499160766602, | |
| "learning_rate": 4.611847415796476e-06, | |
| "loss": 0.5488, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.20642978003384094, | |
| "grad_norm": 1.5802503824234009, | |
| "learning_rate": 4.608171699332174e-06, | |
| "loss": 0.5221, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.20727580372250423, | |
| "grad_norm": 1.5823266506195068, | |
| "learning_rate": 4.604480139911836e-06, | |
| "loss": 0.5678, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.20812182741116753, | |
| "grad_norm": 1.3660752773284912, | |
| "learning_rate": 4.600772765277607e-06, | |
| "loss": 0.5177, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.2089678510998308, | |
| "grad_norm": 1.495895266532898, | |
| "learning_rate": 4.597049603290491e-06, | |
| "loss": 0.5982, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.2098138747884941, | |
| "grad_norm": 1.5751233100891113, | |
| "learning_rate": 4.59331068193013e-06, | |
| "loss": 0.5461, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.21065989847715735, | |
| "grad_norm": 1.4056577682495117, | |
| "learning_rate": 4.5895560292946e-06, | |
| "loss": 0.5657, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.21150592216582065, | |
| "grad_norm": 1.6429760456085205, | |
| "learning_rate": 4.585785673600196e-06, | |
| "loss": 0.6208, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.21235194585448391, | |
| "grad_norm": 1.6389528512954712, | |
| "learning_rate": 4.581999643181223e-06, | |
| "loss": 0.5263, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.2131979695431472, | |
| "grad_norm": 1.4250948429107666, | |
| "learning_rate": 4.578197966489782e-06, | |
| "loss": 0.514, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.2140439932318105, | |
| "grad_norm": 1.6212941408157349, | |
| "learning_rate": 4.574380672095555e-06, | |
| "loss": 0.4906, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.21489001692047377, | |
| "grad_norm": 1.450196385383606, | |
| "learning_rate": 4.5705477886855925e-06, | |
| "loss": 0.4833, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.21573604060913706, | |
| "grad_norm": 1.454309344291687, | |
| "learning_rate": 4.566699345064097e-06, | |
| "loss": 0.5631, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.21658206429780033, | |
| "grad_norm": 1.706693410873413, | |
| "learning_rate": 4.562835370152206e-06, | |
| "loss": 0.548, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.21742808798646363, | |
| "grad_norm": 1.4211560487747192, | |
| "learning_rate": 4.558955892987774e-06, | |
| "loss": 0.5947, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.2182741116751269, | |
| "grad_norm": 1.4968616962432861, | |
| "learning_rate": 4.555060942725156e-06, | |
| "loss": 0.5693, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.21912013536379019, | |
| "grad_norm": 1.4211528301239014, | |
| "learning_rate": 4.551150548634987e-06, | |
| "loss": 0.5976, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.21996615905245348, | |
| "grad_norm": 1.4713115692138672, | |
| "learning_rate": 4.547224740103966e-06, | |
| "loss": 0.4759, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.22081218274111675, | |
| "grad_norm": 1.6762306690216064, | |
| "learning_rate": 4.543283546634626e-06, | |
| "loss": 0.5732, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.22165820642978004, | |
| "grad_norm": 1.266420841217041, | |
| "learning_rate": 4.539326997845124e-06, | |
| "loss": 0.4456, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.2225042301184433, | |
| "grad_norm": 1.568297266960144, | |
| "learning_rate": 4.535355123469009e-06, | |
| "loss": 0.5853, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.2233502538071066, | |
| "grad_norm": 1.5746086835861206, | |
| "learning_rate": 4.531367953355002e-06, | |
| "loss": 0.5569, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.22419627749576987, | |
| "grad_norm": 1.4679572582244873, | |
| "learning_rate": 4.527365517466775e-06, | |
| "loss": 0.4425, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.22504230118443316, | |
| "grad_norm": 1.5745289325714111, | |
| "learning_rate": 4.523347845882718e-06, | |
| "loss": 0.5316, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.22588832487309646, | |
| "grad_norm": 1.5450767278671265, | |
| "learning_rate": 4.519314968795722e-06, | |
| "loss": 0.5353, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.22673434856175972, | |
| "grad_norm": 1.850501537322998, | |
| "learning_rate": 4.515266916512945e-06, | |
| "loss": 0.6068, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.22758037225042302, | |
| "grad_norm": 1.3289093971252441, | |
| "learning_rate": 4.511203719455588e-06, | |
| "loss": 0.529, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.22842639593908629, | |
| "grad_norm": 1.6736395359039307, | |
| "learning_rate": 4.507125408158665e-06, | |
| "loss": 0.6073, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.22927241962774958, | |
| "grad_norm": 1.3271763324737549, | |
| "learning_rate": 4.503032013270774e-06, | |
| "loss": 0.5165, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.23011844331641285, | |
| "grad_norm": 1.4673720598220825, | |
| "learning_rate": 4.498923565553866e-06, | |
| "loss": 0.495, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.23096446700507614, | |
| "grad_norm": 1.5206209421157837, | |
| "learning_rate": 4.494800095883014e-06, | |
| "loss": 0.5659, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.23181049069373943, | |
| "grad_norm": 1.5659880638122559, | |
| "learning_rate": 4.490661635246183e-06, | |
| "loss": 0.5876, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.2326565143824027, | |
| "grad_norm": 1.4979114532470703, | |
| "learning_rate": 4.4865082147439945e-06, | |
| "loss": 0.5988, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.233502538071066, | |
| "grad_norm": 1.5441852807998657, | |
| "learning_rate": 4.482339865589492e-06, | |
| "loss": 0.5215, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.23434856175972926, | |
| "grad_norm": 1.476711630821228, | |
| "learning_rate": 4.478156619107912e-06, | |
| "loss": 0.5019, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.23519458544839256, | |
| "grad_norm": 1.4779571294784546, | |
| "learning_rate": 4.4739585067364425e-06, | |
| "loss": 0.6033, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.23604060913705585, | |
| "grad_norm": 1.5338369607925415, | |
| "learning_rate": 4.469745560023987e-06, | |
| "loss": 0.5089, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.23688663282571912, | |
| "grad_norm": 1.536832332611084, | |
| "learning_rate": 4.465517810630933e-06, | |
| "loss": 0.4967, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.2377326565143824, | |
| "grad_norm": 1.5541664361953735, | |
| "learning_rate": 4.461275290328908e-06, | |
| "loss": 0.5869, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.23857868020304568, | |
| "grad_norm": 1.7338745594024658, | |
| "learning_rate": 4.457018031000544e-06, | |
| "loss": 0.5288, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.23942470389170897, | |
| "grad_norm": 1.345275640487671, | |
| "learning_rate": 4.452746064639239e-06, | |
| "loss": 0.4971, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.24027072758037224, | |
| "grad_norm": 1.4979954957962036, | |
| "learning_rate": 4.448459423348911e-06, | |
| "loss": 0.5437, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.24111675126903553, | |
| "grad_norm": 1.5409454107284546, | |
| "learning_rate": 4.444158139343763e-06, | |
| "loss": 0.5521, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.24196277495769883, | |
| "grad_norm": 1.4069455862045288, | |
| "learning_rate": 4.439842244948036e-06, | |
| "loss": 0.5609, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.2428087986463621, | |
| "grad_norm": 1.5055841207504272, | |
| "learning_rate": 4.435511772595773e-06, | |
| "loss": 0.5308, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.2436548223350254, | |
| "grad_norm": 1.7902872562408447, | |
| "learning_rate": 4.4311667548305644e-06, | |
| "loss": 0.613, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.24450084602368866, | |
| "grad_norm": 1.5679998397827148, | |
| "learning_rate": 4.426807224305315e-06, | |
| "loss": 0.5521, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.24534686971235195, | |
| "grad_norm": 1.5276457071304321, | |
| "learning_rate": 4.422433213781991e-06, | |
| "loss": 0.5454, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.24619289340101522, | |
| "grad_norm": 1.563459873199463, | |
| "learning_rate": 4.4180447561313765e-06, | |
| "loss": 0.508, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.2470389170896785, | |
| "grad_norm": 1.575172781944275, | |
| "learning_rate": 4.413641884332825e-06, | |
| "loss": 0.5726, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.2478849407783418, | |
| "grad_norm": 1.6327418088912964, | |
| "learning_rate": 4.409224631474014e-06, | |
| "loss": 0.6188, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.24873096446700507, | |
| "grad_norm": 1.4867552518844604, | |
| "learning_rate": 4.404793030750695e-06, | |
| "loss": 0.5364, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.24957698815566837, | |
| "grad_norm": 1.512021541595459, | |
| "learning_rate": 4.400347115466442e-06, | |
| "loss": 0.5083, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.25042301184433163, | |
| "grad_norm": 1.3881378173828125, | |
| "learning_rate": 4.395886919032406e-06, | |
| "loss": 0.5789, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.2512690355329949, | |
| "grad_norm": 1.4656717777252197, | |
| "learning_rate": 4.39141247496706e-06, | |
| "loss": 0.5213, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.2521150592216582, | |
| "grad_norm": 1.5013030767440796, | |
| "learning_rate": 4.3869238168959485e-06, | |
| "loss": 0.5245, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.2529610829103215, | |
| "grad_norm": 1.535556435585022, | |
| "learning_rate": 4.382420978551433e-06, | |
| "loss": 0.5541, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.25380710659898476, | |
| "grad_norm": 1.4609181880950928, | |
| "learning_rate": 4.377903993772442e-06, | |
| "loss": 0.5155, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.2546531302876481, | |
| "grad_norm": 1.4020256996154785, | |
| "learning_rate": 4.373372896504215e-06, | |
| "loss": 0.5553, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.25549915397631134, | |
| "grad_norm": 1.5317964553833008, | |
| "learning_rate": 4.368827720798044e-06, | |
| "loss": 0.5547, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.2563451776649746, | |
| "grad_norm": 1.6519298553466797, | |
| "learning_rate": 4.364268500811025e-06, | |
| "loss": 0.5305, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.2571912013536379, | |
| "grad_norm": 1.511772632598877, | |
| "learning_rate": 4.359695270805795e-06, | |
| "loss": 0.449, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.2580372250423012, | |
| "grad_norm": 1.3912087678909302, | |
| "learning_rate": 4.3551080651502755e-06, | |
| "loss": 0.5184, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.25888324873096447, | |
| "grad_norm": 1.5851740837097168, | |
| "learning_rate": 4.350506918317416e-06, | |
| "loss": 0.5489, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.25972927241962773, | |
| "grad_norm": 1.4331227540969849, | |
| "learning_rate": 4.345891864884937e-06, | |
| "loss": 0.5338, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.26057529610829105, | |
| "grad_norm": 1.487821102142334, | |
| "learning_rate": 4.341262939535063e-06, | |
| "loss": 0.5155, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.2614213197969543, | |
| "grad_norm": 1.5234811305999756, | |
| "learning_rate": 4.336620177054269e-06, | |
| "loss": 0.5026, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.2622673434856176, | |
| "grad_norm": 1.3057184219360352, | |
| "learning_rate": 4.331963612333017e-06, | |
| "loss": 0.5378, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.26311336717428085, | |
| "grad_norm": 1.5772360563278198, | |
| "learning_rate": 4.327293280365491e-06, | |
| "loss": 0.6281, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.2639593908629442, | |
| "grad_norm": 1.5020463466644287, | |
| "learning_rate": 4.322609216249336e-06, | |
| "loss": 0.6181, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.26480541455160744, | |
| "grad_norm": 1.4958893060684204, | |
| "learning_rate": 4.317911455185396e-06, | |
| "loss": 0.5468, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.2656514382402707, | |
| "grad_norm": 1.5898452997207642, | |
| "learning_rate": 4.3132000324774485e-06, | |
| "loss": 0.5702, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.26649746192893403, | |
| "grad_norm": 1.8296725749969482, | |
| "learning_rate": 4.308474983531936e-06, | |
| "loss": 0.7021, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.2673434856175973, | |
| "grad_norm": 1.6258792877197266, | |
| "learning_rate": 4.303736343857704e-06, | |
| "loss": 0.557, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.26818950930626057, | |
| "grad_norm": 1.581331729888916, | |
| "learning_rate": 4.298984149065732e-06, | |
| "loss": 0.5027, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.26903553299492383, | |
| "grad_norm": 1.3485808372497559, | |
| "learning_rate": 4.294218434868869e-06, | |
| "loss": 0.4756, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.26988155668358715, | |
| "grad_norm": 1.4658807516098022, | |
| "learning_rate": 4.289439237081557e-06, | |
| "loss": 0.5321, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.2707275803722504, | |
| "grad_norm": 1.616141438484192, | |
| "learning_rate": 4.284646591619575e-06, | |
| "loss": 0.5233, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.2715736040609137, | |
| "grad_norm": 1.5891221761703491, | |
| "learning_rate": 4.2798405344997545e-06, | |
| "loss": 0.5821, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.272419627749577, | |
| "grad_norm": 1.4427226781845093, | |
| "learning_rate": 4.2750211018397204e-06, | |
| "loss": 0.4998, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.2732656514382403, | |
| "grad_norm": 1.6634961366653442, | |
| "learning_rate": 4.270188329857613e-06, | |
| "loss": 0.6044, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.27411167512690354, | |
| "grad_norm": 1.424747109413147, | |
| "learning_rate": 4.2653422548718195e-06, | |
| "loss": 0.5953, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.2749576988155668, | |
| "grad_norm": 1.5241210460662842, | |
| "learning_rate": 4.260482913300697e-06, | |
| "loss": 0.576, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.27580372250423013, | |
| "grad_norm": 1.4235581159591675, | |
| "learning_rate": 4.255610341662304e-06, | |
| "loss": 0.5074, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.2766497461928934, | |
| "grad_norm": 1.5771161317825317, | |
| "learning_rate": 4.2507245765741215e-06, | |
| "loss": 0.5325, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.27749576988155666, | |
| "grad_norm": 1.3609672784805298, | |
| "learning_rate": 4.245825654752781e-06, | |
| "loss": 0.5146, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.27834179357022, | |
| "grad_norm": 1.4064686298370361, | |
| "learning_rate": 4.240913613013785e-06, | |
| "loss": 0.5279, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.27918781725888325, | |
| "grad_norm": 1.3830342292785645, | |
| "learning_rate": 4.235988488271235e-06, | |
| "loss": 0.492, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.2800338409475465, | |
| "grad_norm": 1.4581482410430908, | |
| "learning_rate": 4.231050317537548e-06, | |
| "loss": 0.5313, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.2808798646362098, | |
| "grad_norm": 1.6080012321472168, | |
| "learning_rate": 4.226099137923186e-06, | |
| "loss": 0.5134, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.2817258883248731, | |
| "grad_norm": 1.5522176027297974, | |
| "learning_rate": 4.221134986636371e-06, | |
| "loss": 0.562, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.2825719120135364, | |
| "grad_norm": 1.5215039253234863, | |
| "learning_rate": 4.216157900982808e-06, | |
| "loss": 0.6292, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.28341793570219964, | |
| "grad_norm": 1.5160623788833618, | |
| "learning_rate": 4.211167918365402e-06, | |
| "loss": 0.5636, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.28426395939086296, | |
| "grad_norm": 1.3579323291778564, | |
| "learning_rate": 4.206165076283983e-06, | |
| "loss": 0.5459, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.28510998307952623, | |
| "grad_norm": 1.6921268701553345, | |
| "learning_rate": 4.201149412335015e-06, | |
| "loss": 0.5202, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.2859560067681895, | |
| "grad_norm": 1.5123941898345947, | |
| "learning_rate": 4.196120964211322e-06, | |
| "loss": 0.6074, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.2868020304568528, | |
| "grad_norm": 1.4805959463119507, | |
| "learning_rate": 4.1910797697018026e-06, | |
| "loss": 0.5621, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.2876480541455161, | |
| "grad_norm": 1.5556724071502686, | |
| "learning_rate": 4.1860258666911415e-06, | |
| "loss": 0.5228, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.28849407783417935, | |
| "grad_norm": 1.430757999420166, | |
| "learning_rate": 4.180959293159529e-06, | |
| "loss": 0.5698, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.2893401015228426, | |
| "grad_norm": 1.419783115386963, | |
| "learning_rate": 4.175880087182376e-06, | |
| "loss": 0.5296, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.29018612521150594, | |
| "grad_norm": 1.7747116088867188, | |
| "learning_rate": 4.170788286930024e-06, | |
| "loss": 0.5129, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.2910321489001692, | |
| "grad_norm": 1.5256282091140747, | |
| "learning_rate": 4.165683930667464e-06, | |
| "loss": 0.6247, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.2918781725888325, | |
| "grad_norm": 1.5945425033569336, | |
| "learning_rate": 4.160567056754044e-06, | |
| "loss": 0.555, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.2927241962774958, | |
| "grad_norm": 1.4873846769332886, | |
| "learning_rate": 4.155437703643182e-06, | |
| "loss": 0.576, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.29357021996615906, | |
| "grad_norm": 1.5128968954086304, | |
| "learning_rate": 4.1502959098820774e-06, | |
| "loss": 0.5328, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.29441624365482233, | |
| "grad_norm": 1.6180955171585083, | |
| "learning_rate": 4.145141714111421e-06, | |
| "loss": 0.568, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.2952622673434856, | |
| "grad_norm": 1.605104923248291, | |
| "learning_rate": 4.139975155065109e-06, | |
| "loss": 0.5578, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.2961082910321489, | |
| "grad_norm": 1.6930770874023438, | |
| "learning_rate": 4.134796271569942e-06, | |
| "loss": 0.4953, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.2969543147208122, | |
| "grad_norm": 1.4728012084960938, | |
| "learning_rate": 4.129605102545341e-06, | |
| "loss": 0.4959, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.29780033840947545, | |
| "grad_norm": 1.610328197479248, | |
| "learning_rate": 4.124401687003057e-06, | |
| "loss": 0.5775, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.2986463620981388, | |
| "grad_norm": 1.7682827711105347, | |
| "learning_rate": 4.119186064046868e-06, | |
| "loss": 0.548, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.29949238578680204, | |
| "grad_norm": 1.525848150253296, | |
| "learning_rate": 4.113958272872294e-06, | |
| "loss": 0.5324, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.3003384094754653, | |
| "grad_norm": 1.4608542919158936, | |
| "learning_rate": 4.1087183527663e-06, | |
| "loss": 0.434, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.3011844331641286, | |
| "grad_norm": 1.5706309080123901, | |
| "learning_rate": 4.103466343106999e-06, | |
| "loss": 0.4819, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.3020304568527919, | |
| "grad_norm": 1.5604802370071411, | |
| "learning_rate": 4.098202283363356e-06, | |
| "loss": 0.6517, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.30287648054145516, | |
| "grad_norm": 1.406082272529602, | |
| "learning_rate": 4.092926213094897e-06, | |
| "loss": 0.4477, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.30372250423011843, | |
| "grad_norm": 1.4490677118301392, | |
| "learning_rate": 4.087638171951401e-06, | |
| "loss": 0.5426, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.30456852791878175, | |
| "grad_norm": 1.472643256187439, | |
| "learning_rate": 4.082338199672615e-06, | |
| "loss": 0.5259, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.305414551607445, | |
| "grad_norm": 1.5344111919403076, | |
| "learning_rate": 4.077026336087944e-06, | |
| "loss": 0.5191, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.3062605752961083, | |
| "grad_norm": 1.4391659498214722, | |
| "learning_rate": 4.071702621116158e-06, | |
| "loss": 0.5136, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.30710659898477155, | |
| "grad_norm": 1.6155903339385986, | |
| "learning_rate": 4.066367094765091e-06, | |
| "loss": 0.5033, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.3079526226734349, | |
| "grad_norm": 1.7278281450271606, | |
| "learning_rate": 4.0610197971313395e-06, | |
| "loss": 0.5509, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.30879864636209814, | |
| "grad_norm": 1.4298288822174072, | |
| "learning_rate": 4.0556607683999605e-06, | |
| "loss": 0.5406, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.3096446700507614, | |
| "grad_norm": 1.405069351196289, | |
| "learning_rate": 4.050290048844171e-06, | |
| "loss": 0.5179, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.31049069373942473, | |
| "grad_norm": 1.6136541366577148, | |
| "learning_rate": 4.044907678825045e-06, | |
| "loss": 0.5519, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.311336717428088, | |
| "grad_norm": 1.4015752077102661, | |
| "learning_rate": 4.03951369879121e-06, | |
| "loss": 0.5596, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.31218274111675126, | |
| "grad_norm": 1.5532114505767822, | |
| "learning_rate": 4.034108149278544e-06, | |
| "loss": 0.5302, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.3130287648054145, | |
| "grad_norm": 1.4932410717010498, | |
| "learning_rate": 4.028691070909867e-06, | |
| "loss": 0.5539, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.31387478849407785, | |
| "grad_norm": 1.5183504819869995, | |
| "learning_rate": 4.0232625043946416e-06, | |
| "loss": 0.5717, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.3147208121827411, | |
| "grad_norm": 1.4620509147644043, | |
| "learning_rate": 4.017822490528664e-06, | |
| "loss": 0.5102, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.3155668358714044, | |
| "grad_norm": 1.389592170715332, | |
| "learning_rate": 4.012371070193753e-06, | |
| "loss": 0.5388, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.3164128595600677, | |
| "grad_norm": 1.6649476289749146, | |
| "learning_rate": 4.006908284357453e-06, | |
| "loss": 0.6184, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.31725888324873097, | |
| "grad_norm": 1.5024865865707397, | |
| "learning_rate": 4.001434174072718e-06, | |
| "loss": 0.5858, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.31810490693739424, | |
| "grad_norm": 1.4840333461761475, | |
| "learning_rate": 3.995948780477605e-06, | |
| "loss": 0.528, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.3189509306260575, | |
| "grad_norm": 1.407777190208435, | |
| "learning_rate": 3.990452144794966e-06, | |
| "loss": 0.5988, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.3197969543147208, | |
| "grad_norm": 1.409677505493164, | |
| "learning_rate": 3.984944308332138e-06, | |
| "loss": 0.5369, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.3206429780033841, | |
| "grad_norm": 1.607074499130249, | |
| "learning_rate": 3.97942531248063e-06, | |
| "loss": 0.5795, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.32148900169204736, | |
| "grad_norm": 1.291720986366272, | |
| "learning_rate": 3.973895198715816e-06, | |
| "loss": 0.5359, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.3223350253807107, | |
| "grad_norm": 1.5747040510177612, | |
| "learning_rate": 3.968354008596621e-06, | |
| "loss": 0.5902, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.32318104906937395, | |
| "grad_norm": 1.5991111993789673, | |
| "learning_rate": 3.96280178376521e-06, | |
| "loss": 0.5414, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.3240270727580372, | |
| "grad_norm": 1.3984931707382202, | |
| "learning_rate": 3.957238565946672e-06, | |
| "loss": 0.4185, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.3248730964467005, | |
| "grad_norm": 1.4653754234313965, | |
| "learning_rate": 3.951664396948709e-06, | |
| "loss": 0.6029, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.3257191201353638, | |
| "grad_norm": 1.4449567794799805, | |
| "learning_rate": 3.9460793186613235e-06, | |
| "loss": 0.4723, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.32656514382402707, | |
| "grad_norm": 1.3438575267791748, | |
| "learning_rate": 3.9404833730564975e-06, | |
| "loss": 0.4138, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.32741116751269034, | |
| "grad_norm": 1.423072099685669, | |
| "learning_rate": 3.934876602187886e-06, | |
| "loss": 0.5623, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.32825719120135366, | |
| "grad_norm": 1.52696692943573, | |
| "learning_rate": 3.929259048190492e-06, | |
| "loss": 0.4729, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.3291032148900169, | |
| "grad_norm": 1.3745728731155396, | |
| "learning_rate": 3.923630753280358e-06, | |
| "loss": 0.5163, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.3299492385786802, | |
| "grad_norm": 1.48550546169281, | |
| "learning_rate": 3.917991759754239e-06, | |
| "loss": 0.5379, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.33079526226734346, | |
| "grad_norm": 1.4326492547988892, | |
| "learning_rate": 3.9123421099892955e-06, | |
| "loss": 0.5736, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.3316412859560068, | |
| "grad_norm": 1.6504223346710205, | |
| "learning_rate": 3.906681846442768e-06, | |
| "loss": 0.5303, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.33248730964467005, | |
| "grad_norm": 1.511051893234253, | |
| "learning_rate": 3.9010110116516595e-06, | |
| "loss": 0.5524, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 1.604653000831604, | |
| "learning_rate": 3.895329648232416e-06, | |
| "loss": 0.5777, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.33417935702199664, | |
| "grad_norm": 1.5326584577560425, | |
| "learning_rate": 3.889637798880608e-06, | |
| "loss": 0.5853, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.3350253807106599, | |
| "grad_norm": 1.4475630521774292, | |
| "learning_rate": 3.883935506370605e-06, | |
| "loss": 0.4671, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.33587140439932317, | |
| "grad_norm": 1.4971731901168823, | |
| "learning_rate": 3.8782228135552615e-06, | |
| "loss": 0.6189, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.33671742808798644, | |
| "grad_norm": 1.8308625221252441, | |
| "learning_rate": 3.872499763365585e-06, | |
| "loss": 0.518, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.33756345177664976, | |
| "grad_norm": 1.6570651531219482, | |
| "learning_rate": 3.8667663988104245e-06, | |
| "loss": 0.5731, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.338409475465313, | |
| "grad_norm": 1.411230444908142, | |
| "learning_rate": 3.861022762976136e-06, | |
| "loss": 0.5218, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.3392554991539763, | |
| "grad_norm": 1.4464528560638428, | |
| "learning_rate": 3.85526889902627e-06, | |
| "loss": 0.5282, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.3401015228426396, | |
| "grad_norm": 1.4024648666381836, | |
| "learning_rate": 3.849504850201238e-06, | |
| "loss": 0.529, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.3409475465313029, | |
| "grad_norm": 1.5347040891647339, | |
| "learning_rate": 3.84373065981799e-06, | |
| "loss": 0.5267, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.34179357021996615, | |
| "grad_norm": 1.5913515090942383, | |
| "learning_rate": 3.837946371269696e-06, | |
| "loss": 0.5788, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.3426395939086294, | |
| "grad_norm": 1.4288476705551147, | |
| "learning_rate": 3.832152028025406e-06, | |
| "loss": 0.5769, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.34348561759729274, | |
| "grad_norm": 1.577329397201538, | |
| "learning_rate": 3.826347673629738e-06, | |
| "loss": 0.6016, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.344331641285956, | |
| "grad_norm": 1.6311994791030884, | |
| "learning_rate": 3.820533351702538e-06, | |
| "loss": 0.5265, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.34517766497461927, | |
| "grad_norm": 1.445512294769287, | |
| "learning_rate": 3.8147091059385646e-06, | |
| "loss": 0.4741, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.3460236886632826, | |
| "grad_norm": 1.338087558746338, | |
| "learning_rate": 3.80887498010715e-06, | |
| "loss": 0.4751, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.34686971235194586, | |
| "grad_norm": 1.413231611251831, | |
| "learning_rate": 3.8030310180518748e-06, | |
| "loss": 0.4685, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.3477157360406091, | |
| "grad_norm": 1.547485113143921, | |
| "learning_rate": 3.7971772636902425e-06, | |
| "loss": 0.5329, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.34856175972927245, | |
| "grad_norm": 1.6648271083831787, | |
| "learning_rate": 3.791313761013343e-06, | |
| "loss": 0.4467, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.3494077834179357, | |
| "grad_norm": 1.4436287879943848, | |
| "learning_rate": 3.7854405540855268e-06, | |
| "loss": 0.4634, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.350253807106599, | |
| "grad_norm": 1.4640777111053467, | |
| "learning_rate": 3.77955768704407e-06, | |
| "loss": 0.574, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.35109983079526225, | |
| "grad_norm": 1.6034146547317505, | |
| "learning_rate": 3.7736652040988474e-06, | |
| "loss": 0.5958, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.35194585448392557, | |
| "grad_norm": 1.3981318473815918, | |
| "learning_rate": 3.7677631495319953e-06, | |
| "loss": 0.5729, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.35279187817258884, | |
| "grad_norm": 1.4371929168701172, | |
| "learning_rate": 3.761851567697583e-06, | |
| "loss": 0.4701, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.3536379018612521, | |
| "grad_norm": 1.4483686685562134, | |
| "learning_rate": 3.7559305030212746e-06, | |
| "loss": 0.6109, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.3544839255499154, | |
| "grad_norm": 1.5899584293365479, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "loss": 0.5399, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.3553299492385787, | |
| "grad_norm": 1.4690099954605103, | |
| "learning_rate": 3.744060103201619e-06, | |
| "loss": 0.5434, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.35617597292724196, | |
| "grad_norm": 1.5741721391677856, | |
| "learning_rate": 3.7381108572645836e-06, | |
| "loss": 0.5081, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.3570219966159052, | |
| "grad_norm": 1.4619410037994385, | |
| "learning_rate": 3.7321523068976068e-06, | |
| "loss": 0.4984, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.35786802030456855, | |
| "grad_norm": 1.5194240808486938, | |
| "learning_rate": 3.726184496879323e-06, | |
| "loss": 0.5694, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.3587140439932318, | |
| "grad_norm": 1.4686346054077148, | |
| "learning_rate": 3.7202074720579544e-06, | |
| "loss": 0.4949, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.3595600676818951, | |
| "grad_norm": 1.7008684873580933, | |
| "learning_rate": 3.7142212773509727e-06, | |
| "loss": 0.577, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.3604060913705584, | |
| "grad_norm": 1.681458592414856, | |
| "learning_rate": 3.7082259577447604e-06, | |
| "loss": 0.5924, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.36125211505922167, | |
| "grad_norm": 1.490136981010437, | |
| "learning_rate": 3.702221558294274e-06, | |
| "loss": 0.4885, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.36209813874788493, | |
| "grad_norm": 1.41923189163208, | |
| "learning_rate": 3.696208124122706e-06, | |
| "loss": 0.4789, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.3629441624365482, | |
| "grad_norm": 1.781841516494751, | |
| "learning_rate": 3.690185700421145e-06, | |
| "loss": 0.5364, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.3637901861252115, | |
| "grad_norm": 1.4928046464920044, | |
| "learning_rate": 3.6841543324482356e-06, | |
| "loss": 0.4776, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.3646362098138748, | |
| "grad_norm": 1.4005343914031982, | |
| "learning_rate": 3.6781140655298374e-06, | |
| "loss": 0.4997, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.36548223350253806, | |
| "grad_norm": 1.7181766033172607, | |
| "learning_rate": 3.6720649450586885e-06, | |
| "loss": 0.5811, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.3663282571912014, | |
| "grad_norm": 1.6100702285766602, | |
| "learning_rate": 3.6660070164940614e-06, | |
| "loss": 0.571, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.36717428087986465, | |
| "grad_norm": 1.5206496715545654, | |
| "learning_rate": 3.659940325361419e-06, | |
| "loss": 0.4898, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.3680203045685279, | |
| "grad_norm": 1.4749397039413452, | |
| "learning_rate": 3.6538649172520774e-06, | |
| "loss": 0.5013, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.3688663282571912, | |
| "grad_norm": 1.568749189376831, | |
| "learning_rate": 3.64778083782286e-06, | |
| "loss": 0.5309, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.3697123519458545, | |
| "grad_norm": 1.6498618125915527, | |
| "learning_rate": 3.641688132795757e-06, | |
| "loss": 0.6235, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.37055837563451777, | |
| "grad_norm": 1.494895577430725, | |
| "learning_rate": 3.635586847957577e-06, | |
| "loss": 0.6193, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.37140439932318103, | |
| "grad_norm": 1.5606284141540527, | |
| "learning_rate": 3.6294770291596083e-06, | |
| "loss": 0.5419, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.37225042301184436, | |
| "grad_norm": 1.5283830165863037, | |
| "learning_rate": 3.6233587223172717e-06, | |
| "loss": 0.5235, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.3730964467005076, | |
| "grad_norm": 1.5230801105499268, | |
| "learning_rate": 3.6172319734097764e-06, | |
| "loss": 0.6246, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.3739424703891709, | |
| "grad_norm": 1.4073126316070557, | |
| "learning_rate": 3.611096828479773e-06, | |
| "loss": 0.5065, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.37478849407783416, | |
| "grad_norm": 1.568429946899414, | |
| "learning_rate": 3.604953333633009e-06, | |
| "loss": 0.5097, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.3756345177664975, | |
| "grad_norm": 1.5261017084121704, | |
| "learning_rate": 3.59880153503798e-06, | |
| "loss": 0.5877, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.37648054145516074, | |
| "grad_norm": 1.5816318988800049, | |
| "learning_rate": 3.5926414789255877e-06, | |
| "loss": 0.5142, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.377326565143824, | |
| "grad_norm": 1.4384475946426392, | |
| "learning_rate": 3.586473211588787e-06, | |
| "loss": 0.482, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.37817258883248733, | |
| "grad_norm": 1.500186800956726, | |
| "learning_rate": 3.5802967793822386e-06, | |
| "loss": 0.5422, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.3790186125211506, | |
| "grad_norm": 1.3965848684310913, | |
| "learning_rate": 3.5741122287219665e-06, | |
| "loss": 0.4749, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.37986463620981387, | |
| "grad_norm": 1.4717751741409302, | |
| "learning_rate": 3.567919606085004e-06, | |
| "loss": 0.5679, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.38071065989847713, | |
| "grad_norm": 1.4586070775985718, | |
| "learning_rate": 3.561718958009042e-06, | |
| "loss": 0.4985, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.38155668358714045, | |
| "grad_norm": 1.437875747680664, | |
| "learning_rate": 3.555510331092087e-06, | |
| "loss": 0.4932, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.3824027072758037, | |
| "grad_norm": 1.5008926391601562, | |
| "learning_rate": 3.549293771992104e-06, | |
| "loss": 0.5441, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.383248730964467, | |
| "grad_norm": 1.5105735063552856, | |
| "learning_rate": 3.5430693274266694e-06, | |
| "loss": 0.5371, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.3840947546531303, | |
| "grad_norm": 1.4934806823730469, | |
| "learning_rate": 3.5368370441726197e-06, | |
| "loss": 0.5097, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.3849407783417936, | |
| "grad_norm": 1.5622752904891968, | |
| "learning_rate": 3.5305969690656985e-06, | |
| "loss": 0.5208, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.38578680203045684, | |
| "grad_norm": 1.5283855199813843, | |
| "learning_rate": 3.5243491490002056e-06, | |
| "loss": 0.5266, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.3866328257191201, | |
| "grad_norm": 1.4231075048446655, | |
| "learning_rate": 3.5180936309286444e-06, | |
| "loss": 0.5582, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.38747884940778343, | |
| "grad_norm": 1.4727346897125244, | |
| "learning_rate": 3.5118304618613684e-06, | |
| "loss": 0.5029, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.3883248730964467, | |
| "grad_norm": 1.4854283332824707, | |
| "learning_rate": 3.5055596888662295e-06, | |
| "loss": 0.5081, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.38917089678510997, | |
| "grad_norm": 1.3666778802871704, | |
| "learning_rate": 3.4992813590682225e-06, | |
| "loss": 0.4756, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.3900169204737733, | |
| "grad_norm": 1.7599936723709106, | |
| "learning_rate": 3.4929955196491315e-06, | |
| "loss": 0.498, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.39086294416243655, | |
| "grad_norm": 1.5538440942764282, | |
| "learning_rate": 3.4867022178471764e-06, | |
| "loss": 0.6403, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.3917089678510998, | |
| "grad_norm": 1.4417282342910767, | |
| "learning_rate": 3.4804015009566573e-06, | |
| "loss": 0.5727, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.3925549915397631, | |
| "grad_norm": 1.512620449066162, | |
| "learning_rate": 3.4740934163275974e-06, | |
| "loss": 0.6629, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.3934010152284264, | |
| "grad_norm": 1.446155071258545, | |
| "learning_rate": 3.46777801136539e-06, | |
| "loss": 0.5477, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.3942470389170897, | |
| "grad_norm": 1.4650105237960815, | |
| "learning_rate": 3.4614553335304407e-06, | |
| "loss": 0.5874, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.39509306260575294, | |
| "grad_norm": 1.7039233446121216, | |
| "learning_rate": 3.455125430337809e-06, | |
| "loss": 0.5101, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.39593908629441626, | |
| "grad_norm": 1.5379666090011597, | |
| "learning_rate": 3.4487883493568566e-06, | |
| "loss": 0.5835, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.39678510998307953, | |
| "grad_norm": 1.4904314279556274, | |
| "learning_rate": 3.442444138210883e-06, | |
| "loss": 0.5217, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.3976311336717428, | |
| "grad_norm": 1.5643348693847656, | |
| "learning_rate": 3.436092844576774e-06, | |
| "loss": 0.5183, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.39847715736040606, | |
| "grad_norm": 1.4762258529663086, | |
| "learning_rate": 3.4297345161846373e-06, | |
| "loss": 0.5368, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.3993231810490694, | |
| "grad_norm": 1.496219277381897, | |
| "learning_rate": 3.4233692008174497e-06, | |
| "loss": 0.4962, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.40016920473773265, | |
| "grad_norm": 1.5690780878067017, | |
| "learning_rate": 3.416996946310694e-06, | |
| "loss": 0.4984, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.4010152284263959, | |
| "grad_norm": 1.4688186645507812, | |
| "learning_rate": 3.4106178005520006e-06, | |
| "loss": 0.5794, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.40186125211505924, | |
| "grad_norm": 1.5053479671478271, | |
| "learning_rate": 3.4042318114807893e-06, | |
| "loss": 0.4901, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.4027072758037225, | |
| "grad_norm": 1.3780537843704224, | |
| "learning_rate": 3.3978390270879056e-06, | |
| "loss": 0.558, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.4035532994923858, | |
| "grad_norm": 1.382036566734314, | |
| "learning_rate": 3.3914394954152635e-06, | |
| "loss": 0.5202, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.40439932318104904, | |
| "grad_norm": 1.469938039779663, | |
| "learning_rate": 3.385033264555482e-06, | |
| "loss": 0.5, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.40524534686971236, | |
| "grad_norm": 1.4906823635101318, | |
| "learning_rate": 3.3786203826515235e-06, | |
| "loss": 0.4972, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.40609137055837563, | |
| "grad_norm": 1.448851466178894, | |
| "learning_rate": 3.3722008978963365e-06, | |
| "loss": 0.5561, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.4069373942470389, | |
| "grad_norm": 1.4323585033416748, | |
| "learning_rate": 3.3657748585324874e-06, | |
| "loss": 0.5233, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.4077834179357022, | |
| "grad_norm": 1.6246329545974731, | |
| "learning_rate": 3.3593423128518017e-06, | |
| "loss": 0.6112, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.4086294416243655, | |
| "grad_norm": 1.393144965171814, | |
| "learning_rate": 3.352903309194999e-06, | |
| "loss": 0.4907, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.40947546531302875, | |
| "grad_norm": 1.4356099367141724, | |
| "learning_rate": 3.3464578959513322e-06, | |
| "loss": 0.5602, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.4103214890016921, | |
| "grad_norm": 1.3799232244491577, | |
| "learning_rate": 3.3400061215582213e-06, | |
| "loss": 0.4997, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.41116751269035534, | |
| "grad_norm": 1.570279836654663, | |
| "learning_rate": 3.3335480345008907e-06, | |
| "loss": 0.4928, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.4120135363790186, | |
| "grad_norm": 1.4619395732879639, | |
| "learning_rate": 3.3270836833120047e-06, | |
| "loss": 0.4955, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.4128595600676819, | |
| "grad_norm": 1.5027363300323486, | |
| "learning_rate": 3.3206131165713023e-06, | |
| "loss": 0.4928, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.4137055837563452, | |
| "grad_norm": 1.5609545707702637, | |
| "learning_rate": 3.314136382905234e-06, | |
| "loss": 0.5996, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.41455160744500846, | |
| "grad_norm": 1.6889272928237915, | |
| "learning_rate": 3.3076535309865925e-06, | |
| "loss": 0.604, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.41539763113367173, | |
| "grad_norm": 1.5524094104766846, | |
| "learning_rate": 3.301164609534151e-06, | |
| "loss": 0.5188, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.41624365482233505, | |
| "grad_norm": 1.5471911430358887, | |
| "learning_rate": 3.2946696673122953e-06, | |
| "loss": 0.56, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.4170896785109983, | |
| "grad_norm": 1.515896201133728, | |
| "learning_rate": 3.288168753130657e-06, | |
| "loss": 0.5333, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.4179357021996616, | |
| "grad_norm": 1.5967165231704712, | |
| "learning_rate": 3.2816619158437463e-06, | |
| "loss": 0.5538, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.41878172588832485, | |
| "grad_norm": 1.4772549867630005, | |
| "learning_rate": 3.2751492043505873e-06, | |
| "loss": 0.5723, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.4196277495769882, | |
| "grad_norm": 1.6248629093170166, | |
| "learning_rate": 3.268630667594348e-06, | |
| "loss": 0.5796, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.42047377326565144, | |
| "grad_norm": 1.639542579650879, | |
| "learning_rate": 3.2621063545619734e-06, | |
| "loss": 0.5001, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.4213197969543147, | |
| "grad_norm": 1.406894564628601, | |
| "learning_rate": 3.2555763142838175e-06, | |
| "loss": 0.414, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.42216582064297803, | |
| "grad_norm": 1.5696587562561035, | |
| "learning_rate": 3.2490405958332743e-06, | |
| "loss": 0.4809, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.4230118443316413, | |
| "grad_norm": 1.6583582162857056, | |
| "learning_rate": 3.24249924832641e-06, | |
| "loss": 0.4467, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.42385786802030456, | |
| "grad_norm": 1.4358595609664917, | |
| "learning_rate": 3.2359523209215933e-06, | |
| "loss": 0.5417, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.42470389170896783, | |
| "grad_norm": 1.497576355934143, | |
| "learning_rate": 3.2293998628191246e-06, | |
| "loss": 0.5172, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.42554991539763115, | |
| "grad_norm": 1.3067649602890015, | |
| "learning_rate": 3.2228419232608692e-06, | |
| "loss": 0.488, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.4263959390862944, | |
| "grad_norm": 1.7320537567138672, | |
| "learning_rate": 3.2162785515298854e-06, | |
| "loss": 0.5953, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.4272419627749577, | |
| "grad_norm": 1.4349291324615479, | |
| "learning_rate": 3.2097097969500545e-06, | |
| "loss": 0.4994, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.428087986463621, | |
| "grad_norm": 1.5768568515777588, | |
| "learning_rate": 3.2031357088857083e-06, | |
| "loss": 0.6069, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.4289340101522843, | |
| "grad_norm": 1.8132436275482178, | |
| "learning_rate": 3.196556336741261e-06, | |
| "loss": 0.5251, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.42978003384094754, | |
| "grad_norm": 1.570563793182373, | |
| "learning_rate": 3.1899717299608384e-06, | |
| "loss": 0.5625, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.4306260575296108, | |
| "grad_norm": 1.51616632938385, | |
| "learning_rate": 3.1833819380279028e-06, | |
| "loss": 0.5653, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.43147208121827413, | |
| "grad_norm": 1.4574007987976074, | |
| "learning_rate": 3.1767870104648834e-06, | |
| "loss": 0.4707, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.4323181049069374, | |
| "grad_norm": 1.5099055767059326, | |
| "learning_rate": 3.1701869968328036e-06, | |
| "loss": 0.4206, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.43316412859560066, | |
| "grad_norm": 1.533036708831787, | |
| "learning_rate": 3.1635819467309094e-06, | |
| "loss": 0.5782, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.434010152284264, | |
| "grad_norm": 1.3681944608688354, | |
| "learning_rate": 3.156971909796295e-06, | |
| "loss": 0.5213, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.43485617597292725, | |
| "grad_norm": 1.522709846496582, | |
| "learning_rate": 3.150356935703531e-06, | |
| "loss": 0.496, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.4357021996615905, | |
| "grad_norm": 1.6741329431533813, | |
| "learning_rate": 3.143737074164292e-06, | |
| "loss": 0.5972, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.4365482233502538, | |
| "grad_norm": 1.6000012159347534, | |
| "learning_rate": 3.1371123749269804e-06, | |
| "loss": 0.5715, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.4373942470389171, | |
| "grad_norm": 1.5590283870697021, | |
| "learning_rate": 3.1304828877763567e-06, | |
| "loss": 0.5033, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.43824027072758037, | |
| "grad_norm": 1.471935749053955, | |
| "learning_rate": 3.123848662533157e-06, | |
| "loss": 0.4956, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.43908629441624364, | |
| "grad_norm": 1.4592459201812744, | |
| "learning_rate": 3.1172097490537308e-06, | |
| "loss": 0.5009, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.43993231810490696, | |
| "grad_norm": 1.4220460653305054, | |
| "learning_rate": 3.110566197229655e-06, | |
| "loss": 0.5181, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.4407783417935702, | |
| "grad_norm": 1.4611165523529053, | |
| "learning_rate": 3.1039180569873667e-06, | |
| "loss": 0.5199, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.4416243654822335, | |
| "grad_norm": 1.3216363191604614, | |
| "learning_rate": 3.0972653782877836e-06, | |
| "loss": 0.4938, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.44247038917089676, | |
| "grad_norm": 1.5299708843231201, | |
| "learning_rate": 3.0906082111259313e-06, | |
| "loss": 0.6092, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.4433164128595601, | |
| "grad_norm": 1.4188978672027588, | |
| "learning_rate": 3.083946605530564e-06, | |
| "loss": 0.4906, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.44416243654822335, | |
| "grad_norm": 1.493012547492981, | |
| "learning_rate": 3.0772806115637934e-06, | |
| "loss": 0.6105, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.4450084602368866, | |
| "grad_norm": 1.3560352325439453, | |
| "learning_rate": 3.070610279320708e-06, | |
| "loss": 0.465, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.44585448392554994, | |
| "grad_norm": 1.5919266939163208, | |
| "learning_rate": 3.063935658928998e-06, | |
| "loss": 0.4673, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.4467005076142132, | |
| "grad_norm": 1.4098035097122192, | |
| "learning_rate": 3.0572568005485825e-06, | |
| "loss": 0.5447, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.44754653130287647, | |
| "grad_norm": 1.4572504758834839, | |
| "learning_rate": 3.050573754371228e-06, | |
| "loss": 0.5234, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.44839255499153974, | |
| "grad_norm": 1.505883812904358, | |
| "learning_rate": 3.0438865706201683e-06, | |
| "loss": 0.5126, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.44923857868020306, | |
| "grad_norm": 1.484840750694275, | |
| "learning_rate": 3.0371952995497357e-06, | |
| "loss": 0.5136, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.4500846023688663, | |
| "grad_norm": 1.516889214515686, | |
| "learning_rate": 3.0304999914449774e-06, | |
| "loss": 0.5783, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.4509306260575296, | |
| "grad_norm": 1.399756669998169, | |
| "learning_rate": 3.02380069662128e-06, | |
| "loss": 0.501, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.4517766497461929, | |
| "grad_norm": 1.6473559141159058, | |
| "learning_rate": 3.0170974654239877e-06, | |
| "loss": 0.5147, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.4526226734348562, | |
| "grad_norm": 1.333022117614746, | |
| "learning_rate": 3.0103903482280295e-06, | |
| "loss": 0.4848, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.45346869712351945, | |
| "grad_norm": 1.3775697946548462, | |
| "learning_rate": 3.0036793954375358e-06, | |
| "loss": 0.4997, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.4543147208121827, | |
| "grad_norm": 1.4264084100723267, | |
| "learning_rate": 2.9969646574854632e-06, | |
| "loss": 0.4977, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.45516074450084604, | |
| "grad_norm": 1.5796583890914917, | |
| "learning_rate": 2.9902461848332128e-06, | |
| "loss": 0.6589, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.4560067681895093, | |
| "grad_norm": 1.5686849355697632, | |
| "learning_rate": 2.9835240279702516e-06, | |
| "loss": 0.4683, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.45685279187817257, | |
| "grad_norm": 1.3858373165130615, | |
| "learning_rate": 2.9767982374137344e-06, | |
| "loss": 0.5051, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.4576988155668359, | |
| "grad_norm": 1.6889417171478271, | |
| "learning_rate": 2.9700688637081233e-06, | |
| "loss": 0.5072, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.45854483925549916, | |
| "grad_norm": 1.6027723550796509, | |
| "learning_rate": 2.9633359574248077e-06, | |
| "loss": 0.5958, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.4593908629441624, | |
| "grad_norm": 1.4570367336273193, | |
| "learning_rate": 2.9565995691617242e-06, | |
| "loss": 0.5182, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.4602368866328257, | |
| "grad_norm": 1.4474014043807983, | |
| "learning_rate": 2.9498597495429773e-06, | |
| "loss": 0.523, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.461082910321489, | |
| "grad_norm": 1.5590665340423584, | |
| "learning_rate": 2.943116549218457e-06, | |
| "loss": 0.5413, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.4619289340101523, | |
| "grad_norm": 1.539737582206726, | |
| "learning_rate": 2.9363700188634597e-06, | |
| "loss": 0.6038, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.46277495769881555, | |
| "grad_norm": 1.543686032295227, | |
| "learning_rate": 2.929620209178307e-06, | |
| "loss": 0.4771, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.46362098138747887, | |
| "grad_norm": 1.4283702373504639, | |
| "learning_rate": 2.9228671708879664e-06, | |
| "loss": 0.5311, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.46446700507614214, | |
| "grad_norm": 1.365286946296692, | |
| "learning_rate": 2.916110954741667e-06, | |
| "loss": 0.485, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.4653130287648054, | |
| "grad_norm": 1.4654829502105713, | |
| "learning_rate": 2.909351611512518e-06, | |
| "loss": 0.4788, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.46615905245346867, | |
| "grad_norm": 1.5056861639022827, | |
| "learning_rate": 2.902589191997132e-06, | |
| "loss": 0.5171, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.467005076142132, | |
| "grad_norm": 1.3876997232437134, | |
| "learning_rate": 2.8958237470152374e-06, | |
| "loss": 0.5373, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.46785109983079526, | |
| "grad_norm": 1.370692253112793, | |
| "learning_rate": 2.889055327409301e-06, | |
| "loss": 0.4746, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.4686971235194585, | |
| "grad_norm": 1.5535578727722168, | |
| "learning_rate": 2.882283984044141e-06, | |
| "loss": 0.4739, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.46954314720812185, | |
| "grad_norm": 1.6460717916488647, | |
| "learning_rate": 2.8755097678065513e-06, | |
| "loss": 0.5865, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.4703891708967851, | |
| "grad_norm": 1.4789174795150757, | |
| "learning_rate": 2.8687327296049126e-06, | |
| "loss": 0.5395, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.4712351945854484, | |
| "grad_norm": 1.4689819812774658, | |
| "learning_rate": 2.861952920368816e-06, | |
| "loss": 0.592, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.4720812182741117, | |
| "grad_norm": 1.719758152961731, | |
| "learning_rate": 2.8551703910486735e-06, | |
| "loss": 0.5949, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.47292724196277497, | |
| "grad_norm": 1.5856834650039673, | |
| "learning_rate": 2.8483851926153396e-06, | |
| "loss": 0.4885, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.47377326565143824, | |
| "grad_norm": 1.5992249250411987, | |
| "learning_rate": 2.8415973760597284e-06, | |
| "loss": 0.5733, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.4746192893401015, | |
| "grad_norm": 1.512696385383606, | |
| "learning_rate": 2.8348069923924277e-06, | |
| "loss": 0.5093, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.4754653130287648, | |
| "grad_norm": 1.4138331413269043, | |
| "learning_rate": 2.828014092643319e-06, | |
| "loss": 0.4628, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.4763113367174281, | |
| "grad_norm": 1.6273956298828125, | |
| "learning_rate": 2.8212187278611907e-06, | |
| "loss": 0.6473, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.47715736040609136, | |
| "grad_norm": 1.4268630743026733, | |
| "learning_rate": 2.8144209491133573e-06, | |
| "loss": 0.4941, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.4780033840947547, | |
| "grad_norm": 1.5218658447265625, | |
| "learning_rate": 2.807620807485273e-06, | |
| "loss": 0.5629, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.47884940778341795, | |
| "grad_norm": 1.5016785860061646, | |
| "learning_rate": 2.8008183540801486e-06, | |
| "loss": 0.5488, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.4796954314720812, | |
| "grad_norm": 1.6292673349380493, | |
| "learning_rate": 2.7940136400185697e-06, | |
| "loss": 0.4968, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.4805414551607445, | |
| "grad_norm": 1.7516484260559082, | |
| "learning_rate": 2.7872067164381113e-06, | |
| "loss": 0.6068, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.4813874788494078, | |
| "grad_norm": 1.6018465757369995, | |
| "learning_rate": 2.7803976344929497e-06, | |
| "loss": 0.515, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.48223350253807107, | |
| "grad_norm": 1.4703809022903442, | |
| "learning_rate": 2.7735864453534845e-06, | |
| "loss": 0.4804, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.48307952622673433, | |
| "grad_norm": 1.6393179893493652, | |
| "learning_rate": 2.7667732002059494e-06, | |
| "loss": 0.5815, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.48392554991539766, | |
| "grad_norm": 1.3294684886932373, | |
| "learning_rate": 2.7599579502520295e-06, | |
| "loss": 0.4847, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.4847715736040609, | |
| "grad_norm": 1.5507575273513794, | |
| "learning_rate": 2.753140746708477e-06, | |
| "loss": 0.6029, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.4856175972927242, | |
| "grad_norm": 1.458669662475586, | |
| "learning_rate": 2.746321640806722e-06, | |
| "loss": 0.5659, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.48646362098138746, | |
| "grad_norm": 1.5378319025039673, | |
| "learning_rate": 2.7395006837924953e-06, | |
| "loss": 0.5321, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.4873096446700508, | |
| "grad_norm": 1.6430675983428955, | |
| "learning_rate": 2.7326779269254363e-06, | |
| "loss": 0.4837, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.48815566835871405, | |
| "grad_norm": 1.6236116886138916, | |
| "learning_rate": 2.7258534214787108e-06, | |
| "loss": 0.4962, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.4890016920473773, | |
| "grad_norm": 1.4811713695526123, | |
| "learning_rate": 2.7190272187386246e-06, | |
| "loss": 0.4433, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.48984771573604063, | |
| "grad_norm": 1.4098522663116455, | |
| "learning_rate": 2.7121993700042403e-06, | |
| "loss": 0.5793, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.4906937394247039, | |
| "grad_norm": 1.4402129650115967, | |
| "learning_rate": 2.7053699265869883e-06, | |
| "loss": 0.4585, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.49153976311336717, | |
| "grad_norm": 1.5286259651184082, | |
| "learning_rate": 2.6985389398102844e-06, | |
| "loss": 0.502, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.49238578680203043, | |
| "grad_norm": 1.4347509145736694, | |
| "learning_rate": 2.6917064610091425e-06, | |
| "loss": 0.4995, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.49323181049069376, | |
| "grad_norm": 1.3990005254745483, | |
| "learning_rate": 2.6848725415297888e-06, | |
| "loss": 0.4727, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.494077834179357, | |
| "grad_norm": 1.4724209308624268, | |
| "learning_rate": 2.6780372327292763e-06, | |
| "loss": 0.5381, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.4949238578680203, | |
| "grad_norm": 1.631493330001831, | |
| "learning_rate": 2.6712005859751e-06, | |
| "loss": 0.5569, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.4957698815566836, | |
| "grad_norm": 1.513969898223877, | |
| "learning_rate": 2.6643626526448063e-06, | |
| "loss": 0.4989, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.4966159052453469, | |
| "grad_norm": 1.542619228363037, | |
| "learning_rate": 2.6575234841256137e-06, | |
| "loss": 0.5313, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.49746192893401014, | |
| "grad_norm": 1.4393746852874756, | |
| "learning_rate": 2.6506831318140226e-06, | |
| "loss": 0.5492, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.4983079526226734, | |
| "grad_norm": 1.3358962535858154, | |
| "learning_rate": 2.6438416471154277e-06, | |
| "loss": 0.478, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.49915397631133673, | |
| "grad_norm": 1.6701815128326416, | |
| "learning_rate": 2.636999081443736e-06, | |
| "loss": 0.5219, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 1.4150053262710571, | |
| "learning_rate": 2.6301554862209756e-06, | |
| "loss": 0.4718, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.5008460236886633, | |
| "grad_norm": 1.3618898391723633, | |
| "learning_rate": 2.6233109128769134e-06, | |
| "loss": 0.5055, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.5016920473773265, | |
| "grad_norm": 1.6331762075424194, | |
| "learning_rate": 2.6164654128486683e-06, | |
| "loss": 0.5177, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.5025380710659898, | |
| "grad_norm": 1.5203489065170288, | |
| "learning_rate": 2.6096190375803183e-06, | |
| "loss": 0.5066, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.5033840947546532, | |
| "grad_norm": 1.6266874074935913, | |
| "learning_rate": 2.602771838522525e-06, | |
| "loss": 0.4494, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.5042301184433164, | |
| "grad_norm": 1.3882555961608887, | |
| "learning_rate": 2.595923867132136e-06, | |
| "loss": 0.5231, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.5050761421319797, | |
| "grad_norm": 1.4567385911941528, | |
| "learning_rate": 2.5890751748718055e-06, | |
| "loss": 0.5295, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.505922165820643, | |
| "grad_norm": 1.4776872396469116, | |
| "learning_rate": 2.5822258132096038e-06, | |
| "loss": 0.505, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.5067681895093062, | |
| "grad_norm": 1.404428243637085, | |
| "learning_rate": 2.575375833618633e-06, | |
| "loss": 0.4914, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.5076142131979695, | |
| "grad_norm": 1.4569783210754395, | |
| "learning_rate": 2.568525287576638e-06, | |
| "loss": 0.4488, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5084602368866328, | |
| "grad_norm": 1.4980030059814453, | |
| "learning_rate": 2.561674226565621e-06, | |
| "loss": 0.5389, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.5093062605752962, | |
| "grad_norm": 1.4138386249542236, | |
| "learning_rate": 2.5548227020714532e-06, | |
| "loss": 0.5175, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.5101522842639594, | |
| "grad_norm": 1.7734383344650269, | |
| "learning_rate": 2.547970765583491e-06, | |
| "loss": 0.5559, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.5109983079526227, | |
| "grad_norm": 1.5217783451080322, | |
| "learning_rate": 2.541118468594185e-06, | |
| "loss": 0.4747, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.511844331641286, | |
| "grad_norm": 1.696345329284668, | |
| "learning_rate": 2.5342658625986965e-06, | |
| "loss": 0.5078, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.5126903553299492, | |
| "grad_norm": 1.5817019939422607, | |
| "learning_rate": 2.527412999094507e-06, | |
| "loss": 0.5418, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.5135363790186125, | |
| "grad_norm": 1.3751314878463745, | |
| "learning_rate": 2.520559929581034e-06, | |
| "loss": 0.4278, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.5143824027072758, | |
| "grad_norm": 1.4618191719055176, | |
| "learning_rate": 2.5137067055592457e-06, | |
| "loss": 0.5491, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.5152284263959391, | |
| "grad_norm": 1.4312200546264648, | |
| "learning_rate": 2.5068533785312673e-06, | |
| "loss": 0.5052, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.5160744500846024, | |
| "grad_norm": 1.6040703058242798, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.4421, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.5169204737732657, | |
| "grad_norm": 1.7869144678115845, | |
| "learning_rate": 2.4931466214687336e-06, | |
| "loss": 0.4662, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.5177664974619289, | |
| "grad_norm": 1.4954723119735718, | |
| "learning_rate": 2.486293294440755e-06, | |
| "loss": 0.482, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.5186125211505922, | |
| "grad_norm": 1.504907488822937, | |
| "learning_rate": 2.479440070418967e-06, | |
| "loss": 0.4846, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.5194585448392555, | |
| "grad_norm": 1.5628070831298828, | |
| "learning_rate": 2.4725870009054944e-06, | |
| "loss": 0.5379, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.5203045685279187, | |
| "grad_norm": 1.3009322881698608, | |
| "learning_rate": 2.4657341374013047e-06, | |
| "loss": 0.4173, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.5211505922165821, | |
| "grad_norm": 1.4733883142471313, | |
| "learning_rate": 2.4588815314058155e-06, | |
| "loss": 0.4481, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.5219966159052454, | |
| "grad_norm": 1.511818528175354, | |
| "learning_rate": 2.4520292344165093e-06, | |
| "loss": 0.4529, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.5228426395939086, | |
| "grad_norm": 1.5413731336593628, | |
| "learning_rate": 2.4451772979285468e-06, | |
| "loss": 0.5215, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.5236886632825719, | |
| "grad_norm": 1.4342833757400513, | |
| "learning_rate": 2.4383257734343795e-06, | |
| "loss": 0.4864, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.5245346869712352, | |
| "grad_norm": 1.339322566986084, | |
| "learning_rate": 2.431474712423363e-06, | |
| "loss": 0.4954, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.5253807106598984, | |
| "grad_norm": 1.6234588623046875, | |
| "learning_rate": 2.4246241663813675e-06, | |
| "loss": 0.4753, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.5262267343485617, | |
| "grad_norm": 1.3801982402801514, | |
| "learning_rate": 2.4177741867903966e-06, | |
| "loss": 0.4836, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.5270727580372251, | |
| "grad_norm": 1.5722270011901855, | |
| "learning_rate": 2.4109248251281953e-06, | |
| "loss": 0.5472, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.5279187817258884, | |
| "grad_norm": 1.5523285865783691, | |
| "learning_rate": 2.4040761328678647e-06, | |
| "loss": 0.5056, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.5287648054145516, | |
| "grad_norm": 1.481810212135315, | |
| "learning_rate": 2.3972281614774764e-06, | |
| "loss": 0.5493, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.5296108291032149, | |
| "grad_norm": 1.4845212697982788, | |
| "learning_rate": 2.3903809624196826e-06, | |
| "loss": 0.5125, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.5304568527918782, | |
| "grad_norm": 1.3311069011688232, | |
| "learning_rate": 2.3835345871513334e-06, | |
| "loss": 0.4476, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.5313028764805414, | |
| "grad_norm": 1.6216577291488647, | |
| "learning_rate": 2.376689087123087e-06, | |
| "loss": 0.5174, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.5321489001692047, | |
| "grad_norm": 1.6146211624145508, | |
| "learning_rate": 2.369844513779026e-06, | |
| "loss": 0.5302, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.5329949238578681, | |
| "grad_norm": 1.3406670093536377, | |
| "learning_rate": 2.3630009185562646e-06, | |
| "loss": 0.571, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.5338409475465313, | |
| "grad_norm": 1.4292736053466797, | |
| "learning_rate": 2.3561583528845723e-06, | |
| "loss": 0.5067, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.5346869712351946, | |
| "grad_norm": 1.4825644493103027, | |
| "learning_rate": 2.3493168681859782e-06, | |
| "loss": 0.5024, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.5355329949238579, | |
| "grad_norm": 1.6885347366333008, | |
| "learning_rate": 2.3424765158743867e-06, | |
| "loss": 0.5526, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.5363790186125211, | |
| "grad_norm": 1.4368715286254883, | |
| "learning_rate": 2.335637347355194e-06, | |
| "loss": 0.5817, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.5372250423011844, | |
| "grad_norm": 1.5050628185272217, | |
| "learning_rate": 2.3287994140249005e-06, | |
| "loss": 0.5771, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.5380710659898477, | |
| "grad_norm": 1.43318510055542, | |
| "learning_rate": 2.321962767270724e-06, | |
| "loss": 0.4677, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.538917089678511, | |
| "grad_norm": 1.4823578596115112, | |
| "learning_rate": 2.315127458470212e-06, | |
| "loss": 0.496, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.5397631133671743, | |
| "grad_norm": 1.4760611057281494, | |
| "learning_rate": 2.308293538990858e-06, | |
| "loss": 0.5526, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.5406091370558376, | |
| "grad_norm": 1.4542453289031982, | |
| "learning_rate": 2.301461060189716e-06, | |
| "loss": 0.4325, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.5414551607445008, | |
| "grad_norm": 1.4895241260528564, | |
| "learning_rate": 2.2946300734130126e-06, | |
| "loss": 0.5135, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.5423011844331641, | |
| "grad_norm": 1.3324871063232422, | |
| "learning_rate": 2.2878006299957613e-06, | |
| "loss": 0.4435, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.5431472081218274, | |
| "grad_norm": 1.5044846534729004, | |
| "learning_rate": 2.2809727812613767e-06, | |
| "loss": 0.5371, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.5439932318104906, | |
| "grad_norm": 1.3756799697875977, | |
| "learning_rate": 2.2741465785212905e-06, | |
| "loss": 0.4914, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.544839255499154, | |
| "grad_norm": 1.5922890901565552, | |
| "learning_rate": 2.267322073074564e-06, | |
| "loss": 0.4904, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.5456852791878173, | |
| "grad_norm": 1.377279281616211, | |
| "learning_rate": 2.260499316207505e-06, | |
| "loss": 0.4736, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.5465313028764806, | |
| "grad_norm": 1.5589686632156372, | |
| "learning_rate": 2.2536783591932786e-06, | |
| "loss": 0.5133, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.5473773265651438, | |
| "grad_norm": 1.617056965827942, | |
| "learning_rate": 2.246859253291524e-06, | |
| "loss": 0.4293, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.5482233502538071, | |
| "grad_norm": 1.2803261280059814, | |
| "learning_rate": 2.2400420497479713e-06, | |
| "loss": 0.4212, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.5490693739424704, | |
| "grad_norm": 1.705077886581421, | |
| "learning_rate": 2.2332267997940514e-06, | |
| "loss": 0.5949, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.5499153976311336, | |
| "grad_norm": 1.2756019830703735, | |
| "learning_rate": 2.2264135546465163e-06, | |
| "loss": 0.4538, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.550761421319797, | |
| "grad_norm": 1.4529634714126587, | |
| "learning_rate": 2.219602365507051e-06, | |
| "loss": 0.5181, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.5516074450084603, | |
| "grad_norm": 1.4561599493026733, | |
| "learning_rate": 2.21279328356189e-06, | |
| "loss": 0.5149, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.5524534686971235, | |
| "grad_norm": 1.526835560798645, | |
| "learning_rate": 2.205986359981431e-06, | |
| "loss": 0.4777, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.5532994923857868, | |
| "grad_norm": 1.572192907333374, | |
| "learning_rate": 2.1991816459198526e-06, | |
| "loss": 0.4954, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.5541455160744501, | |
| "grad_norm": 1.3265060186386108, | |
| "learning_rate": 2.1923791925147287e-06, | |
| "loss": 0.5218, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.5549915397631133, | |
| "grad_norm": 1.4780526161193848, | |
| "learning_rate": 2.1855790508866435e-06, | |
| "loss": 0.5365, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.5558375634517766, | |
| "grad_norm": 1.5056841373443604, | |
| "learning_rate": 2.1787812721388093e-06, | |
| "loss": 0.5579, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.55668358714044, | |
| "grad_norm": 1.5058797597885132, | |
| "learning_rate": 2.1719859073566813e-06, | |
| "loss": 0.5154, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.5575296108291032, | |
| "grad_norm": 1.3792710304260254, | |
| "learning_rate": 2.1651930076075727e-06, | |
| "loss": 0.4752, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.5583756345177665, | |
| "grad_norm": 1.4559630155563354, | |
| "learning_rate": 2.158402623940273e-06, | |
| "loss": 0.5478, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.5592216582064298, | |
| "grad_norm": 1.4395264387130737, | |
| "learning_rate": 2.1516148073846613e-06, | |
| "loss": 0.4919, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.560067681895093, | |
| "grad_norm": 1.4851564168930054, | |
| "learning_rate": 2.1448296089513273e-06, | |
| "loss": 0.458, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.5609137055837563, | |
| "grad_norm": 1.4913303852081299, | |
| "learning_rate": 2.1380470796311843e-06, | |
| "loss": 0.539, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.5617597292724196, | |
| "grad_norm": 1.4618514776229858, | |
| "learning_rate": 2.131267270395088e-06, | |
| "loss": 0.4823, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.562605752961083, | |
| "grad_norm": 1.4132062196731567, | |
| "learning_rate": 2.1244902321934495e-06, | |
| "loss": 0.4256, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.5634517766497462, | |
| "grad_norm": 1.6368167400360107, | |
| "learning_rate": 2.11771601595586e-06, | |
| "loss": 0.5763, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.5642978003384095, | |
| "grad_norm": 1.5602619647979736, | |
| "learning_rate": 2.1109446725907003e-06, | |
| "loss": 0.5281, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.5651438240270727, | |
| "grad_norm": 1.4556282758712769, | |
| "learning_rate": 2.104176252984763e-06, | |
| "loss": 0.542, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.565989847715736, | |
| "grad_norm": 1.4794425964355469, | |
| "learning_rate": 2.097410808002869e-06, | |
| "loss": 0.5617, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.5668358714043993, | |
| "grad_norm": 1.653266429901123, | |
| "learning_rate": 2.0906483884874816e-06, | |
| "loss": 0.58, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.5676818950930627, | |
| "grad_norm": 1.421467661857605, | |
| "learning_rate": 2.0838890452583337e-06, | |
| "loss": 0.5255, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.5685279187817259, | |
| "grad_norm": 1.327575922012329, | |
| "learning_rate": 2.0771328291120336e-06, | |
| "loss": 0.4885, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.5693739424703892, | |
| "grad_norm": 1.7012816667556763, | |
| "learning_rate": 2.070379790821693e-06, | |
| "loss": 0.5295, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.5702199661590525, | |
| "grad_norm": 1.7111144065856934, | |
| "learning_rate": 2.063629981136541e-06, | |
| "loss": 0.4796, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.5710659898477157, | |
| "grad_norm": 1.4859437942504883, | |
| "learning_rate": 2.0568834507815434e-06, | |
| "loss": 0.4998, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.571912013536379, | |
| "grad_norm": 1.4416285753250122, | |
| "learning_rate": 2.050140250457023e-06, | |
| "loss": 0.5211, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.5727580372250423, | |
| "grad_norm": 1.4626954793930054, | |
| "learning_rate": 2.043400430838276e-06, | |
| "loss": 0.5127, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.5736040609137056, | |
| "grad_norm": 1.4203089475631714, | |
| "learning_rate": 2.036664042575193e-06, | |
| "loss": 0.4599, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.5744500846023689, | |
| "grad_norm": 1.3037195205688477, | |
| "learning_rate": 2.0299311362918775e-06, | |
| "loss": 0.4848, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.5752961082910322, | |
| "grad_norm": 1.6444575786590576, | |
| "learning_rate": 2.0232017625862664e-06, | |
| "loss": 0.5882, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.5761421319796954, | |
| "grad_norm": 1.4801158905029297, | |
| "learning_rate": 2.01647597202975e-06, | |
| "loss": 0.5177, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.5769881556683587, | |
| "grad_norm": 1.3590668439865112, | |
| "learning_rate": 2.0097538151667885e-06, | |
| "loss": 0.5236, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.577834179357022, | |
| "grad_norm": 1.7626949548721313, | |
| "learning_rate": 2.0030353425145376e-06, | |
| "loss": 0.5392, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.5786802030456852, | |
| "grad_norm": 1.493628740310669, | |
| "learning_rate": 1.9963206045624647e-06, | |
| "loss": 0.5182, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.5795262267343486, | |
| "grad_norm": 1.515743374824524, | |
| "learning_rate": 1.989609651771971e-06, | |
| "loss": 0.5648, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.5803722504230119, | |
| "grad_norm": 1.511932134628296, | |
| "learning_rate": 1.9829025345760127e-06, | |
| "loss": 0.4885, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.5812182741116751, | |
| "grad_norm": 1.457202434539795, | |
| "learning_rate": 1.9761993033787206e-06, | |
| "loss": 0.5903, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.5820642978003384, | |
| "grad_norm": 1.4985764026641846, | |
| "learning_rate": 1.969500008555023e-06, | |
| "loss": 0.5411, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.5829103214890017, | |
| "grad_norm": 1.451522707939148, | |
| "learning_rate": 1.962804700450265e-06, | |
| "loss": 0.5094, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.583756345177665, | |
| "grad_norm": 1.3009876012802124, | |
| "learning_rate": 1.956113429379833e-06, | |
| "loss": 0.4272, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.5846023688663282, | |
| "grad_norm": 1.2623531818389893, | |
| "learning_rate": 1.9494262456287735e-06, | |
| "loss": 0.4011, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.5854483925549916, | |
| "grad_norm": 1.3815149068832397, | |
| "learning_rate": 1.942743199451418e-06, | |
| "loss": 0.5189, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.5862944162436549, | |
| "grad_norm": 1.6201854944229126, | |
| "learning_rate": 1.9360643410710027e-06, | |
| "loss": 0.6014, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.5871404399323181, | |
| "grad_norm": 1.3804255723953247, | |
| "learning_rate": 1.929389720679294e-06, | |
| "loss": 0.4803, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.5879864636209814, | |
| "grad_norm": 1.566095232963562, | |
| "learning_rate": 1.922719388436208e-06, | |
| "loss": 0.5526, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.5888324873096447, | |
| "grad_norm": 1.5268288850784302, | |
| "learning_rate": 1.916053394469437e-06, | |
| "loss": 0.4897, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.5896785109983079, | |
| "grad_norm": 1.3224598169326782, | |
| "learning_rate": 1.909391788874069e-06, | |
| "loss": 0.4971, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.5905245346869712, | |
| "grad_norm": 1.6039224863052368, | |
| "learning_rate": 1.9027346217122161e-06, | |
| "loss": 0.6325, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.5913705583756346, | |
| "grad_norm": 1.4165875911712646, | |
| "learning_rate": 1.8960819430126337e-06, | |
| "loss": 0.49, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.5922165820642978, | |
| "grad_norm": 1.480425238609314, | |
| "learning_rate": 1.8894338027703456e-06, | |
| "loss": 0.5507, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5930626057529611, | |
| "grad_norm": 1.3495324850082397, | |
| "learning_rate": 1.88279025094627e-06, | |
| "loss": 0.4775, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.5939086294416244, | |
| "grad_norm": 1.507717251777649, | |
| "learning_rate": 1.8761513374668434e-06, | |
| "loss": 0.5575, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.5947546531302876, | |
| "grad_norm": 1.415955662727356, | |
| "learning_rate": 1.8695171122236443e-06, | |
| "loss": 0.4595, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.5956006768189509, | |
| "grad_norm": 1.4151737689971924, | |
| "learning_rate": 1.8628876250730198e-06, | |
| "loss": 0.484, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.5964467005076142, | |
| "grad_norm": 1.379846215248108, | |
| "learning_rate": 1.8562629258357087e-06, | |
| "loss": 0.4786, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.5972927241962775, | |
| "grad_norm": 1.6034132242202759, | |
| "learning_rate": 1.8496430642964698e-06, | |
| "loss": 0.5037, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.5981387478849408, | |
| "grad_norm": 1.4743245840072632, | |
| "learning_rate": 1.8430280902037061e-06, | |
| "loss": 0.4941, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.5989847715736041, | |
| "grad_norm": 1.4026728868484497, | |
| "learning_rate": 1.8364180532690916e-06, | |
| "loss": 0.4978, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.5998307952622673, | |
| "grad_norm": 1.5930570363998413, | |
| "learning_rate": 1.8298130031671974e-06, | |
| "loss": 0.5155, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.6006768189509306, | |
| "grad_norm": 1.4963091611862183, | |
| "learning_rate": 1.8232129895351164e-06, | |
| "loss": 0.4775, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.6015228426395939, | |
| "grad_norm": 1.3382537364959717, | |
| "learning_rate": 1.8166180619720974e-06, | |
| "loss": 0.4759, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.6023688663282571, | |
| "grad_norm": 1.5729764699935913, | |
| "learning_rate": 1.8100282700391616e-06, | |
| "loss": 0.4431, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.6032148900169205, | |
| "grad_norm": 1.6764527559280396, | |
| "learning_rate": 1.8034436632587394e-06, | |
| "loss": 0.4979, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.6040609137055838, | |
| "grad_norm": 1.3650676012039185, | |
| "learning_rate": 1.7968642911142926e-06, | |
| "loss": 0.5107, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.6049069373942471, | |
| "grad_norm": 1.4936357736587524, | |
| "learning_rate": 1.7902902030499463e-06, | |
| "loss": 0.5537, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.6057529610829103, | |
| "grad_norm": 1.4027994871139526, | |
| "learning_rate": 1.7837214484701154e-06, | |
| "loss": 0.487, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.6065989847715736, | |
| "grad_norm": 1.433212161064148, | |
| "learning_rate": 1.7771580767391314e-06, | |
| "loss": 0.5041, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.6074450084602369, | |
| "grad_norm": 1.3107362985610962, | |
| "learning_rate": 1.7706001371808763e-06, | |
| "loss": 0.4696, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.6082910321489001, | |
| "grad_norm": 1.5976382493972778, | |
| "learning_rate": 1.7640476790784077e-06, | |
| "loss": 0.5224, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.6091370558375635, | |
| "grad_norm": 1.513655185699463, | |
| "learning_rate": 1.7575007516735909e-06, | |
| "loss": 0.5048, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.6099830795262268, | |
| "grad_norm": 1.4939072132110596, | |
| "learning_rate": 1.7509594041667265e-06, | |
| "loss": 0.4744, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.61082910321489, | |
| "grad_norm": 1.4942210912704468, | |
| "learning_rate": 1.7444236857161837e-06, | |
| "loss": 0.548, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.6116751269035533, | |
| "grad_norm": 1.6308627128601074, | |
| "learning_rate": 1.7378936454380277e-06, | |
| "loss": 0.555, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.6125211505922166, | |
| "grad_norm": 1.5539953708648682, | |
| "learning_rate": 1.7313693324056523e-06, | |
| "loss": 0.4423, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.6133671742808798, | |
| "grad_norm": 1.5067429542541504, | |
| "learning_rate": 1.724850795649413e-06, | |
| "loss": 0.5053, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.6142131979695431, | |
| "grad_norm": 1.4803341627120972, | |
| "learning_rate": 1.718338084156254e-06, | |
| "loss": 0.5284, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.6150592216582065, | |
| "grad_norm": 1.3545798063278198, | |
| "learning_rate": 1.7118312468693437e-06, | |
| "loss": 0.4296, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.6159052453468697, | |
| "grad_norm": 1.3966692686080933, | |
| "learning_rate": 1.7053303326877051e-06, | |
| "loss": 0.5169, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.616751269035533, | |
| "grad_norm": 1.5924454927444458, | |
| "learning_rate": 1.6988353904658495e-06, | |
| "loss": 0.5, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.6175972927241963, | |
| "grad_norm": 1.3930429220199585, | |
| "learning_rate": 1.692346469013408e-06, | |
| "loss": 0.5098, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.6184433164128595, | |
| "grad_norm": 1.2582634687423706, | |
| "learning_rate": 1.6858636170947668e-06, | |
| "loss": 0.4755, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.6192893401015228, | |
| "grad_norm": 1.777503252029419, | |
| "learning_rate": 1.6793868834286985e-06, | |
| "loss": 0.578, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.6201353637901861, | |
| "grad_norm": 1.353458285331726, | |
| "learning_rate": 1.6729163166879964e-06, | |
| "loss": 0.4851, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.6209813874788495, | |
| "grad_norm": 1.5160582065582275, | |
| "learning_rate": 1.6664519654991101e-06, | |
| "loss": 0.6046, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.6218274111675127, | |
| "grad_norm": 1.39556086063385, | |
| "learning_rate": 1.6599938784417796e-06, | |
| "loss": 0.5351, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.622673434856176, | |
| "grad_norm": 1.260372281074524, | |
| "learning_rate": 1.6535421040486686e-06, | |
| "loss": 0.4827, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.6235194585448393, | |
| "grad_norm": 1.6492784023284912, | |
| "learning_rate": 1.6470966908050012e-06, | |
| "loss": 0.5938, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.6243654822335025, | |
| "grad_norm": 1.360202670097351, | |
| "learning_rate": 1.6406576871481985e-06, | |
| "loss": 0.484, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.6252115059221658, | |
| "grad_norm": 1.4238383769989014, | |
| "learning_rate": 1.634225141467513e-06, | |
| "loss": 0.4169, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.626057529610829, | |
| "grad_norm": 1.4238007068634033, | |
| "learning_rate": 1.6277991021036644e-06, | |
| "loss": 0.5064, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.6269035532994924, | |
| "grad_norm": 1.5756981372833252, | |
| "learning_rate": 1.6213796173484769e-06, | |
| "loss": 0.5694, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.6277495769881557, | |
| "grad_norm": 1.5684349536895752, | |
| "learning_rate": 1.6149667354445192e-06, | |
| "loss": 0.5578, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.628595600676819, | |
| "grad_norm": 1.3545986413955688, | |
| "learning_rate": 1.608560504584737e-06, | |
| "loss": 0.5363, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.6294416243654822, | |
| "grad_norm": 1.3728703260421753, | |
| "learning_rate": 1.6021609729120948e-06, | |
| "loss": 0.4479, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.6302876480541455, | |
| "grad_norm": 1.4639620780944824, | |
| "learning_rate": 1.5957681885192111e-06, | |
| "loss": 0.5478, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.6311336717428088, | |
| "grad_norm": 1.3161569833755493, | |
| "learning_rate": 1.5893821994479996e-06, | |
| "loss": 0.4568, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.631979695431472, | |
| "grad_norm": 1.5565990209579468, | |
| "learning_rate": 1.5830030536893066e-06, | |
| "loss": 0.5132, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.6328257191201354, | |
| "grad_norm": 1.4323241710662842, | |
| "learning_rate": 1.5766307991825514e-06, | |
| "loss": 0.4374, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.6336717428087987, | |
| "grad_norm": 1.372560739517212, | |
| "learning_rate": 1.5702654838153641e-06, | |
| "loss": 0.5318, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.6345177664974619, | |
| "grad_norm": 1.3776485919952393, | |
| "learning_rate": 1.5639071554232266e-06, | |
| "loss": 0.4903, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.6353637901861252, | |
| "grad_norm": 1.421043872833252, | |
| "learning_rate": 1.5575558617891173e-06, | |
| "loss": 0.4828, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.6362098138747885, | |
| "grad_norm": 1.4715155363082886, | |
| "learning_rate": 1.551211650643144e-06, | |
| "loss": 0.5535, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.6370558375634517, | |
| "grad_norm": 1.4590644836425781, | |
| "learning_rate": 1.5448745696621915e-06, | |
| "loss": 0.4879, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.637901861252115, | |
| "grad_norm": 1.3986414670944214, | |
| "learning_rate": 1.5385446664695603e-06, | |
| "loss": 0.4828, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.6387478849407784, | |
| "grad_norm": 1.2732402086257935, | |
| "learning_rate": 1.53222198863461e-06, | |
| "loss": 0.4103, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.6395939086294417, | |
| "grad_norm": 1.4938690662384033, | |
| "learning_rate": 1.5259065836724035e-06, | |
| "loss": 0.4481, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.6404399323181049, | |
| "grad_norm": 1.4133765697479248, | |
| "learning_rate": 1.5195984990433437e-06, | |
| "loss": 0.4322, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.6412859560067682, | |
| "grad_norm": 1.4759124517440796, | |
| "learning_rate": 1.5132977821528244e-06, | |
| "loss": 0.5065, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.6421319796954315, | |
| "grad_norm": 1.4921671152114868, | |
| "learning_rate": 1.5070044803508693e-06, | |
| "loss": 0.5302, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.6429780033840947, | |
| "grad_norm": 1.5433109998703003, | |
| "learning_rate": 1.500718640931779e-06, | |
| "loss": 0.5281, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.643824027072758, | |
| "grad_norm": 1.4201653003692627, | |
| "learning_rate": 1.494440311133772e-06, | |
| "loss": 0.4778, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.6446700507614214, | |
| "grad_norm": 1.4562307596206665, | |
| "learning_rate": 1.4881695381386324e-06, | |
| "loss": 0.5654, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.6455160744500846, | |
| "grad_norm": 1.3270257711410522, | |
| "learning_rate": 1.4819063690713565e-06, | |
| "loss": 0.4588, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.6463620981387479, | |
| "grad_norm": 1.3628114461898804, | |
| "learning_rate": 1.4756508509997946e-06, | |
| "loss": 0.5339, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.6472081218274112, | |
| "grad_norm": 1.5659477710723877, | |
| "learning_rate": 1.4694030309343015e-06, | |
| "loss": 0.4217, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.6480541455160744, | |
| "grad_norm": 1.6154391765594482, | |
| "learning_rate": 1.4631629558273803e-06, | |
| "loss": 0.5273, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.6489001692047377, | |
| "grad_norm": 1.5286039113998413, | |
| "learning_rate": 1.4569306725733313e-06, | |
| "loss": 0.5359, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.649746192893401, | |
| "grad_norm": 1.4298804998397827, | |
| "learning_rate": 1.450706228007897e-06, | |
| "loss": 0.526, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.6505922165820643, | |
| "grad_norm": 1.3179171085357666, | |
| "learning_rate": 1.4444896689079142e-06, | |
| "loss": 0.4854, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.6514382402707276, | |
| "grad_norm": 1.4320234060287476, | |
| "learning_rate": 1.4382810419909587e-06, | |
| "loss": 0.5674, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.6522842639593909, | |
| "grad_norm": 1.5675216913223267, | |
| "learning_rate": 1.432080393914997e-06, | |
| "loss": 0.6243, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.6531302876480541, | |
| "grad_norm": 1.4815562963485718, | |
| "learning_rate": 1.4258877712780333e-06, | |
| "loss": 0.5564, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.6539763113367174, | |
| "grad_norm": 1.3841257095336914, | |
| "learning_rate": 1.4197032206177618e-06, | |
| "loss": 0.481, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.6548223350253807, | |
| "grad_norm": 1.3601456880569458, | |
| "learning_rate": 1.4135267884112153e-06, | |
| "loss": 0.4912, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.6556683587140439, | |
| "grad_norm": 1.3832186460494995, | |
| "learning_rate": 1.4073585210744136e-06, | |
| "loss": 0.5253, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.6565143824027073, | |
| "grad_norm": 1.516714096069336, | |
| "learning_rate": 1.401198464962021e-06, | |
| "loss": 0.4864, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.6573604060913706, | |
| "grad_norm": 1.6682307720184326, | |
| "learning_rate": 1.3950466663669915e-06, | |
| "loss": 0.5815, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.6582064297800339, | |
| "grad_norm": 1.3226845264434814, | |
| "learning_rate": 1.3889031715202272e-06, | |
| "loss": 0.4574, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.6590524534686971, | |
| "grad_norm": 1.5030988454818726, | |
| "learning_rate": 1.3827680265902235e-06, | |
| "loss": 0.5515, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.6598984771573604, | |
| "grad_norm": 1.5196452140808105, | |
| "learning_rate": 1.3766412776827282e-06, | |
| "loss": 0.5655, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.6607445008460237, | |
| "grad_norm": 1.392120361328125, | |
| "learning_rate": 1.3705229708403928e-06, | |
| "loss": 0.5012, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.6615905245346869, | |
| "grad_norm": 1.4476877450942993, | |
| "learning_rate": 1.3644131520424241e-06, | |
| "loss": 0.5739, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.6624365482233503, | |
| "grad_norm": 1.4679392576217651, | |
| "learning_rate": 1.3583118672042441e-06, | |
| "loss": 0.5118, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.6632825719120136, | |
| "grad_norm": 1.5056501626968384, | |
| "learning_rate": 1.3522191621771402e-06, | |
| "loss": 0.5661, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.6641285956006768, | |
| "grad_norm": 1.583238124847412, | |
| "learning_rate": 1.346135082747923e-06, | |
| "loss": 0.6109, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.6649746192893401, | |
| "grad_norm": 1.6272413730621338, | |
| "learning_rate": 1.3400596746385817e-06, | |
| "loss": 0.5755, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.6658206429780034, | |
| "grad_norm": 1.630968451499939, | |
| "learning_rate": 1.3339929835059393e-06, | |
| "loss": 0.5263, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 1.260642647743225, | |
| "learning_rate": 1.3279350549413117e-06, | |
| "loss": 0.4703, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.6675126903553299, | |
| "grad_norm": 1.5122746229171753, | |
| "learning_rate": 1.3218859344701634e-06, | |
| "loss": 0.5735, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.6683587140439933, | |
| "grad_norm": 1.5645116567611694, | |
| "learning_rate": 1.3158456675517657e-06, | |
| "loss": 0.5894, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.6692047377326565, | |
| "grad_norm": 1.6442245244979858, | |
| "learning_rate": 1.3098142995788554e-06, | |
| "loss": 0.5377, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.6700507614213198, | |
| "grad_norm": 1.5986851453781128, | |
| "learning_rate": 1.3037918758772944e-06, | |
| "loss": 0.5731, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.6708967851099831, | |
| "grad_norm": 1.582816243171692, | |
| "learning_rate": 1.2977784417057262e-06, | |
| "loss": 0.4785, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.6717428087986463, | |
| "grad_norm": 1.3253484964370728, | |
| "learning_rate": 1.29177404225524e-06, | |
| "loss": 0.4108, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.6725888324873096, | |
| "grad_norm": 1.4294928312301636, | |
| "learning_rate": 1.2857787226490275e-06, | |
| "loss": 0.5675, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.6734348561759729, | |
| "grad_norm": 1.5926408767700195, | |
| "learning_rate": 1.2797925279420454e-06, | |
| "loss": 0.5764, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.6742808798646363, | |
| "grad_norm": 1.352016568183899, | |
| "learning_rate": 1.2738155031206772e-06, | |
| "loss": 0.5051, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.6751269035532995, | |
| "grad_norm": 1.4352556467056274, | |
| "learning_rate": 1.2678476931023947e-06, | |
| "loss": 0.4998, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.6759729272419628, | |
| "grad_norm": 1.5205786228179932, | |
| "learning_rate": 1.2618891427354174e-06, | |
| "loss": 0.4852, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.676818950930626, | |
| "grad_norm": 1.4403825998306274, | |
| "learning_rate": 1.2559398967983821e-06, | |
| "loss": 0.5406, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6776649746192893, | |
| "grad_norm": 1.380646824836731, | |
| "learning_rate": 1.2500000000000007e-06, | |
| "loss": 0.4836, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.6785109983079526, | |
| "grad_norm": 1.469948410987854, | |
| "learning_rate": 1.2440694969787262e-06, | |
| "loss": 0.5521, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.6793570219966159, | |
| "grad_norm": 1.4081382751464844, | |
| "learning_rate": 1.2381484323024178e-06, | |
| "loss": 0.4733, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.6802030456852792, | |
| "grad_norm": 1.3682475090026855, | |
| "learning_rate": 1.232236850468004e-06, | |
| "loss": 0.5307, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.6810490693739425, | |
| "grad_norm": 1.5731854438781738, | |
| "learning_rate": 1.2263347959011534e-06, | |
| "loss": 0.4799, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.6818950930626058, | |
| "grad_norm": 1.4115339517593384, | |
| "learning_rate": 1.2204423129559306e-06, | |
| "loss": 0.4808, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.682741116751269, | |
| "grad_norm": 1.5260988473892212, | |
| "learning_rate": 1.2145594459144745e-06, | |
| "loss": 0.5247, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.6835871404399323, | |
| "grad_norm": 1.402204155921936, | |
| "learning_rate": 1.2086862389866577e-06, | |
| "loss": 0.5507, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.6844331641285956, | |
| "grad_norm": 1.574589729309082, | |
| "learning_rate": 1.2028227363097583e-06, | |
| "loss": 0.4803, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.6852791878172588, | |
| "grad_norm": 1.4717187881469727, | |
| "learning_rate": 1.1969689819481257e-06, | |
| "loss": 0.5736, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.6861252115059222, | |
| "grad_norm": 1.4452178478240967, | |
| "learning_rate": 1.1911250198928508e-06, | |
| "loss": 0.5348, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.6869712351945855, | |
| "grad_norm": 1.5130823850631714, | |
| "learning_rate": 1.1852908940614354e-06, | |
| "loss": 0.4494, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.6878172588832487, | |
| "grad_norm": 1.5016757249832153, | |
| "learning_rate": 1.1794666482974617e-06, | |
| "loss": 0.4589, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.688663282571912, | |
| "grad_norm": 1.604846715927124, | |
| "learning_rate": 1.1736523263702637e-06, | |
| "loss": 0.5153, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.6895093062605753, | |
| "grad_norm": 1.5163328647613525, | |
| "learning_rate": 1.167847971974595e-06, | |
| "loss": 0.5082, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.6903553299492385, | |
| "grad_norm": 1.3733899593353271, | |
| "learning_rate": 1.1620536287303052e-06, | |
| "loss": 0.478, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.6912013536379019, | |
| "grad_norm": 1.4143409729003906, | |
| "learning_rate": 1.1562693401820094e-06, | |
| "loss": 0.497, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.6920473773265652, | |
| "grad_norm": 1.385507345199585, | |
| "learning_rate": 1.1504951497987626e-06, | |
| "loss": 0.5322, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.6928934010152284, | |
| "grad_norm": 1.6582002639770508, | |
| "learning_rate": 1.14473110097373e-06, | |
| "loss": 0.5422, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.6937394247038917, | |
| "grad_norm": 1.633652925491333, | |
| "learning_rate": 1.1389772370238638e-06, | |
| "loss": 0.5311, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.694585448392555, | |
| "grad_norm": 1.4567234516143799, | |
| "learning_rate": 1.133233601189577e-06, | |
| "loss": 0.5541, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.6954314720812182, | |
| "grad_norm": 1.4335215091705322, | |
| "learning_rate": 1.1275002366344156e-06, | |
| "loss": 0.4871, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.6962774957698815, | |
| "grad_norm": 1.401573896408081, | |
| "learning_rate": 1.1217771864447396e-06, | |
| "loss": 0.4619, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.6971235194585449, | |
| "grad_norm": 1.5211458206176758, | |
| "learning_rate": 1.1160644936293955e-06, | |
| "loss": 0.5443, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.6979695431472082, | |
| "grad_norm": 1.3945550918579102, | |
| "learning_rate": 1.110362201119393e-06, | |
| "loss": 0.5781, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.6988155668358714, | |
| "grad_norm": 1.577100396156311, | |
| "learning_rate": 1.1046703517675848e-06, | |
| "loss": 0.5209, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.6996615905245347, | |
| "grad_norm": 1.50028657913208, | |
| "learning_rate": 1.0989889883483415e-06, | |
| "loss": 0.4327, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.700507614213198, | |
| "grad_norm": 1.5429868698120117, | |
| "learning_rate": 1.093318153557233e-06, | |
| "loss": 0.4869, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.7013536379018612, | |
| "grad_norm": 1.4478799104690552, | |
| "learning_rate": 1.0876578900107053e-06, | |
| "loss": 0.5054, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.7021996615905245, | |
| "grad_norm": 1.5432162284851074, | |
| "learning_rate": 1.0820082402457617e-06, | |
| "loss": 0.5047, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.7030456852791879, | |
| "grad_norm": 1.5668376684188843, | |
| "learning_rate": 1.0763692467196432e-06, | |
| "loss": 0.5276, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.7038917089678511, | |
| "grad_norm": 1.5857386589050293, | |
| "learning_rate": 1.070740951809508e-06, | |
| "loss": 0.5437, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.7047377326565144, | |
| "grad_norm": 1.518097996711731, | |
| "learning_rate": 1.0651233978121145e-06, | |
| "loss": 0.5266, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.7055837563451777, | |
| "grad_norm": 1.5712751150131226, | |
| "learning_rate": 1.0595166269435027e-06, | |
| "loss": 0.5185, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.7064297800338409, | |
| "grad_norm": 1.4731966257095337, | |
| "learning_rate": 1.0539206813386774e-06, | |
| "loss": 0.5471, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.7072758037225042, | |
| "grad_norm": 1.4393733739852905, | |
| "learning_rate": 1.048335603051291e-06, | |
| "loss": 0.4901, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.7081218274111675, | |
| "grad_norm": 1.6055103540420532, | |
| "learning_rate": 1.0427614340533293e-06, | |
| "loss": 0.5252, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.7089678510998308, | |
| "grad_norm": 1.5624628067016602, | |
| "learning_rate": 1.037198216234791e-06, | |
| "loss": 0.5123, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.7098138747884941, | |
| "grad_norm": 1.5251977443695068, | |
| "learning_rate": 1.0316459914033794e-06, | |
| "loss": 0.4864, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.7106598984771574, | |
| "grad_norm": 1.4280132055282593, | |
| "learning_rate": 1.0261048012841848e-06, | |
| "loss": 0.4176, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.7115059221658206, | |
| "grad_norm": 1.5174310207366943, | |
| "learning_rate": 1.0205746875193712e-06, | |
| "loss": 0.468, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.7123519458544839, | |
| "grad_norm": 1.4498469829559326, | |
| "learning_rate": 1.0150556916678634e-06, | |
| "loss": 0.4887, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.7131979695431472, | |
| "grad_norm": 1.3770853281021118, | |
| "learning_rate": 1.0095478552050348e-06, | |
| "loss": 0.4825, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.7140439932318104, | |
| "grad_norm": 1.3348174095153809, | |
| "learning_rate": 1.0040512195223947e-06, | |
| "loss": 0.4465, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.7148900169204738, | |
| "grad_norm": 1.5871872901916504, | |
| "learning_rate": 9.985658259272826e-07, | |
| "loss": 0.504, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.7157360406091371, | |
| "grad_norm": 1.5275450944900513, | |
| "learning_rate": 9.930917156425477e-07, | |
| "loss": 0.5283, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.7165820642978004, | |
| "grad_norm": 1.7547075748443604, | |
| "learning_rate": 9.876289298062478e-07, | |
| "loss": 0.5201, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.7174280879864636, | |
| "grad_norm": 1.480360507965088, | |
| "learning_rate": 9.821775094713376e-07, | |
| "loss": 0.5058, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.7182741116751269, | |
| "grad_norm": 1.6711392402648926, | |
| "learning_rate": 9.767374956053584e-07, | |
| "loss": 0.4928, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.7191201353637902, | |
| "grad_norm": 1.2926833629608154, | |
| "learning_rate": 9.713089290901334e-07, | |
| "loss": 0.4889, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.7199661590524534, | |
| "grad_norm": 1.6195472478866577, | |
| "learning_rate": 9.658918507214567e-07, | |
| "loss": 0.5089, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.7208121827411168, | |
| "grad_norm": 1.544764757156372, | |
| "learning_rate": 9.604863012087904e-07, | |
| "loss": 0.5558, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.7216582064297801, | |
| "grad_norm": 1.4564974308013916, | |
| "learning_rate": 9.550923211749557e-07, | |
| "loss": 0.5031, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.7225042301184433, | |
| "grad_norm": 1.4476693868637085, | |
| "learning_rate": 9.497099511558309e-07, | |
| "loss": 0.486, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.7233502538071066, | |
| "grad_norm": 1.3963452577590942, | |
| "learning_rate": 9.443392316000413e-07, | |
| "loss": 0.4551, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.7241962774957699, | |
| "grad_norm": 1.6682579517364502, | |
| "learning_rate": 9.389802028686617e-07, | |
| "loss": 0.5026, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.7250423011844331, | |
| "grad_norm": 1.4662981033325195, | |
| "learning_rate": 9.336329052349089e-07, | |
| "loss": 0.5005, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.7258883248730964, | |
| "grad_norm": 1.414525032043457, | |
| "learning_rate": 9.28297378883842e-07, | |
| "loss": 0.4489, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.7267343485617598, | |
| "grad_norm": 1.6276682615280151, | |
| "learning_rate": 9.229736639120562e-07, | |
| "loss": 0.6037, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.727580372250423, | |
| "grad_norm": 1.5041996240615845, | |
| "learning_rate": 9.176618003273848e-07, | |
| "loss": 0.5154, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.7284263959390863, | |
| "grad_norm": 1.3910162448883057, | |
| "learning_rate": 9.123618280485993e-07, | |
| "loss": 0.3958, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.7292724196277496, | |
| "grad_norm": 1.3514389991760254, | |
| "learning_rate": 9.070737869051044e-07, | |
| "loss": 0.456, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.7301184433164128, | |
| "grad_norm": 1.4312387704849243, | |
| "learning_rate": 9.017977166366445e-07, | |
| "loss": 0.5148, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.7309644670050761, | |
| "grad_norm": 1.4258145093917847, | |
| "learning_rate": 8.965336568930022e-07, | |
| "loss": 0.5091, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.7318104906937394, | |
| "grad_norm": 1.468034029006958, | |
| "learning_rate": 8.912816472337008e-07, | |
| "loss": 0.4846, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.7326565143824028, | |
| "grad_norm": 1.375032663345337, | |
| "learning_rate": 8.860417271277067e-07, | |
| "loss": 0.4945, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.733502538071066, | |
| "grad_norm": 1.2760509252548218, | |
| "learning_rate": 8.808139359531332e-07, | |
| "loss": 0.4549, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.7343485617597293, | |
| "grad_norm": 1.5727797746658325, | |
| "learning_rate": 8.75598312996944e-07, | |
| "loss": 0.4966, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.7351945854483926, | |
| "grad_norm": 1.5262360572814941, | |
| "learning_rate": 8.703948974546592e-07, | |
| "loss": 0.5525, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.7360406091370558, | |
| "grad_norm": 1.482978105545044, | |
| "learning_rate": 8.65203728430059e-07, | |
| "loss": 0.463, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.7368866328257191, | |
| "grad_norm": 1.515729546546936, | |
| "learning_rate": 8.600248449348916e-07, | |
| "loss": 0.5125, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.7377326565143824, | |
| "grad_norm": 1.6180707216262817, | |
| "learning_rate": 8.548582858885787e-07, | |
| "loss": 0.5446, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.7385786802030457, | |
| "grad_norm": 1.7022454738616943, | |
| "learning_rate": 8.497040901179232e-07, | |
| "loss": 0.439, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.739424703891709, | |
| "grad_norm": 1.35090172290802, | |
| "learning_rate": 8.445622963568184e-07, | |
| "loss": 0.4509, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.7402707275803723, | |
| "grad_norm": 1.2935441732406616, | |
| "learning_rate": 8.394329432459561e-07, | |
| "loss": 0.4708, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.7411167512690355, | |
| "grad_norm": 1.4397931098937988, | |
| "learning_rate": 8.343160693325356e-07, | |
| "loss": 0.4763, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.7419627749576988, | |
| "grad_norm": 1.4349732398986816, | |
| "learning_rate": 8.292117130699767e-07, | |
| "loss": 0.4536, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.7428087986463621, | |
| "grad_norm": 1.4029067754745483, | |
| "learning_rate": 8.241199128176255e-07, | |
| "loss": 0.5145, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.7436548223350253, | |
| "grad_norm": 1.4133881330490112, | |
| "learning_rate": 8.190407068404721e-07, | |
| "loss": 0.5282, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.7445008460236887, | |
| "grad_norm": 1.4240069389343262, | |
| "learning_rate": 8.139741333088597e-07, | |
| "loss": 0.4763, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.745346869712352, | |
| "grad_norm": 1.5198265314102173, | |
| "learning_rate": 8.089202302981983e-07, | |
| "loss": 0.5663, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.7461928934010152, | |
| "grad_norm": 1.3956340551376343, | |
| "learning_rate": 8.038790357886783e-07, | |
| "loss": 0.4796, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.7470389170896785, | |
| "grad_norm": 1.4687814712524414, | |
| "learning_rate": 7.988505876649863e-07, | |
| "loss": 0.5539, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.7478849407783418, | |
| "grad_norm": 1.3739749193191528, | |
| "learning_rate": 7.938349237160184e-07, | |
| "loss": 0.4545, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.748730964467005, | |
| "grad_norm": 1.3965612649917603, | |
| "learning_rate": 7.888320816345984e-07, | |
| "loss": 0.5094, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.7495769881556683, | |
| "grad_norm": 1.383991003036499, | |
| "learning_rate": 7.838420990171927e-07, | |
| "loss": 0.3785, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.7504230118443317, | |
| "grad_norm": 1.4848946332931519, | |
| "learning_rate": 7.788650133636291e-07, | |
| "loss": 0.4969, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.751269035532995, | |
| "grad_norm": 1.5263686180114746, | |
| "learning_rate": 7.739008620768143e-07, | |
| "loss": 0.4673, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.7521150592216582, | |
| "grad_norm": 1.5392656326293945, | |
| "learning_rate": 7.689496824624526e-07, | |
| "loss": 0.5333, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.7529610829103215, | |
| "grad_norm": 1.4336823225021362, | |
| "learning_rate": 7.640115117287661e-07, | |
| "loss": 0.5547, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.7538071065989848, | |
| "grad_norm": 1.606310486793518, | |
| "learning_rate": 7.590863869862155e-07, | |
| "loss": 0.5233, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.754653130287648, | |
| "grad_norm": 1.4799706935882568, | |
| "learning_rate": 7.541743452472194e-07, | |
| "loss": 0.5235, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.7554991539763113, | |
| "grad_norm": 1.4974623918533325, | |
| "learning_rate": 7.492754234258794e-07, | |
| "loss": 0.4494, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.7563451776649747, | |
| "grad_norm": 1.7052468061447144, | |
| "learning_rate": 7.443896583376972e-07, | |
| "loss": 0.5512, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.7571912013536379, | |
| "grad_norm": 1.6682631969451904, | |
| "learning_rate": 7.395170866993043e-07, | |
| "loss": 0.5588, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.7580372250423012, | |
| "grad_norm": 1.5600894689559937, | |
| "learning_rate": 7.346577451281822e-07, | |
| "loss": 0.5154, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.7588832487309645, | |
| "grad_norm": 1.35463547706604, | |
| "learning_rate": 7.298116701423874e-07, | |
| "loss": 0.5002, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.7597292724196277, | |
| "grad_norm": 1.4675949811935425, | |
| "learning_rate": 7.249788981602801e-07, | |
| "loss": 0.5376, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.760575296108291, | |
| "grad_norm": 1.5439352989196777, | |
| "learning_rate": 7.201594655002458e-07, | |
| "loss": 0.4828, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.7614213197969543, | |
| "grad_norm": 1.5555047988891602, | |
| "learning_rate": 7.153534083804253e-07, | |
| "loss": 0.5266, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.7622673434856176, | |
| "grad_norm": 1.6355152130126953, | |
| "learning_rate": 7.105607629184433e-07, | |
| "loss": 0.5003, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.7631133671742809, | |
| "grad_norm": 1.4930115938186646, | |
| "learning_rate": 7.057815651311323e-07, | |
| "loss": 0.5193, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.7639593908629442, | |
| "grad_norm": 1.5737615823745728, | |
| "learning_rate": 7.010158509342682e-07, | |
| "loss": 0.5353, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.7648054145516074, | |
| "grad_norm": 1.5561256408691406, | |
| "learning_rate": 6.962636561422967e-07, | |
| "loss": 0.5157, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.7656514382402707, | |
| "grad_norm": 1.5765726566314697, | |
| "learning_rate": 6.915250164680648e-07, | |
| "loss": 0.5358, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.766497461928934, | |
| "grad_norm": 1.4768812656402588, | |
| "learning_rate": 6.867999675225523e-07, | |
| "loss": 0.5021, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.7673434856175972, | |
| "grad_norm": 1.4443702697753906, | |
| "learning_rate": 6.820885448146041e-07, | |
| "loss": 0.5442, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.7681895093062606, | |
| "grad_norm": 1.5117536783218384, | |
| "learning_rate": 6.773907837506646e-07, | |
| "loss": 0.6001, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.7690355329949239, | |
| "grad_norm": 1.4011093378067017, | |
| "learning_rate": 6.7270671963451e-07, | |
| "loss": 0.5374, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.7698815566835872, | |
| "grad_norm": 1.523114562034607, | |
| "learning_rate": 6.680363876669832e-07, | |
| "loss": 0.4964, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.7707275803722504, | |
| "grad_norm": 1.3653088808059692, | |
| "learning_rate": 6.633798229457309e-07, | |
| "loss": 0.4675, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.7715736040609137, | |
| "grad_norm": 1.532370686531067, | |
| "learning_rate": 6.587370604649373e-07, | |
| "loss": 0.5292, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.772419627749577, | |
| "grad_norm": 1.5474774837493896, | |
| "learning_rate": 6.541081351150638e-07, | |
| "loss": 0.5468, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.7732656514382402, | |
| "grad_norm": 1.6492289304733276, | |
| "learning_rate": 6.494930816825842e-07, | |
| "loss": 0.5242, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.7741116751269036, | |
| "grad_norm": 1.6536519527435303, | |
| "learning_rate": 6.448919348497254e-07, | |
| "loss": 0.5071, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.7749576988155669, | |
| "grad_norm": 1.7425479888916016, | |
| "learning_rate": 6.403047291942057e-07, | |
| "loss": 0.5401, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.7758037225042301, | |
| "grad_norm": 1.5332328081130981, | |
| "learning_rate": 6.357314991889757e-07, | |
| "loss": 0.5349, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.7766497461928934, | |
| "grad_norm": 1.671473503112793, | |
| "learning_rate": 6.311722792019565e-07, | |
| "loss": 0.5401, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.7774957698815567, | |
| "grad_norm": 1.3117287158966064, | |
| "learning_rate": 6.266271034957861e-07, | |
| "loss": 0.4603, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.7783417935702199, | |
| "grad_norm": 1.5059982538223267, | |
| "learning_rate": 6.220960062275583e-07, | |
| "loss": 0.5476, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.7791878172588832, | |
| "grad_norm": 1.5411993265151978, | |
| "learning_rate": 6.175790214485674e-07, | |
| "loss": 0.5483, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.7800338409475466, | |
| "grad_norm": 1.4125125408172607, | |
| "learning_rate": 6.130761831040522e-07, | |
| "loss": 0.4734, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.7808798646362098, | |
| "grad_norm": 1.3989757299423218, | |
| "learning_rate": 6.085875250329401e-07, | |
| "loss": 0.4282, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.7817258883248731, | |
| "grad_norm": 1.408751368522644, | |
| "learning_rate": 6.041130809675944e-07, | |
| "loss": 0.4072, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.7825719120135364, | |
| "grad_norm": 1.6468225717544556, | |
| "learning_rate": 5.996528845335587e-07, | |
| "loss": 0.5275, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.7834179357021996, | |
| "grad_norm": 1.299722671508789, | |
| "learning_rate": 5.952069692493062e-07, | |
| "loss": 0.4825, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.7842639593908629, | |
| "grad_norm": 1.3989574909210205, | |
| "learning_rate": 5.907753685259865e-07, | |
| "loss": 0.5353, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.7851099830795262, | |
| "grad_norm": 1.632617473602295, | |
| "learning_rate": 5.863581156671755e-07, | |
| "loss": 0.5604, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.7859560067681896, | |
| "grad_norm": 1.3802127838134766, | |
| "learning_rate": 5.819552438686238e-07, | |
| "loss": 0.5555, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.7868020304568528, | |
| "grad_norm": 1.2966598272323608, | |
| "learning_rate": 5.775667862180087e-07, | |
| "loss": 0.5135, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.7876480541455161, | |
| "grad_norm": 1.4955836534500122, | |
| "learning_rate": 5.731927756946848e-07, | |
| "loss": 0.5242, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.7884940778341794, | |
| "grad_norm": 1.486580729484558, | |
| "learning_rate": 5.688332451694356e-07, | |
| "loss": 0.5137, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.7893401015228426, | |
| "grad_norm": 1.5617793798446655, | |
| "learning_rate": 5.644882274042285e-07, | |
| "loss": 0.5552, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.7901861252115059, | |
| "grad_norm": 1.4690701961517334, | |
| "learning_rate": 5.601577550519646e-07, | |
| "loss": 0.5497, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.7910321489001692, | |
| "grad_norm": 1.4751214981079102, | |
| "learning_rate": 5.558418606562385e-07, | |
| "loss": 0.4816, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.7918781725888325, | |
| "grad_norm": 1.4868813753128052, | |
| "learning_rate": 5.5154057665109e-07, | |
| "loss": 0.511, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.7927241962774958, | |
| "grad_norm": 1.3979462385177612, | |
| "learning_rate": 5.472539353607612e-07, | |
| "loss": 0.4614, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.7935702199661591, | |
| "grad_norm": 1.40373694896698, | |
| "learning_rate": 5.429819689994556e-07, | |
| "loss": 0.5177, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.7944162436548223, | |
| "grad_norm": 1.5623489618301392, | |
| "learning_rate": 5.387247096710921e-07, | |
| "loss": 0.5668, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.7952622673434856, | |
| "grad_norm": 1.258279800415039, | |
| "learning_rate": 5.344821893690679e-07, | |
| "loss": 0.4178, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.7961082910321489, | |
| "grad_norm": 1.3184384107589722, | |
| "learning_rate": 5.30254439976014e-07, | |
| "loss": 0.4387, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.7969543147208121, | |
| "grad_norm": 1.380998969078064, | |
| "learning_rate": 5.260414932635588e-07, | |
| "loss": 0.5116, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.7978003384094755, | |
| "grad_norm": 1.447558045387268, | |
| "learning_rate": 5.218433808920884e-07, | |
| "loss": 0.5289, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.7986463620981388, | |
| "grad_norm": 1.6539430618286133, | |
| "learning_rate": 5.176601344105084e-07, | |
| "loss": 0.5358, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.799492385786802, | |
| "grad_norm": 1.6026103496551514, | |
| "learning_rate": 5.134917852560067e-07, | |
| "loss": 0.5461, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.8003384094754653, | |
| "grad_norm": 1.8216614723205566, | |
| "learning_rate": 5.09338364753818e-07, | |
| "loss": 0.5195, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.8011844331641286, | |
| "grad_norm": 1.376305103302002, | |
| "learning_rate": 5.051999041169869e-07, | |
| "loss": 0.5051, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.8020304568527918, | |
| "grad_norm": 1.5061923265457153, | |
| "learning_rate": 5.010764344461352e-07, | |
| "loss": 0.4656, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.8028764805414551, | |
| "grad_norm": 1.6302450895309448, | |
| "learning_rate": 4.969679867292276e-07, | |
| "loss": 0.5381, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.8037225042301185, | |
| "grad_norm": 1.4138633012771606, | |
| "learning_rate": 4.928745918413352e-07, | |
| "loss": 0.5356, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.8045685279187818, | |
| "grad_norm": 1.4501382112503052, | |
| "learning_rate": 4.887962805444122e-07, | |
| "loss": 0.506, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.805414551607445, | |
| "grad_norm": 1.3429620265960693, | |
| "learning_rate": 4.847330834870551e-07, | |
| "loss": 0.4975, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.8062605752961083, | |
| "grad_norm": 1.3932536840438843, | |
| "learning_rate": 4.806850312042782e-07, | |
| "loss": 0.4677, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.8071065989847716, | |
| "grad_norm": 1.4443645477294922, | |
| "learning_rate": 4.766521541172822e-07, | |
| "loss": 0.5573, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.8079526226734348, | |
| "grad_norm": 1.536169409751892, | |
| "learning_rate": 4.7263448253322574e-07, | |
| "loss": 0.5413, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.8087986463620981, | |
| "grad_norm": 1.3791049718856812, | |
| "learning_rate": 4.686320466449981e-07, | |
| "loss": 0.4739, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.8096446700507615, | |
| "grad_norm": 1.523890733718872, | |
| "learning_rate": 4.6464487653099216e-07, | |
| "loss": 0.5314, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.8104906937394247, | |
| "grad_norm": 1.4911043643951416, | |
| "learning_rate": 4.6067300215487663e-07, | |
| "loss": 0.5903, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.811336717428088, | |
| "grad_norm": 1.383557915687561, | |
| "learning_rate": 4.5671645336537425e-07, | |
| "loss": 0.5124, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.8121827411167513, | |
| "grad_norm": 1.5496838092803955, | |
| "learning_rate": 4.52775259896035e-07, | |
| "loss": 0.5472, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.8130287648054145, | |
| "grad_norm": 1.4877400398254395, | |
| "learning_rate": 4.4884945136501325e-07, | |
| "loss": 0.5108, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.8138747884940778, | |
| "grad_norm": 1.4807846546173096, | |
| "learning_rate": 4.449390572748449e-07, | |
| "loss": 0.4671, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.8147208121827412, | |
| "grad_norm": 1.3834859132766724, | |
| "learning_rate": 4.4104410701222703e-07, | |
| "loss": 0.4498, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.8155668358714044, | |
| "grad_norm": 1.4340556859970093, | |
| "learning_rate": 4.371646298477947e-07, | |
| "loss": 0.4934, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.8164128595600677, | |
| "grad_norm": 1.6941485404968262, | |
| "learning_rate": 4.333006549359034e-07, | |
| "loss": 0.5905, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.817258883248731, | |
| "grad_norm": 1.3260046243667603, | |
| "learning_rate": 4.2945221131440783e-07, | |
| "loss": 0.4841, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.8181049069373942, | |
| "grad_norm": 1.5405261516571045, | |
| "learning_rate": 4.2561932790444597e-07, | |
| "loss": 0.5125, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.8189509306260575, | |
| "grad_norm": 1.5137594938278198, | |
| "learning_rate": 4.218020335102191e-07, | |
| "loss": 0.469, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.8197969543147208, | |
| "grad_norm": 1.3630168437957764, | |
| "learning_rate": 4.1800035681877765e-07, | |
| "loss": 0.5501, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.8206429780033841, | |
| "grad_norm": 1.3924651145935059, | |
| "learning_rate": 4.142143263998047e-07, | |
| "loss": 0.4994, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.8214890016920474, | |
| "grad_norm": 1.426595687866211, | |
| "learning_rate": 4.104439707054003e-07, | |
| "loss": 0.4795, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.8223350253807107, | |
| "grad_norm": 1.356465220451355, | |
| "learning_rate": 4.0668931806987e-07, | |
| "loss": 0.4743, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.823181049069374, | |
| "grad_norm": 1.474966049194336, | |
| "learning_rate": 4.029503967095097e-07, | |
| "loss": 0.5118, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.8240270727580372, | |
| "grad_norm": 1.4417613744735718, | |
| "learning_rate": 3.9922723472239356e-07, | |
| "loss": 0.5565, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.8248730964467005, | |
| "grad_norm": 1.5358595848083496, | |
| "learning_rate": 3.9551986008816544e-07, | |
| "loss": 0.5128, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.8257191201353637, | |
| "grad_norm": 1.390822410583496, | |
| "learning_rate": 3.9182830066782614e-07, | |
| "loss": 0.5552, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.8265651438240271, | |
| "grad_norm": 1.3930827379226685, | |
| "learning_rate": 3.8815258420352385e-07, | |
| "loss": 0.4682, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.8274111675126904, | |
| "grad_norm": 1.3697726726531982, | |
| "learning_rate": 3.844927383183486e-07, | |
| "loss": 0.4713, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.8282571912013537, | |
| "grad_norm": 1.5058186054229736, | |
| "learning_rate": 3.808487905161215e-07, | |
| "loss": 0.5004, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.8291032148900169, | |
| "grad_norm": 1.4689873456954956, | |
| "learning_rate": 3.772207681811896e-07, | |
| "loss": 0.5002, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.8299492385786802, | |
| "grad_norm": 1.4826774597167969, | |
| "learning_rate": 3.7360869857821805e-07, | |
| "loss": 0.4269, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.8307952622673435, | |
| "grad_norm": 1.3101059198379517, | |
| "learning_rate": 3.7001260885198925e-07, | |
| "loss": 0.53, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.8316412859560067, | |
| "grad_norm": 1.5160847902297974, | |
| "learning_rate": 3.664325260271953e-07, | |
| "loss": 0.5218, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.8324873096446701, | |
| "grad_norm": 1.3929725885391235, | |
| "learning_rate": 3.6286847700823634e-07, | |
| "loss": 0.55, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 1.4347172975540161, | |
| "learning_rate": 3.5932048857901773e-07, | |
| "loss": 0.455, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.8341793570219966, | |
| "grad_norm": 1.373490333557129, | |
| "learning_rate": 3.5578858740274976e-07, | |
| "loss": 0.4914, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.8350253807106599, | |
| "grad_norm": 1.4317591190338135, | |
| "learning_rate": 3.5227280002174626e-07, | |
| "loss": 0.5637, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.8358714043993232, | |
| "grad_norm": 1.6726568937301636, | |
| "learning_rate": 3.487731528572255e-07, | |
| "loss": 0.5096, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.8367174280879864, | |
| "grad_norm": 1.3493298292160034, | |
| "learning_rate": 3.4528967220911287e-07, | |
| "loss": 0.4357, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.8375634517766497, | |
| "grad_norm": 1.4160189628601074, | |
| "learning_rate": 3.418223842558385e-07, | |
| "loss": 0.4529, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.8384094754653131, | |
| "grad_norm": 1.5254844427108765, | |
| "learning_rate": 3.38371315054149e-07, | |
| "loss": 0.4857, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.8392554991539763, | |
| "grad_norm": 1.4467828273773193, | |
| "learning_rate": 3.3493649053890325e-07, | |
| "loss": 0.5402, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.8401015228426396, | |
| "grad_norm": 1.5890865325927734, | |
| "learning_rate": 3.315179365228824e-07, | |
| "loss": 0.4991, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.8409475465313029, | |
| "grad_norm": 1.4599320888519287, | |
| "learning_rate": 3.281156786965933e-07, | |
| "loss": 0.399, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.8417935702199661, | |
| "grad_norm": 1.501905918121338, | |
| "learning_rate": 3.2472974262807794e-07, | |
| "loss": 0.4408, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.8426395939086294, | |
| "grad_norm": 1.4133471250534058, | |
| "learning_rate": 3.213601537627195e-07, | |
| "loss": 0.4854, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.8434856175972927, | |
| "grad_norm": 1.474295973777771, | |
| "learning_rate": 3.1800693742305074e-07, | |
| "loss": 0.4868, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 0.8443316412859561, | |
| "grad_norm": 1.340802788734436, | |
| "learning_rate": 3.146701188085649e-07, | |
| "loss": 0.5024, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.8451776649746193, | |
| "grad_norm": 1.5965441465377808, | |
| "learning_rate": 3.11349722995527e-07, | |
| "loss": 0.5567, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.8460236886632826, | |
| "grad_norm": 1.4308526515960693, | |
| "learning_rate": 3.080457749367832e-07, | |
| "loss": 0.4896, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.8468697123519459, | |
| "grad_norm": 1.475598931312561, | |
| "learning_rate": 3.04758299461575e-07, | |
| "loss": 0.4918, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 0.8477157360406091, | |
| "grad_norm": 1.4925700426101685, | |
| "learning_rate": 3.014873212753516e-07, | |
| "loss": 0.4652, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.8485617597292724, | |
| "grad_norm": 1.5554040670394897, | |
| "learning_rate": 2.982328649595856e-07, | |
| "loss": 0.5302, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 0.8494077834179357, | |
| "grad_norm": 1.666174054145813, | |
| "learning_rate": 2.949949549715858e-07, | |
| "loss": 0.4715, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.850253807106599, | |
| "grad_norm": 1.4457473754882812, | |
| "learning_rate": 2.917736156443171e-07, | |
| "loss": 0.4546, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.8510998307952623, | |
| "grad_norm": 1.4290974140167236, | |
| "learning_rate": 2.885688711862136e-07, | |
| "loss": 0.5035, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.8519458544839256, | |
| "grad_norm": 1.4836621284484863, | |
| "learning_rate": 2.8538074568099954e-07, | |
| "loss": 0.5912, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 0.8527918781725888, | |
| "grad_norm": 1.309338092803955, | |
| "learning_rate": 2.8220926308750757e-07, | |
| "loss": 0.4693, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.8536379018612521, | |
| "grad_norm": 1.3714721202850342, | |
| "learning_rate": 2.7905444723949765e-07, | |
| "loss": 0.4841, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 0.8544839255499154, | |
| "grad_norm": 1.3649089336395264, | |
| "learning_rate": 2.7591632184547996e-07, | |
| "loss": 0.4965, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.8553299492385786, | |
| "grad_norm": 1.4042978286743164, | |
| "learning_rate": 2.727949104885341e-07, | |
| "loss": 0.4913, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 0.856175972927242, | |
| "grad_norm": 1.6518502235412598, | |
| "learning_rate": 2.6969023662613473e-07, | |
| "loss": 0.5123, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.8570219966159053, | |
| "grad_norm": 1.5500681400299072, | |
| "learning_rate": 2.666023235899734e-07, | |
| "loss": 0.464, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 0.8578680203045685, | |
| "grad_norm": 1.5040642023086548, | |
| "learning_rate": 2.6353119458578297e-07, | |
| "loss": 0.4687, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.8587140439932318, | |
| "grad_norm": 1.548913836479187, | |
| "learning_rate": 2.604768726931645e-07, | |
| "loss": 0.565, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.8595600676818951, | |
| "grad_norm": 1.342336893081665, | |
| "learning_rate": 2.5743938086541354e-07, | |
| "loss": 0.4772, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.8604060913705583, | |
| "grad_norm": 1.419202446937561, | |
| "learning_rate": 2.544187419293462e-07, | |
| "loss": 0.5304, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 0.8612521150592216, | |
| "grad_norm": 1.3937733173370361, | |
| "learning_rate": 2.514149785851311e-07, | |
| "loss": 0.5183, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.862098138747885, | |
| "grad_norm": 1.44895601272583, | |
| "learning_rate": 2.4842811340611423e-07, | |
| "loss": 0.497, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 0.8629441624365483, | |
| "grad_norm": 1.648419976234436, | |
| "learning_rate": 2.454581688386523e-07, | |
| "loss": 0.4864, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.8637901861252115, | |
| "grad_norm": 1.4835489988327026, | |
| "learning_rate": 2.4250516720194267e-07, | |
| "loss": 0.5337, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 0.8646362098138748, | |
| "grad_norm": 1.247205376625061, | |
| "learning_rate": 2.3956913068785697e-07, | |
| "loss": 0.4464, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.8654822335025381, | |
| "grad_norm": 1.5768427848815918, | |
| "learning_rate": 2.3665008136077332e-07, | |
| "loss": 0.5662, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 0.8663282571912013, | |
| "grad_norm": 1.430821180343628, | |
| "learning_rate": 2.3374804115741056e-07, | |
| "loss": 0.4908, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.8671742808798646, | |
| "grad_norm": 1.5622667074203491, | |
| "learning_rate": 2.3086303188666393e-07, | |
| "loss": 0.5398, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.868020304568528, | |
| "grad_norm": 1.375335931777954, | |
| "learning_rate": 2.2799507522944048e-07, | |
| "loss": 0.4289, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.8688663282571912, | |
| "grad_norm": 1.2710477113723755, | |
| "learning_rate": 2.2514419273849674e-07, | |
| "loss": 0.4489, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 0.8697123519458545, | |
| "grad_norm": 1.3400541543960571, | |
| "learning_rate": 2.223104058382766e-07, | |
| "loss": 0.4705, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.8705583756345178, | |
| "grad_norm": 1.3298795223236084, | |
| "learning_rate": 2.1949373582475065e-07, | |
| "loss": 0.5148, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 0.871404399323181, | |
| "grad_norm": 1.3770630359649658, | |
| "learning_rate": 2.166942038652531e-07, | |
| "loss": 0.4446, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.8722504230118443, | |
| "grad_norm": 1.5070509910583496, | |
| "learning_rate": 2.1391183099832958e-07, | |
| "loss": 0.4768, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 0.8730964467005076, | |
| "grad_norm": 1.6530050039291382, | |
| "learning_rate": 2.111466381335714e-07, | |
| "loss": 0.5459, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.873942470389171, | |
| "grad_norm": 1.3462867736816406, | |
| "learning_rate": 2.083986460514631e-07, | |
| "loss": 0.4644, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 0.8747884940778342, | |
| "grad_norm": 1.366542100906372, | |
| "learning_rate": 2.056678754032246e-07, | |
| "loss": 0.4752, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.8756345177664975, | |
| "grad_norm": 1.3380286693572998, | |
| "learning_rate": 2.0295434671065706e-07, | |
| "loss": 0.4484, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.8764805414551607, | |
| "grad_norm": 1.5602799654006958, | |
| "learning_rate": 2.002580803659873e-07, | |
| "loss": 0.5123, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.877326565143824, | |
| "grad_norm": 1.4561575651168823, | |
| "learning_rate": 1.9757909663171508e-07, | |
| "loss": 0.5151, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 0.8781725888324873, | |
| "grad_norm": 1.4674752950668335, | |
| "learning_rate": 1.9491741564046125e-07, | |
| "loss": 0.4728, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.8790186125211505, | |
| "grad_norm": 1.5501822233200073, | |
| "learning_rate": 1.9227305739481618e-07, | |
| "loss": 0.4961, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 0.8798646362098139, | |
| "grad_norm": 1.3679369688034058, | |
| "learning_rate": 1.896460417671897e-07, | |
| "loss": 0.5051, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.8807106598984772, | |
| "grad_norm": 1.462009072303772, | |
| "learning_rate": 1.8703638849966094e-07, | |
| "loss": 0.4909, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 0.8815566835871405, | |
| "grad_norm": 1.4271647930145264, | |
| "learning_rate": 1.844441172038311e-07, | |
| "loss": 0.4598, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.8824027072758037, | |
| "grad_norm": 1.3603568077087402, | |
| "learning_rate": 1.818692473606748e-07, | |
| "loss": 0.446, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 0.883248730964467, | |
| "grad_norm": 1.4679503440856934, | |
| "learning_rate": 1.7931179832039513e-07, | |
| "loss": 0.4422, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.8840947546531303, | |
| "grad_norm": 1.4945645332336426, | |
| "learning_rate": 1.767717893022769e-07, | |
| "loss": 0.5402, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.8849407783417935, | |
| "grad_norm": 1.5339360237121582, | |
| "learning_rate": 1.7424923939454274e-07, | |
| "loss": 0.5342, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.8857868020304569, | |
| "grad_norm": 1.4887621402740479, | |
| "learning_rate": 1.717441675542106e-07, | |
| "loss": 0.5502, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 0.8866328257191202, | |
| "grad_norm": 1.5359052419662476, | |
| "learning_rate": 1.6925659260694894e-07, | |
| "loss": 0.5523, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.8874788494077834, | |
| "grad_norm": 1.4636895656585693, | |
| "learning_rate": 1.667865332469379e-07, | |
| "loss": 0.4753, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 0.8883248730964467, | |
| "grad_norm": 1.4733718633651733, | |
| "learning_rate": 1.643340080367267e-07, | |
| "loss": 0.4974, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.88917089678511, | |
| "grad_norm": 1.4999078512191772, | |
| "learning_rate": 1.6189903540709595e-07, | |
| "loss": 0.4722, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 0.8900169204737732, | |
| "grad_norm": 1.534686803817749, | |
| "learning_rate": 1.5948163365691798e-07, | |
| "loss": 0.5379, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.8908629441624365, | |
| "grad_norm": 1.3469022512435913, | |
| "learning_rate": 1.5708182095301867e-07, | |
| "loss": 0.5219, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 0.8917089678510999, | |
| "grad_norm": 1.2992980480194092, | |
| "learning_rate": 1.5469961533004258e-07, | |
| "loss": 0.4464, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.8925549915397631, | |
| "grad_norm": 1.1914132833480835, | |
| "learning_rate": 1.5233503469031686e-07, | |
| "loss": 0.3623, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.8934010152284264, | |
| "grad_norm": 1.5193549394607544, | |
| "learning_rate": 1.499880968037165e-07, | |
| "loss": 0.4728, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.8942470389170897, | |
| "grad_norm": 1.4608553647994995, | |
| "learning_rate": 1.4765881930752983e-07, | |
| "loss": 0.4644, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 0.8950930626057529, | |
| "grad_norm": 1.6205723285675049, | |
| "learning_rate": 1.4534721970632882e-07, | |
| "loss": 0.5703, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.8959390862944162, | |
| "grad_norm": 1.5539836883544922, | |
| "learning_rate": 1.4305331537183387e-07, | |
| "loss": 0.6296, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 0.8967851099830795, | |
| "grad_norm": 1.4455832242965698, | |
| "learning_rate": 1.4077712354278683e-07, | |
| "loss": 0.5241, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.8976311336717429, | |
| "grad_norm": 1.4322365522384644, | |
| "learning_rate": 1.385186613248171e-07, | |
| "loss": 0.4872, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 0.8984771573604061, | |
| "grad_norm": 1.5108438730239868, | |
| "learning_rate": 1.362779456903182e-07, | |
| "loss": 0.526, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.8993231810490694, | |
| "grad_norm": 1.3449329137802124, | |
| "learning_rate": 1.340549934783164e-07, | |
| "loss": 0.4537, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 0.9001692047377327, | |
| "grad_norm": 1.4694099426269531, | |
| "learning_rate": 1.3184982139434587e-07, | |
| "loss": 0.5095, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.9010152284263959, | |
| "grad_norm": 1.5920803546905518, | |
| "learning_rate": 1.2966244601032267e-07, | |
| "loss": 0.5452, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.9018612521150592, | |
| "grad_norm": 1.3615165948867798, | |
| "learning_rate": 1.2749288376442044e-07, | |
| "loss": 0.4741, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.9027072758037225, | |
| "grad_norm": 1.6377193927764893, | |
| "learning_rate": 1.253411509609459e-07, | |
| "loss": 0.555, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 0.9035532994923858, | |
| "grad_norm": 1.604526400566101, | |
| "learning_rate": 1.2320726377021836e-07, | |
| "loss": 0.5634, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.9043993231810491, | |
| "grad_norm": 1.342687964439392, | |
| "learning_rate": 1.2109123822844653e-07, | |
| "loss": 0.5433, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 0.9052453468697124, | |
| "grad_norm": 1.5053247213363647, | |
| "learning_rate": 1.1899309023760686e-07, | |
| "loss": 0.5671, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.9060913705583756, | |
| "grad_norm": 1.361431360244751, | |
| "learning_rate": 1.1691283556532846e-07, | |
| "loss": 0.5314, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 0.9069373942470389, | |
| "grad_norm": 1.2656420469284058, | |
| "learning_rate": 1.1485048984476998e-07, | |
| "loss": 0.4949, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.9077834179357022, | |
| "grad_norm": 1.4055432081222534, | |
| "learning_rate": 1.1280606857450387e-07, | |
| "loss": 0.4909, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 0.9086294416243654, | |
| "grad_norm": 1.4172112941741943, | |
| "learning_rate": 1.1077958711840032e-07, | |
| "loss": 0.4917, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.9094754653130288, | |
| "grad_norm": 1.3536546230316162, | |
| "learning_rate": 1.0877106070551175e-07, | |
| "loss": 0.4862, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.9103214890016921, | |
| "grad_norm": 1.4950735569000244, | |
| "learning_rate": 1.0678050442995802e-07, | |
| "loss": 0.479, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.9111675126903553, | |
| "grad_norm": 1.420637607574463, | |
| "learning_rate": 1.0480793325081174e-07, | |
| "loss": 0.5085, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 0.9120135363790186, | |
| "grad_norm": 1.3945380449295044, | |
| "learning_rate": 1.0285336199198858e-07, | |
| "loss": 0.4308, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.9128595600676819, | |
| "grad_norm": 1.4180177450180054, | |
| "learning_rate": 1.0091680534213389e-07, | |
| "loss": 0.4753, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 0.9137055837563451, | |
| "grad_norm": 1.3723344802856445, | |
| "learning_rate": 9.899827785451288e-08, | |
| "loss": 0.4353, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.9145516074450084, | |
| "grad_norm": 1.4225527048110962, | |
| "learning_rate": 9.709779394690144e-08, | |
| "loss": 0.5055, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 0.9153976311336718, | |
| "grad_norm": 1.4633773565292358, | |
| "learning_rate": 9.521536790147722e-08, | |
| "loss": 0.4279, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.916243654822335, | |
| "grad_norm": 1.5522949695587158, | |
| "learning_rate": 9.335101386471285e-08, | |
| "loss": 0.5099, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 0.9170896785109983, | |
| "grad_norm": 1.4376224279403687, | |
| "learning_rate": 9.150474584726926e-08, | |
| "loss": 0.5157, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.9179357021996616, | |
| "grad_norm": 1.551571249961853, | |
| "learning_rate": 8.967657772389032e-08, | |
| "loss": 0.5229, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.9187817258883249, | |
| "grad_norm": 1.5067143440246582, | |
| "learning_rate": 8.78665232332998e-08, | |
| "loss": 0.5731, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.9196277495769881, | |
| "grad_norm": 1.6685928106307983, | |
| "learning_rate": 8.607459597809565e-08, | |
| "loss": 0.5185, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 0.9204737732656514, | |
| "grad_norm": 1.6518747806549072, | |
| "learning_rate": 8.430080942465085e-08, | |
| "loss": 0.4492, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.9213197969543148, | |
| "grad_norm": 1.4557331800460815, | |
| "learning_rate": 8.254517690300946e-08, | |
| "loss": 0.4527, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 0.922165820642978, | |
| "grad_norm": 1.3865046501159668, | |
| "learning_rate": 8.080771160678763e-08, | |
| "loss": 0.4718, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.9230118443316413, | |
| "grad_norm": 1.6267703771591187, | |
| "learning_rate": 7.908842659307525e-08, | |
| "loss": 0.5811, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 0.9238578680203046, | |
| "grad_norm": 1.4735057353973389, | |
| "learning_rate": 7.738733478233673e-08, | |
| "loss": 0.5173, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.9247038917089678, | |
| "grad_norm": 1.2759696245193481, | |
| "learning_rate": 7.57044489583128e-08, | |
| "loss": 0.447, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 0.9255499153976311, | |
| "grad_norm": 1.3496994972229004, | |
| "learning_rate": 7.40397817679278e-08, | |
| "loss": 0.5328, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.9263959390862944, | |
| "grad_norm": 1.4382102489471436, | |
| "learning_rate": 7.239334572119172e-08, | |
| "loss": 0.4431, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.9272419627749577, | |
| "grad_norm": 1.497261643409729, | |
| "learning_rate": 7.076515319110688e-08, | |
| "loss": 0.4202, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.928087986463621, | |
| "grad_norm": 1.4099481105804443, | |
| "learning_rate": 6.915521641357504e-08, | |
| "loss": 0.4269, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 0.9289340101522843, | |
| "grad_norm": 1.5935684442520142, | |
| "learning_rate": 6.756354748730709e-08, | |
| "loss": 0.4807, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.9297800338409475, | |
| "grad_norm": 1.3946175575256348, | |
| "learning_rate": 6.599015837372907e-08, | |
| "loss": 0.4243, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 0.9306260575296108, | |
| "grad_norm": 1.605188012123108, | |
| "learning_rate": 6.443506089689411e-08, | |
| "loss": 0.4828, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.9314720812182741, | |
| "grad_norm": 1.4577580690383911, | |
| "learning_rate": 6.289826674339333e-08, | |
| "loss": 0.492, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 0.9323181049069373, | |
| "grad_norm": 1.3271147012710571, | |
| "learning_rate": 6.137978746226848e-08, | |
| "loss": 0.4126, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.9331641285956007, | |
| "grad_norm": 1.4398398399353027, | |
| "learning_rate": 5.987963446492384e-08, | |
| "loss": 0.5249, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 0.934010152284264, | |
| "grad_norm": 1.4152181148529053, | |
| "learning_rate": 5.839781902504227e-08, | |
| "loss": 0.4867, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.9348561759729273, | |
| "grad_norm": 1.6749119758605957, | |
| "learning_rate": 5.693435227849875e-08, | |
| "loss": 0.5718, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.9357021996615905, | |
| "grad_norm": 1.4649239778518677, | |
| "learning_rate": 5.548924522327748e-08, | |
| "loss": 0.4816, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.9365482233502538, | |
| "grad_norm": 1.4536762237548828, | |
| "learning_rate": 5.406250871938912e-08, | |
| "loss": 0.4235, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 0.937394247038917, | |
| "grad_norm": 1.7883013486862183, | |
| "learning_rate": 5.265415348879005e-08, | |
| "loss": 0.4966, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.9382402707275804, | |
| "grad_norm": 1.671594500541687, | |
| "learning_rate": 5.126419011529993e-08, | |
| "loss": 0.5989, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 0.9390862944162437, | |
| "grad_norm": 1.4351028203964233, | |
| "learning_rate": 4.989262904452369e-08, | |
| "loss": 0.4604, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.939932318104907, | |
| "grad_norm": 1.4522861242294312, | |
| "learning_rate": 4.853948058377245e-08, | |
| "loss": 0.5147, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 0.9407783417935702, | |
| "grad_norm": 1.4232293367385864, | |
| "learning_rate": 4.720475490198634e-08, | |
| "loss": 0.4657, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.9416243654822335, | |
| "grad_norm": 1.4143102169036865, | |
| "learning_rate": 4.5888462029658186e-08, | |
| "loss": 0.5611, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 0.9424703891708968, | |
| "grad_norm": 1.4739620685577393, | |
| "learning_rate": 4.4590611858756906e-08, | |
| "loss": 0.5188, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.94331641285956, | |
| "grad_norm": 1.3989241123199463, | |
| "learning_rate": 4.3311214142654766e-08, | |
| "loss": 0.5242, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.9441624365482234, | |
| "grad_norm": 1.3451173305511475, | |
| "learning_rate": 4.205027849605359e-08, | |
| "loss": 0.4451, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.9450084602368867, | |
| "grad_norm": 1.3867361545562744, | |
| "learning_rate": 4.0807814394911996e-08, | |
| "loss": 0.5151, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 0.9458544839255499, | |
| "grad_norm": 1.344876766204834, | |
| "learning_rate": 3.9583831176374654e-08, | |
| "loss": 0.4618, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.9467005076142132, | |
| "grad_norm": 1.4002844095230103, | |
| "learning_rate": 3.837833803870178e-08, | |
| "loss": 0.4737, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 0.9475465313028765, | |
| "grad_norm": 1.4744623899459839, | |
| "learning_rate": 3.7191344041200836e-08, | |
| "loss": 0.5001, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.9483925549915397, | |
| "grad_norm": 1.4199622869491577, | |
| "learning_rate": 3.602285810415718e-08, | |
| "loss": 0.4648, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 0.949238578680203, | |
| "grad_norm": 1.4660803079605103, | |
| "learning_rate": 3.4872889008767954e-08, | |
| "loss": 0.5241, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.9500846023688664, | |
| "grad_norm": 1.4228583574295044, | |
| "learning_rate": 3.37414453970758e-08, | |
| "loss": 0.4474, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 0.9509306260575296, | |
| "grad_norm": 1.6068693399429321, | |
| "learning_rate": 3.262853577190445e-08, | |
| "loss": 0.5052, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.9517766497461929, | |
| "grad_norm": 1.5408366918563843, | |
| "learning_rate": 3.153416849679347e-08, | |
| "loss": 0.5581, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.9526226734348562, | |
| "grad_norm": 1.6030367612838745, | |
| "learning_rate": 3.04583517959367e-08, | |
| "loss": 0.5493, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.9534686971235194, | |
| "grad_norm": 1.3581690788269043, | |
| "learning_rate": 2.940109375411976e-08, | |
| "loss": 0.4723, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 0.9543147208121827, | |
| "grad_norm": 1.3531447649002075, | |
| "learning_rate": 2.8362402316660374e-08, | |
| "loss": 0.4892, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.955160744500846, | |
| "grad_norm": 1.4876835346221924, | |
| "learning_rate": 2.734228528934679e-08, | |
| "loss": 0.5043, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 0.9560067681895094, | |
| "grad_norm": 1.703354835510254, | |
| "learning_rate": 2.634075033838057e-08, | |
| "loss": 0.4791, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.9568527918781726, | |
| "grad_norm": 1.5040783882141113, | |
| "learning_rate": 2.535780499031887e-08, | |
| "loss": 0.4949, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 0.9576988155668359, | |
| "grad_norm": 1.524038314819336, | |
| "learning_rate": 2.4393456632016977e-08, | |
| "loss": 0.5314, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.9585448392554992, | |
| "grad_norm": 1.4656578302383423, | |
| "learning_rate": 2.3447712510573928e-08, | |
| "loss": 0.5389, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 0.9593908629441624, | |
| "grad_norm": 1.492907166481018, | |
| "learning_rate": 2.2520579733277258e-08, | |
| "loss": 0.4928, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.9602368866328257, | |
| "grad_norm": 1.4177685976028442, | |
| "learning_rate": 2.161206526754972e-08, | |
| "loss": 0.4866, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.961082910321489, | |
| "grad_norm": 1.377503514289856, | |
| "learning_rate": 2.072217594089765e-08, | |
| "loss": 0.5325, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.9619289340101523, | |
| "grad_norm": 1.5548027753829956, | |
| "learning_rate": 1.985091844085796e-08, | |
| "loss": 0.5236, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 0.9627749576988156, | |
| "grad_norm": 1.4531538486480713, | |
| "learning_rate": 1.899829931495012e-08, | |
| "loss": 0.5555, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.9636209813874789, | |
| "grad_norm": 1.5614752769470215, | |
| "learning_rate": 1.8164324970625646e-08, | |
| "loss": 0.4734, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 0.9644670050761421, | |
| "grad_norm": 1.4508945941925049, | |
| "learning_rate": 1.7349001675219245e-08, | |
| "loss": 0.4839, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.9653130287648054, | |
| "grad_norm": 1.348341464996338, | |
| "learning_rate": 1.65523355559033e-08, | |
| "loss": 0.4769, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 0.9661590524534687, | |
| "grad_norm": 1.5168187618255615, | |
| "learning_rate": 1.5774332599641228e-08, | |
| "loss": 0.4738, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.9670050761421319, | |
| "grad_norm": 1.6730620861053467, | |
| "learning_rate": 1.501499865314171e-08, | |
| "loss": 0.6032, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 0.9678510998307953, | |
| "grad_norm": 1.559067726135254, | |
| "learning_rate": 1.4274339422816197e-08, | |
| "loss": 0.4942, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.9686971235194586, | |
| "grad_norm": 1.3421351909637451, | |
| "learning_rate": 1.3552360474734794e-08, | |
| "loss": 0.4029, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.9695431472081218, | |
| "grad_norm": 1.4648717641830444, | |
| "learning_rate": 1.2849067234584623e-08, | |
| "loss": 0.49, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.9703891708967851, | |
| "grad_norm": 1.444698691368103, | |
| "learning_rate": 1.2164464987630131e-08, | |
| "loss": 0.4617, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 0.9712351945854484, | |
| "grad_norm": 1.3629822731018066, | |
| "learning_rate": 1.1498558878672017e-08, | |
| "loss": 0.5096, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.9720812182741116, | |
| "grad_norm": 1.6496318578720093, | |
| "learning_rate": 1.0851353912008644e-08, | |
| "loss": 0.5134, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 0.9729272419627749, | |
| "grad_norm": 1.529559850692749, | |
| "learning_rate": 1.0222854951399408e-08, | |
| "loss": 0.5383, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.9737732656514383, | |
| "grad_norm": 1.5004898309707642, | |
| "learning_rate": 9.613066720028097e-09, | |
| "loss": 0.5168, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 0.9746192893401016, | |
| "grad_norm": 1.316214919090271, | |
| "learning_rate": 9.021993800466256e-09, | |
| "loss": 0.439, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.9754653130287648, | |
| "grad_norm": 1.5756371021270752, | |
| "learning_rate": 8.449640634639878e-09, | |
| "loss": 0.5111, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 0.9763113367174281, | |
| "grad_norm": 1.5366209745407104, | |
| "learning_rate": 7.896011523794989e-09, | |
| "loss": 0.5319, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.9771573604060914, | |
| "grad_norm": 1.3801406621932983, | |
| "learning_rate": 7.361110628466839e-09, | |
| "loss": 0.5529, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.9780033840947546, | |
| "grad_norm": 1.5490000247955322, | |
| "learning_rate": 6.84494196844715e-09, | |
| "loss": 0.5038, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.9788494077834179, | |
| "grad_norm": 1.5412358045578003, | |
| "learning_rate": 6.347509422754139e-09, | |
| "loss": 0.4155, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 0.9796954314720813, | |
| "grad_norm": 1.4816324710845947, | |
| "learning_rate": 5.868816729604765e-09, | |
| "loss": 0.4967, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.9805414551607445, | |
| "grad_norm": 1.3229267597198486, | |
| "learning_rate": 5.408867486384473e-09, | |
| "loss": 0.4326, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 0.9813874788494078, | |
| "grad_norm": 1.5264832973480225, | |
| "learning_rate": 4.9676651496222136e-09, | |
| "loss": 0.4914, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.9822335025380711, | |
| "grad_norm": 1.4101085662841797, | |
| "learning_rate": 4.5452130349629694e-09, | |
| "loss": 0.5168, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 0.9830795262267343, | |
| "grad_norm": 1.574628472328186, | |
| "learning_rate": 4.1415143171436026e-09, | |
| "loss": 0.504, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.9839255499153976, | |
| "grad_norm": 1.586148738861084, | |
| "learning_rate": 3.756572029968708e-09, | |
| "loss": 0.4851, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 0.9847715736040609, | |
| "grad_norm": 1.5436687469482422, | |
| "learning_rate": 3.390389066287858e-09, | |
| "loss": 0.4857, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.9856175972927242, | |
| "grad_norm": 1.4142553806304932, | |
| "learning_rate": 3.0429681779739485e-09, | |
| "loss": 0.4534, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.9864636209813875, | |
| "grad_norm": 1.3092448711395264, | |
| "learning_rate": 2.7143119759026614e-09, | |
| "loss": 0.4619, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.9873096446700508, | |
| "grad_norm": 1.4233113527297974, | |
| "learning_rate": 2.404422929932204e-09, | |
| "loss": 0.5151, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 0.988155668358714, | |
| "grad_norm": 1.3810383081436157, | |
| "learning_rate": 2.1133033688858217e-09, | |
| "loss": 0.4998, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.9890016920473773, | |
| "grad_norm": 1.4071393013000488, | |
| "learning_rate": 1.8409554805329243e-09, | |
| "loss": 0.4839, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 0.9898477157360406, | |
| "grad_norm": 1.4692143201828003, | |
| "learning_rate": 1.5873813115740989e-09, | |
| "loss": 0.4573, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.9906937394247038, | |
| "grad_norm": 1.4754483699798584, | |
| "learning_rate": 1.3525827676247327e-09, | |
| "loss": 0.5774, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 0.9915397631133672, | |
| "grad_norm": 1.3921698331832886, | |
| "learning_rate": 1.1365616132008595e-09, | |
| "loss": 0.488, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.9923857868020305, | |
| "grad_norm": 1.3214542865753174, | |
| "learning_rate": 9.393194717061127e-10, | |
| "loss": 0.5449, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 0.9932318104906938, | |
| "grad_norm": 1.474719524383545, | |
| "learning_rate": 7.608578254195143e-10, | |
| "loss": 0.4718, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.994077834179357, | |
| "grad_norm": 1.3366878032684326, | |
| "learning_rate": 6.011780154843716e-10, | |
| "loss": 0.5207, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.9949238578680203, | |
| "grad_norm": 1.5625263452529907, | |
| "learning_rate": 4.602812418974534e-10, | |
| "loss": 0.5025, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.9957698815566836, | |
| "grad_norm": 1.4968297481536865, | |
| "learning_rate": 3.3816856350177284e-10, | |
| "loss": 0.5409, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 0.9966159052453468, | |
| "grad_norm": 1.4987884759902954, | |
| "learning_rate": 2.348408979760408e-10, | |
| "loss": 0.4497, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.9974619289340102, | |
| "grad_norm": 1.578500509262085, | |
| "learning_rate": 1.502990218302247e-10, | |
| "loss": 0.5731, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 0.9983079526226735, | |
| "grad_norm": 1.4984197616577148, | |
| "learning_rate": 8.454357039860972e-11, | |
| "loss": 0.4767, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.9991539763113367, | |
| "grad_norm": 1.5081285238265991, | |
| "learning_rate": 3.7575037834247655e-11, | |
| "loss": 0.5204, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.4636836051940918, | |
| "learning_rate": 9.393777107291614e-12, | |
| "loss": 0.5414, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 1182, | |
| "total_flos": 8.436898797652541e+17, | |
| "train_loss": 0.5417148913627147, | |
| "train_runtime": 7080.4391, | |
| "train_samples_per_second": 4.674, | |
| "train_steps_per_second": 0.167 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 1182, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.436898797652541e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |