| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.09319377109279975, | |
| "eval_steps": 500, | |
| "global_step": 34000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00013704966337176435, | |
| "grad_norm": 1.0435270071029663, | |
| "learning_rate": 2e-05, | |
| "loss": 1.8539, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0002740993267435287, | |
| "grad_norm": 1.1732431650161743, | |
| "learning_rate": 4e-05, | |
| "loss": 1.2692, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.00041114899011529305, | |
| "grad_norm": 1.176833987236023, | |
| "learning_rate": 6e-05, | |
| "loss": 1.1063, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0005481986534870574, | |
| "grad_norm": 1.2997100353240967, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0579, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0006852483168588217, | |
| "grad_norm": 1.2449016571044922, | |
| "learning_rate": 0.0001, | |
| "loss": 1.0288, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.0008222979802305861, | |
| "grad_norm": 1.1221928596496582, | |
| "learning_rate": 0.00012, | |
| "loss": 1.0089, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0009593476436023504, | |
| "grad_norm": 1.4317854642868042, | |
| "learning_rate": 0.00014, | |
| "loss": 1.0009, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.0010963973069741148, | |
| "grad_norm": 0.8827124834060669, | |
| "learning_rate": 0.00016, | |
| "loss": 0.9776, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.0012334469703458792, | |
| "grad_norm": 0.7773798704147339, | |
| "learning_rate": 0.00018, | |
| "loss": 0.9696, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.0013704966337176434, | |
| "grad_norm": 0.7033634185791016, | |
| "learning_rate": 0.0002, | |
| "loss": 0.9546, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.0015075462970894078, | |
| "grad_norm": 0.7856244444847107, | |
| "learning_rate": 0.0001999999907056826, | |
| "loss": 0.9599, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.0016445959604611722, | |
| "grad_norm": 0.6872720122337341, | |
| "learning_rate": 0.00019999996282273207, | |
| "loss": 0.9439, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.0017816456238329364, | |
| "grad_norm": 0.7475189566612244, | |
| "learning_rate": 0.00019999991635115367, | |
| "loss": 0.9472, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.0019186952872047008, | |
| "grad_norm": 0.6948615908622742, | |
| "learning_rate": 0.000199999851290956, | |
| "loss": 0.9288, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.002055744950576465, | |
| "grad_norm": 0.682184636592865, | |
| "learning_rate": 0.00019999976764215115, | |
| "loss": 0.9259, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.0021927946139482296, | |
| "grad_norm": 0.6290674209594727, | |
| "learning_rate": 0.00019999966540475462, | |
| "loss": 0.933, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.002329844277319994, | |
| "grad_norm": 0.6512512564659119, | |
| "learning_rate": 0.00019999954457878552, | |
| "loss": 0.9161, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.0024668939406917584, | |
| "grad_norm": 0.6670469641685486, | |
| "learning_rate": 0.00019999940516426623, | |
| "loss": 0.921, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.0026039436040635223, | |
| "grad_norm": 0.6236344575881958, | |
| "learning_rate": 0.0001999992471612227, | |
| "loss": 0.9115, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.0027409932674352868, | |
| "grad_norm": 0.5856446027755737, | |
| "learning_rate": 0.00019999907056968429, | |
| "loss": 0.8974, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.002878042930807051, | |
| "grad_norm": 0.6129364967346191, | |
| "learning_rate": 0.00019999887538968381, | |
| "loss": 0.893, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.0030150925941788156, | |
| "grad_norm": 0.6010938286781311, | |
| "learning_rate": 0.00019999866162125754, | |
| "loss": 0.9043, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.00315214225755058, | |
| "grad_norm": 0.6339828372001648, | |
| "learning_rate": 0.00019999842926444528, | |
| "loss": 0.9026, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.0032891919209223444, | |
| "grad_norm": 0.6208747029304504, | |
| "learning_rate": 0.00019999817831929017, | |
| "loss": 0.8944, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.0034262415842941088, | |
| "grad_norm": 0.612678587436676, | |
| "learning_rate": 0.0001999979087858389, | |
| "loss": 0.8858, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.0035632912476658727, | |
| "grad_norm": 0.5846489667892456, | |
| "learning_rate": 0.00019999762066414144, | |
| "loss": 0.8873, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.003700340911037637, | |
| "grad_norm": 0.6027012467384338, | |
| "learning_rate": 0.00019999731395425153, | |
| "loss": 0.8951, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.0038373905744094015, | |
| "grad_norm": 0.5514549016952515, | |
| "learning_rate": 0.00019999698865622606, | |
| "loss": 0.8746, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.003974440237781166, | |
| "grad_norm": 0.6226556301116943, | |
| "learning_rate": 0.00019999664477012553, | |
| "loss": 0.8852, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.00411148990115293, | |
| "grad_norm": 0.5818433165550232, | |
| "learning_rate": 0.00019999628229601388, | |
| "loss": 0.8859, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.004248539564524694, | |
| "grad_norm": 0.588690996170044, | |
| "learning_rate": 0.0001999959012339585, | |
| "loss": 0.8813, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.004385589227896459, | |
| "grad_norm": 0.5324600338935852, | |
| "learning_rate": 0.00019999550158403018, | |
| "loss": 0.8643, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.004522638891268223, | |
| "grad_norm": 0.5539863109588623, | |
| "learning_rate": 0.00019999508334630323, | |
| "loss": 0.8732, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.004659688554639988, | |
| "grad_norm": 0.5579485893249512, | |
| "learning_rate": 0.0001999946465208554, | |
| "loss": 0.8718, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.004796738218011752, | |
| "grad_norm": 0.5488457083702087, | |
| "learning_rate": 0.0001999941911077679, | |
| "loss": 0.8656, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.004933787881383517, | |
| "grad_norm": 0.5370609164237976, | |
| "learning_rate": 0.00019999371710712537, | |
| "loss": 0.8729, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.005070837544755281, | |
| "grad_norm": 0.5298507809638977, | |
| "learning_rate": 0.0001999932245190159, | |
| "loss": 0.8703, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.005207887208127045, | |
| "grad_norm": 0.5601800084114075, | |
| "learning_rate": 0.0001999927133435311, | |
| "loss": 0.874, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.0053449368714988095, | |
| "grad_norm": 0.547234833240509, | |
| "learning_rate": 0.00019999218358076598, | |
| "loss": 0.8713, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.0054819865348705735, | |
| "grad_norm": 0.5623106360435486, | |
| "learning_rate": 0.00019999163523081896, | |
| "loss": 0.8742, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.005619036198242338, | |
| "grad_norm": 0.5385792255401611, | |
| "learning_rate": 0.00019999106829379207, | |
| "loss": 0.8679, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.005756085861614102, | |
| "grad_norm": 0.5167329907417297, | |
| "learning_rate": 0.0001999904827697906, | |
| "loss": 0.8634, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.005893135524985867, | |
| "grad_norm": 0.5551963448524475, | |
| "learning_rate": 0.00019998987865892345, | |
| "loss": 0.8684, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.006030185188357631, | |
| "grad_norm": 0.5749981999397278, | |
| "learning_rate": 0.00019998925596130288, | |
| "loss": 0.8593, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.006167234851729395, | |
| "grad_norm": 0.506532609462738, | |
| "learning_rate": 0.00019998861467704468, | |
| "loss": 0.8511, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.00630428451510116, | |
| "grad_norm": 0.5676366686820984, | |
| "learning_rate": 0.000199987954806268, | |
| "loss": 0.8704, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.006441334178472924, | |
| "grad_norm": 0.5860298871994019, | |
| "learning_rate": 0.00019998727634909557, | |
| "loss": 0.8624, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.006578383841844689, | |
| "grad_norm": 0.5202209949493408, | |
| "learning_rate": 0.00019998657930565348, | |
| "loss": 0.8681, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.006715433505216453, | |
| "grad_norm": 0.5211143493652344, | |
| "learning_rate": 0.00019998586367607127, | |
| "loss": 0.8578, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.0068524831685882175, | |
| "grad_norm": 0.5322254300117493, | |
| "learning_rate": 0.000199985129460482, | |
| "loss": 0.8595, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.0069895328319599815, | |
| "grad_norm": 0.5367264151573181, | |
| "learning_rate": 0.00019998437665902214, | |
| "loss": 0.8549, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.0071265824953317455, | |
| "grad_norm": 0.5147452354431152, | |
| "learning_rate": 0.0001999836052718316, | |
| "loss": 0.8608, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.00726363215870351, | |
| "grad_norm": 0.5567029118537903, | |
| "learning_rate": 0.0001999828152990538, | |
| "loss": 0.8648, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.007400681822075274, | |
| "grad_norm": 0.5113374590873718, | |
| "learning_rate": 0.00019998200674083562, | |
| "loss": 0.8608, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.007537731485447039, | |
| "grad_norm": 0.5345346331596375, | |
| "learning_rate": 0.0001999811795973273, | |
| "loss": 0.8603, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.007674781148818803, | |
| "grad_norm": 0.5091614723205566, | |
| "learning_rate": 0.00019998033386868258, | |
| "loss": 0.863, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.007811830812190568, | |
| "grad_norm": 0.5146520137786865, | |
| "learning_rate": 0.00019997946955505874, | |
| "loss": 0.8555, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.007948880475562333, | |
| "grad_norm": 0.5425053834915161, | |
| "learning_rate": 0.0001999785866566164, | |
| "loss": 0.847, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.008085930138934097, | |
| "grad_norm": 0.5353215932846069, | |
| "learning_rate": 0.00019997768517351967, | |
| "loss": 0.8463, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.00822297980230586, | |
| "grad_norm": 0.5260730385780334, | |
| "learning_rate": 0.00019997676510593614, | |
| "loss": 0.8536, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.008360029465677625, | |
| "grad_norm": 0.5397545099258423, | |
| "learning_rate": 0.00019997582645403687, | |
| "loss": 0.8579, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.008497079129049389, | |
| "grad_norm": 0.5507705211639404, | |
| "learning_rate": 0.0001999748692179963, | |
| "loss": 0.8429, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.008634128792421154, | |
| "grad_norm": 0.5049775838851929, | |
| "learning_rate": 0.00019997389339799235, | |
| "loss": 0.8497, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.008771178455792918, | |
| "grad_norm": 0.49926578998565674, | |
| "learning_rate": 0.00019997289899420647, | |
| "loss": 0.8384, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.008908228119164682, | |
| "grad_norm": 0.5003844499588013, | |
| "learning_rate": 0.00019997188600682345, | |
| "loss": 0.8465, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.009045277782536446, | |
| "grad_norm": 0.5375053286552429, | |
| "learning_rate": 0.00019997085443603166, | |
| "loss": 0.8496, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.00918232744590821, | |
| "grad_norm": 0.5452185869216919, | |
| "learning_rate": 0.0001999698042820228, | |
| "loss": 0.8502, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.009319377109279976, | |
| "grad_norm": 0.53727126121521, | |
| "learning_rate": 0.00019996873554499204, | |
| "loss": 0.8498, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.00945642677265174, | |
| "grad_norm": 0.4958254098892212, | |
| "learning_rate": 0.00019996764822513812, | |
| "loss": 0.8532, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.009593476436023504, | |
| "grad_norm": 0.5111714601516724, | |
| "learning_rate": 0.00019996654232266314, | |
| "loss": 0.8457, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.009730526099395268, | |
| "grad_norm": 0.5191270112991333, | |
| "learning_rate": 0.00019996541783777268, | |
| "loss": 0.8555, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.009867575762767034, | |
| "grad_norm": 0.47990381717681885, | |
| "learning_rate": 0.0001999642747706757, | |
| "loss": 0.8382, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.010004625426138797, | |
| "grad_norm": 0.5000740885734558, | |
| "learning_rate": 0.0001999631131215848, | |
| "loss": 0.845, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.010141675089510561, | |
| "grad_norm": 0.5065198540687561, | |
| "learning_rate": 0.0001999619328907158, | |
| "loss": 0.8392, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.010278724752882325, | |
| "grad_norm": 0.5001168847084045, | |
| "learning_rate": 0.00019996073407828812, | |
| "loss": 0.8455, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.01041577441625409, | |
| "grad_norm": 0.501194417476654, | |
| "learning_rate": 0.00019995951668452466, | |
| "loss": 0.8474, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.010552824079625855, | |
| "grad_norm": 0.5274146795272827, | |
| "learning_rate": 0.00019995828070965165, | |
| "loss": 0.8494, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.010689873742997619, | |
| "grad_norm": 0.4850250780582428, | |
| "learning_rate": 0.00019995702615389885, | |
| "loss": 0.8508, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.010826923406369383, | |
| "grad_norm": 0.49130746722221375, | |
| "learning_rate": 0.00019995575301749954, | |
| "loss": 0.8422, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.010963973069741147, | |
| "grad_norm": 0.5032249093055725, | |
| "learning_rate": 0.00019995446130069026, | |
| "loss": 0.8419, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.011101022733112911, | |
| "grad_norm": 0.5428338646888733, | |
| "learning_rate": 0.0001999531510037112, | |
| "loss": 0.839, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.011238072396484677, | |
| "grad_norm": 0.5070387125015259, | |
| "learning_rate": 0.00019995182212680592, | |
| "loss": 0.8372, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.01137512205985644, | |
| "grad_norm": 0.506024956703186, | |
| "learning_rate": 0.0001999504746702214, | |
| "loss": 0.8525, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.011512171723228205, | |
| "grad_norm": 0.48989248275756836, | |
| "learning_rate": 0.00019994910863420815, | |
| "loss": 0.846, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.011649221386599969, | |
| "grad_norm": 0.5321078300476074, | |
| "learning_rate": 0.0001999477240190201, | |
| "loss": 0.8383, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.011786271049971734, | |
| "grad_norm": 0.4978984594345093, | |
| "learning_rate": 0.00019994632082491462, | |
| "loss": 0.8466, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.011923320713343498, | |
| "grad_norm": 0.5495821833610535, | |
| "learning_rate": 0.00019994489905215254, | |
| "loss": 0.8467, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.012060370376715262, | |
| "grad_norm": 0.49797794222831726, | |
| "learning_rate": 0.00019994345870099814, | |
| "loss": 0.8314, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.012197420040087026, | |
| "grad_norm": 0.5053208470344543, | |
| "learning_rate": 0.00019994199977171922, | |
| "loss": 0.835, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.01233446970345879, | |
| "grad_norm": 0.5199260115623474, | |
| "learning_rate": 0.00019994052226458687, | |
| "loss": 0.8353, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.012471519366830556, | |
| "grad_norm": 0.48166322708129883, | |
| "learning_rate": 0.00019993902617987584, | |
| "loss": 0.8383, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.01260856903020232, | |
| "grad_norm": 0.49765893816947937, | |
| "learning_rate": 0.00019993751151786414, | |
| "loss": 0.8426, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.012745618693574084, | |
| "grad_norm": 0.5026233792304993, | |
| "learning_rate": 0.00019993597827883345, | |
| "loss": 0.8303, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.012882668356945848, | |
| "grad_norm": 0.46544715762138367, | |
| "learning_rate": 0.0001999344264630686, | |
| "loss": 0.8401, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.013019718020317612, | |
| "grad_norm": 0.5394437909126282, | |
| "learning_rate": 0.00019993285607085827, | |
| "loss": 0.8342, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.013156767683689377, | |
| "grad_norm": 0.5067171454429626, | |
| "learning_rate": 0.00019993126710249416, | |
| "loss": 0.8325, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.013293817347061141, | |
| "grad_norm": 0.48171430826187134, | |
| "learning_rate": 0.0001999296595582718, | |
| "loss": 0.8336, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.013430867010432905, | |
| "grad_norm": 0.49666112661361694, | |
| "learning_rate": 0.00019992803343848992, | |
| "loss": 0.8305, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.01356791667380467, | |
| "grad_norm": 0.4750135838985443, | |
| "learning_rate": 0.00019992638874345082, | |
| "loss": 0.839, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.013704966337176435, | |
| "grad_norm": 0.49714985489845276, | |
| "learning_rate": 0.00019992472547346023, | |
| "loss": 0.8303, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.013842016000548199, | |
| "grad_norm": 0.48859328031539917, | |
| "learning_rate": 0.0001999230436288273, | |
| "loss": 0.8359, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.013979065663919963, | |
| "grad_norm": 0.5311440229415894, | |
| "learning_rate": 0.00019992134320986473, | |
| "loss": 0.8295, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.014116115327291727, | |
| "grad_norm": 0.5218192934989929, | |
| "learning_rate": 0.00019991962421688855, | |
| "loss": 0.833, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.014253164990663491, | |
| "grad_norm": 0.4954502284526825, | |
| "learning_rate": 0.00019991788665021828, | |
| "loss": 0.835, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.014390214654035257, | |
| "grad_norm": 0.502061665058136, | |
| "learning_rate": 0.00019991613051017698, | |
| "loss": 0.8305, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.01452726431740702, | |
| "grad_norm": 0.4653523564338684, | |
| "learning_rate": 0.00019991435579709102, | |
| "loss": 0.838, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.014664313980778785, | |
| "grad_norm": 0.4881741404533386, | |
| "learning_rate": 0.00019991256251129035, | |
| "loss": 0.827, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.014801363644150549, | |
| "grad_norm": 0.49700623750686646, | |
| "learning_rate": 0.0001999107506531083, | |
| "loss": 0.8418, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.014938413307522314, | |
| "grad_norm": 0.535879909992218, | |
| "learning_rate": 0.00019990892022288164, | |
| "loss": 0.8402, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.015075462970894078, | |
| "grad_norm": 0.4582114815711975, | |
| "learning_rate": 0.00019990707122095064, | |
| "loss": 0.8367, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.015212512634265842, | |
| "grad_norm": 0.5143499970436096, | |
| "learning_rate": 0.00019990520364765902, | |
| "loss": 0.8314, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.015349562297637606, | |
| "grad_norm": 0.45117756724357605, | |
| "learning_rate": 0.00019990331750335393, | |
| "loss": 0.8224, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.01548661196100937, | |
| "grad_norm": 0.5038974285125732, | |
| "learning_rate": 0.000199901412788386, | |
| "loss": 0.8379, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.015623661624381136, | |
| "grad_norm": 0.4858607351779938, | |
| "learning_rate": 0.0001998994895031092, | |
| "loss": 0.8329, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.015760711287752898, | |
| "grad_norm": 0.4778089225292206, | |
| "learning_rate": 0.0001998975476478812, | |
| "loss": 0.8261, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.015897760951124666, | |
| "grad_norm": 0.47703468799591064, | |
| "learning_rate": 0.00019989558722306277, | |
| "loss": 0.8276, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.01603481061449643, | |
| "grad_norm": 0.49877598881721497, | |
| "learning_rate": 0.0001998936082290185, | |
| "loss": 0.8374, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.016171860277868193, | |
| "grad_norm": 0.45777496695518494, | |
| "learning_rate": 0.00019989161066611617, | |
| "loss": 0.8345, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.016308909941239957, | |
| "grad_norm": 0.4969111382961273, | |
| "learning_rate": 0.00019988959453472708, | |
| "loss": 0.83, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.01644595960461172, | |
| "grad_norm": 0.4316222667694092, | |
| "learning_rate": 0.0001998875598352261, | |
| "loss": 0.8248, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.016583009267983485, | |
| "grad_norm": 0.4757803678512573, | |
| "learning_rate": 0.00019988550656799135, | |
| "loss": 0.8336, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.01672005893135525, | |
| "grad_norm": 0.5188441872596741, | |
| "learning_rate": 0.00019988343473340456, | |
| "loss": 0.8336, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.016857108594727013, | |
| "grad_norm": 0.5048016309738159, | |
| "learning_rate": 0.00019988134433185083, | |
| "loss": 0.8209, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.016994158258098777, | |
| "grad_norm": 0.5321030020713806, | |
| "learning_rate": 0.00019987923536371875, | |
| "loss": 0.8338, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.01713120792147054, | |
| "grad_norm": 0.5210222005844116, | |
| "learning_rate": 0.00019987710782940035, | |
| "loss": 0.8313, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.01726825758484231, | |
| "grad_norm": 0.47530996799468994, | |
| "learning_rate": 0.00019987496172929107, | |
| "loss": 0.8271, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.017405307248214073, | |
| "grad_norm": 0.47147926688194275, | |
| "learning_rate": 0.00019987279706378992, | |
| "loss": 0.8241, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.017542356911585837, | |
| "grad_norm": 0.5165305733680725, | |
| "learning_rate": 0.0001998706138332992, | |
| "loss": 0.8255, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.0176794065749576, | |
| "grad_norm": 0.5041708946228027, | |
| "learning_rate": 0.00019986841203822482, | |
| "loss": 0.8251, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.017816456238329365, | |
| "grad_norm": 0.4591565430164337, | |
| "learning_rate": 0.000199866191678976, | |
| "loss": 0.8285, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.01795350590170113, | |
| "grad_norm": 0.5095290541648865, | |
| "learning_rate": 0.00019986395275596553, | |
| "loss": 0.8325, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 0.018090555565072892, | |
| "grad_norm": 0.5019941926002502, | |
| "learning_rate": 0.00019986169526960953, | |
| "loss": 0.8266, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.018227605228444656, | |
| "grad_norm": 0.4563123285770416, | |
| "learning_rate": 0.00019985941922032768, | |
| "loss": 0.8215, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 0.01836465489181642, | |
| "grad_norm": 0.46666857600212097, | |
| "learning_rate": 0.00019985712460854308, | |
| "loss": 0.8223, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.018501704555188188, | |
| "grad_norm": 0.4678499102592468, | |
| "learning_rate": 0.00019985481143468224, | |
| "loss": 0.8319, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.018638754218559952, | |
| "grad_norm": 0.47524577379226685, | |
| "learning_rate": 0.00019985247969917511, | |
| "loss": 0.826, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.018775803881931716, | |
| "grad_norm": 0.49438291788101196, | |
| "learning_rate": 0.0001998501294024552, | |
| "loss": 0.8244, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 0.01891285354530348, | |
| "grad_norm": 0.49879634380340576, | |
| "learning_rate": 0.00019984776054495935, | |
| "loss": 0.8315, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.019049903208675244, | |
| "grad_norm": 0.44203802943229675, | |
| "learning_rate": 0.00019984537312712792, | |
| "loss": 0.8286, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 0.019186952872047008, | |
| "grad_norm": 0.503197193145752, | |
| "learning_rate": 0.00019984296714940469, | |
| "loss": 0.826, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.01932400253541877, | |
| "grad_norm": 0.47709017992019653, | |
| "learning_rate": 0.0001998405426122369, | |
| "loss": 0.8289, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.019461052198790536, | |
| "grad_norm": 0.47072190046310425, | |
| "learning_rate": 0.00019983809951607526, | |
| "loss": 0.8241, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.0195981018621623, | |
| "grad_norm": 0.5062088370323181, | |
| "learning_rate": 0.00019983563786137387, | |
| "loss": 0.8298, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 0.019735151525534067, | |
| "grad_norm": 0.47417354583740234, | |
| "learning_rate": 0.00019983315764859034, | |
| "loss": 0.824, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.01987220118890583, | |
| "grad_norm": 0.48068752884864807, | |
| "learning_rate": 0.0001998306588781857, | |
| "loss": 0.8237, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.020009250852277595, | |
| "grad_norm": 0.4789169430732727, | |
| "learning_rate": 0.00019982814155062445, | |
| "loss": 0.8289, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.02014630051564936, | |
| "grad_norm": 0.48607948422431946, | |
| "learning_rate": 0.00019982560566637455, | |
| "loss": 0.8241, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.020283350179021123, | |
| "grad_norm": 0.4875280261039734, | |
| "learning_rate": 0.0001998230512259073, | |
| "loss": 0.8236, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.020420399842392887, | |
| "grad_norm": 0.5202775001525879, | |
| "learning_rate": 0.00019982047822969762, | |
| "loss": 0.8231, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 0.02055744950576465, | |
| "grad_norm": 0.4869972765445709, | |
| "learning_rate": 0.00019981788667822374, | |
| "loss": 0.8248, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.020694499169136415, | |
| "grad_norm": 0.49330034852027893, | |
| "learning_rate": 0.00019981527657196745, | |
| "loss": 0.8358, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 0.02083154883250818, | |
| "grad_norm": 0.49196910858154297, | |
| "learning_rate": 0.00019981264791141387, | |
| "loss": 0.8218, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.020968598495879943, | |
| "grad_norm": 0.48113760352134705, | |
| "learning_rate": 0.00019981000069705168, | |
| "loss": 0.8233, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 0.02110564815925171, | |
| "grad_norm": 0.47389155626296997, | |
| "learning_rate": 0.00019980733492937293, | |
| "loss": 0.8177, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.021242697822623474, | |
| "grad_norm": 0.48917362093925476, | |
| "learning_rate": 0.00019980465060887319, | |
| "loss": 0.8261, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.021379747485995238, | |
| "grad_norm": 0.4534808397293091, | |
| "learning_rate": 0.00019980194773605141, | |
| "loss": 0.8249, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.021516797149367002, | |
| "grad_norm": 0.5190074443817139, | |
| "learning_rate": 0.00019979922631141, | |
| "loss": 0.834, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 0.021653846812738766, | |
| "grad_norm": 0.49182918667793274, | |
| "learning_rate": 0.00019979648633545487, | |
| "loss": 0.8229, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.02179089647611053, | |
| "grad_norm": 0.47965994477272034, | |
| "learning_rate": 0.00019979372780869534, | |
| "loss": 0.8204, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 0.021927946139482294, | |
| "grad_norm": 0.48521897196769714, | |
| "learning_rate": 0.00019979095073164416, | |
| "loss": 0.8215, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.022064995802854058, | |
| "grad_norm": 0.522633969783783, | |
| "learning_rate": 0.00019978815510481756, | |
| "loss": 0.826, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 0.022202045466225822, | |
| "grad_norm": 0.47689947485923767, | |
| "learning_rate": 0.00019978534092873523, | |
| "loss": 0.8232, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.02233909512959759, | |
| "grad_norm": 0.4920557737350464, | |
| "learning_rate": 0.00019978250820392024, | |
| "loss": 0.8271, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 0.022476144792969353, | |
| "grad_norm": 0.49732133746147156, | |
| "learning_rate": 0.00019977965693089922, | |
| "loss": 0.8255, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.022613194456341117, | |
| "grad_norm": 0.4798087775707245, | |
| "learning_rate": 0.00019977678711020214, | |
| "loss": 0.818, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 0.02275024411971288, | |
| "grad_norm": 0.48783496022224426, | |
| "learning_rate": 0.00019977389874236242, | |
| "loss": 0.8164, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.022887293783084645, | |
| "grad_norm": 0.48201945424079895, | |
| "learning_rate": 0.00019977099182791707, | |
| "loss": 0.8114, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 0.02302434344645641, | |
| "grad_norm": 0.4945693016052246, | |
| "learning_rate": 0.00019976806636740638, | |
| "loss": 0.8213, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.023161393109828173, | |
| "grad_norm": 0.4559716582298279, | |
| "learning_rate": 0.00019976512236137418, | |
| "loss": 0.8133, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 0.023298442773199937, | |
| "grad_norm": 0.5023700594902039, | |
| "learning_rate": 0.00019976215981036766, | |
| "loss": 0.8205, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.0234354924365717, | |
| "grad_norm": 0.49640583992004395, | |
| "learning_rate": 0.00019975917871493758, | |
| "loss": 0.8304, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 0.02357254209994347, | |
| "grad_norm": 0.4793291389942169, | |
| "learning_rate": 0.0001997561790756381, | |
| "loss": 0.8312, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.023709591763315233, | |
| "grad_norm": 0.5027498006820679, | |
| "learning_rate": 0.00019975316089302674, | |
| "loss": 0.8255, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 0.023846641426686997, | |
| "grad_norm": 0.48330357670783997, | |
| "learning_rate": 0.0001997501241676646, | |
| "loss": 0.817, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.02398369109005876, | |
| "grad_norm": 0.4686134159564972, | |
| "learning_rate": 0.00019974706890011615, | |
| "loss": 0.8145, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 0.024120740753430524, | |
| "grad_norm": 0.5041179656982422, | |
| "learning_rate": 0.00019974399509094929, | |
| "loss": 0.8226, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.02425779041680229, | |
| "grad_norm": 0.48190945386886597, | |
| "learning_rate": 0.00019974090274073544, | |
| "loss": 0.8174, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 0.024394840080174052, | |
| "grad_norm": 0.4537580907344818, | |
| "learning_rate": 0.00019973779185004941, | |
| "loss": 0.8236, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.024531889743545816, | |
| "grad_norm": 0.46414464712142944, | |
| "learning_rate": 0.00019973466241946948, | |
| "loss": 0.8154, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 0.02466893940691758, | |
| "grad_norm": 0.4720008671283722, | |
| "learning_rate": 0.00019973151444957737, | |
| "loss": 0.8171, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.024805989070289348, | |
| "grad_norm": 0.484370619058609, | |
| "learning_rate": 0.0001997283479409582, | |
| "loss": 0.8226, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 0.024943038733661112, | |
| "grad_norm": 0.44341036677360535, | |
| "learning_rate": 0.00019972516289420064, | |
| "loss": 0.8214, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.025080088397032876, | |
| "grad_norm": 0.4852900207042694, | |
| "learning_rate": 0.00019972195930989675, | |
| "loss": 0.8279, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 0.02521713806040464, | |
| "grad_norm": 0.4795898199081421, | |
| "learning_rate": 0.000199718737188642, | |
| "loss": 0.8256, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.025354187723776404, | |
| "grad_norm": 0.47716230154037476, | |
| "learning_rate": 0.00019971549653103533, | |
| "loss": 0.815, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 0.025491237387148168, | |
| "grad_norm": 0.4899328351020813, | |
| "learning_rate": 0.00019971223733767913, | |
| "loss": 0.8123, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.02562828705051993, | |
| "grad_norm": 0.48426783084869385, | |
| "learning_rate": 0.00019970895960917927, | |
| "loss": 0.8128, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 0.025765336713891696, | |
| "grad_norm": 0.48592498898506165, | |
| "learning_rate": 0.00019970566334614502, | |
| "loss": 0.8117, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.02590238637726346, | |
| "grad_norm": 0.4794251322746277, | |
| "learning_rate": 0.00019970234854918914, | |
| "loss": 0.8165, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 0.026039436040635223, | |
| "grad_norm": 0.4640505015850067, | |
| "learning_rate": 0.00019969901521892778, | |
| "loss": 0.8194, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.02617648570400699, | |
| "grad_norm": 0.4869844913482666, | |
| "learning_rate": 0.00019969566335598056, | |
| "loss": 0.8198, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 0.026313535367378755, | |
| "grad_norm": 0.4699898362159729, | |
| "learning_rate": 0.00019969229296097052, | |
| "loss": 0.8145, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.02645058503075052, | |
| "grad_norm": 0.43347305059432983, | |
| "learning_rate": 0.0001996889040345242, | |
| "loss": 0.826, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 0.026587634694122283, | |
| "grad_norm": 0.49129053950309753, | |
| "learning_rate": 0.00019968549657727155, | |
| "loss": 0.8168, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.026724684357494047, | |
| "grad_norm": 0.48174822330474854, | |
| "learning_rate": 0.00019968207058984597, | |
| "loss": 0.8116, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 0.02686173402086581, | |
| "grad_norm": 0.4568016827106476, | |
| "learning_rate": 0.00019967862607288435, | |
| "loss": 0.8142, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.026998783684237575, | |
| "grad_norm": 0.4910202622413635, | |
| "learning_rate": 0.00019967516302702692, | |
| "loss": 0.8181, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 0.02713583334760934, | |
| "grad_norm": 0.4772759974002838, | |
| "learning_rate": 0.0001996716814529174, | |
| "loss": 0.8116, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.027272883010981103, | |
| "grad_norm": 0.5179160833358765, | |
| "learning_rate": 0.000199668181351203, | |
| "loss": 0.8254, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 0.02740993267435287, | |
| "grad_norm": 0.506245493888855, | |
| "learning_rate": 0.00019966466272253435, | |
| "loss": 0.8182, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.027546982337724634, | |
| "grad_norm": 0.4551698565483093, | |
| "learning_rate": 0.00019966112556756547, | |
| "loss": 0.8201, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 0.027684032001096398, | |
| "grad_norm": 0.4976311922073364, | |
| "learning_rate": 0.00019965756988695393, | |
| "loss": 0.8195, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.027821081664468162, | |
| "grad_norm": 0.49078822135925293, | |
| "learning_rate": 0.00019965399568136062, | |
| "loss": 0.8181, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 0.027958131327839926, | |
| "grad_norm": 0.4534152150154114, | |
| "learning_rate": 0.00019965040295145, | |
| "loss": 0.8093, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.02809518099121169, | |
| "grad_norm": 0.4695248603820801, | |
| "learning_rate": 0.00019964679169788986, | |
| "loss": 0.8105, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 0.028232230654583454, | |
| "grad_norm": 0.49516522884368896, | |
| "learning_rate": 0.00019964316192135152, | |
| "loss": 0.8152, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.028369280317955218, | |
| "grad_norm": 0.4861946105957031, | |
| "learning_rate": 0.00019963951362250967, | |
| "loss": 0.8267, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 0.028506329981326982, | |
| "grad_norm": 0.46852409839630127, | |
| "learning_rate": 0.0001996358468020425, | |
| "loss": 0.8187, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.02864337964469875, | |
| "grad_norm": 0.4557548463344574, | |
| "learning_rate": 0.00019963216146063158, | |
| "loss": 0.808, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 0.028780429308070513, | |
| "grad_norm": 0.4835500121116638, | |
| "learning_rate": 0.00019962845759896207, | |
| "loss": 0.8142, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.028917478971442277, | |
| "grad_norm": 0.48590588569641113, | |
| "learning_rate": 0.00019962473521772234, | |
| "loss": 0.8251, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 0.02905452863481404, | |
| "grad_norm": 0.46996089816093445, | |
| "learning_rate": 0.00019962099431760442, | |
| "loss": 0.8209, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.029191578298185805, | |
| "grad_norm": 0.46507537364959717, | |
| "learning_rate": 0.00019961723489930365, | |
| "loss": 0.8124, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 0.02932862796155757, | |
| "grad_norm": 0.48764947056770325, | |
| "learning_rate": 0.00019961345696351888, | |
| "loss": 0.8221, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.029465677624929333, | |
| "grad_norm": 0.49930909276008606, | |
| "learning_rate": 0.00019960966051095234, | |
| "loss": 0.8181, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 0.029602727288301097, | |
| "grad_norm": 0.5145189166069031, | |
| "learning_rate": 0.00019960584554230978, | |
| "loss": 0.8195, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.02973977695167286, | |
| "grad_norm": 0.46932724118232727, | |
| "learning_rate": 0.00019960201205830033, | |
| "loss": 0.8208, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 0.02987682661504463, | |
| "grad_norm": 0.48645126819610596, | |
| "learning_rate": 0.00019959816005963657, | |
| "loss": 0.8142, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.030013876278416392, | |
| "grad_norm": 0.4872402250766754, | |
| "learning_rate": 0.00019959428954703453, | |
| "loss": 0.8058, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 0.030150925941788156, | |
| "grad_norm": 0.49168017506599426, | |
| "learning_rate": 0.00019959040052121375, | |
| "loss": 0.8133, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.03028797560515992, | |
| "grad_norm": 0.5038509964942932, | |
| "learning_rate": 0.00019958649298289707, | |
| "loss": 0.8218, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 0.030425025268531684, | |
| "grad_norm": 0.4773401618003845, | |
| "learning_rate": 0.00019958256693281088, | |
| "loss": 0.8083, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.03056207493190345, | |
| "grad_norm": 0.4657188653945923, | |
| "learning_rate": 0.00019957862237168498, | |
| "loss": 0.8097, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 0.030699124595275212, | |
| "grad_norm": 0.45735588669776917, | |
| "learning_rate": 0.0001995746593002526, | |
| "loss": 0.8123, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.030836174258646976, | |
| "grad_norm": 0.4805397689342499, | |
| "learning_rate": 0.00019957067771925044, | |
| "loss": 0.8178, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 0.03097322392201874, | |
| "grad_norm": 0.4764343202114105, | |
| "learning_rate": 0.00019956667762941862, | |
| "loss": 0.8133, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.031110273585390504, | |
| "grad_norm": 0.4501846432685852, | |
| "learning_rate": 0.00019956265903150067, | |
| "loss": 0.8114, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 0.03124732324876227, | |
| "grad_norm": 0.46945250034332275, | |
| "learning_rate": 0.00019955862192624362, | |
| "loss": 0.8214, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.031384372912134036, | |
| "grad_norm": 0.461480975151062, | |
| "learning_rate": 0.00019955456631439792, | |
| "loss": 0.8163, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 0.031521422575505796, | |
| "grad_norm": 0.4994974434375763, | |
| "learning_rate": 0.0001995504921967174, | |
| "loss": 0.8192, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.031658472238877564, | |
| "grad_norm": 0.4574553072452545, | |
| "learning_rate": 0.00019954639957395947, | |
| "loss": 0.8148, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 0.03179552190224933, | |
| "grad_norm": 0.45984333753585815, | |
| "learning_rate": 0.0001995422884468848, | |
| "loss": 0.8111, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.03193257156562109, | |
| "grad_norm": 0.5009839534759521, | |
| "learning_rate": 0.00019953815881625767, | |
| "loss": 0.8224, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 0.03206962122899286, | |
| "grad_norm": 0.4804627597332001, | |
| "learning_rate": 0.00019953401068284568, | |
| "loss": 0.81, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.03220667089236462, | |
| "grad_norm": 0.48593953251838684, | |
| "learning_rate": 0.00019952984404741995, | |
| "loss": 0.8061, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 0.03234372055573639, | |
| "grad_norm": 0.5112223625183105, | |
| "learning_rate": 0.00019952565891075494, | |
| "loss": 0.8112, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.03248077021910815, | |
| "grad_norm": 0.4945172071456909, | |
| "learning_rate": 0.00019952145527362864, | |
| "loss": 0.8113, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 0.032617819882479915, | |
| "grad_norm": 0.4510950744152069, | |
| "learning_rate": 0.00019951723313682248, | |
| "loss": 0.8124, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.032754869545851675, | |
| "grad_norm": 0.4693338871002197, | |
| "learning_rate": 0.00019951299250112122, | |
| "loss": 0.8095, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 0.03289191920922344, | |
| "grad_norm": 0.5016142725944519, | |
| "learning_rate": 0.0001995087333673132, | |
| "loss": 0.8156, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.0330289688725952, | |
| "grad_norm": 0.46924394369125366, | |
| "learning_rate": 0.0001995044557361901, | |
| "loss": 0.8122, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 0.03316601853596697, | |
| "grad_norm": 0.49559521675109863, | |
| "learning_rate": 0.00019950015960854716, | |
| "loss": 0.8173, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.03330306819933874, | |
| "grad_norm": 0.4793796241283417, | |
| "learning_rate": 0.00019949584498518284, | |
| "loss": 0.8241, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 0.0334401178627105, | |
| "grad_norm": 0.48246756196022034, | |
| "learning_rate": 0.00019949151186689928, | |
| "loss": 0.8145, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.033577167526082266, | |
| "grad_norm": 0.4593210220336914, | |
| "learning_rate": 0.00019948716025450187, | |
| "loss": 0.8133, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 0.03371421718945403, | |
| "grad_norm": 0.49932458996772766, | |
| "learning_rate": 0.00019948279014879957, | |
| "loss": 0.814, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.033851266852825794, | |
| "grad_norm": 0.48580220341682434, | |
| "learning_rate": 0.00019947840155060467, | |
| "loss": 0.8045, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 0.033988316516197555, | |
| "grad_norm": 0.47070175409317017, | |
| "learning_rate": 0.00019947399446073298, | |
| "loss": 0.809, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.03412536617956932, | |
| "grad_norm": 0.472607284784317, | |
| "learning_rate": 0.00019946956888000373, | |
| "loss": 0.8041, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 0.03426241584294108, | |
| "grad_norm": 0.5041372776031494, | |
| "learning_rate": 0.0001994651248092396, | |
| "loss": 0.8129, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.03439946550631285, | |
| "grad_norm": 0.4530751705169678, | |
| "learning_rate": 0.00019946066224926657, | |
| "loss": 0.8134, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 0.03453651516968462, | |
| "grad_norm": 0.512209951877594, | |
| "learning_rate": 0.00019945618120091428, | |
| "loss": 0.8112, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.03467356483305638, | |
| "grad_norm": 0.4971703588962555, | |
| "learning_rate": 0.00019945168166501568, | |
| "loss": 0.8138, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 0.034810614496428145, | |
| "grad_norm": 0.46569451689720154, | |
| "learning_rate": 0.0001994471636424071, | |
| "loss": 0.816, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.034947664159799906, | |
| "grad_norm": 0.46872833371162415, | |
| "learning_rate": 0.00019944262713392848, | |
| "loss": 0.813, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 0.03508471382317167, | |
| "grad_norm": 0.4578431248664856, | |
| "learning_rate": 0.00019943807214042303, | |
| "loss": 0.8109, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.035221763486543434, | |
| "grad_norm": 0.46130719780921936, | |
| "learning_rate": 0.00019943349866273746, | |
| "loss": 0.8109, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 0.0353588131499152, | |
| "grad_norm": 0.5125893354415894, | |
| "learning_rate": 0.00019942890670172195, | |
| "loss": 0.8066, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.03549586281328696, | |
| "grad_norm": 0.47778981924057007, | |
| "learning_rate": 0.00019942429625823005, | |
| "loss": 0.8159, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 0.03563291247665873, | |
| "grad_norm": 0.4936039447784424, | |
| "learning_rate": 0.0001994196673331188, | |
| "loss": 0.7937, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.035769962140030497, | |
| "grad_norm": 0.4861447513103485, | |
| "learning_rate": 0.00019941501992724867, | |
| "loss": 0.8097, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 0.03590701180340226, | |
| "grad_norm": 0.47155696153640747, | |
| "learning_rate": 0.0001994103540414835, | |
| "loss": 0.813, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.036044061466774024, | |
| "grad_norm": 0.4665374755859375, | |
| "learning_rate": 0.00019940566967669067, | |
| "loss": 0.8024, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 0.036181111130145785, | |
| "grad_norm": 0.5167489051818848, | |
| "learning_rate": 0.0001994009668337409, | |
| "loss": 0.8117, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.03631816079351755, | |
| "grad_norm": 0.4830285608768463, | |
| "learning_rate": 0.00019939624551350838, | |
| "loss": 0.8239, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 0.03645521045688931, | |
| "grad_norm": 0.46591734886169434, | |
| "learning_rate": 0.00019939150571687077, | |
| "loss": 0.8075, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.03659226012026108, | |
| "grad_norm": 0.46838170289993286, | |
| "learning_rate": 0.00019938674744470914, | |
| "loss": 0.803, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 0.03672930978363284, | |
| "grad_norm": 0.4891092777252197, | |
| "learning_rate": 0.00019938197069790793, | |
| "loss": 0.8098, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.03686635944700461, | |
| "grad_norm": 0.4904557466506958, | |
| "learning_rate": 0.00019937717547735513, | |
| "loss": 0.8216, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 0.037003409110376376, | |
| "grad_norm": 0.4622911810874939, | |
| "learning_rate": 0.00019937236178394207, | |
| "loss": 0.8051, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.037140458773748136, | |
| "grad_norm": 0.48750799894332886, | |
| "learning_rate": 0.00019936752961856357, | |
| "loss": 0.8044, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 0.037277508437119904, | |
| "grad_norm": 0.510019063949585, | |
| "learning_rate": 0.00019936267898211786, | |
| "loss": 0.8098, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.037414558100491664, | |
| "grad_norm": 0.49900510907173157, | |
| "learning_rate": 0.0001993578098755066, | |
| "loss": 0.806, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 0.03755160776386343, | |
| "grad_norm": 0.4632837474346161, | |
| "learning_rate": 0.0001993529222996349, | |
| "loss": 0.8013, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.03768865742723519, | |
| "grad_norm": 0.4744565188884735, | |
| "learning_rate": 0.00019934801625541129, | |
| "loss": 0.8082, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 0.03782570709060696, | |
| "grad_norm": 0.4971522092819214, | |
| "learning_rate": 0.00019934309174374774, | |
| "loss": 0.8056, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.03796275675397872, | |
| "grad_norm": 0.4654538333415985, | |
| "learning_rate": 0.00019933814876555963, | |
| "loss": 0.8119, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 0.03809980641735049, | |
| "grad_norm": 0.47612524032592773, | |
| "learning_rate": 0.00019933318732176582, | |
| "loss": 0.8196, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.038236856080722255, | |
| "grad_norm": 0.5004960298538208, | |
| "learning_rate": 0.00019932820741328856, | |
| "loss": 0.8082, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 0.038373905744094015, | |
| "grad_norm": 0.4597856104373932, | |
| "learning_rate": 0.00019932320904105354, | |
| "loss": 0.7997, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.03851095540746578, | |
| "grad_norm": 0.46188652515411377, | |
| "learning_rate": 0.00019931819220598993, | |
| "loss": 0.8051, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 0.03864800507083754, | |
| "grad_norm": 0.477914959192276, | |
| "learning_rate": 0.00019931315690903026, | |
| "loss": 0.8115, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.03878505473420931, | |
| "grad_norm": 0.511289119720459, | |
| "learning_rate": 0.0001993081031511105, | |
| "loss": 0.8127, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 0.03892210439758107, | |
| "grad_norm": 0.4742279052734375, | |
| "learning_rate": 0.0001993030309331701, | |
| "loss": 0.8113, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.03905915406095284, | |
| "grad_norm": 0.5221574306488037, | |
| "learning_rate": 0.00019929794025615188, | |
| "loss": 0.8122, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 0.0391962037243246, | |
| "grad_norm": 0.4802887737751007, | |
| "learning_rate": 0.00019929283112100218, | |
| "loss": 0.8001, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.03933325338769637, | |
| "grad_norm": 0.5080459117889404, | |
| "learning_rate": 0.00019928770352867074, | |
| "loss": 0.811, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 0.039470303051068134, | |
| "grad_norm": 0.4794371426105499, | |
| "learning_rate": 0.0001992825574801106, | |
| "loss": 0.801, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.039607352714439895, | |
| "grad_norm": 0.49599120020866394, | |
| "learning_rate": 0.00019927739297627848, | |
| "loss": 0.8099, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 0.03974440237781166, | |
| "grad_norm": 0.4951411187648773, | |
| "learning_rate": 0.0001992722100181343, | |
| "loss": 0.8149, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.03988145204118342, | |
| "grad_norm": 0.5010212659835815, | |
| "learning_rate": 0.00019926700860664148, | |
| "loss": 0.8043, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 0.04001850170455519, | |
| "grad_norm": 0.4742908477783203, | |
| "learning_rate": 0.00019926178874276698, | |
| "loss": 0.8116, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.04015555136792695, | |
| "grad_norm": 0.5191108584403992, | |
| "learning_rate": 0.00019925655042748102, | |
| "loss": 0.8024, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 0.04029260103129872, | |
| "grad_norm": 0.50883948802948, | |
| "learning_rate": 0.00019925129366175738, | |
| "loss": 0.7936, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.04042965069467048, | |
| "grad_norm": 0.47401902079582214, | |
| "learning_rate": 0.0001992460184465732, | |
| "loss": 0.8082, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 0.040566700358042246, | |
| "grad_norm": 0.4578768014907837, | |
| "learning_rate": 0.00019924072478290906, | |
| "loss": 0.8068, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.04070375002141401, | |
| "grad_norm": 0.4548453092575073, | |
| "learning_rate": 0.00019923541267174907, | |
| "loss": 0.8083, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 0.040840799684785774, | |
| "grad_norm": 0.45983466506004333, | |
| "learning_rate": 0.0001992300821140805, | |
| "loss": 0.8065, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.04097784934815754, | |
| "grad_norm": 0.48265522718429565, | |
| "learning_rate": 0.0001992247331108944, | |
| "loss": 0.8122, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 0.0411148990115293, | |
| "grad_norm": 0.4878624975681305, | |
| "learning_rate": 0.000199219365663185, | |
| "loss": 0.8101, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.04125194867490107, | |
| "grad_norm": 0.5351042151451111, | |
| "learning_rate": 0.00019921397977195002, | |
| "loss": 0.8092, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 0.04138899833827283, | |
| "grad_norm": 0.47971010208129883, | |
| "learning_rate": 0.00019920857543819068, | |
| "loss": 0.8111, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.0415260480016446, | |
| "grad_norm": 0.4985937774181366, | |
| "learning_rate": 0.00019920315266291154, | |
| "loss": 0.8131, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 0.04166309766501636, | |
| "grad_norm": 0.4487249255180359, | |
| "learning_rate": 0.0001991977114471206, | |
| "loss": 0.8171, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.041800147328388125, | |
| "grad_norm": 0.4501367211341858, | |
| "learning_rate": 0.00019919225179182933, | |
| "loss": 0.8041, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 0.041937196991759886, | |
| "grad_norm": 0.4651374816894531, | |
| "learning_rate": 0.0001991867736980526, | |
| "loss": 0.7995, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.04207424665513165, | |
| "grad_norm": 0.5046921372413635, | |
| "learning_rate": 0.00019918127716680873, | |
| "loss": 0.8076, | |
| "step": 15350 | |
| }, | |
| { | |
| "epoch": 0.04221129631850342, | |
| "grad_norm": 0.4766380488872528, | |
| "learning_rate": 0.00019917576219911942, | |
| "loss": 0.808, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.04234834598187518, | |
| "grad_norm": 0.5033254623413086, | |
| "learning_rate": 0.00019917022879600987, | |
| "loss": 0.8092, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 0.04248539564524695, | |
| "grad_norm": 0.5111039280891418, | |
| "learning_rate": 0.0001991646769585086, | |
| "loss": 0.8028, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.04262244530861871, | |
| "grad_norm": 0.5142612457275391, | |
| "learning_rate": 0.00019915910668764767, | |
| "loss": 0.8091, | |
| "step": 15550 | |
| }, | |
| { | |
| "epoch": 0.042759494971990476, | |
| "grad_norm": 0.5041571259498596, | |
| "learning_rate": 0.00019915351798446254, | |
| "loss": 0.8085, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.04289654463536224, | |
| "grad_norm": 0.5112966299057007, | |
| "learning_rate": 0.000199147910849992, | |
| "loss": 0.8045, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 0.043033594298734004, | |
| "grad_norm": 0.5248098373413086, | |
| "learning_rate": 0.0001991422852852784, | |
| "loss": 0.8012, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.043170643962105765, | |
| "grad_norm": 0.47679683566093445, | |
| "learning_rate": 0.00019913664129136743, | |
| "loss": 0.8116, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 0.04330769362547753, | |
| "grad_norm": 0.46716034412384033, | |
| "learning_rate": 0.00019913097886930823, | |
| "loss": 0.7983, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.0434447432888493, | |
| "grad_norm": 0.5045149326324463, | |
| "learning_rate": 0.00019912529802015337, | |
| "loss": 0.8066, | |
| "step": 15850 | |
| }, | |
| { | |
| "epoch": 0.04358179295222106, | |
| "grad_norm": 0.44902193546295166, | |
| "learning_rate": 0.00019911959874495885, | |
| "loss": 0.8005, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.04371884261559283, | |
| "grad_norm": 0.46237778663635254, | |
| "learning_rate": 0.0001991138810447841, | |
| "loss": 0.7956, | |
| "step": 15950 | |
| }, | |
| { | |
| "epoch": 0.04385589227896459, | |
| "grad_norm": 0.4876990020275116, | |
| "learning_rate": 0.0001991081449206919, | |
| "loss": 0.8045, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.043992941942336355, | |
| "grad_norm": 0.4410344362258911, | |
| "learning_rate": 0.0001991023903737486, | |
| "loss": 0.8022, | |
| "step": 16050 | |
| }, | |
| { | |
| "epoch": 0.044129991605708116, | |
| "grad_norm": 0.44994667172431946, | |
| "learning_rate": 0.00019909661740502387, | |
| "loss": 0.7971, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.04426704126907988, | |
| "grad_norm": 0.49519991874694824, | |
| "learning_rate": 0.00019909082601559077, | |
| "loss": 0.8005, | |
| "step": 16150 | |
| }, | |
| { | |
| "epoch": 0.044404090932451644, | |
| "grad_norm": 0.48117753863334656, | |
| "learning_rate": 0.0001990850162065259, | |
| "loss": 0.8029, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.04454114059582341, | |
| "grad_norm": 0.4871579706668854, | |
| "learning_rate": 0.0001990791879789092, | |
| "loss": 0.8059, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 0.04467819025919518, | |
| "grad_norm": 0.489726722240448, | |
| "learning_rate": 0.00019907334133382405, | |
| "loss": 0.799, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.04481523992256694, | |
| "grad_norm": 0.5114454030990601, | |
| "learning_rate": 0.00019906747627235728, | |
| "loss": 0.8076, | |
| "step": 16350 | |
| }, | |
| { | |
| "epoch": 0.04495228958593871, | |
| "grad_norm": 0.4839656949043274, | |
| "learning_rate": 0.00019906159279559912, | |
| "loss": 0.8107, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.04508933924931047, | |
| "grad_norm": 0.5057470202445984, | |
| "learning_rate": 0.00019905569090464324, | |
| "loss": 0.8061, | |
| "step": 16450 | |
| }, | |
| { | |
| "epoch": 0.045226388912682235, | |
| "grad_norm": 0.4629577100276947, | |
| "learning_rate": 0.0001990497706005867, | |
| "loss": 0.8076, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.045363438576053995, | |
| "grad_norm": 0.5014634132385254, | |
| "learning_rate": 0.00019904383188453002, | |
| "loss": 0.811, | |
| "step": 16550 | |
| }, | |
| { | |
| "epoch": 0.04550048823942576, | |
| "grad_norm": 0.47154685854911804, | |
| "learning_rate": 0.00019903787475757708, | |
| "loss": 0.7882, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.04563753790279752, | |
| "grad_norm": 0.4919736981391907, | |
| "learning_rate": 0.0001990318992208353, | |
| "loss": 0.8136, | |
| "step": 16650 | |
| }, | |
| { | |
| "epoch": 0.04577458756616929, | |
| "grad_norm": 0.4639228880405426, | |
| "learning_rate": 0.00019902590527541545, | |
| "loss": 0.8056, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.04591163722954106, | |
| "grad_norm": 0.42496591806411743, | |
| "learning_rate": 0.00019901989292243165, | |
| "loss": 0.8116, | |
| "step": 16750 | |
| }, | |
| { | |
| "epoch": 0.04604868689291282, | |
| "grad_norm": 0.4563073217868805, | |
| "learning_rate": 0.00019901386216300155, | |
| "loss": 0.7984, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.046185736556284586, | |
| "grad_norm": 0.4917526841163635, | |
| "learning_rate": 0.00019900781299824619, | |
| "loss": 0.8067, | |
| "step": 16850 | |
| }, | |
| { | |
| "epoch": 0.046322786219656346, | |
| "grad_norm": 0.47236648201942444, | |
| "learning_rate": 0.00019900174542929005, | |
| "loss": 0.7997, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.046459835883028114, | |
| "grad_norm": 0.4592929780483246, | |
| "learning_rate": 0.00019899565945726098, | |
| "loss": 0.8128, | |
| "step": 16950 | |
| }, | |
| { | |
| "epoch": 0.046596885546399874, | |
| "grad_norm": 0.4753207862377167, | |
| "learning_rate": 0.00019898955508329023, | |
| "loss": 0.8122, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.04673393520977164, | |
| "grad_norm": 0.47833189368247986, | |
| "learning_rate": 0.00019898343230851265, | |
| "loss": 0.799, | |
| "step": 17050 | |
| }, | |
| { | |
| "epoch": 0.0468709848731434, | |
| "grad_norm": 0.498756468296051, | |
| "learning_rate": 0.00019897729113406624, | |
| "loss": 0.8033, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.04700803453651517, | |
| "grad_norm": 0.5029916167259216, | |
| "learning_rate": 0.00019897113156109268, | |
| "loss": 0.8033, | |
| "step": 17150 | |
| }, | |
| { | |
| "epoch": 0.04714508419988694, | |
| "grad_norm": 0.45970985293388367, | |
| "learning_rate": 0.00019896495359073688, | |
| "loss": 0.7994, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.0472821338632587, | |
| "grad_norm": 0.47940370440483093, | |
| "learning_rate": 0.00019895875722414724, | |
| "loss": 0.8016, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 0.047419183526630465, | |
| "grad_norm": 0.46687984466552734, | |
| "learning_rate": 0.0001989525424624756, | |
| "loss": 0.8115, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.047556233190002226, | |
| "grad_norm": 0.5174044966697693, | |
| "learning_rate": 0.0001989463093068772, | |
| "loss": 0.8037, | |
| "step": 17350 | |
| }, | |
| { | |
| "epoch": 0.04769328285337399, | |
| "grad_norm": 0.5020077228546143, | |
| "learning_rate": 0.0001989400577585107, | |
| "loss": 0.7974, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.047830332516745754, | |
| "grad_norm": 0.504945695400238, | |
| "learning_rate": 0.00019893378781853818, | |
| "loss": 0.8093, | |
| "step": 17450 | |
| }, | |
| { | |
| "epoch": 0.04796738218011752, | |
| "grad_norm": 0.4759848415851593, | |
| "learning_rate": 0.00019892749948812507, | |
| "loss": 0.8098, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.04810443184348928, | |
| "grad_norm": 0.4772614538669586, | |
| "learning_rate": 0.0001989211927684404, | |
| "loss": 0.8054, | |
| "step": 17550 | |
| }, | |
| { | |
| "epoch": 0.04824148150686105, | |
| "grad_norm": 0.47073760628700256, | |
| "learning_rate": 0.00019891486766065644, | |
| "loss": 0.8077, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.048378531170232816, | |
| "grad_norm": 0.47336697578430176, | |
| "learning_rate": 0.00019890852416594893, | |
| "loss": 0.8043, | |
| "step": 17650 | |
| }, | |
| { | |
| "epoch": 0.04851558083360458, | |
| "grad_norm": 0.45777302980422974, | |
| "learning_rate": 0.00019890216228549704, | |
| "loss": 0.8008, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.048652630496976344, | |
| "grad_norm": 0.4561302661895752, | |
| "learning_rate": 0.0001988957820204834, | |
| "loss": 0.8044, | |
| "step": 17750 | |
| }, | |
| { | |
| "epoch": 0.048789680160348105, | |
| "grad_norm": 0.4956313669681549, | |
| "learning_rate": 0.000198889383372094, | |
| "loss": 0.8002, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.04892672982371987, | |
| "grad_norm": 0.485945463180542, | |
| "learning_rate": 0.00019888296634151822, | |
| "loss": 0.7988, | |
| "step": 17850 | |
| }, | |
| { | |
| "epoch": 0.04906377948709163, | |
| "grad_norm": 0.46995940804481506, | |
| "learning_rate": 0.00019887653092994894, | |
| "loss": 0.7973, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.0492008291504634, | |
| "grad_norm": 0.46722331643104553, | |
| "learning_rate": 0.00019887007713858239, | |
| "loss": 0.8108, | |
| "step": 17950 | |
| }, | |
| { | |
| "epoch": 0.04933787881383516, | |
| "grad_norm": 0.47116202116012573, | |
| "learning_rate": 0.00019886360496861825, | |
| "loss": 0.8062, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.04947492847720693, | |
| "grad_norm": 0.48090749979019165, | |
| "learning_rate": 0.00019885711442125961, | |
| "loss": 0.8006, | |
| "step": 18050 | |
| }, | |
| { | |
| "epoch": 0.049611978140578696, | |
| "grad_norm": 0.46765968203544617, | |
| "learning_rate": 0.00019885060549771299, | |
| "loss": 0.7977, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.049749027803950456, | |
| "grad_norm": 0.482744961977005, | |
| "learning_rate": 0.00019884407819918828, | |
| "loss": 0.8017, | |
| "step": 18150 | |
| }, | |
| { | |
| "epoch": 0.049886077467322223, | |
| "grad_norm": 0.5016223192214966, | |
| "learning_rate": 0.00019883753252689885, | |
| "loss": 0.8049, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.050023127130693984, | |
| "grad_norm": 0.4829504191875458, | |
| "learning_rate": 0.0001988309684820614, | |
| "loss": 0.8061, | |
| "step": 18250 | |
| }, | |
| { | |
| "epoch": 0.05016017679406575, | |
| "grad_norm": 0.500898003578186, | |
| "learning_rate": 0.00019882438606589616, | |
| "loss": 0.8018, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.05029722645743751, | |
| "grad_norm": 0.4587385058403015, | |
| "learning_rate": 0.0001988177852796267, | |
| "loss": 0.8035, | |
| "step": 18350 | |
| }, | |
| { | |
| "epoch": 0.05043427612080928, | |
| "grad_norm": 0.49064910411834717, | |
| "learning_rate": 0.00019881116612447994, | |
| "loss": 0.8069, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.05057132578418104, | |
| "grad_norm": 0.476414293050766, | |
| "learning_rate": 0.00019880452860168636, | |
| "loss": 0.8059, | |
| "step": 18450 | |
| }, | |
| { | |
| "epoch": 0.05070837544755281, | |
| "grad_norm": 0.49723196029663086, | |
| "learning_rate": 0.0001987978727124798, | |
| "loss": 0.7962, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.050845425110924575, | |
| "grad_norm": 0.47163042426109314, | |
| "learning_rate": 0.00019879119845809745, | |
| "loss": 0.7943, | |
| "step": 18550 | |
| }, | |
| { | |
| "epoch": 0.050982474774296335, | |
| "grad_norm": 0.4651722311973572, | |
| "learning_rate": 0.00019878450583978, | |
| "loss": 0.7956, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.0511195244376681, | |
| "grad_norm": 0.44886264204978943, | |
| "learning_rate": 0.0001987777948587715, | |
| "loss": 0.8028, | |
| "step": 18650 | |
| }, | |
| { | |
| "epoch": 0.05125657410103986, | |
| "grad_norm": 0.4783935248851776, | |
| "learning_rate": 0.00019877106551631938, | |
| "loss": 0.8072, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.05139362376441163, | |
| "grad_norm": 0.5192398428916931, | |
| "learning_rate": 0.0001987643178136746, | |
| "loss": 0.8029, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 0.05153067342778339, | |
| "grad_norm": 0.5124772787094116, | |
| "learning_rate": 0.00019875755175209148, | |
| "loss": 0.7977, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.05166772309115516, | |
| "grad_norm": 0.46866002678871155, | |
| "learning_rate": 0.0001987507673328277, | |
| "loss": 0.8019, | |
| "step": 18850 | |
| }, | |
| { | |
| "epoch": 0.05180477275452692, | |
| "grad_norm": 0.496614933013916, | |
| "learning_rate": 0.0001987439645571444, | |
| "loss": 0.8074, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.051941822417898686, | |
| "grad_norm": 0.46783682703971863, | |
| "learning_rate": 0.0001987371434263061, | |
| "loss": 0.7904, | |
| "step": 18950 | |
| }, | |
| { | |
| "epoch": 0.05207887208127045, | |
| "grad_norm": 0.5020947456359863, | |
| "learning_rate": 0.0001987303039415808, | |
| "loss": 0.7959, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.052215921744642214, | |
| "grad_norm": 0.463768869638443, | |
| "learning_rate": 0.0001987234461042398, | |
| "loss": 0.8069, | |
| "step": 19050 | |
| }, | |
| { | |
| "epoch": 0.05235297140801398, | |
| "grad_norm": 0.4553416967391968, | |
| "learning_rate": 0.00019871656991555793, | |
| "loss": 0.7983, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.05249002107138574, | |
| "grad_norm": 0.4670025706291199, | |
| "learning_rate": 0.0001987096753768134, | |
| "loss": 0.7906, | |
| "step": 19150 | |
| }, | |
| { | |
| "epoch": 0.05262707073475751, | |
| "grad_norm": 0.4533403813838959, | |
| "learning_rate": 0.00019870276248928776, | |
| "loss": 0.8072, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.05276412039812927, | |
| "grad_norm": 0.5267584323883057, | |
| "learning_rate": 0.00019869583125426606, | |
| "loss": 0.8001, | |
| "step": 19250 | |
| }, | |
| { | |
| "epoch": 0.05290117006150104, | |
| "grad_norm": 0.4778296947479248, | |
| "learning_rate": 0.0001986888816730367, | |
| "loss": 0.8038, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.0530382197248728, | |
| "grad_norm": 0.48750537633895874, | |
| "learning_rate": 0.00019868191374689152, | |
| "loss": 0.7952, | |
| "step": 19350 | |
| }, | |
| { | |
| "epoch": 0.053175269388244566, | |
| "grad_norm": 0.4721476435661316, | |
| "learning_rate": 0.00019867492747712573, | |
| "loss": 0.8035, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.053312319051616326, | |
| "grad_norm": 0.471542626619339, | |
| "learning_rate": 0.00019866792286503802, | |
| "loss": 0.7943, | |
| "step": 19450 | |
| }, | |
| { | |
| "epoch": 0.053449368714988094, | |
| "grad_norm": 0.4602237343788147, | |
| "learning_rate": 0.00019866089991193045, | |
| "loss": 0.7999, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.05358641837835986, | |
| "grad_norm": 0.4979691505432129, | |
| "learning_rate": 0.0001986538586191085, | |
| "loss": 0.8028, | |
| "step": 19550 | |
| }, | |
| { | |
| "epoch": 0.05372346804173162, | |
| "grad_norm": 0.4573438763618469, | |
| "learning_rate": 0.000198646798987881, | |
| "loss": 0.801, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.05386051770510339, | |
| "grad_norm": 0.4607889950275421, | |
| "learning_rate": 0.0001986397210195603, | |
| "loss": 0.7963, | |
| "step": 19650 | |
| }, | |
| { | |
| "epoch": 0.05399756736847515, | |
| "grad_norm": 0.489907443523407, | |
| "learning_rate": 0.00019863262471546205, | |
| "loss": 0.8003, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.05413461703184692, | |
| "grad_norm": 0.47025689482688904, | |
| "learning_rate": 0.0001986255100769054, | |
| "loss": 0.791, | |
| "step": 19750 | |
| }, | |
| { | |
| "epoch": 0.05427166669521868, | |
| "grad_norm": 0.4787987172603607, | |
| "learning_rate": 0.00019861837710521282, | |
| "loss": 0.7966, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.054408716358590445, | |
| "grad_norm": 0.4853837788105011, | |
| "learning_rate": 0.00019861122580171027, | |
| "loss": 0.8056, | |
| "step": 19850 | |
| }, | |
| { | |
| "epoch": 0.054545766021962205, | |
| "grad_norm": 0.5139968991279602, | |
| "learning_rate": 0.00019860405616772706, | |
| "loss": 0.7907, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.05468281568533397, | |
| "grad_norm": 0.5047359466552734, | |
| "learning_rate": 0.00019859686820459594, | |
| "loss": 0.7988, | |
| "step": 19950 | |
| }, | |
| { | |
| "epoch": 0.05481986534870574, | |
| "grad_norm": 0.48581087589263916, | |
| "learning_rate": 0.000198589661913653, | |
| "loss": 0.8095, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.0549569150120775, | |
| "grad_norm": 0.46513304114341736, | |
| "learning_rate": 0.0001985824372962379, | |
| "loss": 0.8026, | |
| "step": 20050 | |
| }, | |
| { | |
| "epoch": 0.05509396467544927, | |
| "grad_norm": 0.46750718355178833, | |
| "learning_rate": 0.0001985751943536935, | |
| "loss": 0.8037, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.05523101433882103, | |
| "grad_norm": 0.49538227915763855, | |
| "learning_rate": 0.00019856793308736616, | |
| "loss": 0.7956, | |
| "step": 20150 | |
| }, | |
| { | |
| "epoch": 0.055368064002192796, | |
| "grad_norm": 0.46910130977630615, | |
| "learning_rate": 0.00019856065349860576, | |
| "loss": 0.7893, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.05550511366556456, | |
| "grad_norm": 0.4639648199081421, | |
| "learning_rate": 0.00019855335558876535, | |
| "loss": 0.8017, | |
| "step": 20250 | |
| }, | |
| { | |
| "epoch": 0.055642163328936324, | |
| "grad_norm": 0.43415069580078125, | |
| "learning_rate": 0.00019854603935920157, | |
| "loss": 0.7954, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.055779212992308085, | |
| "grad_norm": 0.44691941142082214, | |
| "learning_rate": 0.00019853870481127442, | |
| "loss": 0.8008, | |
| "step": 20350 | |
| }, | |
| { | |
| "epoch": 0.05591626265567985, | |
| "grad_norm": 0.4980349838733673, | |
| "learning_rate": 0.00019853135194634726, | |
| "loss": 0.8039, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.05605331231905162, | |
| "grad_norm": 0.4900018870830536, | |
| "learning_rate": 0.0001985239807657869, | |
| "loss": 0.797, | |
| "step": 20450 | |
| }, | |
| { | |
| "epoch": 0.05619036198242338, | |
| "grad_norm": 0.48318371176719666, | |
| "learning_rate": 0.00019851659127096357, | |
| "loss": 0.8072, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.05632741164579515, | |
| "grad_norm": 0.47676709294319153, | |
| "learning_rate": 0.00019850918346325084, | |
| "loss": 0.7995, | |
| "step": 20550 | |
| }, | |
| { | |
| "epoch": 0.05646446130916691, | |
| "grad_norm": 0.4761766493320465, | |
| "learning_rate": 0.00019850175734402572, | |
| "loss": 0.8024, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.056601510972538675, | |
| "grad_norm": 0.4747091233730316, | |
| "learning_rate": 0.00019849431291466864, | |
| "loss": 0.8041, | |
| "step": 20650 | |
| }, | |
| { | |
| "epoch": 0.056738560635910436, | |
| "grad_norm": 0.49748796224594116, | |
| "learning_rate": 0.00019848685017656342, | |
| "loss": 0.8072, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.0568756102992822, | |
| "grad_norm": 0.4664687216281891, | |
| "learning_rate": 0.00019847936913109727, | |
| "loss": 0.8037, | |
| "step": 20750 | |
| }, | |
| { | |
| "epoch": 0.057012659962653964, | |
| "grad_norm": 0.4881364703178406, | |
| "learning_rate": 0.00019847186977966082, | |
| "loss": 0.7898, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.05714970962602573, | |
| "grad_norm": 0.46295851469039917, | |
| "learning_rate": 0.00019846435212364806, | |
| "loss": 0.8069, | |
| "step": 20850 | |
| }, | |
| { | |
| "epoch": 0.0572867592893975, | |
| "grad_norm": 0.48034432530403137, | |
| "learning_rate": 0.00019845681616445648, | |
| "loss": 0.7978, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.05742380895276926, | |
| "grad_norm": 0.4847252666950226, | |
| "learning_rate": 0.00019844926190348692, | |
| "loss": 0.7984, | |
| "step": 20950 | |
| }, | |
| { | |
| "epoch": 0.05756085861614103, | |
| "grad_norm": 0.4705177843570709, | |
| "learning_rate": 0.00019844168934214353, | |
| "loss": 0.8, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.05769790827951279, | |
| "grad_norm": 0.4897174835205078, | |
| "learning_rate": 0.00019843409848183403, | |
| "loss": 0.7983, | |
| "step": 21050 | |
| }, | |
| { | |
| "epoch": 0.057834957942884554, | |
| "grad_norm": 0.46888941526412964, | |
| "learning_rate": 0.0001984264893239694, | |
| "loss": 0.8026, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.057972007606256315, | |
| "grad_norm": 0.4739408493041992, | |
| "learning_rate": 0.0001984188618699641, | |
| "loss": 0.7953, | |
| "step": 21150 | |
| }, | |
| { | |
| "epoch": 0.05810905726962808, | |
| "grad_norm": 0.45465242862701416, | |
| "learning_rate": 0.000198411216121236, | |
| "loss": 0.7972, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.05824610693299984, | |
| "grad_norm": 0.4990096688270569, | |
| "learning_rate": 0.0001984035520792063, | |
| "loss": 0.8089, | |
| "step": 21250 | |
| }, | |
| { | |
| "epoch": 0.05838315659637161, | |
| "grad_norm": 0.48028653860092163, | |
| "learning_rate": 0.00019839586974529963, | |
| "loss": 0.798, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.05852020625974338, | |
| "grad_norm": 0.48342519998550415, | |
| "learning_rate": 0.00019838816912094405, | |
| "loss": 0.8078, | |
| "step": 21350 | |
| }, | |
| { | |
| "epoch": 0.05865725592311514, | |
| "grad_norm": 0.5085064768791199, | |
| "learning_rate": 0.00019838045020757105, | |
| "loss": 0.8079, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.058794305586486906, | |
| "grad_norm": 0.47484463453292847, | |
| "learning_rate": 0.00019837271300661538, | |
| "loss": 0.7995, | |
| "step": 21450 | |
| }, | |
| { | |
| "epoch": 0.058931355249858666, | |
| "grad_norm": 0.4731627404689789, | |
| "learning_rate": 0.00019836495751951536, | |
| "loss": 0.8001, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.059068404913230434, | |
| "grad_norm": 0.4548656940460205, | |
| "learning_rate": 0.00019835718374771257, | |
| "loss": 0.8092, | |
| "step": 21550 | |
| }, | |
| { | |
| "epoch": 0.059205454576602194, | |
| "grad_norm": 0.5077337026596069, | |
| "learning_rate": 0.00019834939169265206, | |
| "loss": 0.8032, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.05934250423997396, | |
| "grad_norm": 0.46409091353416443, | |
| "learning_rate": 0.00019834158135578233, | |
| "loss": 0.8005, | |
| "step": 21650 | |
| }, | |
| { | |
| "epoch": 0.05947955390334572, | |
| "grad_norm": 0.4751618206501007, | |
| "learning_rate": 0.00019833375273855514, | |
| "loss": 0.7954, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.05961660356671749, | |
| "grad_norm": 0.4541051983833313, | |
| "learning_rate": 0.00019832590584242574, | |
| "loss": 0.8003, | |
| "step": 21750 | |
| }, | |
| { | |
| "epoch": 0.05975365323008926, | |
| "grad_norm": 0.5068663954734802, | |
| "learning_rate": 0.0001983180406688528, | |
| "loss": 0.7951, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.05989070289346102, | |
| "grad_norm": 0.4474228322505951, | |
| "learning_rate": 0.00019831015721929825, | |
| "loss": 0.7793, | |
| "step": 21850 | |
| }, | |
| { | |
| "epoch": 0.060027752556832785, | |
| "grad_norm": 0.4839327037334442, | |
| "learning_rate": 0.00019830225549522762, | |
| "loss": 0.8067, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.060164802220204545, | |
| "grad_norm": 0.47900480031967163, | |
| "learning_rate": 0.0001982943354981097, | |
| "loss": 0.7976, | |
| "step": 21950 | |
| }, | |
| { | |
| "epoch": 0.06030185188357631, | |
| "grad_norm": 0.49303489923477173, | |
| "learning_rate": 0.0001982863972294167, | |
| "loss": 0.8012, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.06043890154694807, | |
| "grad_norm": 0.4726907014846802, | |
| "learning_rate": 0.0001982784406906242, | |
| "loss": 0.799, | |
| "step": 22050 | |
| }, | |
| { | |
| "epoch": 0.06057595121031984, | |
| "grad_norm": 0.4976119101047516, | |
| "learning_rate": 0.00019827046588321133, | |
| "loss": 0.7957, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.0607130008736916, | |
| "grad_norm": 0.4890015423297882, | |
| "learning_rate": 0.00019826247280866038, | |
| "loss": 0.7927, | |
| "step": 22150 | |
| }, | |
| { | |
| "epoch": 0.06085005053706337, | |
| "grad_norm": 0.4718722701072693, | |
| "learning_rate": 0.00019825446146845717, | |
| "loss": 0.7993, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.06098710020043513, | |
| "grad_norm": 0.4812386631965637, | |
| "learning_rate": 0.00019824643186409094, | |
| "loss": 0.7984, | |
| "step": 22250 | |
| }, | |
| { | |
| "epoch": 0.0611241498638069, | |
| "grad_norm": 0.5093216896057129, | |
| "learning_rate": 0.0001982383839970543, | |
| "loss": 0.794, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.061261199527178664, | |
| "grad_norm": 0.45676031708717346, | |
| "learning_rate": 0.00019823031786884315, | |
| "loss": 0.8003, | |
| "step": 22350 | |
| }, | |
| { | |
| "epoch": 0.061398249190550425, | |
| "grad_norm": 0.47176119685173035, | |
| "learning_rate": 0.00019822223348095697, | |
| "loss": 0.7922, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.06153529885392219, | |
| "grad_norm": 0.485170841217041, | |
| "learning_rate": 0.00019821413083489847, | |
| "loss": 0.8026, | |
| "step": 22450 | |
| }, | |
| { | |
| "epoch": 0.06167234851729395, | |
| "grad_norm": 0.5021151900291443, | |
| "learning_rate": 0.00019820600993217385, | |
| "loss": 0.7926, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.06180939818066572, | |
| "grad_norm": 0.4861471951007843, | |
| "learning_rate": 0.00019819787077429268, | |
| "loss": 0.8004, | |
| "step": 22550 | |
| }, | |
| { | |
| "epoch": 0.06194644784403748, | |
| "grad_norm": 0.48229849338531494, | |
| "learning_rate": 0.00019818971336276787, | |
| "loss": 0.7963, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.06208349750740925, | |
| "grad_norm": 0.45228251814842224, | |
| "learning_rate": 0.00019818153769911586, | |
| "loss": 0.8017, | |
| "step": 22650 | |
| }, | |
| { | |
| "epoch": 0.06222054717078101, | |
| "grad_norm": 0.48165270686149597, | |
| "learning_rate": 0.00019817334378485635, | |
| "loss": 0.7904, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.062357596834152776, | |
| "grad_norm": 0.46487587690353394, | |
| "learning_rate": 0.00019816513162151242, | |
| "loss": 0.8009, | |
| "step": 22750 | |
| }, | |
| { | |
| "epoch": 0.06249464649752454, | |
| "grad_norm": 0.4883232116699219, | |
| "learning_rate": 0.00019815690121061067, | |
| "loss": 0.7981, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.06263169616089631, | |
| "grad_norm": 0.4995619058609009, | |
| "learning_rate": 0.00019814865255368105, | |
| "loss": 0.7978, | |
| "step": 22850 | |
| }, | |
| { | |
| "epoch": 0.06276874582426807, | |
| "grad_norm": 0.4523701071739197, | |
| "learning_rate": 0.0001981403856522568, | |
| "loss": 0.795, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.06290579548763983, | |
| "grad_norm": 0.5012710094451904, | |
| "learning_rate": 0.0001981321005078746, | |
| "loss": 0.7942, | |
| "step": 22950 | |
| }, | |
| { | |
| "epoch": 0.06304284515101159, | |
| "grad_norm": 0.47123944759368896, | |
| "learning_rate": 0.00019812379712207462, | |
| "loss": 0.8057, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.06317989481438337, | |
| "grad_norm": 0.4805154800415039, | |
| "learning_rate": 0.00019811547549640035, | |
| "loss": 0.8059, | |
| "step": 23050 | |
| }, | |
| { | |
| "epoch": 0.06331694447775513, | |
| "grad_norm": 0.4887898564338684, | |
| "learning_rate": 0.0001981071356323986, | |
| "loss": 0.7988, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.06345399414112689, | |
| "grad_norm": 0.4898166358470917, | |
| "learning_rate": 0.0001980987775316197, | |
| "loss": 0.7993, | |
| "step": 23150 | |
| }, | |
| { | |
| "epoch": 0.06359104380449866, | |
| "grad_norm": 0.4890003800392151, | |
| "learning_rate": 0.00019809040119561728, | |
| "loss": 0.7926, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.06372809346787042, | |
| "grad_norm": 0.47351571917533875, | |
| "learning_rate": 0.00019808200662594838, | |
| "loss": 0.7928, | |
| "step": 23250 | |
| }, | |
| { | |
| "epoch": 0.06386514313124218, | |
| "grad_norm": 0.4690215587615967, | |
| "learning_rate": 0.00019807359382417343, | |
| "loss": 0.7894, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.06400219279461394, | |
| "grad_norm": 0.4408493638038635, | |
| "learning_rate": 0.00019806516279185628, | |
| "loss": 0.8, | |
| "step": 23350 | |
| }, | |
| { | |
| "epoch": 0.06413924245798572, | |
| "grad_norm": 0.4749327003955841, | |
| "learning_rate": 0.00019805671353056412, | |
| "loss": 0.7966, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.06427629212135748, | |
| "grad_norm": 0.4740825593471527, | |
| "learning_rate": 0.00019804824604186758, | |
| "loss": 0.7984, | |
| "step": 23450 | |
| }, | |
| { | |
| "epoch": 0.06441334178472924, | |
| "grad_norm": 0.45539775490760803, | |
| "learning_rate": 0.00019803976032734064, | |
| "loss": 0.8011, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.064550391448101, | |
| "grad_norm": 0.47875672578811646, | |
| "learning_rate": 0.00019803125638856063, | |
| "loss": 0.794, | |
| "step": 23550 | |
| }, | |
| { | |
| "epoch": 0.06468744111147277, | |
| "grad_norm": 0.4581809639930725, | |
| "learning_rate": 0.00019802273422710843, | |
| "loss": 0.7968, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.06482449077484453, | |
| "grad_norm": 0.502196729183197, | |
| "learning_rate": 0.00019801419384456805, | |
| "loss": 0.7994, | |
| "step": 23650 | |
| }, | |
| { | |
| "epoch": 0.0649615404382163, | |
| "grad_norm": 0.4565235674381256, | |
| "learning_rate": 0.00019800563524252716, | |
| "loss": 0.8012, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.06509859010158807, | |
| "grad_norm": 0.49176493287086487, | |
| "learning_rate": 0.00019799705842257659, | |
| "loss": 0.7974, | |
| "step": 23750 | |
| }, | |
| { | |
| "epoch": 0.06523563976495983, | |
| "grad_norm": 0.47653689980506897, | |
| "learning_rate": 0.00019798846338631073, | |
| "loss": 0.7912, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.06537268942833159, | |
| "grad_norm": 0.48957517743110657, | |
| "learning_rate": 0.00019797985013532722, | |
| "loss": 0.8003, | |
| "step": 23850 | |
| }, | |
| { | |
| "epoch": 0.06550973909170335, | |
| "grad_norm": 0.4694381058216095, | |
| "learning_rate": 0.00019797121867122717, | |
| "loss": 0.7942, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.06564678875507513, | |
| "grad_norm": 0.435002863407135, | |
| "learning_rate": 0.00019796256899561504, | |
| "loss": 0.8081, | |
| "step": 23950 | |
| }, | |
| { | |
| "epoch": 0.06578383841844689, | |
| "grad_norm": 0.48766547441482544, | |
| "learning_rate": 0.0001979539011100987, | |
| "loss": 0.7908, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.06592088808181865, | |
| "grad_norm": 0.456478476524353, | |
| "learning_rate": 0.0001979452150162894, | |
| "loss": 0.8038, | |
| "step": 24050 | |
| }, | |
| { | |
| "epoch": 0.0660579377451904, | |
| "grad_norm": 0.4724636375904083, | |
| "learning_rate": 0.00019793651071580177, | |
| "loss": 0.7906, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.06619498740856218, | |
| "grad_norm": 0.48072385787963867, | |
| "learning_rate": 0.0001979277882102538, | |
| "loss": 0.7936, | |
| "step": 24150 | |
| }, | |
| { | |
| "epoch": 0.06633203707193394, | |
| "grad_norm": 0.4881296455860138, | |
| "learning_rate": 0.00019791904750126688, | |
| "loss": 0.7992, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.0664690867353057, | |
| "grad_norm": 0.4786093831062317, | |
| "learning_rate": 0.0001979102885904658, | |
| "loss": 0.8022, | |
| "step": 24250 | |
| }, | |
| { | |
| "epoch": 0.06660613639867748, | |
| "grad_norm": 0.4414425194263458, | |
| "learning_rate": 0.0001979015114794787, | |
| "loss": 0.7908, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.06674318606204924, | |
| "grad_norm": 0.4660377502441406, | |
| "learning_rate": 0.00019789271616993718, | |
| "loss": 0.8056, | |
| "step": 24350 | |
| }, | |
| { | |
| "epoch": 0.066880235725421, | |
| "grad_norm": 0.4839298725128174, | |
| "learning_rate": 0.00019788390266347613, | |
| "loss": 0.8041, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.06701728538879276, | |
| "grad_norm": 0.4496074914932251, | |
| "learning_rate": 0.00019787507096173386, | |
| "loss": 0.7869, | |
| "step": 24450 | |
| }, | |
| { | |
| "epoch": 0.06715433505216453, | |
| "grad_norm": 0.43039125204086304, | |
| "learning_rate": 0.00019786622106635207, | |
| "loss": 0.7886, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.06729138471553629, | |
| "grad_norm": 0.4781733751296997, | |
| "learning_rate": 0.00019785735297897584, | |
| "loss": 0.8016, | |
| "step": 24550 | |
| }, | |
| { | |
| "epoch": 0.06742843437890805, | |
| "grad_norm": 0.4739139974117279, | |
| "learning_rate": 0.00019784846670125358, | |
| "loss": 0.7987, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.06756548404227983, | |
| "grad_norm": 0.47873392701148987, | |
| "learning_rate": 0.0001978395622348372, | |
| "loss": 0.7957, | |
| "step": 24650 | |
| }, | |
| { | |
| "epoch": 0.06770253370565159, | |
| "grad_norm": 0.4496181905269623, | |
| "learning_rate": 0.0001978306395813819, | |
| "loss": 0.7966, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.06783958336902335, | |
| "grad_norm": 0.48717474937438965, | |
| "learning_rate": 0.00019782169874254623, | |
| "loss": 0.7961, | |
| "step": 24750 | |
| }, | |
| { | |
| "epoch": 0.06797663303239511, | |
| "grad_norm": 0.4960111379623413, | |
| "learning_rate": 0.00019781273971999222, | |
| "loss": 0.8033, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.06811368269576688, | |
| "grad_norm": 0.45468300580978394, | |
| "learning_rate": 0.00019780376251538523, | |
| "loss": 0.7954, | |
| "step": 24850 | |
| }, | |
| { | |
| "epoch": 0.06825073235913864, | |
| "grad_norm": 0.4620811939239502, | |
| "learning_rate": 0.00019779476713039395, | |
| "loss": 0.7884, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.0683877820225104, | |
| "grad_norm": 0.5242409110069275, | |
| "learning_rate": 0.00019778575356669057, | |
| "loss": 0.8074, | |
| "step": 24950 | |
| }, | |
| { | |
| "epoch": 0.06852483168588216, | |
| "grad_norm": 0.4887278378009796, | |
| "learning_rate": 0.00019777672182595053, | |
| "loss": 0.7938, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.06866188134925394, | |
| "grad_norm": 0.48667004704475403, | |
| "learning_rate": 0.0001977676719098527, | |
| "loss": 0.8002, | |
| "step": 25050 | |
| }, | |
| { | |
| "epoch": 0.0687989310126257, | |
| "grad_norm": 0.46475672721862793, | |
| "learning_rate": 0.0001977586038200794, | |
| "loss": 0.8023, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 0.06893598067599746, | |
| "grad_norm": 0.4685113728046417, | |
| "learning_rate": 0.00019774951755831627, | |
| "loss": 0.7976, | |
| "step": 25150 | |
| }, | |
| { | |
| "epoch": 0.06907303033936923, | |
| "grad_norm": 0.47015681862831116, | |
| "learning_rate": 0.00019774041312625222, | |
| "loss": 0.7936, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.069210080002741, | |
| "grad_norm": 0.4932703375816345, | |
| "learning_rate": 0.00019773129052557973, | |
| "loss": 0.7985, | |
| "step": 25250 | |
| }, | |
| { | |
| "epoch": 0.06934712966611276, | |
| "grad_norm": 0.4694651663303375, | |
| "learning_rate": 0.00019772214975799453, | |
| "loss": 0.7955, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 0.06948417932948452, | |
| "grad_norm": 0.44623926281929016, | |
| "learning_rate": 0.00019771299082519574, | |
| "loss": 0.7993, | |
| "step": 25350 | |
| }, | |
| { | |
| "epoch": 0.06962122899285629, | |
| "grad_norm": 0.4828040599822998, | |
| "learning_rate": 0.00019770381372888595, | |
| "loss": 0.7941, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.06975827865622805, | |
| "grad_norm": 0.4696119427680969, | |
| "learning_rate": 0.000197694618470771, | |
| "loss": 0.798, | |
| "step": 25450 | |
| }, | |
| { | |
| "epoch": 0.06989532831959981, | |
| "grad_norm": 0.49441754817962646, | |
| "learning_rate": 0.00019768540505256022, | |
| "loss": 0.8041, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.07003237798297159, | |
| "grad_norm": 0.5214009881019592, | |
| "learning_rate": 0.00019767617347596619, | |
| "loss": 0.7859, | |
| "step": 25550 | |
| }, | |
| { | |
| "epoch": 0.07016942764634335, | |
| "grad_norm": 0.46143990755081177, | |
| "learning_rate": 0.00019766692374270496, | |
| "loss": 0.7938, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.0703064773097151, | |
| "grad_norm": 0.5268940925598145, | |
| "learning_rate": 0.00019765765585449594, | |
| "loss": 0.8088, | |
| "step": 25650 | |
| }, | |
| { | |
| "epoch": 0.07044352697308687, | |
| "grad_norm": 0.4896582365036011, | |
| "learning_rate": 0.00019764836981306193, | |
| "loss": 0.7983, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 0.07058057663645864, | |
| "grad_norm": 0.45617246627807617, | |
| "learning_rate": 0.000197639065620129, | |
| "loss": 0.7983, | |
| "step": 25750 | |
| }, | |
| { | |
| "epoch": 0.0707176262998304, | |
| "grad_norm": 0.43470898270606995, | |
| "learning_rate": 0.00019762974327742675, | |
| "loss": 0.7981, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.07085467596320216, | |
| "grad_norm": 0.4943329393863678, | |
| "learning_rate": 0.000197620402786688, | |
| "loss": 0.7973, | |
| "step": 25850 | |
| }, | |
| { | |
| "epoch": 0.07099172562657392, | |
| "grad_norm": 0.4778243899345398, | |
| "learning_rate": 0.00019761104414964912, | |
| "loss": 0.796, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 0.0711287752899457, | |
| "grad_norm": 0.4903486669063568, | |
| "learning_rate": 0.00019760166736804968, | |
| "loss": 0.7967, | |
| "step": 25950 | |
| }, | |
| { | |
| "epoch": 0.07126582495331746, | |
| "grad_norm": 0.4814266562461853, | |
| "learning_rate": 0.0001975922724436327, | |
| "loss": 0.7989, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.07140287461668922, | |
| "grad_norm": 0.4753884971141815, | |
| "learning_rate": 0.00019758285937814457, | |
| "loss": 0.7977, | |
| "step": 26050 | |
| }, | |
| { | |
| "epoch": 0.07153992428006099, | |
| "grad_norm": 0.4597572684288025, | |
| "learning_rate": 0.0001975734281733351, | |
| "loss": 0.7962, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 0.07167697394343275, | |
| "grad_norm": 0.4517659544944763, | |
| "learning_rate": 0.00019756397883095736, | |
| "loss": 0.7938, | |
| "step": 26150 | |
| }, | |
| { | |
| "epoch": 0.07181402360680451, | |
| "grad_norm": 0.48652228713035583, | |
| "learning_rate": 0.00019755451135276787, | |
| "loss": 0.7902, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.07195107327017627, | |
| "grad_norm": 0.4702857732772827, | |
| "learning_rate": 0.00019754502574052655, | |
| "loss": 0.7999, | |
| "step": 26250 | |
| }, | |
| { | |
| "epoch": 0.07208812293354805, | |
| "grad_norm": 0.47835201025009155, | |
| "learning_rate": 0.0001975355219959966, | |
| "loss": 0.7982, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 0.07222517259691981, | |
| "grad_norm": 0.4716067314147949, | |
| "learning_rate": 0.0001975260001209446, | |
| "loss": 0.7959, | |
| "step": 26350 | |
| }, | |
| { | |
| "epoch": 0.07236222226029157, | |
| "grad_norm": 0.4469781517982483, | |
| "learning_rate": 0.0001975164601171406, | |
| "loss": 0.7946, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.07249927192366334, | |
| "grad_norm": 0.4652368724346161, | |
| "learning_rate": 0.00019750690198635796, | |
| "loss": 0.794, | |
| "step": 26450 | |
| }, | |
| { | |
| "epoch": 0.0726363215870351, | |
| "grad_norm": 0.47072771191596985, | |
| "learning_rate": 0.00019749732573037338, | |
| "loss": 0.8008, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.07277337125040687, | |
| "grad_norm": 0.48966971039772034, | |
| "learning_rate": 0.00019748773135096694, | |
| "loss": 0.8039, | |
| "step": 26550 | |
| }, | |
| { | |
| "epoch": 0.07291042091377863, | |
| "grad_norm": 0.4906329810619354, | |
| "learning_rate": 0.00019747811884992213, | |
| "loss": 0.7942, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.0730474705771504, | |
| "grad_norm": 0.4698156714439392, | |
| "learning_rate": 0.0001974684882290258, | |
| "loss": 0.7966, | |
| "step": 26650 | |
| }, | |
| { | |
| "epoch": 0.07318452024052216, | |
| "grad_norm": 0.4471251964569092, | |
| "learning_rate": 0.00019745883949006808, | |
| "loss": 0.7853, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 0.07332156990389392, | |
| "grad_norm": 0.4238675832748413, | |
| "learning_rate": 0.00019744917263484263, | |
| "loss": 0.7954, | |
| "step": 26750 | |
| }, | |
| { | |
| "epoch": 0.07345861956726568, | |
| "grad_norm": 0.48359739780426025, | |
| "learning_rate": 0.00019743948766514632, | |
| "loss": 0.788, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.07359566923063746, | |
| "grad_norm": 0.4774411916732788, | |
| "learning_rate": 0.0001974297845827795, | |
| "loss": 0.7928, | |
| "step": 26850 | |
| }, | |
| { | |
| "epoch": 0.07373271889400922, | |
| "grad_norm": 0.46250951290130615, | |
| "learning_rate": 0.0001974200633895458, | |
| "loss": 0.7981, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 0.07386976855738098, | |
| "grad_norm": 0.4583037495613098, | |
| "learning_rate": 0.00019741032408725226, | |
| "loss": 0.7907, | |
| "step": 26950 | |
| }, | |
| { | |
| "epoch": 0.07400681822075275, | |
| "grad_norm": 0.46894222497940063, | |
| "learning_rate": 0.00019740056667770932, | |
| "loss": 0.7959, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.07414386788412451, | |
| "grad_norm": 0.48790639638900757, | |
| "learning_rate": 0.0001973907911627307, | |
| "loss": 0.791, | |
| "step": 27050 | |
| }, | |
| { | |
| "epoch": 0.07428091754749627, | |
| "grad_norm": 0.4991588592529297, | |
| "learning_rate": 0.0001973809975441336, | |
| "loss": 0.8112, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 0.07441796721086803, | |
| "grad_norm": 0.4748496413230896, | |
| "learning_rate": 0.00019737118582373845, | |
| "loss": 0.7932, | |
| "step": 27150 | |
| }, | |
| { | |
| "epoch": 0.07455501687423981, | |
| "grad_norm": 0.47536593675613403, | |
| "learning_rate": 0.00019736135600336916, | |
| "loss": 0.8025, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.07469206653761157, | |
| "grad_norm": 0.48133620619773865, | |
| "learning_rate": 0.00019735150808485293, | |
| "loss": 0.7986, | |
| "step": 27250 | |
| }, | |
| { | |
| "epoch": 0.07482911620098333, | |
| "grad_norm": 0.4730217754840851, | |
| "learning_rate": 0.00019734164207002038, | |
| "loss": 0.7917, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 0.07496616586435509, | |
| "grad_norm": 0.4878595471382141, | |
| "learning_rate": 0.00019733175796070546, | |
| "loss": 0.7946, | |
| "step": 27350 | |
| }, | |
| { | |
| "epoch": 0.07510321552772686, | |
| "grad_norm": 0.4788990616798401, | |
| "learning_rate": 0.00019732185575874547, | |
| "loss": 0.796, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.07524026519109862, | |
| "grad_norm": 0.4414103627204895, | |
| "learning_rate": 0.00019731193546598114, | |
| "loss": 0.7901, | |
| "step": 27450 | |
| }, | |
| { | |
| "epoch": 0.07537731485447038, | |
| "grad_norm": 0.47307801246643066, | |
| "learning_rate": 0.00019730199708425646, | |
| "loss": 0.8015, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.07551436451784216, | |
| "grad_norm": 0.490078866481781, | |
| "learning_rate": 0.00019729204061541889, | |
| "loss": 0.8079, | |
| "step": 27550 | |
| }, | |
| { | |
| "epoch": 0.07565141418121392, | |
| "grad_norm": 0.4815292954444885, | |
| "learning_rate": 0.00019728206606131916, | |
| "loss": 0.8044, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.07578846384458568, | |
| "grad_norm": 0.47046026587486267, | |
| "learning_rate": 0.00019727207342381143, | |
| "loss": 0.7982, | |
| "step": 27650 | |
| }, | |
| { | |
| "epoch": 0.07592551350795744, | |
| "grad_norm": 0.488438218832016, | |
| "learning_rate": 0.0001972620627047532, | |
| "loss": 0.7939, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 0.07606256317132921, | |
| "grad_norm": 0.5354650020599365, | |
| "learning_rate": 0.0001972520339060053, | |
| "loss": 0.7929, | |
| "step": 27750 | |
| }, | |
| { | |
| "epoch": 0.07619961283470097, | |
| "grad_norm": 0.461015909910202, | |
| "learning_rate": 0.00019724198702943197, | |
| "loss": 0.7995, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.07633666249807274, | |
| "grad_norm": 0.5413848161697388, | |
| "learning_rate": 0.00019723192207690078, | |
| "loss": 0.7895, | |
| "step": 27850 | |
| }, | |
| { | |
| "epoch": 0.07647371216144451, | |
| "grad_norm": 0.46962863206863403, | |
| "learning_rate": 0.00019722183905028265, | |
| "loss": 0.7885, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 0.07661076182481627, | |
| "grad_norm": 0.4755912125110626, | |
| "learning_rate": 0.00019721173795145188, | |
| "loss": 0.7883, | |
| "step": 27950 | |
| }, | |
| { | |
| "epoch": 0.07674781148818803, | |
| "grad_norm": 0.4535827040672302, | |
| "learning_rate": 0.0001972016187822862, | |
| "loss": 0.7905, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.07688486115155979, | |
| "grad_norm": 0.4831449091434479, | |
| "learning_rate": 0.0001971914815446665, | |
| "loss": 0.7918, | |
| "step": 28050 | |
| }, | |
| { | |
| "epoch": 0.07702191081493157, | |
| "grad_norm": 0.49459129571914673, | |
| "learning_rate": 0.00019718132624047725, | |
| "loss": 0.7905, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 0.07715896047830333, | |
| "grad_norm": 0.46750736236572266, | |
| "learning_rate": 0.00019717115287160613, | |
| "loss": 0.7954, | |
| "step": 28150 | |
| }, | |
| { | |
| "epoch": 0.07729601014167509, | |
| "grad_norm": 0.5005916953086853, | |
| "learning_rate": 0.00019716096143994428, | |
| "loss": 0.7912, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 0.07743305980504685, | |
| "grad_norm": 0.5159142017364502, | |
| "learning_rate": 0.00019715075194738608, | |
| "loss": 0.7944, | |
| "step": 28250 | |
| }, | |
| { | |
| "epoch": 0.07757010946841862, | |
| "grad_norm": 0.4624069929122925, | |
| "learning_rate": 0.00019714052439582939, | |
| "loss": 0.7898, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 0.07770715913179038, | |
| "grad_norm": 0.4599010646343231, | |
| "learning_rate": 0.00019713027878717537, | |
| "loss": 0.7904, | |
| "step": 28350 | |
| }, | |
| { | |
| "epoch": 0.07784420879516214, | |
| "grad_norm": 0.5177801847457886, | |
| "learning_rate": 0.00019712001512332852, | |
| "loss": 0.7961, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.07798125845853392, | |
| "grad_norm": 0.494656503200531, | |
| "learning_rate": 0.00019710973340619675, | |
| "loss": 0.798, | |
| "step": 28450 | |
| }, | |
| { | |
| "epoch": 0.07811830812190568, | |
| "grad_norm": 0.46997737884521484, | |
| "learning_rate": 0.0001970994336376912, | |
| "loss": 0.7912, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.07825535778527744, | |
| "grad_norm": 0.47283247113227844, | |
| "learning_rate": 0.00019708911581972657, | |
| "loss": 0.7986, | |
| "step": 28550 | |
| }, | |
| { | |
| "epoch": 0.0783924074486492, | |
| "grad_norm": 0.47213488817214966, | |
| "learning_rate": 0.0001970787799542207, | |
| "loss": 0.7926, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 0.07852945711202097, | |
| "grad_norm": 0.4525822401046753, | |
| "learning_rate": 0.00019706842604309496, | |
| "loss": 0.7946, | |
| "step": 28650 | |
| }, | |
| { | |
| "epoch": 0.07866650677539273, | |
| "grad_norm": 0.4752897024154663, | |
| "learning_rate": 0.000197058054088274, | |
| "loss": 0.7888, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 0.0788035564387645, | |
| "grad_norm": 0.47640207409858704, | |
| "learning_rate": 0.00019704766409168575, | |
| "loss": 0.8016, | |
| "step": 28750 | |
| }, | |
| { | |
| "epoch": 0.07894060610213627, | |
| "grad_norm": 0.49076226353645325, | |
| "learning_rate": 0.00019703725605526166, | |
| "loss": 0.7891, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.07907765576550803, | |
| "grad_norm": 0.5122234225273132, | |
| "learning_rate": 0.00019702682998093638, | |
| "loss": 0.7914, | |
| "step": 28850 | |
| }, | |
| { | |
| "epoch": 0.07921470542887979, | |
| "grad_norm": 0.4687149226665497, | |
| "learning_rate": 0.00019701638587064801, | |
| "loss": 0.7876, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 0.07935175509225155, | |
| "grad_norm": 0.48135191202163696, | |
| "learning_rate": 0.00019700592372633792, | |
| "loss": 0.797, | |
| "step": 28950 | |
| }, | |
| { | |
| "epoch": 0.07948880475562332, | |
| "grad_norm": 0.457065612077713, | |
| "learning_rate": 0.00019699544354995097, | |
| "loss": 0.7903, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.07962585441899508, | |
| "grad_norm": 0.49080437421798706, | |
| "learning_rate": 0.0001969849453434352, | |
| "loss": 0.7941, | |
| "step": 29050 | |
| }, | |
| { | |
| "epoch": 0.07976290408236685, | |
| "grad_norm": 0.47728291153907776, | |
| "learning_rate": 0.0001969744291087421, | |
| "loss": 0.7973, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 0.0798999537457386, | |
| "grad_norm": 0.4719684422016144, | |
| "learning_rate": 0.00019696389484782649, | |
| "loss": 0.7931, | |
| "step": 29150 | |
| }, | |
| { | |
| "epoch": 0.08003700340911038, | |
| "grad_norm": 0.4797501862049103, | |
| "learning_rate": 0.00019695334256264658, | |
| "loss": 0.7852, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.08017405307248214, | |
| "grad_norm": 0.4938788115978241, | |
| "learning_rate": 0.00019694277225516387, | |
| "loss": 0.7949, | |
| "step": 29250 | |
| }, | |
| { | |
| "epoch": 0.0803111027358539, | |
| "grad_norm": 0.5137888193130493, | |
| "learning_rate": 0.00019693218392734325, | |
| "loss": 0.7933, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 0.08044815239922568, | |
| "grad_norm": 0.4900031089782715, | |
| "learning_rate": 0.0001969215775811529, | |
| "loss": 0.8004, | |
| "step": 29350 | |
| }, | |
| { | |
| "epoch": 0.08058520206259744, | |
| "grad_norm": 0.4935697317123413, | |
| "learning_rate": 0.00019691095321856445, | |
| "loss": 0.7966, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 0.0807222517259692, | |
| "grad_norm": 0.48324236273765564, | |
| "learning_rate": 0.00019690031084155282, | |
| "loss": 0.802, | |
| "step": 29450 | |
| }, | |
| { | |
| "epoch": 0.08085930138934096, | |
| "grad_norm": 0.44104790687561035, | |
| "learning_rate": 0.00019688965045209624, | |
| "loss": 0.7906, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.08099635105271273, | |
| "grad_norm": 0.4910528361797333, | |
| "learning_rate": 0.00019687897205217638, | |
| "loss": 0.7939, | |
| "step": 29550 | |
| }, | |
| { | |
| "epoch": 0.08113340071608449, | |
| "grad_norm": 0.4602694809436798, | |
| "learning_rate": 0.0001968682756437782, | |
| "loss": 0.7927, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 0.08127045037945625, | |
| "grad_norm": 0.47206035256385803, | |
| "learning_rate": 0.00019685756122888996, | |
| "loss": 0.7845, | |
| "step": 29650 | |
| }, | |
| { | |
| "epoch": 0.08140750004282803, | |
| "grad_norm": 0.471284419298172, | |
| "learning_rate": 0.00019684682880950337, | |
| "loss": 0.7878, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 0.08154454970619979, | |
| "grad_norm": 0.4756605625152588, | |
| "learning_rate": 0.0001968360783876135, | |
| "loss": 0.7915, | |
| "step": 29750 | |
| }, | |
| { | |
| "epoch": 0.08168159936957155, | |
| "grad_norm": 0.5032472014427185, | |
| "learning_rate": 0.00019682530996521859, | |
| "loss": 0.7811, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 0.08181864903294331, | |
| "grad_norm": 0.49361568689346313, | |
| "learning_rate": 0.0001968145235443204, | |
| "loss": 0.8027, | |
| "step": 29850 | |
| }, | |
| { | |
| "epoch": 0.08195569869631508, | |
| "grad_norm": 0.4601605236530304, | |
| "learning_rate": 0.00019680371912692395, | |
| "loss": 0.7915, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 0.08209274835968684, | |
| "grad_norm": 0.46172550320625305, | |
| "learning_rate": 0.0001967928967150377, | |
| "loss": 0.8005, | |
| "step": 29950 | |
| }, | |
| { | |
| "epoch": 0.0822297980230586, | |
| "grad_norm": 0.4655384123325348, | |
| "learning_rate": 0.00019678205631067333, | |
| "loss": 0.7884, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.08236684768643036, | |
| "grad_norm": 0.49141067266464233, | |
| "learning_rate": 0.00019677119791584592, | |
| "loss": 0.7914, | |
| "step": 30050 | |
| }, | |
| { | |
| "epoch": 0.08250389734980214, | |
| "grad_norm": 0.5075825452804565, | |
| "learning_rate": 0.00019676032153257396, | |
| "loss": 0.7973, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 0.0826409470131739, | |
| "grad_norm": 0.4856976866722107, | |
| "learning_rate": 0.00019674942716287915, | |
| "loss": 0.7892, | |
| "step": 30150 | |
| }, | |
| { | |
| "epoch": 0.08277799667654566, | |
| "grad_norm": 0.4587472975254059, | |
| "learning_rate": 0.00019673851480878665, | |
| "loss": 0.79, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 0.08291504633991743, | |
| "grad_norm": 0.4883078932762146, | |
| "learning_rate": 0.00019672758447232487, | |
| "loss": 0.7917, | |
| "step": 30250 | |
| }, | |
| { | |
| "epoch": 0.0830520960032892, | |
| "grad_norm": 0.46209436655044556, | |
| "learning_rate": 0.00019671663615552566, | |
| "loss": 0.7957, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 0.08318914566666095, | |
| "grad_norm": 0.48940733075141907, | |
| "learning_rate": 0.00019670566986042416, | |
| "loss": 0.7883, | |
| "step": 30350 | |
| }, | |
| { | |
| "epoch": 0.08332619533003272, | |
| "grad_norm": 0.44892752170562744, | |
| "learning_rate": 0.0001966946855890588, | |
| "loss": 0.7926, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 0.08346324499340449, | |
| "grad_norm": 0.4406869113445282, | |
| "learning_rate": 0.0001966836833434715, | |
| "loss": 0.7937, | |
| "step": 30450 | |
| }, | |
| { | |
| "epoch": 0.08360029465677625, | |
| "grad_norm": 0.47302696108818054, | |
| "learning_rate": 0.00019667266312570732, | |
| "loss": 0.7966, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.08373734432014801, | |
| "grad_norm": 0.4698280990123749, | |
| "learning_rate": 0.00019666162493781484, | |
| "loss": 0.7959, | |
| "step": 30550 | |
| }, | |
| { | |
| "epoch": 0.08387439398351977, | |
| "grad_norm": 0.46622198820114136, | |
| "learning_rate": 0.00019665056878184588, | |
| "loss": 0.7848, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 0.08401144364689155, | |
| "grad_norm": 0.44060927629470825, | |
| "learning_rate": 0.00019663949465985564, | |
| "loss": 0.7901, | |
| "step": 30650 | |
| }, | |
| { | |
| "epoch": 0.0841484933102633, | |
| "grad_norm": 0.4829963743686676, | |
| "learning_rate": 0.00019662840257390264, | |
| "loss": 0.7857, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 0.08428554297363507, | |
| "grad_norm": 0.4828779399394989, | |
| "learning_rate": 0.00019661729252604877, | |
| "loss": 0.7991, | |
| "step": 30750 | |
| }, | |
| { | |
| "epoch": 0.08442259263700684, | |
| "grad_norm": 0.4656374156475067, | |
| "learning_rate": 0.0001966061645183592, | |
| "loss": 0.8017, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 0.0845596423003786, | |
| "grad_norm": 0.43232300877571106, | |
| "learning_rate": 0.0001965950185529025, | |
| "loss": 0.7916, | |
| "step": 30850 | |
| }, | |
| { | |
| "epoch": 0.08469669196375036, | |
| "grad_norm": 0.483804315328598, | |
| "learning_rate": 0.00019658385463175053, | |
| "loss": 0.7831, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 0.08483374162712212, | |
| "grad_norm": 0.4652833938598633, | |
| "learning_rate": 0.00019657267275697854, | |
| "loss": 0.7963, | |
| "step": 30950 | |
| }, | |
| { | |
| "epoch": 0.0849707912904939, | |
| "grad_norm": 0.4923244118690491, | |
| "learning_rate": 0.00019656147293066508, | |
| "loss": 0.7988, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.08510784095386566, | |
| "grad_norm": 0.4675879180431366, | |
| "learning_rate": 0.00019655025515489201, | |
| "loss": 0.7933, | |
| "step": 31050 | |
| }, | |
| { | |
| "epoch": 0.08524489061723742, | |
| "grad_norm": 0.46415793895721436, | |
| "learning_rate": 0.00019653901943174462, | |
| "loss": 0.7928, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 0.08538194028060919, | |
| "grad_norm": 0.4991393983364105, | |
| "learning_rate": 0.00019652776576331146, | |
| "loss": 0.7948, | |
| "step": 31150 | |
| }, | |
| { | |
| "epoch": 0.08551898994398095, | |
| "grad_norm": 0.46876922249794006, | |
| "learning_rate": 0.00019651649415168437, | |
| "loss": 0.7875, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 0.08565603960735271, | |
| "grad_norm": 0.4724014699459076, | |
| "learning_rate": 0.00019650520459895868, | |
| "loss": 0.7958, | |
| "step": 31250 | |
| }, | |
| { | |
| "epoch": 0.08579308927072447, | |
| "grad_norm": 0.5036382079124451, | |
| "learning_rate": 0.0001964938971072329, | |
| "loss": 0.8053, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 0.08593013893409625, | |
| "grad_norm": 0.4702759087085724, | |
| "learning_rate": 0.00019648257167860899, | |
| "loss": 0.7869, | |
| "step": 31350 | |
| }, | |
| { | |
| "epoch": 0.08606718859746801, | |
| "grad_norm": 0.48790547251701355, | |
| "learning_rate": 0.00019647122831519215, | |
| "loss": 0.7887, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 0.08620423826083977, | |
| "grad_norm": 0.4691486954689026, | |
| "learning_rate": 0.00019645986701909097, | |
| "loss": 0.7939, | |
| "step": 31450 | |
| }, | |
| { | |
| "epoch": 0.08634128792421153, | |
| "grad_norm": 0.48565474152565, | |
| "learning_rate": 0.00019644848779241735, | |
| "loss": 0.7927, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.0864783375875833, | |
| "grad_norm": 0.4843965172767639, | |
| "learning_rate": 0.00019643709063728654, | |
| "loss": 0.7876, | |
| "step": 31550 | |
| }, | |
| { | |
| "epoch": 0.08661538725095506, | |
| "grad_norm": 0.49629899859428406, | |
| "learning_rate": 0.00019642567555581714, | |
| "loss": 0.7887, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 0.08675243691432682, | |
| "grad_norm": 0.49302300810813904, | |
| "learning_rate": 0.00019641424255013106, | |
| "loss": 0.798, | |
| "step": 31650 | |
| }, | |
| { | |
| "epoch": 0.0868894865776986, | |
| "grad_norm": 0.510237455368042, | |
| "learning_rate": 0.00019640279162235348, | |
| "loss": 0.7981, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 0.08702653624107036, | |
| "grad_norm": 0.4617491662502289, | |
| "learning_rate": 0.000196391322774613, | |
| "loss": 0.8049, | |
| "step": 31750 | |
| }, | |
| { | |
| "epoch": 0.08716358590444212, | |
| "grad_norm": 0.4670426547527313, | |
| "learning_rate": 0.00019637983600904156, | |
| "loss": 0.7886, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 0.08730063556781388, | |
| "grad_norm": 0.5087895393371582, | |
| "learning_rate": 0.00019636833132777434, | |
| "loss": 0.7899, | |
| "step": 31850 | |
| }, | |
| { | |
| "epoch": 0.08743768523118566, | |
| "grad_norm": 0.5074539184570312, | |
| "learning_rate": 0.00019635680873294993, | |
| "loss": 0.7868, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 0.08757473489455742, | |
| "grad_norm": 0.4760209023952484, | |
| "learning_rate": 0.00019634526822671022, | |
| "loss": 0.7927, | |
| "step": 31950 | |
| }, | |
| { | |
| "epoch": 0.08771178455792918, | |
| "grad_norm": 0.4537639319896698, | |
| "learning_rate": 0.00019633370981120044, | |
| "loss": 0.7858, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.08784883422130095, | |
| "grad_norm": 0.48078107833862305, | |
| "learning_rate": 0.00019632213348856912, | |
| "loss": 0.7918, | |
| "step": 32050 | |
| }, | |
| { | |
| "epoch": 0.08798588388467271, | |
| "grad_norm": 0.47621411085128784, | |
| "learning_rate": 0.00019631053926096815, | |
| "loss": 0.7978, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 0.08812293354804447, | |
| "grad_norm": 0.4982161819934845, | |
| "learning_rate": 0.00019629892713055275, | |
| "loss": 0.7901, | |
| "step": 32150 | |
| }, | |
| { | |
| "epoch": 0.08825998321141623, | |
| "grad_norm": 0.4935854971408844, | |
| "learning_rate": 0.00019628729709948145, | |
| "loss": 0.7851, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 0.088397032874788, | |
| "grad_norm": 0.49464720487594604, | |
| "learning_rate": 0.00019627564916991613, | |
| "loss": 0.7949, | |
| "step": 32250 | |
| }, | |
| { | |
| "epoch": 0.08853408253815977, | |
| "grad_norm": 0.4959528148174286, | |
| "learning_rate": 0.00019626398334402195, | |
| "loss": 0.7926, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 0.08867113220153153, | |
| "grad_norm": 0.45773616433143616, | |
| "learning_rate": 0.00019625229962396743, | |
| "loss": 0.7868, | |
| "step": 32350 | |
| }, | |
| { | |
| "epoch": 0.08880818186490329, | |
| "grad_norm": 0.48498624563217163, | |
| "learning_rate": 0.00019624059801192444, | |
| "loss": 0.7811, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 0.08894523152827506, | |
| "grad_norm": 0.4527600109577179, | |
| "learning_rate": 0.0001962288785100681, | |
| "loss": 0.789, | |
| "step": 32450 | |
| }, | |
| { | |
| "epoch": 0.08908228119164682, | |
| "grad_norm": 0.46126919984817505, | |
| "learning_rate": 0.000196217141120577, | |
| "loss": 0.7894, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.08921933085501858, | |
| "grad_norm": 0.4673360586166382, | |
| "learning_rate": 0.00019620538584563284, | |
| "loss": 0.7866, | |
| "step": 32550 | |
| }, | |
| { | |
| "epoch": 0.08935638051839036, | |
| "grad_norm": 0.4808477461338043, | |
| "learning_rate": 0.00019619361268742087, | |
| "loss": 0.7941, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 0.08949343018176212, | |
| "grad_norm": 0.4685232639312744, | |
| "learning_rate": 0.0001961818216481295, | |
| "loss": 0.7915, | |
| "step": 32650 | |
| }, | |
| { | |
| "epoch": 0.08963047984513388, | |
| "grad_norm": 0.45769527554512024, | |
| "learning_rate": 0.00019617001272995054, | |
| "loss": 0.7996, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 0.08976752950850564, | |
| "grad_norm": 0.46215105056762695, | |
| "learning_rate": 0.00019615818593507914, | |
| "loss": 0.7873, | |
| "step": 32750 | |
| }, | |
| { | |
| "epoch": 0.08990457917187741, | |
| "grad_norm": 0.4922528564929962, | |
| "learning_rate": 0.00019614634126571365, | |
| "loss": 0.7899, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 0.09004162883524917, | |
| "grad_norm": 0.486177921295166, | |
| "learning_rate": 0.0001961344787240559, | |
| "loss": 0.7896, | |
| "step": 32850 | |
| }, | |
| { | |
| "epoch": 0.09017867849862093, | |
| "grad_norm": 0.47860997915267944, | |
| "learning_rate": 0.00019612259831231098, | |
| "loss": 0.793, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 0.09031572816199271, | |
| "grad_norm": 0.47204136848449707, | |
| "learning_rate": 0.0001961107000326873, | |
| "loss": 0.785, | |
| "step": 32950 | |
| }, | |
| { | |
| "epoch": 0.09045277782536447, | |
| "grad_norm": 0.4928169250488281, | |
| "learning_rate": 0.00019609878388739653, | |
| "loss": 0.7896, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.09058982748873623, | |
| "grad_norm": 0.5053184032440186, | |
| "learning_rate": 0.00019608684987865375, | |
| "loss": 0.7901, | |
| "step": 33050 | |
| }, | |
| { | |
| "epoch": 0.09072687715210799, | |
| "grad_norm": 0.4610320031642914, | |
| "learning_rate": 0.00019607489800867737, | |
| "loss": 0.7888, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 0.09086392681547976, | |
| "grad_norm": 0.48464083671569824, | |
| "learning_rate": 0.000196062928279689, | |
| "loss": 0.7977, | |
| "step": 33150 | |
| }, | |
| { | |
| "epoch": 0.09100097647885153, | |
| "grad_norm": 0.4782997965812683, | |
| "learning_rate": 0.0001960509406939137, | |
| "loss": 0.7971, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 0.09113802614222329, | |
| "grad_norm": 0.5001197457313538, | |
| "learning_rate": 0.00019603893525357984, | |
| "loss": 0.7911, | |
| "step": 33250 | |
| }, | |
| { | |
| "epoch": 0.09127507580559505, | |
| "grad_norm": 0.4851212203502655, | |
| "learning_rate": 0.000196026911960919, | |
| "loss": 0.7976, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 0.09141212546896682, | |
| "grad_norm": 0.5041796565055847, | |
| "learning_rate": 0.00019601487081816616, | |
| "loss": 0.7862, | |
| "step": 33350 | |
| }, | |
| { | |
| "epoch": 0.09154917513233858, | |
| "grad_norm": 0.458372563123703, | |
| "learning_rate": 0.0001960028118275596, | |
| "loss": 0.7859, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 0.09168622479571034, | |
| "grad_norm": 0.4743143618106842, | |
| "learning_rate": 0.00019599073499134093, | |
| "loss": 0.7898, | |
| "step": 33450 | |
| }, | |
| { | |
| "epoch": 0.09182327445908212, | |
| "grad_norm": 0.5056654214859009, | |
| "learning_rate": 0.0001959786403117551, | |
| "loss": 0.7998, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.09196032412245388, | |
| "grad_norm": 0.49195945262908936, | |
| "learning_rate": 0.00019596652779105026, | |
| "loss": 0.7912, | |
| "step": 33550 | |
| }, | |
| { | |
| "epoch": 0.09209737378582564, | |
| "grad_norm": 0.4621276557445526, | |
| "learning_rate": 0.00019595439743147806, | |
| "loss": 0.8009, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 0.0922344234491974, | |
| "grad_norm": 0.5244962573051453, | |
| "learning_rate": 0.00019594224923529332, | |
| "loss": 0.7926, | |
| "step": 33650 | |
| }, | |
| { | |
| "epoch": 0.09237147311256917, | |
| "grad_norm": 0.48035624623298645, | |
| "learning_rate": 0.0001959300832047542, | |
| "loss": 0.7893, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 0.09250852277594093, | |
| "grad_norm": 0.48420900106430054, | |
| "learning_rate": 0.00019591789934212225, | |
| "loss": 0.7889, | |
| "step": 33750 | |
| }, | |
| { | |
| "epoch": 0.09264557243931269, | |
| "grad_norm": 0.46187272667884827, | |
| "learning_rate": 0.00019590569764966226, | |
| "loss": 0.7873, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 0.09278262210268447, | |
| "grad_norm": 0.48740679025650024, | |
| "learning_rate": 0.0001958934781296424, | |
| "loss": 0.786, | |
| "step": 33850 | |
| }, | |
| { | |
| "epoch": 0.09291967176605623, | |
| "grad_norm": 0.49153274297714233, | |
| "learning_rate": 0.00019588124078433403, | |
| "loss": 0.7906, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 0.09305672142942799, | |
| "grad_norm": 0.4947867691516876, | |
| "learning_rate": 0.00019586898561601196, | |
| "loss": 0.7856, | |
| "step": 33950 | |
| }, | |
| { | |
| "epoch": 0.09319377109279975, | |
| "grad_norm": 0.48114946484565735, | |
| "learning_rate": 0.00019585671262695425, | |
| "loss": 0.7914, | |
| "step": 34000 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 364831, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 2000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.376731557311283e+19, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |