{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.6671954545535203, "eval_steps": 2000, "global_step": 94000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.097823984611918e-06, "grad_norm": 3.421875, "learning_rate": 2e-06, "loss": 1.9673, "step": 1 }, { "epoch": 7.097823984611917e-05, "grad_norm": 2.5625, "learning_rate": 2e-05, "loss": 2.8247, "step": 10 }, { "epoch": 0.00014195647969223834, "grad_norm": 2.25, "learning_rate": 4e-05, "loss": 2.7814, "step": 20 }, { "epoch": 0.00021293471953835752, "grad_norm": 2.40625, "learning_rate": 6e-05, "loss": 2.631, "step": 30 }, { "epoch": 0.0002839129593844767, "grad_norm": 2.015625, "learning_rate": 8e-05, "loss": 2.5256, "step": 40 }, { "epoch": 0.00035489119923059586, "grad_norm": 2.8125, "learning_rate": 0.0001, "loss": 2.3918, "step": 50 }, { "epoch": 0.00042586943907671503, "grad_norm": 1.2109375, "learning_rate": 0.00012, "loss": 2.3012, "step": 60 }, { "epoch": 0.0004968476789228342, "grad_norm": 1.015625, "learning_rate": 0.00014000000000000001, "loss": 2.2406, "step": 70 }, { "epoch": 0.0005678259187689534, "grad_norm": 0.98046875, "learning_rate": 0.00016, "loss": 2.1576, "step": 80 }, { "epoch": 0.0006388041586150726, "grad_norm": 0.8359375, "learning_rate": 0.00017999999999999998, "loss": 2.0241, "step": 90 }, { "epoch": 0.0007097823984611917, "grad_norm": 0.8671875, "learning_rate": 0.0002, "loss": 2.0779, "step": 100 }, { "epoch": 0.0007807606383073109, "grad_norm": 0.73828125, "learning_rate": 0.00022, "loss": 1.9851, "step": 110 }, { "epoch": 0.0008517388781534301, "grad_norm": 0.76171875, "learning_rate": 0.00024, "loss": 1.9232, "step": 120 }, { "epoch": 0.0009227171179995492, "grad_norm": 0.72265625, "learning_rate": 0.00026000000000000003, "loss": 1.9514, "step": 130 }, { "epoch": 0.0009936953578456684, "grad_norm": 0.8203125, "learning_rate": 0.00028000000000000003, "loss": 1.8969, "step": 140 }, { "epoch": 0.0010646735976917877, "grad_norm": 1.203125, "learning_rate": 0.0003, "loss": 1.8305, "step": 150 }, { "epoch": 0.0011356518375379068, "grad_norm": 1.015625, "learning_rate": 0.00032, "loss": 1.7253, "step": 160 }, { "epoch": 0.001206630077384026, "grad_norm": 1.046875, "learning_rate": 0.00034, "loss": 1.4287, "step": 170 }, { "epoch": 0.001277608317230145, "grad_norm": 1.0078125, "learning_rate": 0.00035999999999999997, "loss": 1.1894, "step": 180 }, { "epoch": 0.0013485865570762644, "grad_norm": 0.84765625, "learning_rate": 0.00038, "loss": 1.0411, "step": 190 }, { "epoch": 0.0014195647969223834, "grad_norm": 0.91015625, "learning_rate": 0.0004, "loss": 0.9419, "step": 200 }, { "epoch": 0.0014905430367685027, "grad_norm": 0.6484375, "learning_rate": 0.00042, "loss": 0.8918, "step": 210 }, { "epoch": 0.0015615212766146218, "grad_norm": 0.7890625, "learning_rate": 0.00044, "loss": 0.8232, "step": 220 }, { "epoch": 0.001632499516460741, "grad_norm": 0.765625, "learning_rate": 0.00046, "loss": 0.8122, "step": 230 }, { "epoch": 0.0017034777563068601, "grad_norm": 0.6640625, "learning_rate": 0.00048, "loss": 0.7864, "step": 240 }, { "epoch": 0.0017744559961529794, "grad_norm": 0.69921875, "learning_rate": 0.0005, "loss": 0.7349, "step": 250 }, { "epoch": 0.0018454342359990985, "grad_norm": 0.6796875, "learning_rate": 0.0005200000000000001, "loss": 0.7337, "step": 260 }, { "epoch": 0.0019164124758452178, "grad_norm": 0.71875, "learning_rate": 0.00054, "loss": 0.6994, "step": 270 }, { "epoch": 0.001987390715691337, "grad_norm": 0.77734375, "learning_rate": 0.0005600000000000001, "loss": 0.713, "step": 280 }, { "epoch": 0.002058368955537456, "grad_norm": 0.60546875, "learning_rate": 0.00058, "loss": 0.6934, "step": 290 }, { "epoch": 0.0021293471953835754, "grad_norm": 0.5078125, "learning_rate": 0.0006, "loss": 0.6397, "step": 300 }, { "epoch": 0.0022003254352296947, "grad_norm": 0.62109375, "learning_rate": 0.00062, "loss": 0.6531, "step": 310 }, { "epoch": 0.0022713036750758135, "grad_norm": 0.54296875, "learning_rate": 0.00064, "loss": 0.6486, "step": 320 }, { "epoch": 0.002342281914921933, "grad_norm": 0.515625, "learning_rate": 0.00066, "loss": 0.6271, "step": 330 }, { "epoch": 0.002413260154768052, "grad_norm": 0.65234375, "learning_rate": 0.00068, "loss": 0.6154, "step": 340 }, { "epoch": 0.0024842383946141714, "grad_norm": 0.5546875, "learning_rate": 0.0007, "loss": 0.5797, "step": 350 }, { "epoch": 0.00255521663446029, "grad_norm": 1.1484375, "learning_rate": 0.0007199999999999999, "loss": 0.5683, "step": 360 }, { "epoch": 0.0026261948743064095, "grad_norm": 0.6875, "learning_rate": 0.00074, "loss": 0.565, "step": 370 }, { "epoch": 0.0026971731141525288, "grad_norm": 0.625, "learning_rate": 0.00076, "loss": 0.5394, "step": 380 }, { "epoch": 0.002768151353998648, "grad_norm": 0.6328125, "learning_rate": 0.0007800000000000001, "loss": 0.517, "step": 390 }, { "epoch": 0.002839129593844767, "grad_norm": 0.59375, "learning_rate": 0.0008, "loss": 0.5183, "step": 400 }, { "epoch": 0.002910107833690886, "grad_norm": 0.486328125, "learning_rate": 0.00082, "loss": 0.508, "step": 410 }, { "epoch": 0.0029810860735370055, "grad_norm": 0.486328125, "learning_rate": 0.00084, "loss": 0.4891, "step": 420 }, { "epoch": 0.0030520643133831247, "grad_norm": 0.58203125, "learning_rate": 0.00086, "loss": 0.4898, "step": 430 }, { "epoch": 0.0031230425532292436, "grad_norm": 0.5, "learning_rate": 0.00088, "loss": 0.4683, "step": 440 }, { "epoch": 0.003194020793075363, "grad_norm": 0.46875, "learning_rate": 0.0009000000000000001, "loss": 0.4596, "step": 450 }, { "epoch": 0.003264999032921482, "grad_norm": 0.412109375, "learning_rate": 0.00092, "loss": 0.495, "step": 460 }, { "epoch": 0.0033359772727676014, "grad_norm": 0.412109375, "learning_rate": 0.00094, "loss": 0.468, "step": 470 }, { "epoch": 0.0034069555126137203, "grad_norm": 0.39453125, "learning_rate": 0.00096, "loss": 0.4736, "step": 480 }, { "epoch": 0.0034779337524598396, "grad_norm": 1.1484375, "learning_rate": 0.00098, "loss": 0.4622, "step": 490 }, { "epoch": 0.003548911992305959, "grad_norm": 0.83984375, "learning_rate": 0.001, "loss": 0.5132, "step": 500 }, { "epoch": 0.003619890232152078, "grad_norm": 0.466796875, "learning_rate": 0.00102, "loss": 0.462, "step": 510 }, { "epoch": 0.003690868471998197, "grad_norm": 0.5234375, "learning_rate": 0.0010400000000000001, "loss": 0.4796, "step": 520 }, { "epoch": 0.0037618467118443162, "grad_norm": 0.41796875, "learning_rate": 0.0010600000000000002, "loss": 0.4899, "step": 530 }, { "epoch": 0.0038328249516904355, "grad_norm": 0.4921875, "learning_rate": 0.00108, "loss": 0.4605, "step": 540 }, { "epoch": 0.003903803191536555, "grad_norm": 0.40234375, "learning_rate": 0.0011, "loss": 0.4321, "step": 550 }, { "epoch": 0.003974781431382674, "grad_norm": 0.3828125, "learning_rate": 0.0011200000000000001, "loss": 0.4655, "step": 560 }, { "epoch": 0.004045759671228793, "grad_norm": 0.4296875, "learning_rate": 0.00114, "loss": 0.4622, "step": 570 }, { "epoch": 0.004116737911074912, "grad_norm": 0.40625, "learning_rate": 0.00116, "loss": 0.4311, "step": 580 }, { "epoch": 0.004187716150921031, "grad_norm": 0.392578125, "learning_rate": 0.00118, "loss": 0.4381, "step": 590 }, { "epoch": 0.004258694390767151, "grad_norm": 0.41015625, "learning_rate": 0.0012, "loss": 0.4417, "step": 600 }, { "epoch": 0.00432967263061327, "grad_norm": 0.369140625, "learning_rate": 0.00122, "loss": 0.4195, "step": 610 }, { "epoch": 0.004400650870459389, "grad_norm": 0.4453125, "learning_rate": 0.00124, "loss": 0.4411, "step": 620 }, { "epoch": 0.004471629110305508, "grad_norm": 0.3125, "learning_rate": 0.00126, "loss": 0.4142, "step": 630 }, { "epoch": 0.004542607350151627, "grad_norm": 0.31640625, "learning_rate": 0.00128, "loss": 0.4043, "step": 640 }, { "epoch": 0.004613585589997747, "grad_norm": 0.365234375, "learning_rate": 0.0013000000000000002, "loss": 0.4046, "step": 650 }, { "epoch": 0.004684563829843866, "grad_norm": 0.267578125, "learning_rate": 0.00132, "loss": 0.423, "step": 660 }, { "epoch": 0.0047555420696899844, "grad_norm": 0.447265625, "learning_rate": 0.00134, "loss": 0.409, "step": 670 }, { "epoch": 0.004826520309536104, "grad_norm": 0.302734375, "learning_rate": 0.00136, "loss": 0.3837, "step": 680 }, { "epoch": 0.004897498549382223, "grad_norm": 0.337890625, "learning_rate": 0.00138, "loss": 0.4171, "step": 690 }, { "epoch": 0.004968476789228343, "grad_norm": 0.3359375, "learning_rate": 0.0014, "loss": 0.4104, "step": 700 }, { "epoch": 0.0050394550290744616, "grad_norm": 0.302734375, "learning_rate": 0.00142, "loss": 0.4044, "step": 710 }, { "epoch": 0.00511043326892058, "grad_norm": 0.384765625, "learning_rate": 0.0014399999999999999, "loss": 0.4001, "step": 720 }, { "epoch": 0.0051814115087667, "grad_norm": 0.263671875, "learning_rate": 0.00146, "loss": 0.4025, "step": 730 }, { "epoch": 0.005252389748612819, "grad_norm": 0.3359375, "learning_rate": 0.00148, "loss": 0.402, "step": 740 }, { "epoch": 0.005323367988458938, "grad_norm": 0.369140625, "learning_rate": 0.0015, "loss": 0.3981, "step": 750 }, { "epoch": 0.0053943462283050575, "grad_norm": 0.3046875, "learning_rate": 0.00152, "loss": 0.3818, "step": 760 }, { "epoch": 0.005465324468151176, "grad_norm": 0.251953125, "learning_rate": 0.0015400000000000001, "loss": 0.3857, "step": 770 }, { "epoch": 0.005536302707997296, "grad_norm": 0.283203125, "learning_rate": 0.0015600000000000002, "loss": 0.3803, "step": 780 }, { "epoch": 0.005607280947843415, "grad_norm": 0.341796875, "learning_rate": 0.00158, "loss": 0.3891, "step": 790 }, { "epoch": 0.005678259187689534, "grad_norm": 0.2734375, "learning_rate": 0.0016, "loss": 0.3918, "step": 800 }, { "epoch": 0.0057492374275356535, "grad_norm": 0.27734375, "learning_rate": 0.0016200000000000001, "loss": 0.3871, "step": 810 }, { "epoch": 0.005820215667381772, "grad_norm": 0.25, "learning_rate": 0.00164, "loss": 0.404, "step": 820 }, { "epoch": 0.005891193907227891, "grad_norm": 0.255859375, "learning_rate": 0.00166, "loss": 0.3794, "step": 830 }, { "epoch": 0.005962172147074011, "grad_norm": 0.345703125, "learning_rate": 0.00168, "loss": 0.4127, "step": 840 }, { "epoch": 0.00603315038692013, "grad_norm": 0.23828125, "learning_rate": 0.0017, "loss": 0.4, "step": 850 }, { "epoch": 0.0061041286267662495, "grad_norm": 0.2109375, "learning_rate": 0.00172, "loss": 0.3514, "step": 860 }, { "epoch": 0.006175106866612368, "grad_norm": 0.23828125, "learning_rate": 0.00174, "loss": 0.3929, "step": 870 }, { "epoch": 0.006246085106458487, "grad_norm": 0.240234375, "learning_rate": 0.00176, "loss": 0.3832, "step": 880 }, { "epoch": 0.006317063346304607, "grad_norm": 0.32421875, "learning_rate": 0.0017800000000000001, "loss": 0.3802, "step": 890 }, { "epoch": 0.006388041586150726, "grad_norm": 0.26171875, "learning_rate": 0.0018000000000000002, "loss": 0.3838, "step": 900 }, { "epoch": 0.0064590198259968454, "grad_norm": 0.22265625, "learning_rate": 0.00182, "loss": 0.3709, "step": 910 }, { "epoch": 0.006529998065842964, "grad_norm": 0.4140625, "learning_rate": 0.00184, "loss": 0.432, "step": 920 }, { "epoch": 0.006600976305689083, "grad_norm": 0.2255859375, "learning_rate": 0.00186, "loss": 0.3726, "step": 930 }, { "epoch": 0.006671954545535203, "grad_norm": 0.294921875, "learning_rate": 0.00188, "loss": 0.4191, "step": 940 }, { "epoch": 0.006742932785381322, "grad_norm": 0.25390625, "learning_rate": 0.0019, "loss": 0.3847, "step": 950 }, { "epoch": 0.0068139110252274405, "grad_norm": 0.271484375, "learning_rate": 0.00192, "loss": 0.3907, "step": 960 }, { "epoch": 0.00688488926507356, "grad_norm": 0.326171875, "learning_rate": 0.0019399999999999999, "loss": 0.3924, "step": 970 }, { "epoch": 0.006955867504919679, "grad_norm": 0.439453125, "learning_rate": 0.00196, "loss": 0.4228, "step": 980 }, { "epoch": 0.007026845744765799, "grad_norm": 0.28125, "learning_rate": 0.00198, "loss": 0.4137, "step": 990 }, { "epoch": 0.007097823984611918, "grad_norm": 0.30859375, "learning_rate": 0.002, "loss": 0.3955, "step": 1000 }, { "epoch": 0.0071688022244580365, "grad_norm": 0.2451171875, "learning_rate": 0.0019999999995549784, "loss": 0.3857, "step": 1010 }, { "epoch": 0.007239780464304156, "grad_norm": 0.259765625, "learning_rate": 0.0019999999982199127, "loss": 0.3826, "step": 1020 }, { "epoch": 0.007310758704150275, "grad_norm": 0.29296875, "learning_rate": 0.0019999999959948037, "loss": 0.4023, "step": 1030 }, { "epoch": 0.007381736943996394, "grad_norm": 0.208984375, "learning_rate": 0.0019999999928796515, "loss": 0.379, "step": 1040 }, { "epoch": 0.007452715183842514, "grad_norm": 0.1962890625, "learning_rate": 0.001999999988874455, "loss": 0.3699, "step": 1050 }, { "epoch": 0.0075236934236886325, "grad_norm": 0.248046875, "learning_rate": 0.0019999999839792155, "loss": 0.369, "step": 1060 }, { "epoch": 0.007594671663534752, "grad_norm": 0.25390625, "learning_rate": 0.001999999978193932, "loss": 0.3717, "step": 1070 }, { "epoch": 0.007665649903380871, "grad_norm": 0.2041015625, "learning_rate": 0.0019999999715186053, "loss": 0.3582, "step": 1080 }, { "epoch": 0.00773662814322699, "grad_norm": 0.294921875, "learning_rate": 0.001999999963953235, "loss": 0.3733, "step": 1090 }, { "epoch": 0.00780760638307311, "grad_norm": 0.267578125, "learning_rate": 0.0019999999554978208, "loss": 0.3834, "step": 1100 }, { "epoch": 0.00787858462291923, "grad_norm": 0.283203125, "learning_rate": 0.001999999946152363, "loss": 0.378, "step": 1110 }, { "epoch": 0.007949562862765347, "grad_norm": 0.2294921875, "learning_rate": 0.001999999935916862, "loss": 0.3682, "step": 1120 }, { "epoch": 0.008020541102611467, "grad_norm": 0.263671875, "learning_rate": 0.0019999999247913174, "loss": 0.3664, "step": 1130 }, { "epoch": 0.008091519342457587, "grad_norm": 0.283203125, "learning_rate": 0.0019999999127757293, "loss": 0.3685, "step": 1140 }, { "epoch": 0.008162497582303705, "grad_norm": 0.158203125, "learning_rate": 0.001999999899870098, "loss": 0.3483, "step": 1150 }, { "epoch": 0.008233475822149824, "grad_norm": 0.310546875, "learning_rate": 0.0019999998860744223, "loss": 0.3524, "step": 1160 }, { "epoch": 0.008304454061995944, "grad_norm": 0.29296875, "learning_rate": 0.001999999871388704, "loss": 0.373, "step": 1170 }, { "epoch": 0.008375432301842062, "grad_norm": 0.279296875, "learning_rate": 0.001999999855812942, "loss": 0.3986, "step": 1180 }, { "epoch": 0.008446410541688182, "grad_norm": 0.16796875, "learning_rate": 0.001999999839347136, "loss": 0.3786, "step": 1190 }, { "epoch": 0.008517388781534302, "grad_norm": 0.1943359375, "learning_rate": 0.0019999998219912872, "loss": 0.3885, "step": 1200 }, { "epoch": 0.00858836702138042, "grad_norm": 0.1953125, "learning_rate": 0.0019999998037453946, "loss": 0.341, "step": 1210 }, { "epoch": 0.00865934526122654, "grad_norm": 0.2734375, "learning_rate": 0.001999999784609459, "loss": 0.3646, "step": 1220 }, { "epoch": 0.008730323501072659, "grad_norm": 0.1943359375, "learning_rate": 0.0019999997645834796, "loss": 0.3466, "step": 1230 }, { "epoch": 0.008801301740918779, "grad_norm": 0.1611328125, "learning_rate": 0.001999999743667457, "loss": 0.3362, "step": 1240 }, { "epoch": 0.008872279980764897, "grad_norm": 0.158203125, "learning_rate": 0.0019999997218613915, "loss": 0.363, "step": 1250 }, { "epoch": 0.008943258220611016, "grad_norm": 0.2451171875, "learning_rate": 0.0019999996991652822, "loss": 0.3515, "step": 1260 }, { "epoch": 0.009014236460457136, "grad_norm": 0.291015625, "learning_rate": 0.0019999996755791297, "loss": 0.35, "step": 1270 }, { "epoch": 0.009085214700303254, "grad_norm": 0.2216796875, "learning_rate": 0.001999999651102934, "loss": 0.3506, "step": 1280 }, { "epoch": 0.009156192940149374, "grad_norm": 0.197265625, "learning_rate": 0.001999999625736695, "loss": 0.3282, "step": 1290 }, { "epoch": 0.009227171179995493, "grad_norm": 0.248046875, "learning_rate": 0.001999999599480413, "loss": 0.3575, "step": 1300 }, { "epoch": 0.009298149419841611, "grad_norm": 0.193359375, "learning_rate": 0.0019999995723340874, "loss": 0.3495, "step": 1310 }, { "epoch": 0.009369127659687731, "grad_norm": 0.259765625, "learning_rate": 0.001999999544297719, "loss": 0.3503, "step": 1320 }, { "epoch": 0.009440105899533851, "grad_norm": 0.1591796875, "learning_rate": 0.001999999515371307, "loss": 0.3546, "step": 1330 }, { "epoch": 0.009511084139379969, "grad_norm": 0.1640625, "learning_rate": 0.001999999485554852, "loss": 0.3388, "step": 1340 }, { "epoch": 0.009582062379226089, "grad_norm": 0.1728515625, "learning_rate": 0.0019999994548483544, "loss": 0.3385, "step": 1350 }, { "epoch": 0.009653040619072208, "grad_norm": 0.1982421875, "learning_rate": 0.001999999423251813, "loss": 0.3675, "step": 1360 }, { "epoch": 0.009724018858918326, "grad_norm": 0.1904296875, "learning_rate": 0.0019999993907652293, "loss": 0.3469, "step": 1370 }, { "epoch": 0.009794997098764446, "grad_norm": 0.2216796875, "learning_rate": 0.0019999993573886024, "loss": 0.3561, "step": 1380 }, { "epoch": 0.009865975338610566, "grad_norm": 0.19921875, "learning_rate": 0.001999999323121932, "loss": 0.3464, "step": 1390 }, { "epoch": 0.009936953578456685, "grad_norm": 0.25390625, "learning_rate": 0.0019999992879652195, "loss": 0.3502, "step": 1400 }, { "epoch": 0.010007931818302803, "grad_norm": 0.3203125, "learning_rate": 0.0019999992519184637, "loss": 0.344, "step": 1410 }, { "epoch": 0.010078910058148923, "grad_norm": 0.1728515625, "learning_rate": 0.001999999214981665, "loss": 0.3417, "step": 1420 }, { "epoch": 0.010149888297995043, "grad_norm": 0.21484375, "learning_rate": 0.0019999991771548235, "loss": 0.3503, "step": 1430 }, { "epoch": 0.01022086653784116, "grad_norm": 0.2216796875, "learning_rate": 0.001999999138437939, "loss": 0.3545, "step": 1440 }, { "epoch": 0.01029184477768728, "grad_norm": 0.17578125, "learning_rate": 0.001999999098831012, "loss": 0.3685, "step": 1450 }, { "epoch": 0.0103628230175334, "grad_norm": 0.126953125, "learning_rate": 0.0019999990583340426, "loss": 0.3474, "step": 1460 }, { "epoch": 0.010433801257379518, "grad_norm": 0.251953125, "learning_rate": 0.0019999990169470303, "loss": 0.3653, "step": 1470 }, { "epoch": 0.010504779497225638, "grad_norm": 0.1748046875, "learning_rate": 0.001999998974669975, "loss": 0.3609, "step": 1480 }, { "epoch": 0.010575757737071758, "grad_norm": 0.1611328125, "learning_rate": 0.0019999989315028777, "loss": 0.3428, "step": 1490 }, { "epoch": 0.010646735976917876, "grad_norm": 0.1630859375, "learning_rate": 0.0019999988874457378, "loss": 0.3339, "step": 1500 }, { "epoch": 0.010717714216763995, "grad_norm": 0.201171875, "learning_rate": 0.001999998842498555, "loss": 0.3439, "step": 1510 }, { "epoch": 0.010788692456610115, "grad_norm": 0.23828125, "learning_rate": 0.00199999879666133, "loss": 0.3508, "step": 1520 }, { "epoch": 0.010859670696456235, "grad_norm": 0.21875, "learning_rate": 0.0019999987499340626, "loss": 0.3579, "step": 1530 }, { "epoch": 0.010930648936302353, "grad_norm": 0.1328125, "learning_rate": 0.001999998702316753, "loss": 0.3524, "step": 1540 }, { "epoch": 0.011001627176148472, "grad_norm": 0.2197265625, "learning_rate": 0.001999998653809401, "loss": 0.3368, "step": 1550 }, { "epoch": 0.011072605415994592, "grad_norm": 0.2294921875, "learning_rate": 0.0019999986044120063, "loss": 0.3398, "step": 1560 }, { "epoch": 0.01114358365584071, "grad_norm": 0.189453125, "learning_rate": 0.00199999855412457, "loss": 0.3217, "step": 1570 }, { "epoch": 0.01121456189568683, "grad_norm": 0.267578125, "learning_rate": 0.0019999985029470913, "loss": 0.3367, "step": 1580 }, { "epoch": 0.01128554013553295, "grad_norm": 0.130859375, "learning_rate": 0.0019999984508795705, "loss": 0.3573, "step": 1590 }, { "epoch": 0.011356518375379068, "grad_norm": 0.1533203125, "learning_rate": 0.001999998397922007, "loss": 0.3283, "step": 1600 }, { "epoch": 0.011427496615225187, "grad_norm": 0.201171875, "learning_rate": 0.0019999983440744024, "loss": 0.3362, "step": 1610 }, { "epoch": 0.011498474855071307, "grad_norm": 0.2236328125, "learning_rate": 0.0019999982893367557, "loss": 0.3487, "step": 1620 }, { "epoch": 0.011569453094917425, "grad_norm": 0.265625, "learning_rate": 0.001999998233709067, "loss": 0.3657, "step": 1630 }, { "epoch": 0.011640431334763545, "grad_norm": 0.166015625, "learning_rate": 0.0019999981771913365, "loss": 0.3256, "step": 1640 }, { "epoch": 0.011711409574609664, "grad_norm": 0.1572265625, "learning_rate": 0.001999998119783564, "loss": 0.3253, "step": 1650 }, { "epoch": 0.011782387814455782, "grad_norm": 0.1474609375, "learning_rate": 0.00199999806148575, "loss": 0.3466, "step": 1660 }, { "epoch": 0.011853366054301902, "grad_norm": 0.212890625, "learning_rate": 0.001999998002297894, "loss": 0.3659, "step": 1670 }, { "epoch": 0.011924344294148022, "grad_norm": 0.1611328125, "learning_rate": 0.0019999979422199964, "loss": 0.3226, "step": 1680 }, { "epoch": 0.011995322533994142, "grad_norm": 0.2099609375, "learning_rate": 0.0019999978812520578, "loss": 0.3486, "step": 1690 }, { "epoch": 0.01206630077384026, "grad_norm": 0.15625, "learning_rate": 0.001999997819394077, "loss": 0.329, "step": 1700 }, { "epoch": 0.01213727901368638, "grad_norm": 0.365234375, "learning_rate": 0.001999997756646055, "loss": 0.3471, "step": 1710 }, { "epoch": 0.012208257253532499, "grad_norm": 0.353515625, "learning_rate": 0.0019999976930079916, "loss": 0.314, "step": 1720 }, { "epoch": 0.012279235493378617, "grad_norm": 0.1767578125, "learning_rate": 0.0019999976284798874, "loss": 0.3071, "step": 1730 }, { "epoch": 0.012350213733224737, "grad_norm": 0.142578125, "learning_rate": 0.0019999975630617416, "loss": 0.3328, "step": 1740 }, { "epoch": 0.012421191973070856, "grad_norm": 0.2890625, "learning_rate": 0.0019999974967535544, "loss": 0.3221, "step": 1750 }, { "epoch": 0.012492170212916974, "grad_norm": 0.138671875, "learning_rate": 0.001999997429555326, "loss": 0.3539, "step": 1760 }, { "epoch": 0.012563148452763094, "grad_norm": 0.1171875, "learning_rate": 0.001999997361467057, "loss": 0.3575, "step": 1770 }, { "epoch": 0.012634126692609214, "grad_norm": 0.16796875, "learning_rate": 0.0019999972924887467, "loss": 0.3214, "step": 1780 }, { "epoch": 0.012705104932455332, "grad_norm": 0.1435546875, "learning_rate": 0.0019999972226203955, "loss": 0.3408, "step": 1790 }, { "epoch": 0.012776083172301451, "grad_norm": 0.2041015625, "learning_rate": 0.0019999971518620038, "loss": 0.3131, "step": 1800 }, { "epoch": 0.012847061412147571, "grad_norm": 0.1513671875, "learning_rate": 0.001999997080213571, "loss": 0.3517, "step": 1810 }, { "epoch": 0.012918039651993691, "grad_norm": 0.177734375, "learning_rate": 0.001999997007675098, "loss": 0.3669, "step": 1820 }, { "epoch": 0.012989017891839809, "grad_norm": 0.265625, "learning_rate": 0.0019999969342465837, "loss": 0.3284, "step": 1830 }, { "epoch": 0.013059996131685929, "grad_norm": 0.1796875, "learning_rate": 0.001999996859928029, "loss": 0.3692, "step": 1840 }, { "epoch": 0.013130974371532048, "grad_norm": 0.2890625, "learning_rate": 0.001999996784719434, "loss": 0.3267, "step": 1850 }, { "epoch": 0.013201952611378166, "grad_norm": 0.1806640625, "learning_rate": 0.0019999967086207983, "loss": 0.3475, "step": 1860 }, { "epoch": 0.013272930851224286, "grad_norm": 0.2177734375, "learning_rate": 0.0019999966316321225, "loss": 0.3494, "step": 1870 }, { "epoch": 0.013343909091070406, "grad_norm": 0.2255859375, "learning_rate": 0.0019999965537534064, "loss": 0.3384, "step": 1880 }, { "epoch": 0.013414887330916524, "grad_norm": 0.158203125, "learning_rate": 0.00199999647498465, "loss": 0.3472, "step": 1890 }, { "epoch": 0.013485865570762643, "grad_norm": 0.1806640625, "learning_rate": 0.0019999963953258536, "loss": 0.3319, "step": 1900 }, { "epoch": 0.013556843810608763, "grad_norm": 0.1533203125, "learning_rate": 0.0019999963147770174, "loss": 0.3478, "step": 1910 }, { "epoch": 0.013627822050454881, "grad_norm": 0.140625, "learning_rate": 0.001999996233338141, "loss": 0.3444, "step": 1920 }, { "epoch": 0.013698800290301, "grad_norm": 0.1572265625, "learning_rate": 0.0019999961510092246, "loss": 0.3524, "step": 1930 }, { "epoch": 0.01376977853014712, "grad_norm": 0.189453125, "learning_rate": 0.0019999960677902686, "loss": 0.32, "step": 1940 }, { "epoch": 0.013840756769993239, "grad_norm": 0.2041015625, "learning_rate": 0.001999995983681273, "loss": 0.3458, "step": 1950 }, { "epoch": 0.013911735009839358, "grad_norm": 0.1474609375, "learning_rate": 0.0019999958986822376, "loss": 0.3413, "step": 1960 }, { "epoch": 0.013982713249685478, "grad_norm": 0.232421875, "learning_rate": 0.0019999958127931623, "loss": 0.348, "step": 1970 }, { "epoch": 0.014053691489531598, "grad_norm": 0.275390625, "learning_rate": 0.001999995726014048, "loss": 0.3324, "step": 1980 }, { "epoch": 0.014124669729377716, "grad_norm": 0.4375, "learning_rate": 0.0019999956383448944, "loss": 0.3398, "step": 1990 }, { "epoch": 0.014195647969223835, "grad_norm": 0.1953125, "learning_rate": 0.0019999955497857014, "loss": 0.3706, "step": 2000 }, { "epoch": 0.014195647969223835, "eval_covost2-zh-en_loss": 4.445901870727539, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 19.9087, "eval_covost2-zh-en_samples_per_second": 3.215, "eval_covost2-zh-en_steps_per_second": 0.201, "step": 2000 }, { "epoch": 0.014195647969223835, "eval_covost2-en-zh_loss": 3.159895896911621, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 19.1811, "eval_covost2-en-zh_samples_per_second": 3.337, "eval_covost2-en-zh_steps_per_second": 0.209, "step": 2000 }, { "epoch": 0.014266626209069955, "grad_norm": 0.236328125, "learning_rate": 0.001999995460336469, "loss": 0.3248, "step": 2010 }, { "epoch": 0.014337604448916073, "grad_norm": 0.1064453125, "learning_rate": 0.001999995369997198, "loss": 0.3337, "step": 2020 }, { "epoch": 0.014408582688762193, "grad_norm": 0.2392578125, "learning_rate": 0.0019999952787678878, "loss": 0.3484, "step": 2030 }, { "epoch": 0.014479560928608312, "grad_norm": 0.1845703125, "learning_rate": 0.0019999951866485383, "loss": 0.3294, "step": 2040 }, { "epoch": 0.01455053916845443, "grad_norm": 0.1806640625, "learning_rate": 0.00199999509363915, "loss": 0.3299, "step": 2050 }, { "epoch": 0.01462151740830055, "grad_norm": 0.1630859375, "learning_rate": 0.0019999949997397235, "loss": 0.3292, "step": 2060 }, { "epoch": 0.01469249564814667, "grad_norm": 0.19140625, "learning_rate": 0.001999994904950258, "loss": 0.3263, "step": 2070 }, { "epoch": 0.014763473887992788, "grad_norm": 0.40234375, "learning_rate": 0.001999994809270754, "loss": 0.3329, "step": 2080 }, { "epoch": 0.014834452127838908, "grad_norm": 0.2890625, "learning_rate": 0.001999994712701211, "loss": 0.3535, "step": 2090 }, { "epoch": 0.014905430367685027, "grad_norm": 0.1708984375, "learning_rate": 0.0019999946152416304, "loss": 0.3257, "step": 2100 }, { "epoch": 0.014976408607531147, "grad_norm": 0.1328125, "learning_rate": 0.001999994516892011, "loss": 0.3165, "step": 2110 }, { "epoch": 0.015047386847377265, "grad_norm": 0.2001953125, "learning_rate": 0.0019999944176523543, "loss": 0.3281, "step": 2120 }, { "epoch": 0.015118365087223385, "grad_norm": 0.18359375, "learning_rate": 0.0019999943175226585, "loss": 0.3243, "step": 2130 }, { "epoch": 0.015189343327069504, "grad_norm": 0.1533203125, "learning_rate": 0.0019999942165029256, "loss": 0.3398, "step": 2140 }, { "epoch": 0.015260321566915622, "grad_norm": 0.193359375, "learning_rate": 0.001999994114593154, "loss": 0.3338, "step": 2150 }, { "epoch": 0.015331299806761742, "grad_norm": 0.14453125, "learning_rate": 0.001999994011793345, "loss": 0.33, "step": 2160 }, { "epoch": 0.015402278046607862, "grad_norm": 0.181640625, "learning_rate": 0.0019999939081034985, "loss": 0.3218, "step": 2170 }, { "epoch": 0.01547325628645398, "grad_norm": 0.2490234375, "learning_rate": 0.0019999938035236147, "loss": 0.3088, "step": 2180 }, { "epoch": 0.0155442345263001, "grad_norm": 0.193359375, "learning_rate": 0.0019999936980536924, "loss": 0.3168, "step": 2190 }, { "epoch": 0.01561521276614622, "grad_norm": 0.1640625, "learning_rate": 0.0019999935916937333, "loss": 0.3236, "step": 2200 }, { "epoch": 0.015686191005992337, "grad_norm": 0.1943359375, "learning_rate": 0.001999993484443737, "loss": 0.3191, "step": 2210 }, { "epoch": 0.01575716924583846, "grad_norm": 0.134765625, "learning_rate": 0.0019999933763037037, "loss": 0.3502, "step": 2220 }, { "epoch": 0.015828147485684577, "grad_norm": 0.162109375, "learning_rate": 0.001999993267273633, "loss": 0.3431, "step": 2230 }, { "epoch": 0.015899125725530695, "grad_norm": 0.1748046875, "learning_rate": 0.001999993157353526, "loss": 0.3309, "step": 2240 }, { "epoch": 0.015970103965376816, "grad_norm": 0.28125, "learning_rate": 0.0019999930465433814, "loss": 0.3044, "step": 2250 }, { "epoch": 0.016041082205222934, "grad_norm": 0.2490234375, "learning_rate": 0.001999992934843201, "loss": 0.3392, "step": 2260 }, { "epoch": 0.016112060445069052, "grad_norm": 0.12890625, "learning_rate": 0.001999992822252983, "loss": 0.3519, "step": 2270 }, { "epoch": 0.016183038684915173, "grad_norm": 0.11865234375, "learning_rate": 0.001999992708772729, "loss": 0.3404, "step": 2280 }, { "epoch": 0.01625401692476129, "grad_norm": 0.1474609375, "learning_rate": 0.001999992594402438, "loss": 0.3372, "step": 2290 }, { "epoch": 0.01632499516460741, "grad_norm": 0.12890625, "learning_rate": 0.001999992479142112, "loss": 0.3042, "step": 2300 }, { "epoch": 0.01639597340445353, "grad_norm": 0.25390625, "learning_rate": 0.001999992362991749, "loss": 0.3227, "step": 2310 }, { "epoch": 0.01646695164429965, "grad_norm": 0.134765625, "learning_rate": 0.00199999224595135, "loss": 0.3283, "step": 2320 }, { "epoch": 0.016537929884145767, "grad_norm": 0.1513671875, "learning_rate": 0.0019999921280209154, "loss": 0.3223, "step": 2330 }, { "epoch": 0.01660890812399189, "grad_norm": 0.130859375, "learning_rate": 0.0019999920092004446, "loss": 0.3052, "step": 2340 }, { "epoch": 0.016679886363838006, "grad_norm": 0.259765625, "learning_rate": 0.001999991889489938, "loss": 0.3229, "step": 2350 }, { "epoch": 0.016750864603684124, "grad_norm": 0.29296875, "learning_rate": 0.001999991768889396, "loss": 0.345, "step": 2360 }, { "epoch": 0.016821842843530246, "grad_norm": 0.35546875, "learning_rate": 0.001999991647398819, "loss": 0.361, "step": 2370 }, { "epoch": 0.016892821083376364, "grad_norm": 0.1455078125, "learning_rate": 0.001999991525018206, "loss": 0.3273, "step": 2380 }, { "epoch": 0.01696379932322248, "grad_norm": 0.1474609375, "learning_rate": 0.0019999914017475584, "loss": 0.3517, "step": 2390 }, { "epoch": 0.017034777563068603, "grad_norm": 0.130859375, "learning_rate": 0.0019999912775868754, "loss": 0.324, "step": 2400 }, { "epoch": 0.01710575580291472, "grad_norm": 0.177734375, "learning_rate": 0.0019999911525361573, "loss": 0.3208, "step": 2410 }, { "epoch": 0.01717673404276084, "grad_norm": 0.12353515625, "learning_rate": 0.0019999910265954043, "loss": 0.3268, "step": 2420 }, { "epoch": 0.01724771228260696, "grad_norm": 0.158203125, "learning_rate": 0.001999990899764617, "loss": 0.307, "step": 2430 }, { "epoch": 0.01731869052245308, "grad_norm": 0.1669921875, "learning_rate": 0.001999990772043795, "loss": 0.3507, "step": 2440 }, { "epoch": 0.017389668762299196, "grad_norm": 0.166015625, "learning_rate": 0.001999990643432938, "loss": 0.3063, "step": 2450 }, { "epoch": 0.017460647002145318, "grad_norm": 0.185546875, "learning_rate": 0.001999990513932047, "loss": 0.3154, "step": 2460 }, { "epoch": 0.017531625241991436, "grad_norm": 0.1513671875, "learning_rate": 0.001999990383541122, "loss": 0.3211, "step": 2470 }, { "epoch": 0.017602603481837557, "grad_norm": 0.24609375, "learning_rate": 0.001999990252260163, "loss": 0.3479, "step": 2480 }, { "epoch": 0.017673581721683675, "grad_norm": 0.421875, "learning_rate": 0.0019999901200891696, "loss": 0.3326, "step": 2490 }, { "epoch": 0.017744559961529793, "grad_norm": 0.1865234375, "learning_rate": 0.001999989987028143, "loss": 0.3358, "step": 2500 }, { "epoch": 0.017815538201375915, "grad_norm": 0.341796875, "learning_rate": 0.0019999898530770823, "loss": 0.3422, "step": 2510 }, { "epoch": 0.017886516441222033, "grad_norm": 0.1484375, "learning_rate": 0.001999989718235988, "loss": 0.3449, "step": 2520 }, { "epoch": 0.01795749468106815, "grad_norm": 0.12890625, "learning_rate": 0.00199998958250486, "loss": 0.3305, "step": 2530 }, { "epoch": 0.018028472920914272, "grad_norm": 0.103515625, "learning_rate": 0.001999989445883699, "loss": 0.3298, "step": 2540 }, { "epoch": 0.01809945116076039, "grad_norm": 0.1376953125, "learning_rate": 0.0019999893083725052, "loss": 0.32, "step": 2550 }, { "epoch": 0.018170429400606508, "grad_norm": 0.1748046875, "learning_rate": 0.0019999891699712777, "loss": 0.3256, "step": 2560 }, { "epoch": 0.01824140764045263, "grad_norm": 0.138671875, "learning_rate": 0.0019999890306800177, "loss": 0.3187, "step": 2570 }, { "epoch": 0.018312385880298748, "grad_norm": 0.1591796875, "learning_rate": 0.001999988890498725, "loss": 0.3118, "step": 2580 }, { "epoch": 0.018383364120144866, "grad_norm": 0.146484375, "learning_rate": 0.0019999887494273998, "loss": 0.339, "step": 2590 }, { "epoch": 0.018454342359990987, "grad_norm": 0.1455078125, "learning_rate": 0.0019999886074660417, "loss": 0.3234, "step": 2600 }, { "epoch": 0.018525320599837105, "grad_norm": 0.134765625, "learning_rate": 0.0019999884646146517, "loss": 0.2932, "step": 2610 }, { "epoch": 0.018596298839683223, "grad_norm": 0.169921875, "learning_rate": 0.0019999883208732293, "loss": 0.3359, "step": 2620 }, { "epoch": 0.018667277079529344, "grad_norm": 0.1748046875, "learning_rate": 0.0019999881762417745, "loss": 0.3033, "step": 2630 }, { "epoch": 0.018738255319375462, "grad_norm": 0.18359375, "learning_rate": 0.0019999880307202886, "loss": 0.3263, "step": 2640 }, { "epoch": 0.01880923355922158, "grad_norm": 0.365234375, "learning_rate": 0.0019999878843087707, "loss": 0.348, "step": 2650 }, { "epoch": 0.018880211799067702, "grad_norm": 0.1552734375, "learning_rate": 0.001999987737007221, "loss": 0.3199, "step": 2660 }, { "epoch": 0.01895119003891382, "grad_norm": 0.162109375, "learning_rate": 0.00199998758881564, "loss": 0.3207, "step": 2670 }, { "epoch": 0.019022168278759938, "grad_norm": 0.1259765625, "learning_rate": 0.0019999874397340273, "loss": 0.329, "step": 2680 }, { "epoch": 0.01909314651860606, "grad_norm": 0.1884765625, "learning_rate": 0.0019999872897623837, "loss": 0.3259, "step": 2690 }, { "epoch": 0.019164124758452177, "grad_norm": 0.10546875, "learning_rate": 0.001999987138900709, "loss": 0.2922, "step": 2700 }, { "epoch": 0.019235102998298295, "grad_norm": 0.150390625, "learning_rate": 0.001999986987149003, "loss": 0.3122, "step": 2710 }, { "epoch": 0.019306081238144417, "grad_norm": 0.18359375, "learning_rate": 0.001999986834507267, "loss": 0.2997, "step": 2720 }, { "epoch": 0.019377059477990535, "grad_norm": 0.20703125, "learning_rate": 0.0019999866809755004, "loss": 0.2984, "step": 2730 }, { "epoch": 0.019448037717836653, "grad_norm": 0.146484375, "learning_rate": 0.001999986526553703, "loss": 0.3066, "step": 2740 }, { "epoch": 0.019519015957682774, "grad_norm": 0.138671875, "learning_rate": 0.0019999863712418754, "loss": 0.3299, "step": 2750 }, { "epoch": 0.019589994197528892, "grad_norm": 0.1552734375, "learning_rate": 0.001999986215040018, "loss": 0.3029, "step": 2760 }, { "epoch": 0.019660972437375013, "grad_norm": 0.16015625, "learning_rate": 0.0019999860579481303, "loss": 0.3246, "step": 2770 }, { "epoch": 0.01973195067722113, "grad_norm": 0.18359375, "learning_rate": 0.001999985899966213, "loss": 0.3026, "step": 2780 }, { "epoch": 0.01980292891706725, "grad_norm": 0.21875, "learning_rate": 0.001999985741094266, "loss": 0.3297, "step": 2790 }, { "epoch": 0.01987390715691337, "grad_norm": 0.10107421875, "learning_rate": 0.0019999855813322893, "loss": 0.314, "step": 2800 }, { "epoch": 0.01994488539675949, "grad_norm": 0.1416015625, "learning_rate": 0.0019999854206802833, "loss": 0.2994, "step": 2810 }, { "epoch": 0.020015863636605607, "grad_norm": 0.162109375, "learning_rate": 0.0019999852591382483, "loss": 0.3152, "step": 2820 }, { "epoch": 0.02008684187645173, "grad_norm": 0.2138671875, "learning_rate": 0.001999985096706184, "loss": 0.2897, "step": 2830 }, { "epoch": 0.020157820116297846, "grad_norm": 0.1865234375, "learning_rate": 0.0019999849333840916, "loss": 0.3353, "step": 2840 }, { "epoch": 0.020228798356143964, "grad_norm": 0.15625, "learning_rate": 0.00199998476917197, "loss": 0.3061, "step": 2850 }, { "epoch": 0.020299776595990086, "grad_norm": 0.1279296875, "learning_rate": 0.0019999846040698196, "loss": 0.3059, "step": 2860 }, { "epoch": 0.020370754835836204, "grad_norm": 0.18359375, "learning_rate": 0.0019999844380776412, "loss": 0.304, "step": 2870 }, { "epoch": 0.02044173307568232, "grad_norm": 0.2109375, "learning_rate": 0.0019999842711954344, "loss": 0.3396, "step": 2880 }, { "epoch": 0.020512711315528443, "grad_norm": 0.15625, "learning_rate": 0.0019999841034232, "loss": 0.2918, "step": 2890 }, { "epoch": 0.02058368955537456, "grad_norm": 0.1875, "learning_rate": 0.001999983934760937, "loss": 0.3004, "step": 2900 }, { "epoch": 0.02065466779522068, "grad_norm": 0.1376953125, "learning_rate": 0.0019999837652086468, "loss": 0.3244, "step": 2910 }, { "epoch": 0.0207256460350668, "grad_norm": 0.21484375, "learning_rate": 0.001999983594766329, "loss": 0.3063, "step": 2920 }, { "epoch": 0.02079662427491292, "grad_norm": 0.126953125, "learning_rate": 0.0019999834234339835, "loss": 0.3354, "step": 2930 }, { "epoch": 0.020867602514759036, "grad_norm": 0.09228515625, "learning_rate": 0.0019999832512116113, "loss": 0.2833, "step": 2940 }, { "epoch": 0.020938580754605158, "grad_norm": 0.3828125, "learning_rate": 0.001999983078099212, "loss": 0.3115, "step": 2950 }, { "epoch": 0.021009558994451276, "grad_norm": 0.13671875, "learning_rate": 0.0019999829040967853, "loss": 0.3233, "step": 2960 }, { "epoch": 0.021080537234297394, "grad_norm": 0.119140625, "learning_rate": 0.0019999827292043323, "loss": 0.3152, "step": 2970 }, { "epoch": 0.021151515474143515, "grad_norm": 0.166015625, "learning_rate": 0.0019999825534218526, "loss": 0.3407, "step": 2980 }, { "epoch": 0.021222493713989633, "grad_norm": 0.2119140625, "learning_rate": 0.001999982376749347, "loss": 0.33, "step": 2990 }, { "epoch": 0.02129347195383575, "grad_norm": 0.1552734375, "learning_rate": 0.001999982199186815, "loss": 0.3023, "step": 3000 }, { "epoch": 0.021364450193681873, "grad_norm": 0.1416015625, "learning_rate": 0.0019999820207342567, "loss": 0.2946, "step": 3010 }, { "epoch": 0.02143542843352799, "grad_norm": 0.1708984375, "learning_rate": 0.001999981841391673, "loss": 0.3156, "step": 3020 }, { "epoch": 0.02150640667337411, "grad_norm": 0.126953125, "learning_rate": 0.0019999816611590638, "loss": 0.2978, "step": 3030 }, { "epoch": 0.02157738491322023, "grad_norm": 0.11328125, "learning_rate": 0.0019999814800364286, "loss": 0.2999, "step": 3040 }, { "epoch": 0.021648363153066348, "grad_norm": 0.107421875, "learning_rate": 0.0019999812980237685, "loss": 0.3206, "step": 3050 }, { "epoch": 0.02171934139291247, "grad_norm": 0.11767578125, "learning_rate": 0.0019999811151210833, "loss": 0.3067, "step": 3060 }, { "epoch": 0.021790319632758588, "grad_norm": 0.203125, "learning_rate": 0.001999980931328373, "loss": 0.3116, "step": 3070 }, { "epoch": 0.021861297872604706, "grad_norm": 0.314453125, "learning_rate": 0.0019999807466456383, "loss": 0.3074, "step": 3080 }, { "epoch": 0.021932276112450827, "grad_norm": 0.232421875, "learning_rate": 0.0019999805610728784, "loss": 0.31, "step": 3090 }, { "epoch": 0.022003254352296945, "grad_norm": 0.1435546875, "learning_rate": 0.001999980374610095, "loss": 0.2782, "step": 3100 }, { "epoch": 0.022074232592143063, "grad_norm": 0.232421875, "learning_rate": 0.0019999801872572867, "loss": 0.3093, "step": 3110 }, { "epoch": 0.022145210831989184, "grad_norm": 0.130859375, "learning_rate": 0.0019999799990144547, "loss": 0.3169, "step": 3120 }, { "epoch": 0.022216189071835302, "grad_norm": 0.1494140625, "learning_rate": 0.0019999798098815987, "loss": 0.3265, "step": 3130 }, { "epoch": 0.02228716731168142, "grad_norm": 0.1171875, "learning_rate": 0.001999979619858719, "loss": 0.2983, "step": 3140 }, { "epoch": 0.022358145551527542, "grad_norm": 0.11669921875, "learning_rate": 0.0019999794289458166, "loss": 0.3153, "step": 3150 }, { "epoch": 0.02242912379137366, "grad_norm": 0.10400390625, "learning_rate": 0.0019999792371428898, "loss": 0.294, "step": 3160 }, { "epoch": 0.022500102031219778, "grad_norm": 0.2021484375, "learning_rate": 0.001999979044449941, "loss": 0.3023, "step": 3170 }, { "epoch": 0.0225710802710659, "grad_norm": 0.1318359375, "learning_rate": 0.001999978850866969, "loss": 0.3222, "step": 3180 }, { "epoch": 0.022642058510912017, "grad_norm": 0.2255859375, "learning_rate": 0.001999978656393974, "loss": 0.3276, "step": 3190 }, { "epoch": 0.022713036750758135, "grad_norm": 0.1455078125, "learning_rate": 0.001999978461030957, "loss": 0.2851, "step": 3200 }, { "epoch": 0.022784014990604257, "grad_norm": 0.2216796875, "learning_rate": 0.001999978264777917, "loss": 0.3187, "step": 3210 }, { "epoch": 0.022854993230450375, "grad_norm": 0.150390625, "learning_rate": 0.001999978067634855, "loss": 0.3115, "step": 3220 }, { "epoch": 0.022925971470296493, "grad_norm": 0.345703125, "learning_rate": 0.0019999778696017717, "loss": 0.3333, "step": 3230 }, { "epoch": 0.022996949710142614, "grad_norm": 0.1845703125, "learning_rate": 0.0019999776706786667, "loss": 0.3297, "step": 3240 }, { "epoch": 0.023067927949988732, "grad_norm": 0.1533203125, "learning_rate": 0.0019999774708655397, "loss": 0.3141, "step": 3250 }, { "epoch": 0.02313890618983485, "grad_norm": 0.201171875, "learning_rate": 0.0019999772701623916, "loss": 0.3206, "step": 3260 }, { "epoch": 0.02320988442968097, "grad_norm": 0.349609375, "learning_rate": 0.0019999770685692224, "loss": 0.3135, "step": 3270 }, { "epoch": 0.02328086266952709, "grad_norm": 0.2236328125, "learning_rate": 0.0019999768660860324, "loss": 0.2814, "step": 3280 }, { "epoch": 0.023351840909373207, "grad_norm": 0.1181640625, "learning_rate": 0.0019999766627128213, "loss": 0.2899, "step": 3290 }, { "epoch": 0.02342281914921933, "grad_norm": 0.10205078125, "learning_rate": 0.00199997645844959, "loss": 0.2987, "step": 3300 }, { "epoch": 0.023493797389065447, "grad_norm": 0.1767578125, "learning_rate": 0.001999976253296338, "loss": 0.3214, "step": 3310 }, { "epoch": 0.023564775628911565, "grad_norm": 0.10693359375, "learning_rate": 0.0019999760472530666, "loss": 0.2925, "step": 3320 }, { "epoch": 0.023635753868757686, "grad_norm": 0.189453125, "learning_rate": 0.001999975840319775, "loss": 0.3235, "step": 3330 }, { "epoch": 0.023706732108603804, "grad_norm": 0.1962890625, "learning_rate": 0.0019999756324964634, "loss": 0.3155, "step": 3340 }, { "epoch": 0.023777710348449926, "grad_norm": 0.1708984375, "learning_rate": 0.0019999754237831325, "loss": 0.3183, "step": 3350 }, { "epoch": 0.023848688588296044, "grad_norm": 0.1435546875, "learning_rate": 0.0019999752141797826, "loss": 0.3022, "step": 3360 }, { "epoch": 0.02391966682814216, "grad_norm": 0.185546875, "learning_rate": 0.001999975003686413, "loss": 0.2946, "step": 3370 }, { "epoch": 0.023990645067988283, "grad_norm": 0.126953125, "learning_rate": 0.001999974792303025, "loss": 0.297, "step": 3380 }, { "epoch": 0.0240616233078344, "grad_norm": 0.1259765625, "learning_rate": 0.0019999745800296184, "loss": 0.2892, "step": 3390 }, { "epoch": 0.02413260154768052, "grad_norm": 0.37890625, "learning_rate": 0.001999974366866193, "loss": 0.3055, "step": 3400 }, { "epoch": 0.02420357978752664, "grad_norm": 0.203125, "learning_rate": 0.0019999741528127497, "loss": 0.3206, "step": 3410 }, { "epoch": 0.02427455802737276, "grad_norm": 0.275390625, "learning_rate": 0.001999973937869288, "loss": 0.3004, "step": 3420 }, { "epoch": 0.024345536267218876, "grad_norm": 0.16015625, "learning_rate": 0.0019999737220358087, "loss": 0.2977, "step": 3430 }, { "epoch": 0.024416514507064998, "grad_norm": 0.1484375, "learning_rate": 0.0019999735053123115, "loss": 0.2819, "step": 3440 }, { "epoch": 0.024487492746911116, "grad_norm": 0.1494140625, "learning_rate": 0.0019999732876987975, "loss": 0.3004, "step": 3450 }, { "epoch": 0.024558470986757234, "grad_norm": 0.11181640625, "learning_rate": 0.0019999730691952658, "loss": 0.3253, "step": 3460 }, { "epoch": 0.024629449226603355, "grad_norm": 0.16796875, "learning_rate": 0.0019999728498017173, "loss": 0.2802, "step": 3470 }, { "epoch": 0.024700427466449473, "grad_norm": 0.1513671875, "learning_rate": 0.001999972629518152, "loss": 0.2869, "step": 3480 }, { "epoch": 0.02477140570629559, "grad_norm": 0.1328125, "learning_rate": 0.0019999724083445704, "loss": 0.3035, "step": 3490 }, { "epoch": 0.024842383946141713, "grad_norm": 0.197265625, "learning_rate": 0.001999972186280972, "loss": 0.298, "step": 3500 }, { "epoch": 0.02491336218598783, "grad_norm": 0.126953125, "learning_rate": 0.001999971963327358, "loss": 0.2825, "step": 3510 }, { "epoch": 0.02498434042583395, "grad_norm": 0.12353515625, "learning_rate": 0.001999971739483728, "loss": 0.3016, "step": 3520 }, { "epoch": 0.02505531866568007, "grad_norm": 0.208984375, "learning_rate": 0.0019999715147500825, "loss": 0.3147, "step": 3530 }, { "epoch": 0.025126296905526188, "grad_norm": 0.177734375, "learning_rate": 0.0019999712891264213, "loss": 0.3238, "step": 3540 }, { "epoch": 0.025197275145372306, "grad_norm": 0.12158203125, "learning_rate": 0.001999971062612745, "loss": 0.3092, "step": 3550 }, { "epoch": 0.025268253385218428, "grad_norm": 0.20703125, "learning_rate": 0.0019999708352090536, "loss": 0.3082, "step": 3560 }, { "epoch": 0.025339231625064546, "grad_norm": 0.142578125, "learning_rate": 0.0019999706069153476, "loss": 0.302, "step": 3570 }, { "epoch": 0.025410209864910663, "grad_norm": 0.181640625, "learning_rate": 0.0019999703777316274, "loss": 0.3034, "step": 3580 }, { "epoch": 0.025481188104756785, "grad_norm": 0.1416015625, "learning_rate": 0.0019999701476578926, "loss": 0.3041, "step": 3590 }, { "epoch": 0.025552166344602903, "grad_norm": 0.154296875, "learning_rate": 0.0019999699166941436, "loss": 0.3124, "step": 3600 }, { "epoch": 0.02562314458444902, "grad_norm": 0.2275390625, "learning_rate": 0.001999969684840381, "loss": 0.3081, "step": 3610 }, { "epoch": 0.025694122824295142, "grad_norm": 0.140625, "learning_rate": 0.0019999694520966044, "loss": 0.2958, "step": 3620 }, { "epoch": 0.02576510106414126, "grad_norm": 0.10205078125, "learning_rate": 0.0019999692184628146, "loss": 0.3175, "step": 3630 }, { "epoch": 0.025836079303987382, "grad_norm": 0.12890625, "learning_rate": 0.0019999689839390114, "loss": 0.3046, "step": 3640 }, { "epoch": 0.0259070575438335, "grad_norm": 0.185546875, "learning_rate": 0.0019999687485251962, "loss": 0.2765, "step": 3650 }, { "epoch": 0.025978035783679618, "grad_norm": 0.12109375, "learning_rate": 0.0019999685122213677, "loss": 0.3297, "step": 3660 }, { "epoch": 0.02604901402352574, "grad_norm": 0.126953125, "learning_rate": 0.0019999682750275263, "loss": 0.2739, "step": 3670 }, { "epoch": 0.026119992263371857, "grad_norm": 0.1181640625, "learning_rate": 0.0019999680369436734, "loss": 0.3147, "step": 3680 }, { "epoch": 0.026190970503217975, "grad_norm": 0.126953125, "learning_rate": 0.0019999677979698084, "loss": 0.292, "step": 3690 }, { "epoch": 0.026261948743064097, "grad_norm": 0.11083984375, "learning_rate": 0.0019999675581059313, "loss": 0.2884, "step": 3700 }, { "epoch": 0.026332926982910215, "grad_norm": 0.16015625, "learning_rate": 0.001999967317352043, "loss": 0.3174, "step": 3710 }, { "epoch": 0.026403905222756333, "grad_norm": 0.10205078125, "learning_rate": 0.0019999670757081434, "loss": 0.2821, "step": 3720 }, { "epoch": 0.026474883462602454, "grad_norm": 0.37109375, "learning_rate": 0.0019999668331742325, "loss": 0.2986, "step": 3730 }, { "epoch": 0.026545861702448572, "grad_norm": 0.1162109375, "learning_rate": 0.001999966589750311, "loss": 0.3007, "step": 3740 }, { "epoch": 0.02661683994229469, "grad_norm": 0.173828125, "learning_rate": 0.001999966345436379, "loss": 0.2859, "step": 3750 }, { "epoch": 0.02668781818214081, "grad_norm": 0.1337890625, "learning_rate": 0.0019999661002324367, "loss": 0.3346, "step": 3760 }, { "epoch": 0.02675879642198693, "grad_norm": 0.203125, "learning_rate": 0.001999965854138484, "loss": 0.3138, "step": 3770 }, { "epoch": 0.026829774661833047, "grad_norm": 0.1328125, "learning_rate": 0.001999965607154522, "loss": 0.2955, "step": 3780 }, { "epoch": 0.02690075290167917, "grad_norm": 0.12060546875, "learning_rate": 0.00199996535928055, "loss": 0.3015, "step": 3790 }, { "epoch": 0.026971731141525287, "grad_norm": 0.10888671875, "learning_rate": 0.001999965110516569, "loss": 0.2915, "step": 3800 }, { "epoch": 0.027042709381371405, "grad_norm": 0.109375, "learning_rate": 0.0019999648608625786, "loss": 0.2814, "step": 3810 }, { "epoch": 0.027113687621217526, "grad_norm": 0.1044921875, "learning_rate": 0.0019999646103185794, "loss": 0.3242, "step": 3820 }, { "epoch": 0.027184665861063644, "grad_norm": 0.126953125, "learning_rate": 0.0019999643588845717, "loss": 0.2821, "step": 3830 }, { "epoch": 0.027255644100909762, "grad_norm": 0.1298828125, "learning_rate": 0.001999964106560556, "loss": 0.2803, "step": 3840 }, { "epoch": 0.027326622340755884, "grad_norm": 0.1494140625, "learning_rate": 0.0019999638533465315, "loss": 0.3064, "step": 3850 }, { "epoch": 0.027397600580602, "grad_norm": 0.138671875, "learning_rate": 0.0019999635992424994, "loss": 0.2953, "step": 3860 }, { "epoch": 0.02746857882044812, "grad_norm": 0.2353515625, "learning_rate": 0.00199996334424846, "loss": 0.3017, "step": 3870 }, { "epoch": 0.02753955706029424, "grad_norm": 0.1396484375, "learning_rate": 0.001999963088364413, "loss": 0.3148, "step": 3880 }, { "epoch": 0.02761053530014036, "grad_norm": 0.271484375, "learning_rate": 0.0019999628315903587, "loss": 0.301, "step": 3890 }, { "epoch": 0.027681513539986477, "grad_norm": 0.1416015625, "learning_rate": 0.001999962573926298, "loss": 0.3042, "step": 3900 }, { "epoch": 0.0277524917798326, "grad_norm": 0.1494140625, "learning_rate": 0.0019999623153722306, "loss": 0.3051, "step": 3910 }, { "epoch": 0.027823470019678716, "grad_norm": 0.1748046875, "learning_rate": 0.0019999620559281567, "loss": 0.3048, "step": 3920 }, { "epoch": 0.027894448259524838, "grad_norm": 0.134765625, "learning_rate": 0.001999961795594077, "loss": 0.2985, "step": 3930 }, { "epoch": 0.027965426499370956, "grad_norm": 0.1357421875, "learning_rate": 0.001999961534369991, "loss": 0.3162, "step": 3940 }, { "epoch": 0.028036404739217074, "grad_norm": 0.1767578125, "learning_rate": 0.0019999612722559, "loss": 0.2835, "step": 3950 }, { "epoch": 0.028107382979063195, "grad_norm": 0.1533203125, "learning_rate": 0.0019999610092518035, "loss": 0.2962, "step": 3960 }, { "epoch": 0.028178361218909313, "grad_norm": 0.236328125, "learning_rate": 0.001999960745357702, "loss": 0.3169, "step": 3970 }, { "epoch": 0.02824933945875543, "grad_norm": 0.10791015625, "learning_rate": 0.001999960480573596, "loss": 0.2934, "step": 3980 }, { "epoch": 0.028320317698601553, "grad_norm": 0.1318359375, "learning_rate": 0.0019999602148994848, "loss": 0.3174, "step": 3990 }, { "epoch": 0.02839129593844767, "grad_norm": 0.09326171875, "learning_rate": 0.0019999599483353702, "loss": 0.2846, "step": 4000 }, { "epoch": 0.02839129593844767, "eval_covost2-zh-en_loss": 4.163454055786133, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 21.2089, "eval_covost2-zh-en_samples_per_second": 3.018, "eval_covost2-zh-en_steps_per_second": 0.189, "step": 4000 }, { "epoch": 0.02839129593844767, "eval_covost2-en-zh_loss": 3.2028112411499023, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 21.6848, "eval_covost2-en-zh_samples_per_second": 2.951, "eval_covost2-en-zh_steps_per_second": 0.184, "step": 4000 }, { "epoch": 0.02846227417829379, "grad_norm": 0.28125, "learning_rate": 0.0019999596808812506, "loss": 0.2967, "step": 4010 }, { "epoch": 0.02853325241813991, "grad_norm": 0.166015625, "learning_rate": 0.0019999594125371285, "loss": 0.3024, "step": 4020 }, { "epoch": 0.028604230657986028, "grad_norm": 0.294921875, "learning_rate": 0.0019999591433030018, "loss": 0.3136, "step": 4030 }, { "epoch": 0.028675208897832146, "grad_norm": 0.1318359375, "learning_rate": 0.0019999588731788726, "loss": 0.293, "step": 4040 }, { "epoch": 0.028746187137678268, "grad_norm": 0.0986328125, "learning_rate": 0.0019999586021647404, "loss": 0.3047, "step": 4050 }, { "epoch": 0.028817165377524386, "grad_norm": 0.126953125, "learning_rate": 0.001999958330260605, "loss": 0.2981, "step": 4060 }, { "epoch": 0.028888143617370503, "grad_norm": 0.1376953125, "learning_rate": 0.001999958057466468, "loss": 0.3041, "step": 4070 }, { "epoch": 0.028959121857216625, "grad_norm": 0.1279296875, "learning_rate": 0.0019999577837823282, "loss": 0.3077, "step": 4080 }, { "epoch": 0.029030100097062743, "grad_norm": 0.1806640625, "learning_rate": 0.001999957509208187, "loss": 0.2893, "step": 4090 }, { "epoch": 0.02910107833690886, "grad_norm": 0.1591796875, "learning_rate": 0.001999957233744044, "loss": 0.3061, "step": 4100 }, { "epoch": 0.029172056576754982, "grad_norm": 0.1015625, "learning_rate": 0.0019999569573898998, "loss": 0.278, "step": 4110 }, { "epoch": 0.0292430348166011, "grad_norm": 0.138671875, "learning_rate": 0.0019999566801457546, "loss": 0.3182, "step": 4120 }, { "epoch": 0.02931401305644722, "grad_norm": 0.1494140625, "learning_rate": 0.0019999564020116087, "loss": 0.315, "step": 4130 }, { "epoch": 0.02938499129629334, "grad_norm": 0.1484375, "learning_rate": 0.001999956122987462, "loss": 0.3137, "step": 4140 }, { "epoch": 0.029455969536139458, "grad_norm": 0.1533203125, "learning_rate": 0.0019999558430733155, "loss": 0.293, "step": 4150 }, { "epoch": 0.029526947775985576, "grad_norm": 0.2197265625, "learning_rate": 0.001999955562269169, "loss": 0.3005, "step": 4160 }, { "epoch": 0.029597926015831697, "grad_norm": 0.12890625, "learning_rate": 0.0019999552805750227, "loss": 0.2886, "step": 4170 }, { "epoch": 0.029668904255677815, "grad_norm": 0.1279296875, "learning_rate": 0.001999954997990877, "loss": 0.3161, "step": 4180 }, { "epoch": 0.029739882495523933, "grad_norm": 0.13671875, "learning_rate": 0.001999954714516732, "loss": 0.2858, "step": 4190 }, { "epoch": 0.029810860735370055, "grad_norm": 0.11376953125, "learning_rate": 0.001999954430152589, "loss": 0.2901, "step": 4200 }, { "epoch": 0.029881838975216173, "grad_norm": 0.1552734375, "learning_rate": 0.001999954144898447, "loss": 0.2886, "step": 4210 }, { "epoch": 0.029952817215062294, "grad_norm": 0.271484375, "learning_rate": 0.001999953858754307, "loss": 0.3003, "step": 4220 }, { "epoch": 0.030023795454908412, "grad_norm": 0.1328125, "learning_rate": 0.0019999535717201682, "loss": 0.2846, "step": 4230 }, { "epoch": 0.03009477369475453, "grad_norm": 0.1455078125, "learning_rate": 0.0019999532837960323, "loss": 0.3095, "step": 4240 }, { "epoch": 0.03016575193460065, "grad_norm": 0.177734375, "learning_rate": 0.001999952994981899, "loss": 0.272, "step": 4250 }, { "epoch": 0.03023673017444677, "grad_norm": 0.1298828125, "learning_rate": 0.0019999527052777686, "loss": 0.2878, "step": 4260 }, { "epoch": 0.030307708414292887, "grad_norm": 0.1474609375, "learning_rate": 0.0019999524146836417, "loss": 0.2814, "step": 4270 }, { "epoch": 0.03037868665413901, "grad_norm": 0.1337890625, "learning_rate": 0.0019999521231995176, "loss": 0.3237, "step": 4280 }, { "epoch": 0.030449664893985127, "grad_norm": 0.1845703125, "learning_rate": 0.001999951830825398, "loss": 0.3118, "step": 4290 }, { "epoch": 0.030520643133831245, "grad_norm": 0.13671875, "learning_rate": 0.0019999515375612818, "loss": 0.3175, "step": 4300 }, { "epoch": 0.030591621373677366, "grad_norm": 0.16015625, "learning_rate": 0.00199995124340717, "loss": 0.3233, "step": 4310 }, { "epoch": 0.030662599613523484, "grad_norm": 0.1396484375, "learning_rate": 0.001999950948363063, "loss": 0.294, "step": 4320 }, { "epoch": 0.030733577853369602, "grad_norm": 0.1328125, "learning_rate": 0.001999950652428961, "loss": 0.294, "step": 4330 }, { "epoch": 0.030804556093215724, "grad_norm": 0.1884765625, "learning_rate": 0.001999950355604864, "loss": 0.2929, "step": 4340 }, { "epoch": 0.03087553433306184, "grad_norm": 0.1513671875, "learning_rate": 0.001999950057890773, "loss": 0.3125, "step": 4350 }, { "epoch": 0.03094651257290796, "grad_norm": 0.1220703125, "learning_rate": 0.0019999497592866873, "loss": 0.3108, "step": 4360 }, { "epoch": 0.03101749081275408, "grad_norm": 0.16015625, "learning_rate": 0.0019999494597926076, "loss": 0.2871, "step": 4370 }, { "epoch": 0.0310884690526002, "grad_norm": 0.177734375, "learning_rate": 0.0019999491594085345, "loss": 0.3075, "step": 4380 }, { "epoch": 0.031159447292446317, "grad_norm": 0.12255859375, "learning_rate": 0.001999948858134468, "loss": 0.2805, "step": 4390 }, { "epoch": 0.03123042553229244, "grad_norm": 0.1572265625, "learning_rate": 0.0019999485559704087, "loss": 0.2735, "step": 4400 }, { "epoch": 0.03130140377213856, "grad_norm": 0.142578125, "learning_rate": 0.0019999482529163564, "loss": 0.3124, "step": 4410 }, { "epoch": 0.031372382011984674, "grad_norm": 0.130859375, "learning_rate": 0.001999947948972312, "loss": 0.2909, "step": 4420 }, { "epoch": 0.031443360251830796, "grad_norm": 0.1279296875, "learning_rate": 0.001999947644138275, "loss": 0.2932, "step": 4430 }, { "epoch": 0.03151433849167692, "grad_norm": 0.1318359375, "learning_rate": 0.001999947338414247, "loss": 0.2679, "step": 4440 }, { "epoch": 0.03158531673152303, "grad_norm": 0.08740234375, "learning_rate": 0.0019999470318002267, "loss": 0.2832, "step": 4450 }, { "epoch": 0.03165629497136915, "grad_norm": 0.185546875, "learning_rate": 0.0019999467242962153, "loss": 0.2887, "step": 4460 }, { "epoch": 0.031727273211215275, "grad_norm": 0.10888671875, "learning_rate": 0.001999946415902213, "loss": 0.278, "step": 4470 }, { "epoch": 0.03179825145106139, "grad_norm": 0.1689453125, "learning_rate": 0.00199994610661822, "loss": 0.2904, "step": 4480 }, { "epoch": 0.03186922969090751, "grad_norm": 0.1953125, "learning_rate": 0.0019999457964442373, "loss": 0.304, "step": 4490 }, { "epoch": 0.03194020793075363, "grad_norm": 0.1767578125, "learning_rate": 0.0019999454853802646, "loss": 0.2975, "step": 4500 }, { "epoch": 0.03201118617059975, "grad_norm": 0.1904296875, "learning_rate": 0.0019999451734263015, "loss": 0.3008, "step": 4510 }, { "epoch": 0.03208216441044587, "grad_norm": 0.44140625, "learning_rate": 0.0019999448605823493, "loss": 0.2993, "step": 4520 }, { "epoch": 0.03215314265029199, "grad_norm": 0.1748046875, "learning_rate": 0.001999944546848408, "loss": 0.2971, "step": 4530 }, { "epoch": 0.032224120890138104, "grad_norm": 0.0927734375, "learning_rate": 0.0019999442322244784, "loss": 0.2955, "step": 4540 }, { "epoch": 0.032295099129984225, "grad_norm": 0.2080078125, "learning_rate": 0.00199994391671056, "loss": 0.3173, "step": 4550 }, { "epoch": 0.03236607736983035, "grad_norm": 0.14453125, "learning_rate": 0.0019999436003066535, "loss": 0.2892, "step": 4560 }, { "epoch": 0.03243705560967646, "grad_norm": 0.1953125, "learning_rate": 0.001999943283012759, "loss": 0.3051, "step": 4570 }, { "epoch": 0.03250803384952258, "grad_norm": 0.11962890625, "learning_rate": 0.0019999429648288776, "loss": 0.2978, "step": 4580 }, { "epoch": 0.032579012089368704, "grad_norm": 0.1181640625, "learning_rate": 0.0019999426457550087, "loss": 0.2809, "step": 4590 }, { "epoch": 0.03264999032921482, "grad_norm": 0.158203125, "learning_rate": 0.0019999423257911525, "loss": 0.2988, "step": 4600 }, { "epoch": 0.03272096856906094, "grad_norm": 0.11962890625, "learning_rate": 0.0019999420049373103, "loss": 0.287, "step": 4610 }, { "epoch": 0.03279194680890706, "grad_norm": 0.15625, "learning_rate": 0.0019999416831934817, "loss": 0.2789, "step": 4620 }, { "epoch": 0.032862925048753176, "grad_norm": 0.099609375, "learning_rate": 0.001999941360559667, "loss": 0.3055, "step": 4630 }, { "epoch": 0.0329339032885993, "grad_norm": 0.130859375, "learning_rate": 0.001999941037035867, "loss": 0.2889, "step": 4640 }, { "epoch": 0.03300488152844542, "grad_norm": 0.203125, "learning_rate": 0.0019999407126220815, "loss": 0.3011, "step": 4650 }, { "epoch": 0.033075859768291534, "grad_norm": 0.10595703125, "learning_rate": 0.001999940387318311, "loss": 0.3022, "step": 4660 }, { "epoch": 0.033146838008137655, "grad_norm": 0.1455078125, "learning_rate": 0.001999940061124556, "loss": 0.2877, "step": 4670 }, { "epoch": 0.03321781624798378, "grad_norm": 0.1806640625, "learning_rate": 0.0019999397340408165, "loss": 0.295, "step": 4680 }, { "epoch": 0.03328879448782989, "grad_norm": 0.142578125, "learning_rate": 0.001999939406067093, "loss": 0.2959, "step": 4690 }, { "epoch": 0.03335977272767601, "grad_norm": 0.12060546875, "learning_rate": 0.001999939077203386, "loss": 0.3239, "step": 4700 }, { "epoch": 0.033430750967522134, "grad_norm": 0.138671875, "learning_rate": 0.0019999387474496956, "loss": 0.2834, "step": 4710 }, { "epoch": 0.03350172920736825, "grad_norm": 0.3046875, "learning_rate": 0.001999938416806022, "loss": 0.2959, "step": 4720 }, { "epoch": 0.03357270744721437, "grad_norm": 0.1181640625, "learning_rate": 0.001999938085272366, "loss": 0.3092, "step": 4730 }, { "epoch": 0.03364368568706049, "grad_norm": 0.18359375, "learning_rate": 0.0019999377528487276, "loss": 0.2879, "step": 4740 }, { "epoch": 0.033714663926906606, "grad_norm": 0.099609375, "learning_rate": 0.001999937419535107, "loss": 0.2803, "step": 4750 }, { "epoch": 0.03378564216675273, "grad_norm": 0.10888671875, "learning_rate": 0.001999937085331505, "loss": 0.3095, "step": 4760 }, { "epoch": 0.03385662040659885, "grad_norm": 0.2119140625, "learning_rate": 0.001999936750237921, "loss": 0.2683, "step": 4770 }, { "epoch": 0.03392759864644496, "grad_norm": 0.10302734375, "learning_rate": 0.001999936414254357, "loss": 0.2849, "step": 4780 }, { "epoch": 0.033998576886291085, "grad_norm": 0.1455078125, "learning_rate": 0.0019999360773808113, "loss": 0.3187, "step": 4790 }, { "epoch": 0.034069555126137206, "grad_norm": 0.0888671875, "learning_rate": 0.001999935739617286, "loss": 0.2994, "step": 4800 }, { "epoch": 0.03414053336598332, "grad_norm": 0.12158203125, "learning_rate": 0.00199993540096378, "loss": 0.3057, "step": 4810 }, { "epoch": 0.03421151160582944, "grad_norm": 0.10400390625, "learning_rate": 0.0019999350614202946, "loss": 0.2849, "step": 4820 }, { "epoch": 0.034282489845675564, "grad_norm": 0.15625, "learning_rate": 0.00199993472098683, "loss": 0.3128, "step": 4830 }, { "epoch": 0.03435346808552168, "grad_norm": 0.126953125, "learning_rate": 0.001999934379663386, "loss": 0.2928, "step": 4840 }, { "epoch": 0.0344244463253678, "grad_norm": 0.1337890625, "learning_rate": 0.001999934037449963, "loss": 0.3109, "step": 4850 }, { "epoch": 0.03449542456521392, "grad_norm": 0.1728515625, "learning_rate": 0.001999933694346562, "loss": 0.3048, "step": 4860 }, { "epoch": 0.034566402805060036, "grad_norm": 0.126953125, "learning_rate": 0.0019999333503531833, "loss": 0.2928, "step": 4870 }, { "epoch": 0.03463738104490616, "grad_norm": 0.0810546875, "learning_rate": 0.0019999330054698266, "loss": 0.2809, "step": 4880 }, { "epoch": 0.03470835928475228, "grad_norm": 0.11962890625, "learning_rate": 0.0019999326596964926, "loss": 0.2928, "step": 4890 }, { "epoch": 0.03477933752459839, "grad_norm": 0.11865234375, "learning_rate": 0.0019999323130331817, "loss": 0.2879, "step": 4900 }, { "epoch": 0.034850315764444514, "grad_norm": 0.1357421875, "learning_rate": 0.001999931965479894, "loss": 0.3025, "step": 4910 }, { "epoch": 0.034921294004290636, "grad_norm": 0.10986328125, "learning_rate": 0.00199993161703663, "loss": 0.3004, "step": 4920 }, { "epoch": 0.03499227224413675, "grad_norm": 0.12890625, "learning_rate": 0.00199993126770339, "loss": 0.2848, "step": 4930 }, { "epoch": 0.03506325048398287, "grad_norm": 0.158203125, "learning_rate": 0.0019999309174801743, "loss": 0.2855, "step": 4940 }, { "epoch": 0.03513422872382899, "grad_norm": 0.2734375, "learning_rate": 0.0019999305663669837, "loss": 0.3126, "step": 4950 }, { "epoch": 0.035205206963675115, "grad_norm": 0.1181640625, "learning_rate": 0.0019999302143638184, "loss": 0.2876, "step": 4960 }, { "epoch": 0.03527618520352123, "grad_norm": 0.267578125, "learning_rate": 0.001999929861470678, "loss": 0.2837, "step": 4970 }, { "epoch": 0.03534716344336735, "grad_norm": 0.466796875, "learning_rate": 0.0019999295076875635, "loss": 0.3257, "step": 4980 }, { "epoch": 0.03541814168321347, "grad_norm": 0.12890625, "learning_rate": 0.001999929153014475, "loss": 0.303, "step": 4990 }, { "epoch": 0.03548911992305959, "grad_norm": 0.251953125, "learning_rate": 0.001999928797451413, "loss": 0.2974, "step": 5000 }, { "epoch": 0.03556009816290571, "grad_norm": 0.1259765625, "learning_rate": 0.0019999284409983778, "loss": 0.2931, "step": 5010 }, { "epoch": 0.03563107640275183, "grad_norm": 0.12451171875, "learning_rate": 0.0019999280836553697, "loss": 0.2873, "step": 5020 }, { "epoch": 0.035702054642597944, "grad_norm": 0.134765625, "learning_rate": 0.001999927725422389, "loss": 0.2693, "step": 5030 }, { "epoch": 0.035773032882444065, "grad_norm": 0.09765625, "learning_rate": 0.001999927366299437, "loss": 0.2994, "step": 5040 }, { "epoch": 0.03584401112229019, "grad_norm": 0.1259765625, "learning_rate": 0.0019999270062865124, "loss": 0.2914, "step": 5050 }, { "epoch": 0.0359149893621363, "grad_norm": 0.1337890625, "learning_rate": 0.0019999266453836167, "loss": 0.3142, "step": 5060 }, { "epoch": 0.03598596760198242, "grad_norm": 0.1455078125, "learning_rate": 0.0019999262835907502, "loss": 0.2914, "step": 5070 }, { "epoch": 0.036056945841828544, "grad_norm": 0.0888671875, "learning_rate": 0.0019999259209079125, "loss": 0.2941, "step": 5080 }, { "epoch": 0.03612792408167466, "grad_norm": 0.09423828125, "learning_rate": 0.0019999255573351048, "loss": 0.2934, "step": 5090 }, { "epoch": 0.03619890232152078, "grad_norm": 0.2021484375, "learning_rate": 0.001999925192872327, "loss": 0.2972, "step": 5100 }, { "epoch": 0.0362698805613669, "grad_norm": 0.1689453125, "learning_rate": 0.0019999248275195795, "loss": 0.308, "step": 5110 }, { "epoch": 0.036340858801213016, "grad_norm": 0.462890625, "learning_rate": 0.001999924461276863, "loss": 0.2991, "step": 5120 }, { "epoch": 0.03641183704105914, "grad_norm": 0.09912109375, "learning_rate": 0.001999924094144178, "loss": 0.2912, "step": 5130 }, { "epoch": 0.03648281528090526, "grad_norm": 0.14453125, "learning_rate": 0.001999923726121524, "loss": 0.2943, "step": 5140 }, { "epoch": 0.036553793520751374, "grad_norm": 0.0927734375, "learning_rate": 0.001999923357208901, "loss": 0.2852, "step": 5150 }, { "epoch": 0.036624771760597495, "grad_norm": 0.09912109375, "learning_rate": 0.0019999229874063116, "loss": 0.2882, "step": 5160 }, { "epoch": 0.03669575000044362, "grad_norm": 0.1494140625, "learning_rate": 0.001999922616713754, "loss": 0.2861, "step": 5170 }, { "epoch": 0.03676672824028973, "grad_norm": 0.17578125, "learning_rate": 0.0019999222451312294, "loss": 0.2939, "step": 5180 }, { "epoch": 0.03683770648013585, "grad_norm": 0.11865234375, "learning_rate": 0.0019999218726587382, "loss": 0.3079, "step": 5190 }, { "epoch": 0.036908684719981974, "grad_norm": 0.109375, "learning_rate": 0.0019999214992962805, "loss": 0.2972, "step": 5200 }, { "epoch": 0.03697966295982809, "grad_norm": 0.1494140625, "learning_rate": 0.001999921125043857, "loss": 0.2742, "step": 5210 }, { "epoch": 0.03705064119967421, "grad_norm": 0.11669921875, "learning_rate": 0.001999920749901468, "loss": 0.2931, "step": 5220 }, { "epoch": 0.03712161943952033, "grad_norm": 0.142578125, "learning_rate": 0.0019999203738691137, "loss": 0.3042, "step": 5230 }, { "epoch": 0.037192597679366446, "grad_norm": 0.2255859375, "learning_rate": 0.001999919996946795, "loss": 0.3108, "step": 5240 }, { "epoch": 0.03726357591921257, "grad_norm": 0.1728515625, "learning_rate": 0.0019999196191345108, "loss": 0.3111, "step": 5250 }, { "epoch": 0.03733455415905869, "grad_norm": 0.140625, "learning_rate": 0.0019999192404322633, "loss": 0.2927, "step": 5260 }, { "epoch": 0.0374055323989048, "grad_norm": 0.1015625, "learning_rate": 0.001999918860840052, "loss": 0.3161, "step": 5270 }, { "epoch": 0.037476510638750925, "grad_norm": 0.12890625, "learning_rate": 0.001999918480357877, "loss": 0.3156, "step": 5280 }, { "epoch": 0.037547488878597046, "grad_norm": 0.15625, "learning_rate": 0.001999918098985739, "loss": 0.2843, "step": 5290 }, { "epoch": 0.03761846711844316, "grad_norm": 0.1328125, "learning_rate": 0.0019999177167236387, "loss": 0.2904, "step": 5300 }, { "epoch": 0.03768944535828928, "grad_norm": 0.09765625, "learning_rate": 0.0019999173335715765, "loss": 0.2695, "step": 5310 }, { "epoch": 0.037760423598135404, "grad_norm": 0.1201171875, "learning_rate": 0.001999916949529552, "loss": 0.2877, "step": 5320 }, { "epoch": 0.03783140183798152, "grad_norm": 0.1220703125, "learning_rate": 0.0019999165645975658, "loss": 0.2791, "step": 5330 }, { "epoch": 0.03790238007782764, "grad_norm": 0.09130859375, "learning_rate": 0.001999916178775619, "loss": 0.2733, "step": 5340 }, { "epoch": 0.03797335831767376, "grad_norm": 0.208984375, "learning_rate": 0.001999915792063711, "loss": 0.2772, "step": 5350 }, { "epoch": 0.038044336557519876, "grad_norm": 0.1474609375, "learning_rate": 0.001999915404461843, "loss": 0.2984, "step": 5360 }, { "epoch": 0.038115314797366, "grad_norm": 0.146484375, "learning_rate": 0.0019999150159700154, "loss": 0.2742, "step": 5370 }, { "epoch": 0.03818629303721212, "grad_norm": 0.1435546875, "learning_rate": 0.0019999146265882277, "loss": 0.2977, "step": 5380 }, { "epoch": 0.03825727127705823, "grad_norm": 0.15234375, "learning_rate": 0.001999914236316481, "loss": 0.2699, "step": 5390 }, { "epoch": 0.038328249516904354, "grad_norm": 0.09228515625, "learning_rate": 0.0019999138451547755, "loss": 0.2801, "step": 5400 }, { "epoch": 0.038399227756750476, "grad_norm": 0.119140625, "learning_rate": 0.0019999134531031118, "loss": 0.2931, "step": 5410 }, { "epoch": 0.03847020599659659, "grad_norm": 0.1103515625, "learning_rate": 0.0019999130601614902, "loss": 0.2788, "step": 5420 }, { "epoch": 0.03854118423644271, "grad_norm": 0.0869140625, "learning_rate": 0.0019999126663299105, "loss": 0.283, "step": 5430 }, { "epoch": 0.03861216247628883, "grad_norm": 0.10400390625, "learning_rate": 0.0019999122716083738, "loss": 0.2721, "step": 5440 }, { "epoch": 0.03868314071613495, "grad_norm": 0.1416015625, "learning_rate": 0.00199991187599688, "loss": 0.2795, "step": 5450 }, { "epoch": 0.03875411895598107, "grad_norm": 0.09619140625, "learning_rate": 0.0019999114794954304, "loss": 0.292, "step": 5460 }, { "epoch": 0.03882509719582719, "grad_norm": 0.10888671875, "learning_rate": 0.001999911082104024, "loss": 0.2952, "step": 5470 }, { "epoch": 0.038896075435673305, "grad_norm": 0.1123046875, "learning_rate": 0.0019999106838226627, "loss": 0.2853, "step": 5480 }, { "epoch": 0.03896705367551943, "grad_norm": 0.12890625, "learning_rate": 0.001999910284651346, "loss": 0.291, "step": 5490 }, { "epoch": 0.03903803191536555, "grad_norm": 0.111328125, "learning_rate": 0.001999909884590074, "loss": 0.2748, "step": 5500 }, { "epoch": 0.03910901015521166, "grad_norm": 0.11328125, "learning_rate": 0.0019999094836388475, "loss": 0.2742, "step": 5510 }, { "epoch": 0.039179988395057784, "grad_norm": 0.181640625, "learning_rate": 0.0019999090817976674, "loss": 0.2926, "step": 5520 }, { "epoch": 0.039250966634903905, "grad_norm": 0.10791015625, "learning_rate": 0.0019999086790665334, "loss": 0.2823, "step": 5530 }, { "epoch": 0.03932194487475003, "grad_norm": 0.08251953125, "learning_rate": 0.001999908275445446, "loss": 0.2899, "step": 5540 }, { "epoch": 0.03939292311459614, "grad_norm": 0.1494140625, "learning_rate": 0.001999907870934406, "loss": 0.2996, "step": 5550 }, { "epoch": 0.03946390135444226, "grad_norm": 0.10205078125, "learning_rate": 0.0019999074655334135, "loss": 0.2807, "step": 5560 }, { "epoch": 0.039534879594288384, "grad_norm": 0.1533203125, "learning_rate": 0.0019999070592424686, "loss": 0.2932, "step": 5570 }, { "epoch": 0.0396058578341345, "grad_norm": 0.1591796875, "learning_rate": 0.001999906652061572, "loss": 0.2934, "step": 5580 }, { "epoch": 0.03967683607398062, "grad_norm": 0.1337890625, "learning_rate": 0.0019999062439907244, "loss": 0.2807, "step": 5590 }, { "epoch": 0.03974781431382674, "grad_norm": 0.1640625, "learning_rate": 0.0019999058350299256, "loss": 0.2829, "step": 5600 }, { "epoch": 0.039818792553672856, "grad_norm": 0.1513671875, "learning_rate": 0.0019999054251791768, "loss": 0.2853, "step": 5610 }, { "epoch": 0.03988977079351898, "grad_norm": 0.1513671875, "learning_rate": 0.0019999050144384775, "loss": 0.3019, "step": 5620 }, { "epoch": 0.0399607490333651, "grad_norm": 0.1201171875, "learning_rate": 0.0019999046028078287, "loss": 0.2925, "step": 5630 }, { "epoch": 0.040031727273211214, "grad_norm": 0.09765625, "learning_rate": 0.0019999041902872307, "loss": 0.2994, "step": 5640 }, { "epoch": 0.040102705513057335, "grad_norm": 0.1455078125, "learning_rate": 0.0019999037768766836, "loss": 0.2911, "step": 5650 }, { "epoch": 0.04017368375290346, "grad_norm": 0.115234375, "learning_rate": 0.0019999033625761886, "loss": 0.2657, "step": 5660 }, { "epoch": 0.04024466199274957, "grad_norm": 0.10400390625, "learning_rate": 0.0019999029473857454, "loss": 0.2786, "step": 5670 }, { "epoch": 0.04031564023259569, "grad_norm": 0.1025390625, "learning_rate": 0.0019999025313053543, "loss": 0.2845, "step": 5680 }, { "epoch": 0.040386618472441814, "grad_norm": 0.142578125, "learning_rate": 0.001999902114335016, "loss": 0.2966, "step": 5690 }, { "epoch": 0.04045759671228793, "grad_norm": 0.1220703125, "learning_rate": 0.001999901696474731, "loss": 0.2784, "step": 5700 }, { "epoch": 0.04052857495213405, "grad_norm": 0.17578125, "learning_rate": 0.0019999012777244998, "loss": 0.3002, "step": 5710 }, { "epoch": 0.04059955319198017, "grad_norm": 0.16015625, "learning_rate": 0.0019999008580843225, "loss": 0.2941, "step": 5720 }, { "epoch": 0.040670531431826286, "grad_norm": 0.162109375, "learning_rate": 0.0019999004375541997, "loss": 0.2932, "step": 5730 }, { "epoch": 0.04074150967167241, "grad_norm": 0.1435546875, "learning_rate": 0.001999900016134132, "loss": 0.2876, "step": 5740 }, { "epoch": 0.04081248791151853, "grad_norm": 0.16796875, "learning_rate": 0.001999899593824119, "loss": 0.2966, "step": 5750 }, { "epoch": 0.04088346615136464, "grad_norm": 0.2158203125, "learning_rate": 0.0019998991706241622, "loss": 0.2661, "step": 5760 }, { "epoch": 0.040954444391210765, "grad_norm": 0.12890625, "learning_rate": 0.001999898746534261, "loss": 0.287, "step": 5770 }, { "epoch": 0.041025422631056886, "grad_norm": 0.1337890625, "learning_rate": 0.0019998983215544167, "loss": 0.2855, "step": 5780 }, { "epoch": 0.041096400870903, "grad_norm": 0.10400390625, "learning_rate": 0.0019998978956846294, "loss": 0.2755, "step": 5790 }, { "epoch": 0.04116737911074912, "grad_norm": 0.10693359375, "learning_rate": 0.0019998974689248994, "loss": 0.3032, "step": 5800 }, { "epoch": 0.041238357350595244, "grad_norm": 0.1220703125, "learning_rate": 0.001999897041275227, "loss": 0.2942, "step": 5810 }, { "epoch": 0.04130933559044136, "grad_norm": 0.11376953125, "learning_rate": 0.0019998966127356133, "loss": 0.2983, "step": 5820 }, { "epoch": 0.04138031383028748, "grad_norm": 0.169921875, "learning_rate": 0.001999896183306058, "loss": 0.279, "step": 5830 }, { "epoch": 0.0414512920701336, "grad_norm": 0.09716796875, "learning_rate": 0.0019998957529865622, "loss": 0.2883, "step": 5840 }, { "epoch": 0.041522270309979716, "grad_norm": 0.0849609375, "learning_rate": 0.001999895321777125, "loss": 0.2844, "step": 5850 }, { "epoch": 0.04159324854982584, "grad_norm": 0.1474609375, "learning_rate": 0.0019998948896777485, "loss": 0.2937, "step": 5860 }, { "epoch": 0.04166422678967196, "grad_norm": 0.1279296875, "learning_rate": 0.001999894456688432, "loss": 0.271, "step": 5870 }, { "epoch": 0.04173520502951807, "grad_norm": 0.123046875, "learning_rate": 0.0019998940228091763, "loss": 0.3226, "step": 5880 }, { "epoch": 0.041806183269364194, "grad_norm": 0.1484375, "learning_rate": 0.0019998935880399817, "loss": 0.2932, "step": 5890 }, { "epoch": 0.041877161509210316, "grad_norm": 0.1328125, "learning_rate": 0.001999893152380849, "loss": 0.2781, "step": 5900 }, { "epoch": 0.04194813974905643, "grad_norm": 0.1083984375, "learning_rate": 0.0019998927158317785, "loss": 0.2826, "step": 5910 }, { "epoch": 0.04201911798890255, "grad_norm": 0.205078125, "learning_rate": 0.0019998922783927703, "loss": 0.287, "step": 5920 }, { "epoch": 0.04209009622874867, "grad_norm": 0.1455078125, "learning_rate": 0.0019998918400638247, "loss": 0.276, "step": 5930 }, { "epoch": 0.04216107446859479, "grad_norm": 0.10302734375, "learning_rate": 0.001999891400844943, "loss": 0.2681, "step": 5940 }, { "epoch": 0.04223205270844091, "grad_norm": 0.126953125, "learning_rate": 0.001999890960736125, "loss": 0.2921, "step": 5950 }, { "epoch": 0.04230303094828703, "grad_norm": 0.1201171875, "learning_rate": 0.001999890519737371, "loss": 0.2825, "step": 5960 }, { "epoch": 0.042374009188133145, "grad_norm": 0.12451171875, "learning_rate": 0.0019998900778486818, "loss": 0.2732, "step": 5970 }, { "epoch": 0.04244498742797927, "grad_norm": 0.177734375, "learning_rate": 0.0019998896350700577, "loss": 0.2606, "step": 5980 }, { "epoch": 0.04251596566782539, "grad_norm": 0.1533203125, "learning_rate": 0.001999889191401499, "loss": 0.2858, "step": 5990 }, { "epoch": 0.0425869439076715, "grad_norm": 0.1279296875, "learning_rate": 0.0019998887468430063, "loss": 0.2813, "step": 6000 }, { "epoch": 0.0425869439076715, "eval_covost2-zh-en_loss": 4.03579044342041, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 20.4415, "eval_covost2-zh-en_samples_per_second": 3.131, "eval_covost2-zh-en_steps_per_second": 0.196, "step": 6000 }, { "epoch": 0.0425869439076715, "eval_covost2-en-zh_loss": 3.207631826400757, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 19.242, "eval_covost2-en-zh_samples_per_second": 3.326, "eval_covost2-en-zh_steps_per_second": 0.208, "step": 6000 }, { "epoch": 0.042657922147517624, "grad_norm": 0.12158203125, "learning_rate": 0.0019998883013945807, "loss": 0.2661, "step": 6010 }, { "epoch": 0.042728900387363745, "grad_norm": 0.154296875, "learning_rate": 0.0019998878550562212, "loss": 0.2803, "step": 6020 }, { "epoch": 0.04279987862720986, "grad_norm": 0.21875, "learning_rate": 0.001999887407827929, "loss": 0.2793, "step": 6030 }, { "epoch": 0.04287085686705598, "grad_norm": 0.08740234375, "learning_rate": 0.0019998869597097047, "loss": 0.2832, "step": 6040 }, { "epoch": 0.0429418351069021, "grad_norm": 0.1455078125, "learning_rate": 0.001999886510701549, "loss": 0.3199, "step": 6050 }, { "epoch": 0.04301281334674822, "grad_norm": 0.1298828125, "learning_rate": 0.001999886060803462, "loss": 0.2998, "step": 6060 }, { "epoch": 0.04308379158659434, "grad_norm": 0.1474609375, "learning_rate": 0.0019998856100154434, "loss": 0.2844, "step": 6070 }, { "epoch": 0.04315476982644046, "grad_norm": 0.435546875, "learning_rate": 0.001999885158337495, "loss": 0.2716, "step": 6080 }, { "epoch": 0.043225748066286575, "grad_norm": 0.1826171875, "learning_rate": 0.0019998847057696163, "loss": 0.3055, "step": 6090 }, { "epoch": 0.043296726306132696, "grad_norm": 0.10693359375, "learning_rate": 0.0019998842523118077, "loss": 0.3002, "step": 6100 }, { "epoch": 0.04336770454597882, "grad_norm": 0.1953125, "learning_rate": 0.0019998837979640703, "loss": 0.2955, "step": 6110 }, { "epoch": 0.04343868278582494, "grad_norm": 0.2275390625, "learning_rate": 0.001999883342726404, "loss": 0.2886, "step": 6120 }, { "epoch": 0.043509661025671054, "grad_norm": 0.138671875, "learning_rate": 0.0019998828865988097, "loss": 0.2975, "step": 6130 }, { "epoch": 0.043580639265517175, "grad_norm": 0.1806640625, "learning_rate": 0.0019998824295812878, "loss": 0.275, "step": 6140 }, { "epoch": 0.0436516175053633, "grad_norm": 0.111328125, "learning_rate": 0.0019998819716738384, "loss": 0.2694, "step": 6150 }, { "epoch": 0.04372259574520941, "grad_norm": 0.271484375, "learning_rate": 0.001999881512876462, "loss": 0.2799, "step": 6160 }, { "epoch": 0.04379357398505553, "grad_norm": 0.06982421875, "learning_rate": 0.001999881053189159, "loss": 0.2622, "step": 6170 }, { "epoch": 0.043864552224901654, "grad_norm": 0.2080078125, "learning_rate": 0.0019998805926119303, "loss": 0.2818, "step": 6180 }, { "epoch": 0.04393553046474777, "grad_norm": 0.177734375, "learning_rate": 0.0019998801311447763, "loss": 0.2926, "step": 6190 }, { "epoch": 0.04400650870459389, "grad_norm": 0.09814453125, "learning_rate": 0.0019998796687876966, "loss": 0.2829, "step": 6200 }, { "epoch": 0.04407748694444001, "grad_norm": 0.09033203125, "learning_rate": 0.001999879205540693, "loss": 0.2768, "step": 6210 }, { "epoch": 0.044148465184286126, "grad_norm": 0.1171875, "learning_rate": 0.0019998787414037653, "loss": 0.287, "step": 6220 }, { "epoch": 0.04421944342413225, "grad_norm": 0.1884765625, "learning_rate": 0.0019998782763769136, "loss": 0.2705, "step": 6230 }, { "epoch": 0.04429042166397837, "grad_norm": 0.1015625, "learning_rate": 0.001999877810460139, "loss": 0.2844, "step": 6240 }, { "epoch": 0.04436139990382448, "grad_norm": 0.12353515625, "learning_rate": 0.001999877343653441, "loss": 0.2885, "step": 6250 }, { "epoch": 0.044432378143670605, "grad_norm": 0.10107421875, "learning_rate": 0.0019998768759568212, "loss": 0.2875, "step": 6260 }, { "epoch": 0.044503356383516726, "grad_norm": 0.10205078125, "learning_rate": 0.0019998764073702793, "loss": 0.2754, "step": 6270 }, { "epoch": 0.04457433462336284, "grad_norm": 0.177734375, "learning_rate": 0.0019998759378938163, "loss": 0.2801, "step": 6280 }, { "epoch": 0.04464531286320896, "grad_norm": 0.142578125, "learning_rate": 0.001999875467527432, "loss": 0.2601, "step": 6290 }, { "epoch": 0.044716291103055084, "grad_norm": 0.1689453125, "learning_rate": 0.0019998749962711277, "loss": 0.2992, "step": 6300 }, { "epoch": 0.0447872693429012, "grad_norm": 0.2041015625, "learning_rate": 0.001999874524124903, "loss": 0.296, "step": 6310 }, { "epoch": 0.04485824758274732, "grad_norm": 0.1435546875, "learning_rate": 0.001999874051088759, "loss": 0.2743, "step": 6320 }, { "epoch": 0.04492922582259344, "grad_norm": 0.1298828125, "learning_rate": 0.001999873577162696, "loss": 0.2827, "step": 6330 }, { "epoch": 0.045000204062439556, "grad_norm": 0.11328125, "learning_rate": 0.0019998731023467145, "loss": 0.2688, "step": 6340 }, { "epoch": 0.04507118230228568, "grad_norm": 0.103515625, "learning_rate": 0.001999872626640815, "loss": 0.2906, "step": 6350 }, { "epoch": 0.0451421605421318, "grad_norm": 0.09423828125, "learning_rate": 0.001999872150044998, "loss": 0.2748, "step": 6360 }, { "epoch": 0.04521313878197791, "grad_norm": 0.099609375, "learning_rate": 0.0019998716725592633, "loss": 0.288, "step": 6370 }, { "epoch": 0.045284117021824034, "grad_norm": 0.177734375, "learning_rate": 0.001999871194183612, "loss": 0.2917, "step": 6380 }, { "epoch": 0.045355095261670156, "grad_norm": 0.154296875, "learning_rate": 0.001999870714918045, "loss": 0.3005, "step": 6390 }, { "epoch": 0.04542607350151627, "grad_norm": 0.115234375, "learning_rate": 0.0019998702347625618, "loss": 0.2765, "step": 6400 }, { "epoch": 0.04549705174136239, "grad_norm": 0.1435546875, "learning_rate": 0.0019998697537171634, "loss": 0.2796, "step": 6410 }, { "epoch": 0.04556802998120851, "grad_norm": 0.1318359375, "learning_rate": 0.00199986927178185, "loss": 0.2899, "step": 6420 }, { "epoch": 0.04563900822105463, "grad_norm": 0.126953125, "learning_rate": 0.001999868788956623, "loss": 0.2677, "step": 6430 }, { "epoch": 0.04570998646090075, "grad_norm": 0.1376953125, "learning_rate": 0.0019998683052414815, "loss": 0.2906, "step": 6440 }, { "epoch": 0.04578096470074687, "grad_norm": 0.11474609375, "learning_rate": 0.001999867820636427, "loss": 0.3045, "step": 6450 }, { "epoch": 0.045851942940592985, "grad_norm": 0.19140625, "learning_rate": 0.0019998673351414595, "loss": 0.2961, "step": 6460 }, { "epoch": 0.04592292118043911, "grad_norm": 0.08447265625, "learning_rate": 0.0019998668487565797, "loss": 0.2771, "step": 6470 }, { "epoch": 0.04599389942028523, "grad_norm": 0.1396484375, "learning_rate": 0.001999866361481788, "loss": 0.2907, "step": 6480 }, { "epoch": 0.04606487766013134, "grad_norm": 0.1552734375, "learning_rate": 0.001999865873317085, "loss": 0.2739, "step": 6490 }, { "epoch": 0.046135855899977464, "grad_norm": 0.1748046875, "learning_rate": 0.0019998653842624703, "loss": 0.2784, "step": 6500 }, { "epoch": 0.046206834139823585, "grad_norm": 0.0986328125, "learning_rate": 0.0019998648943179457, "loss": 0.284, "step": 6510 }, { "epoch": 0.0462778123796697, "grad_norm": 0.12255859375, "learning_rate": 0.001999864403483511, "loss": 0.2727, "step": 6520 }, { "epoch": 0.04634879061951582, "grad_norm": 0.1630859375, "learning_rate": 0.001999863911759167, "loss": 0.3007, "step": 6530 }, { "epoch": 0.04641976885936194, "grad_norm": 0.1015625, "learning_rate": 0.001999863419144914, "loss": 0.2998, "step": 6540 }, { "epoch": 0.04649074709920806, "grad_norm": 0.09521484375, "learning_rate": 0.0019998629256407522, "loss": 0.2655, "step": 6550 }, { "epoch": 0.04656172533905418, "grad_norm": 0.142578125, "learning_rate": 0.0019998624312466823, "loss": 0.2803, "step": 6560 }, { "epoch": 0.0466327035789003, "grad_norm": 0.12353515625, "learning_rate": 0.0019998619359627053, "loss": 0.2691, "step": 6570 }, { "epoch": 0.046703681818746415, "grad_norm": 0.1005859375, "learning_rate": 0.0019998614397888207, "loss": 0.2617, "step": 6580 }, { "epoch": 0.046774660058592536, "grad_norm": 0.09423828125, "learning_rate": 0.00199986094272503, "loss": 0.2855, "step": 6590 }, { "epoch": 0.04684563829843866, "grad_norm": 0.1396484375, "learning_rate": 0.001999860444771333, "loss": 0.2666, "step": 6600 }, { "epoch": 0.04691661653828477, "grad_norm": 0.134765625, "learning_rate": 0.0019998599459277303, "loss": 0.2645, "step": 6610 }, { "epoch": 0.046987594778130894, "grad_norm": 0.1318359375, "learning_rate": 0.0019998594461942227, "loss": 0.273, "step": 6620 }, { "epoch": 0.047058573017977015, "grad_norm": 0.10693359375, "learning_rate": 0.0019998589455708106, "loss": 0.2823, "step": 6630 }, { "epoch": 0.04712955125782313, "grad_norm": 0.138671875, "learning_rate": 0.001999858444057494, "loss": 0.2805, "step": 6640 }, { "epoch": 0.04720052949766925, "grad_norm": 0.1162109375, "learning_rate": 0.0019998579416542744, "loss": 0.274, "step": 6650 }, { "epoch": 0.04727150773751537, "grad_norm": 0.09765625, "learning_rate": 0.001999857438361151, "loss": 0.313, "step": 6660 }, { "epoch": 0.04734248597736149, "grad_norm": 0.1640625, "learning_rate": 0.001999856934178126, "loss": 0.3037, "step": 6670 }, { "epoch": 0.04741346421720761, "grad_norm": 0.10400390625, "learning_rate": 0.0019998564291051984, "loss": 0.2815, "step": 6680 }, { "epoch": 0.04748444245705373, "grad_norm": 0.1416015625, "learning_rate": 0.0019998559231423686, "loss": 0.3007, "step": 6690 }, { "epoch": 0.04755542069689985, "grad_norm": 0.1123046875, "learning_rate": 0.001999855416289638, "loss": 0.2777, "step": 6700 }, { "epoch": 0.047626398936745966, "grad_norm": 0.11767578125, "learning_rate": 0.001999854908547007, "loss": 0.2777, "step": 6710 }, { "epoch": 0.04769737717659209, "grad_norm": 0.109375, "learning_rate": 0.001999854399914476, "loss": 0.2915, "step": 6720 }, { "epoch": 0.04776835541643821, "grad_norm": 0.1455078125, "learning_rate": 0.001999853890392045, "loss": 0.2658, "step": 6730 }, { "epoch": 0.04783933365628432, "grad_norm": 0.09375, "learning_rate": 0.001999853379979715, "loss": 0.2697, "step": 6740 }, { "epoch": 0.047910311896130445, "grad_norm": 0.109375, "learning_rate": 0.001999852868677487, "loss": 0.2892, "step": 6750 }, { "epoch": 0.047981290135976566, "grad_norm": 0.11962890625, "learning_rate": 0.00199985235648536, "loss": 0.3044, "step": 6760 }, { "epoch": 0.04805226837582268, "grad_norm": 0.28515625, "learning_rate": 0.0019998518434033363, "loss": 0.3112, "step": 6770 }, { "epoch": 0.0481232466156688, "grad_norm": 0.1357421875, "learning_rate": 0.001999851329431415, "loss": 0.2693, "step": 6780 }, { "epoch": 0.048194224855514924, "grad_norm": 0.0732421875, "learning_rate": 0.001999850814569597, "loss": 0.2897, "step": 6790 }, { "epoch": 0.04826520309536104, "grad_norm": 0.1689453125, "learning_rate": 0.0019998502988178833, "loss": 0.2792, "step": 6800 }, { "epoch": 0.04833618133520716, "grad_norm": 0.1318359375, "learning_rate": 0.001999849782176274, "loss": 0.3069, "step": 6810 }, { "epoch": 0.04840715957505328, "grad_norm": 0.125, "learning_rate": 0.0019998492646447694, "loss": 0.2746, "step": 6820 }, { "epoch": 0.048478137814899395, "grad_norm": 0.109375, "learning_rate": 0.0019998487462233707, "loss": 0.2941, "step": 6830 }, { "epoch": 0.04854911605474552, "grad_norm": 0.1357421875, "learning_rate": 0.0019998482269120775, "loss": 0.2856, "step": 6840 }, { "epoch": 0.04862009429459164, "grad_norm": 0.11376953125, "learning_rate": 0.001999847706710891, "loss": 0.2767, "step": 6850 }, { "epoch": 0.04869107253443775, "grad_norm": 0.251953125, "learning_rate": 0.001999847185619811, "loss": 0.3007, "step": 6860 }, { "epoch": 0.048762050774283874, "grad_norm": 0.177734375, "learning_rate": 0.0019998466636388396, "loss": 0.2746, "step": 6870 }, { "epoch": 0.048833029014129996, "grad_norm": 0.1123046875, "learning_rate": 0.001999846140767976, "loss": 0.2797, "step": 6880 }, { "epoch": 0.04890400725397611, "grad_norm": 0.11328125, "learning_rate": 0.00199984561700722, "loss": 0.2805, "step": 6890 }, { "epoch": 0.04897498549382223, "grad_norm": 0.09716796875, "learning_rate": 0.0019998450923565743, "loss": 0.3018, "step": 6900 }, { "epoch": 0.04904596373366835, "grad_norm": 0.12451171875, "learning_rate": 0.0019998445668160374, "loss": 0.2864, "step": 6910 }, { "epoch": 0.04911694197351447, "grad_norm": 0.095703125, "learning_rate": 0.0019998440403856113, "loss": 0.2733, "step": 6920 }, { "epoch": 0.04918792021336059, "grad_norm": 0.197265625, "learning_rate": 0.001999843513065295, "loss": 0.2859, "step": 6930 }, { "epoch": 0.04925889845320671, "grad_norm": 0.1650390625, "learning_rate": 0.0019998429848550906, "loss": 0.2676, "step": 6940 }, { "epoch": 0.049329876693052825, "grad_norm": 0.173828125, "learning_rate": 0.001999842455754997, "loss": 0.2859, "step": 6950 }, { "epoch": 0.04940085493289895, "grad_norm": 0.1875, "learning_rate": 0.0019998419257650163, "loss": 0.303, "step": 6960 }, { "epoch": 0.04947183317274507, "grad_norm": 0.1708984375, "learning_rate": 0.0019998413948851485, "loss": 0.2955, "step": 6970 }, { "epoch": 0.04954281141259118, "grad_norm": 0.154296875, "learning_rate": 0.0019998408631153935, "loss": 0.2687, "step": 6980 }, { "epoch": 0.049613789652437304, "grad_norm": 0.1015625, "learning_rate": 0.0019998403304557527, "loss": 0.3019, "step": 6990 }, { "epoch": 0.049684767892283425, "grad_norm": 0.09033203125, "learning_rate": 0.001999839796906226, "loss": 0.279, "step": 7000 }, { "epoch": 0.04975574613212954, "grad_norm": 0.07861328125, "learning_rate": 0.001999839262466814, "loss": 0.2873, "step": 7010 }, { "epoch": 0.04982672437197566, "grad_norm": 0.1962890625, "learning_rate": 0.001999838727137517, "loss": 0.2902, "step": 7020 }, { "epoch": 0.04989770261182178, "grad_norm": 0.103515625, "learning_rate": 0.001999838190918337, "loss": 0.2661, "step": 7030 }, { "epoch": 0.0499686808516679, "grad_norm": 0.1904296875, "learning_rate": 0.0019998376538092723, "loss": 0.2696, "step": 7040 }, { "epoch": 0.05003965909151402, "grad_norm": 0.1064453125, "learning_rate": 0.0019998371158103253, "loss": 0.2835, "step": 7050 }, { "epoch": 0.05011063733136014, "grad_norm": 0.130859375, "learning_rate": 0.0019998365769214954, "loss": 0.2846, "step": 7060 }, { "epoch": 0.050181615571206255, "grad_norm": 0.1201171875, "learning_rate": 0.0019998360371427837, "loss": 0.2597, "step": 7070 }, { "epoch": 0.050252593811052376, "grad_norm": 0.130859375, "learning_rate": 0.001999835496474191, "loss": 0.2761, "step": 7080 }, { "epoch": 0.0503235720508985, "grad_norm": 0.11279296875, "learning_rate": 0.0019998349549157166, "loss": 0.2557, "step": 7090 }, { "epoch": 0.05039455029074461, "grad_norm": 0.07470703125, "learning_rate": 0.001999834412467362, "loss": 0.2741, "step": 7100 }, { "epoch": 0.050465528530590734, "grad_norm": 0.1083984375, "learning_rate": 0.0019998338691291275, "loss": 0.2844, "step": 7110 }, { "epoch": 0.050536506770436855, "grad_norm": 0.142578125, "learning_rate": 0.0019998333249010144, "loss": 0.2861, "step": 7120 }, { "epoch": 0.05060748501028297, "grad_norm": 0.1181640625, "learning_rate": 0.001999832779783022, "loss": 0.2937, "step": 7130 }, { "epoch": 0.05067846325012909, "grad_norm": 0.1142578125, "learning_rate": 0.001999832233775151, "loss": 0.2734, "step": 7140 }, { "epoch": 0.05074944148997521, "grad_norm": 0.1201171875, "learning_rate": 0.0019998316868774027, "loss": 0.2999, "step": 7150 }, { "epoch": 0.05082041972982133, "grad_norm": 0.1767578125, "learning_rate": 0.0019998311390897775, "loss": 0.282, "step": 7160 }, { "epoch": 0.05089139796966745, "grad_norm": 0.119140625, "learning_rate": 0.0019998305904122757, "loss": 0.2778, "step": 7170 }, { "epoch": 0.05096237620951357, "grad_norm": 0.1513671875, "learning_rate": 0.001999830040844897, "loss": 0.285, "step": 7180 }, { "epoch": 0.051033354449359684, "grad_norm": 0.09765625, "learning_rate": 0.0019998294903876435, "loss": 0.2725, "step": 7190 }, { "epoch": 0.051104332689205806, "grad_norm": 0.0966796875, "learning_rate": 0.0019998289390405154, "loss": 0.2688, "step": 7200 }, { "epoch": 0.05117531092905193, "grad_norm": 0.1357421875, "learning_rate": 0.001999828386803512, "loss": 0.2824, "step": 7210 }, { "epoch": 0.05124628916889804, "grad_norm": 0.10986328125, "learning_rate": 0.001999827833676635, "loss": 0.2804, "step": 7220 }, { "epoch": 0.05131726740874416, "grad_norm": 0.1337890625, "learning_rate": 0.001999827279659885, "loss": 0.2851, "step": 7230 }, { "epoch": 0.051388245648590285, "grad_norm": 0.09814453125, "learning_rate": 0.0019998267247532623, "loss": 0.2802, "step": 7240 }, { "epoch": 0.0514592238884364, "grad_norm": 0.1455078125, "learning_rate": 0.001999826168956767, "loss": 0.2921, "step": 7250 }, { "epoch": 0.05153020212828252, "grad_norm": 0.10986328125, "learning_rate": 0.0019998256122704, "loss": 0.2736, "step": 7260 }, { "epoch": 0.05160118036812864, "grad_norm": 0.1044921875, "learning_rate": 0.001999825054694162, "loss": 0.2785, "step": 7270 }, { "epoch": 0.051672158607974764, "grad_norm": 0.19140625, "learning_rate": 0.0019998244962280537, "loss": 0.2798, "step": 7280 }, { "epoch": 0.05174313684782088, "grad_norm": 0.12158203125, "learning_rate": 0.0019998239368720747, "loss": 0.2723, "step": 7290 }, { "epoch": 0.051814115087667, "grad_norm": 0.1513671875, "learning_rate": 0.0019998233766262267, "loss": 0.2945, "step": 7300 }, { "epoch": 0.05188509332751312, "grad_norm": 0.08544921875, "learning_rate": 0.0019998228154905094, "loss": 0.2903, "step": 7310 }, { "epoch": 0.051956071567359235, "grad_norm": 0.1865234375, "learning_rate": 0.0019998222534649245, "loss": 0.2893, "step": 7320 }, { "epoch": 0.05202704980720536, "grad_norm": 0.1669921875, "learning_rate": 0.001999821690549471, "loss": 0.2887, "step": 7330 }, { "epoch": 0.05209802804705148, "grad_norm": 0.1015625, "learning_rate": 0.0019998211267441505, "loss": 0.2707, "step": 7340 }, { "epoch": 0.05216900628689759, "grad_norm": 0.1240234375, "learning_rate": 0.001999820562048963, "loss": 0.2829, "step": 7350 }, { "epoch": 0.052239984526743714, "grad_norm": 0.115234375, "learning_rate": 0.00199981999646391, "loss": 0.2951, "step": 7360 }, { "epoch": 0.052310962766589836, "grad_norm": 0.1533203125, "learning_rate": 0.0019998194299889914, "loss": 0.2848, "step": 7370 }, { "epoch": 0.05238194100643595, "grad_norm": 0.2236328125, "learning_rate": 0.0019998188626242073, "loss": 0.2692, "step": 7380 }, { "epoch": 0.05245291924628207, "grad_norm": 0.5078125, "learning_rate": 0.0019998182943695587, "loss": 0.2807, "step": 7390 }, { "epoch": 0.05252389748612819, "grad_norm": 0.083984375, "learning_rate": 0.0019998177252250463, "loss": 0.2705, "step": 7400 }, { "epoch": 0.05259487572597431, "grad_norm": 0.087890625, "learning_rate": 0.001999817155190671, "loss": 0.2872, "step": 7410 }, { "epoch": 0.05266585396582043, "grad_norm": 0.255859375, "learning_rate": 0.0019998165842664323, "loss": 0.2953, "step": 7420 }, { "epoch": 0.05273683220566655, "grad_norm": 0.12353515625, "learning_rate": 0.001999816012452332, "loss": 0.2786, "step": 7430 }, { "epoch": 0.052807810445512665, "grad_norm": 0.1572265625, "learning_rate": 0.0019998154397483695, "loss": 0.2893, "step": 7440 }, { "epoch": 0.05287878868535879, "grad_norm": 0.09130859375, "learning_rate": 0.0019998148661545465, "loss": 0.2906, "step": 7450 }, { "epoch": 0.05294976692520491, "grad_norm": 0.119140625, "learning_rate": 0.0019998142916708627, "loss": 0.2736, "step": 7460 }, { "epoch": 0.05302074516505102, "grad_norm": 0.09619140625, "learning_rate": 0.001999813716297319, "loss": 0.2888, "step": 7470 }, { "epoch": 0.053091723404897144, "grad_norm": 0.1328125, "learning_rate": 0.0019998131400339155, "loss": 0.2746, "step": 7480 }, { "epoch": 0.053162701644743265, "grad_norm": 0.1142578125, "learning_rate": 0.0019998125628806537, "loss": 0.2867, "step": 7490 }, { "epoch": 0.05323367988458938, "grad_norm": 0.15625, "learning_rate": 0.0019998119848375338, "loss": 0.2993, "step": 7500 }, { "epoch": 0.0533046581244355, "grad_norm": 0.1357421875, "learning_rate": 0.001999811405904556, "loss": 0.2638, "step": 7510 }, { "epoch": 0.05337563636428162, "grad_norm": 0.12060546875, "learning_rate": 0.0019998108260817207, "loss": 0.3124, "step": 7520 }, { "epoch": 0.05344661460412774, "grad_norm": 0.212890625, "learning_rate": 0.0019998102453690297, "loss": 0.2824, "step": 7530 }, { "epoch": 0.05351759284397386, "grad_norm": 0.11572265625, "learning_rate": 0.0019998096637664823, "loss": 0.2982, "step": 7540 }, { "epoch": 0.05358857108381998, "grad_norm": 0.173828125, "learning_rate": 0.0019998090812740794, "loss": 0.2689, "step": 7550 }, { "epoch": 0.053659549323666095, "grad_norm": 0.1982421875, "learning_rate": 0.001999808497891822, "loss": 0.2789, "step": 7560 }, { "epoch": 0.053730527563512216, "grad_norm": 0.2373046875, "learning_rate": 0.0019998079136197104, "loss": 0.2874, "step": 7570 }, { "epoch": 0.05380150580335834, "grad_norm": 0.1337890625, "learning_rate": 0.001999807328457745, "loss": 0.2862, "step": 7580 }, { "epoch": 0.05387248404320445, "grad_norm": 0.15625, "learning_rate": 0.0019998067424059265, "loss": 0.2656, "step": 7590 }, { "epoch": 0.053943462283050574, "grad_norm": 0.0849609375, "learning_rate": 0.0019998061554642554, "loss": 0.2729, "step": 7600 }, { "epoch": 0.054014440522896695, "grad_norm": 0.10205078125, "learning_rate": 0.001999805567632733, "loss": 0.2967, "step": 7610 }, { "epoch": 0.05408541876274281, "grad_norm": 0.09375, "learning_rate": 0.0019998049789113586, "loss": 0.281, "step": 7620 }, { "epoch": 0.05415639700258893, "grad_norm": 0.134765625, "learning_rate": 0.001999804389300134, "loss": 0.2704, "step": 7630 }, { "epoch": 0.05422737524243505, "grad_norm": 0.1298828125, "learning_rate": 0.001999803798799059, "loss": 0.2793, "step": 7640 }, { "epoch": 0.05429835348228117, "grad_norm": 0.10205078125, "learning_rate": 0.0019998032074081346, "loss": 0.2663, "step": 7650 }, { "epoch": 0.05436933172212729, "grad_norm": 0.11181640625, "learning_rate": 0.001999802615127361, "loss": 0.2935, "step": 7660 }, { "epoch": 0.05444030996197341, "grad_norm": 0.11865234375, "learning_rate": 0.001999802021956739, "loss": 0.2829, "step": 7670 }, { "epoch": 0.054511288201819524, "grad_norm": 0.1123046875, "learning_rate": 0.0019998014278962695, "loss": 0.2816, "step": 7680 }, { "epoch": 0.054582266441665646, "grad_norm": 0.09326171875, "learning_rate": 0.0019998008329459526, "loss": 0.277, "step": 7690 }, { "epoch": 0.05465324468151177, "grad_norm": 0.193359375, "learning_rate": 0.001999800237105789, "loss": 0.2887, "step": 7700 }, { "epoch": 0.05472422292135788, "grad_norm": 0.16796875, "learning_rate": 0.0019997996403757793, "loss": 0.29, "step": 7710 }, { "epoch": 0.054795201161204, "grad_norm": 0.0888671875, "learning_rate": 0.0019997990427559243, "loss": 0.276, "step": 7720 }, { "epoch": 0.054866179401050125, "grad_norm": 0.162109375, "learning_rate": 0.0019997984442462246, "loss": 0.2752, "step": 7730 }, { "epoch": 0.05493715764089624, "grad_norm": 0.2470703125, "learning_rate": 0.0019997978448466802, "loss": 0.2819, "step": 7740 }, { "epoch": 0.05500813588074236, "grad_norm": 0.1640625, "learning_rate": 0.0019997972445572926, "loss": 0.2648, "step": 7750 }, { "epoch": 0.05507911412058848, "grad_norm": 0.11572265625, "learning_rate": 0.0019997966433780616, "loss": 0.2881, "step": 7760 }, { "epoch": 0.0551500923604346, "grad_norm": 0.14453125, "learning_rate": 0.001999796041308988, "loss": 0.2771, "step": 7770 }, { "epoch": 0.05522107060028072, "grad_norm": 0.11767578125, "learning_rate": 0.0019997954383500728, "loss": 0.2776, "step": 7780 }, { "epoch": 0.05529204884012684, "grad_norm": 0.099609375, "learning_rate": 0.001999794834501316, "loss": 0.2895, "step": 7790 }, { "epoch": 0.055363027079972954, "grad_norm": 0.1005859375, "learning_rate": 0.0019997942297627193, "loss": 0.2814, "step": 7800 }, { "epoch": 0.055434005319819075, "grad_norm": 0.251953125, "learning_rate": 0.0019997936241342817, "loss": 0.2659, "step": 7810 }, { "epoch": 0.0555049835596652, "grad_norm": 0.1923828125, "learning_rate": 0.0019997930176160047, "loss": 0.2669, "step": 7820 }, { "epoch": 0.05557596179951131, "grad_norm": 0.10986328125, "learning_rate": 0.001999792410207889, "loss": 0.2843, "step": 7830 }, { "epoch": 0.05564694003935743, "grad_norm": 0.10595703125, "learning_rate": 0.001999791801909935, "loss": 0.2668, "step": 7840 }, { "epoch": 0.055717918279203554, "grad_norm": 0.2236328125, "learning_rate": 0.0019997911927221433, "loss": 0.2943, "step": 7850 }, { "epoch": 0.055788896519049676, "grad_norm": 0.07763671875, "learning_rate": 0.0019997905826445142, "loss": 0.2662, "step": 7860 }, { "epoch": 0.05585987475889579, "grad_norm": 0.1318359375, "learning_rate": 0.0019997899716770492, "loss": 0.2735, "step": 7870 }, { "epoch": 0.05593085299874191, "grad_norm": 0.1845703125, "learning_rate": 0.001999789359819748, "loss": 0.2864, "step": 7880 }, { "epoch": 0.05600183123858803, "grad_norm": 0.09228515625, "learning_rate": 0.0019997887470726114, "loss": 0.276, "step": 7890 }, { "epoch": 0.05607280947843415, "grad_norm": 0.095703125, "learning_rate": 0.00199978813343564, "loss": 0.2756, "step": 7900 }, { "epoch": 0.05614378771828027, "grad_norm": 0.1435546875, "learning_rate": 0.001999787518908835, "loss": 0.2725, "step": 7910 }, { "epoch": 0.05621476595812639, "grad_norm": 0.1923828125, "learning_rate": 0.001999786903492196, "loss": 0.2715, "step": 7920 }, { "epoch": 0.056285744197972505, "grad_norm": 0.12255859375, "learning_rate": 0.0019997862871857243, "loss": 0.2746, "step": 7930 }, { "epoch": 0.05635672243781863, "grad_norm": 0.1337890625, "learning_rate": 0.0019997856699894207, "loss": 0.278, "step": 7940 }, { "epoch": 0.05642770067766475, "grad_norm": 0.1025390625, "learning_rate": 0.001999785051903285, "loss": 0.2777, "step": 7950 }, { "epoch": 0.05649867891751086, "grad_norm": 0.1552734375, "learning_rate": 0.0019997844329273187, "loss": 0.2776, "step": 7960 }, { "epoch": 0.056569657157356984, "grad_norm": 0.09423828125, "learning_rate": 0.0019997838130615215, "loss": 0.2713, "step": 7970 }, { "epoch": 0.056640635397203105, "grad_norm": 0.142578125, "learning_rate": 0.001999783192305895, "loss": 0.2795, "step": 7980 }, { "epoch": 0.05671161363704922, "grad_norm": 0.08349609375, "learning_rate": 0.001999782570660439, "loss": 0.2718, "step": 7990 }, { "epoch": 0.05678259187689534, "grad_norm": 0.2275390625, "learning_rate": 0.0019997819481251547, "loss": 0.2947, "step": 8000 }, { "epoch": 0.05678259187689534, "eval_covost2-zh-en_loss": 4.034458160400391, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 20.6152, "eval_covost2-zh-en_samples_per_second": 3.105, "eval_covost2-zh-en_steps_per_second": 0.194, "step": 8000 }, { "epoch": 0.05678259187689534, "eval_covost2-en-zh_loss": 3.230180025100708, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 20.3906, "eval_covost2-en-zh_samples_per_second": 3.139, "eval_covost2-en-zh_steps_per_second": 0.196, "step": 8000 }, { "epoch": 0.05685357011674146, "grad_norm": 0.1572265625, "learning_rate": 0.0019997813247000424, "loss": 0.2863, "step": 8010 }, { "epoch": 0.05692454835658758, "grad_norm": 0.28515625, "learning_rate": 0.0019997807003851024, "loss": 0.2832, "step": 8020 }, { "epoch": 0.0569955265964337, "grad_norm": 0.1689453125, "learning_rate": 0.001999780075180336, "loss": 0.2911, "step": 8030 }, { "epoch": 0.05706650483627982, "grad_norm": 0.150390625, "learning_rate": 0.001999779449085743, "loss": 0.2762, "step": 8040 }, { "epoch": 0.057137483076125935, "grad_norm": 0.1279296875, "learning_rate": 0.001999778822101325, "loss": 0.2724, "step": 8050 }, { "epoch": 0.057208461315972056, "grad_norm": 0.1357421875, "learning_rate": 0.001999778194227082, "loss": 0.2835, "step": 8060 }, { "epoch": 0.05727943955581818, "grad_norm": 0.1025390625, "learning_rate": 0.0019997775654630153, "loss": 0.2744, "step": 8070 }, { "epoch": 0.05735041779566429, "grad_norm": 0.103515625, "learning_rate": 0.0019997769358091246, "loss": 0.2777, "step": 8080 }, { "epoch": 0.057421396035510414, "grad_norm": 0.1279296875, "learning_rate": 0.0019997763052654106, "loss": 0.267, "step": 8090 }, { "epoch": 0.057492374275356535, "grad_norm": 0.154296875, "learning_rate": 0.0019997756738318745, "loss": 0.2834, "step": 8100 }, { "epoch": 0.05756335251520265, "grad_norm": 0.162109375, "learning_rate": 0.0019997750415085168, "loss": 0.2605, "step": 8110 }, { "epoch": 0.05763433075504877, "grad_norm": 0.1435546875, "learning_rate": 0.001999774408295338, "loss": 0.275, "step": 8120 }, { "epoch": 0.05770530899489489, "grad_norm": 0.10791015625, "learning_rate": 0.001999773774192338, "loss": 0.2732, "step": 8130 }, { "epoch": 0.05777628723474101, "grad_norm": 0.21875, "learning_rate": 0.001999773139199519, "loss": 0.2755, "step": 8140 }, { "epoch": 0.05784726547458713, "grad_norm": 0.08251953125, "learning_rate": 0.0019997725033168803, "loss": 0.2713, "step": 8150 }, { "epoch": 0.05791824371443325, "grad_norm": 0.14453125, "learning_rate": 0.0019997718665444226, "loss": 0.2779, "step": 8160 }, { "epoch": 0.057989221954279364, "grad_norm": 0.10986328125, "learning_rate": 0.0019997712288821476, "loss": 0.287, "step": 8170 }, { "epoch": 0.058060200194125486, "grad_norm": 0.279296875, "learning_rate": 0.001999770590330055, "loss": 0.3108, "step": 8180 }, { "epoch": 0.05813117843397161, "grad_norm": 0.087890625, "learning_rate": 0.0019997699508881458, "loss": 0.2632, "step": 8190 }, { "epoch": 0.05820215667381772, "grad_norm": 0.08349609375, "learning_rate": 0.00199976931055642, "loss": 0.2805, "step": 8200 }, { "epoch": 0.05827313491366384, "grad_norm": 0.1826171875, "learning_rate": 0.0019997686693348795, "loss": 0.2749, "step": 8210 }, { "epoch": 0.058344113153509965, "grad_norm": 0.11962890625, "learning_rate": 0.001999768027223524, "loss": 0.2809, "step": 8220 }, { "epoch": 0.05841509139335608, "grad_norm": 0.1435546875, "learning_rate": 0.001999767384222354, "loss": 0.2792, "step": 8230 }, { "epoch": 0.0584860696332022, "grad_norm": 0.123046875, "learning_rate": 0.0019997667403313705, "loss": 0.2649, "step": 8240 }, { "epoch": 0.05855704787304832, "grad_norm": 0.1826171875, "learning_rate": 0.0019997660955505744, "loss": 0.3003, "step": 8250 }, { "epoch": 0.05862802611289444, "grad_norm": 0.10693359375, "learning_rate": 0.001999765449879966, "loss": 0.2935, "step": 8260 }, { "epoch": 0.05869900435274056, "grad_norm": 0.146484375, "learning_rate": 0.0019997648033195457, "loss": 0.2808, "step": 8270 }, { "epoch": 0.05876998259258668, "grad_norm": 0.0947265625, "learning_rate": 0.001999764155869314, "loss": 0.2656, "step": 8280 }, { "epoch": 0.058840960832432794, "grad_norm": 0.095703125, "learning_rate": 0.0019997635075292724, "loss": 0.2947, "step": 8290 }, { "epoch": 0.058911939072278915, "grad_norm": 0.1171875, "learning_rate": 0.001999762858299421, "loss": 0.2743, "step": 8300 }, { "epoch": 0.05898291731212504, "grad_norm": 0.10205078125, "learning_rate": 0.001999762208179761, "loss": 0.2835, "step": 8310 }, { "epoch": 0.05905389555197115, "grad_norm": 0.3828125, "learning_rate": 0.001999761557170292, "loss": 0.2786, "step": 8320 }, { "epoch": 0.05912487379181727, "grad_norm": 0.19140625, "learning_rate": 0.0019997609052710154, "loss": 0.2644, "step": 8330 }, { "epoch": 0.059195852031663394, "grad_norm": 0.1435546875, "learning_rate": 0.001999760252481932, "loss": 0.2771, "step": 8340 }, { "epoch": 0.05926683027150951, "grad_norm": 0.13671875, "learning_rate": 0.0019997595988030417, "loss": 0.2806, "step": 8350 }, { "epoch": 0.05933780851135563, "grad_norm": 0.1064453125, "learning_rate": 0.0019997589442343456, "loss": 0.2744, "step": 8360 }, { "epoch": 0.05940878675120175, "grad_norm": 0.0947265625, "learning_rate": 0.0019997582887758443, "loss": 0.2481, "step": 8370 }, { "epoch": 0.059479764991047866, "grad_norm": 0.150390625, "learning_rate": 0.0019997576324275383, "loss": 0.265, "step": 8380 }, { "epoch": 0.05955074323089399, "grad_norm": 0.111328125, "learning_rate": 0.0019997569751894284, "loss": 0.2801, "step": 8390 }, { "epoch": 0.05962172147074011, "grad_norm": 0.1123046875, "learning_rate": 0.0019997563170615156, "loss": 0.276, "step": 8400 }, { "epoch": 0.059692699710586224, "grad_norm": 0.0966796875, "learning_rate": 0.0019997556580437998, "loss": 0.2826, "step": 8410 }, { "epoch": 0.059763677950432345, "grad_norm": 0.0927734375, "learning_rate": 0.0019997549981362822, "loss": 0.2867, "step": 8420 }, { "epoch": 0.05983465619027847, "grad_norm": 0.126953125, "learning_rate": 0.0019997543373389635, "loss": 0.2735, "step": 8430 }, { "epoch": 0.05990563443012459, "grad_norm": 0.08984375, "learning_rate": 0.0019997536756518443, "loss": 0.2807, "step": 8440 }, { "epoch": 0.0599766126699707, "grad_norm": 0.078125, "learning_rate": 0.0019997530130749243, "loss": 0.2695, "step": 8450 }, { "epoch": 0.060047590909816824, "grad_norm": 0.10205078125, "learning_rate": 0.0019997523496082057, "loss": 0.2824, "step": 8460 }, { "epoch": 0.060118569149662945, "grad_norm": 0.1005859375, "learning_rate": 0.001999751685251688, "loss": 0.2807, "step": 8470 }, { "epoch": 0.06018954738950906, "grad_norm": 0.1220703125, "learning_rate": 0.0019997510200053725, "loss": 0.2719, "step": 8480 }, { "epoch": 0.06026052562935518, "grad_norm": 0.10791015625, "learning_rate": 0.001999750353869259, "loss": 0.2865, "step": 8490 }, { "epoch": 0.0603315038692013, "grad_norm": 0.1298828125, "learning_rate": 0.00199974968684335, "loss": 0.277, "step": 8500 }, { "epoch": 0.06040248210904742, "grad_norm": 0.328125, "learning_rate": 0.001999749018927644, "loss": 0.2731, "step": 8510 }, { "epoch": 0.06047346034889354, "grad_norm": 0.11279296875, "learning_rate": 0.001999748350122143, "loss": 0.2818, "step": 8520 }, { "epoch": 0.06054443858873966, "grad_norm": 0.09765625, "learning_rate": 0.0019997476804268473, "loss": 0.3072, "step": 8530 }, { "epoch": 0.060615416828585775, "grad_norm": 0.1591796875, "learning_rate": 0.0019997470098417573, "loss": 0.2717, "step": 8540 }, { "epoch": 0.060686395068431896, "grad_norm": 0.11474609375, "learning_rate": 0.001999746338366874, "loss": 0.2428, "step": 8550 }, { "epoch": 0.06075737330827802, "grad_norm": 0.166015625, "learning_rate": 0.001999745666002198, "loss": 0.2959, "step": 8560 }, { "epoch": 0.06082835154812413, "grad_norm": 0.1845703125, "learning_rate": 0.00199974499274773, "loss": 0.2759, "step": 8570 }, { "epoch": 0.060899329787970254, "grad_norm": 0.1484375, "learning_rate": 0.0019997443186034703, "loss": 0.2665, "step": 8580 }, { "epoch": 0.060970308027816375, "grad_norm": 0.1298828125, "learning_rate": 0.0019997436435694204, "loss": 0.274, "step": 8590 }, { "epoch": 0.06104128626766249, "grad_norm": 0.11279296875, "learning_rate": 0.00199974296764558, "loss": 0.2623, "step": 8600 }, { "epoch": 0.06111226450750861, "grad_norm": 0.1015625, "learning_rate": 0.0019997422908319506, "loss": 0.27, "step": 8610 }, { "epoch": 0.06118324274735473, "grad_norm": 0.1708984375, "learning_rate": 0.001999741613128532, "loss": 0.2659, "step": 8620 }, { "epoch": 0.06125422098720085, "grad_norm": 0.1025390625, "learning_rate": 0.0019997409345353253, "loss": 0.278, "step": 8630 }, { "epoch": 0.06132519922704697, "grad_norm": 0.12890625, "learning_rate": 0.0019997402550523315, "loss": 0.2929, "step": 8640 }, { "epoch": 0.06139617746689309, "grad_norm": 0.1953125, "learning_rate": 0.001999739574679551, "loss": 0.2735, "step": 8650 }, { "epoch": 0.061467155706739204, "grad_norm": 0.119140625, "learning_rate": 0.001999738893416984, "loss": 0.266, "step": 8660 }, { "epoch": 0.061538133946585326, "grad_norm": 0.1455078125, "learning_rate": 0.001999738211264632, "loss": 0.2749, "step": 8670 }, { "epoch": 0.06160911218643145, "grad_norm": 0.1708984375, "learning_rate": 0.0019997375282224954, "loss": 0.2799, "step": 8680 }, { "epoch": 0.06168009042627756, "grad_norm": 0.07470703125, "learning_rate": 0.0019997368442905747, "loss": 0.2632, "step": 8690 }, { "epoch": 0.06175106866612368, "grad_norm": 0.0791015625, "learning_rate": 0.00199973615946887, "loss": 0.2718, "step": 8700 }, { "epoch": 0.061822046905969805, "grad_norm": 0.109375, "learning_rate": 0.0019997354737573835, "loss": 0.2946, "step": 8710 }, { "epoch": 0.06189302514581592, "grad_norm": 0.181640625, "learning_rate": 0.001999734787156115, "loss": 0.273, "step": 8720 }, { "epoch": 0.06196400338566204, "grad_norm": 0.1357421875, "learning_rate": 0.001999734099665065, "loss": 0.2771, "step": 8730 }, { "epoch": 0.06203498162550816, "grad_norm": 0.1337890625, "learning_rate": 0.001999733411284234, "loss": 0.273, "step": 8740 }, { "epoch": 0.06210595986535428, "grad_norm": 0.10400390625, "learning_rate": 0.0019997327220136235, "loss": 0.2769, "step": 8750 }, { "epoch": 0.0621769381052004, "grad_norm": 0.146484375, "learning_rate": 0.0019997320318532333, "loss": 0.2846, "step": 8760 }, { "epoch": 0.06224791634504652, "grad_norm": 0.10693359375, "learning_rate": 0.001999731340803065, "loss": 0.2607, "step": 8770 }, { "epoch": 0.062318894584892634, "grad_norm": 0.12109375, "learning_rate": 0.0019997306488631185, "loss": 0.2898, "step": 8780 }, { "epoch": 0.062389872824738755, "grad_norm": 0.140625, "learning_rate": 0.001999729956033395, "loss": 0.2879, "step": 8790 }, { "epoch": 0.06246085106458488, "grad_norm": 0.1845703125, "learning_rate": 0.0019997292623138946, "loss": 0.2717, "step": 8800 }, { "epoch": 0.06253182930443099, "grad_norm": 0.08935546875, "learning_rate": 0.0019997285677046182, "loss": 0.2657, "step": 8810 }, { "epoch": 0.06260280754427712, "grad_norm": 0.1630859375, "learning_rate": 0.001999727872205567, "loss": 0.3022, "step": 8820 }, { "epoch": 0.06267378578412323, "grad_norm": 0.115234375, "learning_rate": 0.0019997271758167416, "loss": 0.2823, "step": 8830 }, { "epoch": 0.06274476402396935, "grad_norm": 0.1259765625, "learning_rate": 0.001999726478538142, "loss": 0.2771, "step": 8840 }, { "epoch": 0.06281574226381548, "grad_norm": 0.1328125, "learning_rate": 0.0019997257803697696, "loss": 0.2724, "step": 8850 }, { "epoch": 0.06288672050366159, "grad_norm": 0.2294921875, "learning_rate": 0.0019997250813116245, "loss": 0.285, "step": 8860 }, { "epoch": 0.0629576987435077, "grad_norm": 0.146484375, "learning_rate": 0.0019997243813637076, "loss": 0.303, "step": 8870 }, { "epoch": 0.06302867698335383, "grad_norm": 0.12451171875, "learning_rate": 0.0019997236805260198, "loss": 0.268, "step": 8880 }, { "epoch": 0.06309965522319995, "grad_norm": 0.208984375, "learning_rate": 0.001999722978798562, "loss": 0.2808, "step": 8890 }, { "epoch": 0.06317063346304606, "grad_norm": 0.0927734375, "learning_rate": 0.001999722276181334, "loss": 0.2551, "step": 8900 }, { "epoch": 0.06324161170289219, "grad_norm": 0.2314453125, "learning_rate": 0.0019997215726743375, "loss": 0.2768, "step": 8910 }, { "epoch": 0.0633125899427383, "grad_norm": 0.1171875, "learning_rate": 0.0019997208682775726, "loss": 0.2742, "step": 8920 }, { "epoch": 0.06338356818258442, "grad_norm": 0.16015625, "learning_rate": 0.0019997201629910403, "loss": 0.2828, "step": 8930 }, { "epoch": 0.06345454642243055, "grad_norm": 0.1484375, "learning_rate": 0.001999719456814741, "loss": 0.2571, "step": 8940 }, { "epoch": 0.06352552466227666, "grad_norm": 0.32421875, "learning_rate": 0.0019997187497486756, "loss": 0.2773, "step": 8950 }, { "epoch": 0.06359650290212278, "grad_norm": 0.1650390625, "learning_rate": 0.0019997180417928445, "loss": 0.2739, "step": 8960 }, { "epoch": 0.0636674811419689, "grad_norm": 0.09716796875, "learning_rate": 0.001999717332947249, "loss": 0.2619, "step": 8970 }, { "epoch": 0.06373845938181502, "grad_norm": 0.1044921875, "learning_rate": 0.001999716623211889, "loss": 0.2642, "step": 8980 }, { "epoch": 0.06380943762166114, "grad_norm": 0.1103515625, "learning_rate": 0.001999715912586766, "loss": 0.2604, "step": 8990 }, { "epoch": 0.06388041586150726, "grad_norm": 0.1259765625, "learning_rate": 0.0019997152010718805, "loss": 0.2712, "step": 9000 }, { "epoch": 0.06395139410135338, "grad_norm": 0.13671875, "learning_rate": 0.001999714488667233, "loss": 0.2705, "step": 9010 }, { "epoch": 0.0640223723411995, "grad_norm": 0.1611328125, "learning_rate": 0.001999713775372824, "loss": 0.2775, "step": 9020 }, { "epoch": 0.06409335058104562, "grad_norm": 0.15625, "learning_rate": 0.0019997130611886546, "loss": 0.2703, "step": 9030 }, { "epoch": 0.06416432882089174, "grad_norm": 0.125, "learning_rate": 0.0019997123461147253, "loss": 0.2851, "step": 9040 }, { "epoch": 0.06423530706073785, "grad_norm": 0.166015625, "learning_rate": 0.0019997116301510368, "loss": 0.2631, "step": 9050 }, { "epoch": 0.06430628530058398, "grad_norm": 0.2119140625, "learning_rate": 0.00199971091329759, "loss": 0.2816, "step": 9060 }, { "epoch": 0.0643772635404301, "grad_norm": 0.1259765625, "learning_rate": 0.0019997101955543856, "loss": 0.2845, "step": 9070 }, { "epoch": 0.06444824178027621, "grad_norm": 0.0673828125, "learning_rate": 0.001999709476921424, "loss": 0.2595, "step": 9080 }, { "epoch": 0.06451922002012234, "grad_norm": 0.146484375, "learning_rate": 0.0019997087573987065, "loss": 0.272, "step": 9090 }, { "epoch": 0.06459019825996845, "grad_norm": 0.1357421875, "learning_rate": 0.001999708036986233, "loss": 0.2909, "step": 9100 }, { "epoch": 0.06466117649981457, "grad_norm": 0.08154296875, "learning_rate": 0.001999707315684005, "loss": 0.2719, "step": 9110 }, { "epoch": 0.0647321547396607, "grad_norm": 0.140625, "learning_rate": 0.0019997065934920224, "loss": 0.2656, "step": 9120 }, { "epoch": 0.06480313297950681, "grad_norm": 0.1337890625, "learning_rate": 0.001999705870410287, "loss": 0.2915, "step": 9130 }, { "epoch": 0.06487411121935292, "grad_norm": 0.15625, "learning_rate": 0.001999705146438798, "loss": 0.2562, "step": 9140 }, { "epoch": 0.06494508945919905, "grad_norm": 0.158203125, "learning_rate": 0.0019997044215775573, "loss": 0.2819, "step": 9150 }, { "epoch": 0.06501606769904517, "grad_norm": 0.177734375, "learning_rate": 0.001999703695826566, "loss": 0.2935, "step": 9160 }, { "epoch": 0.06508704593889128, "grad_norm": 0.10986328125, "learning_rate": 0.0019997029691858235, "loss": 0.2689, "step": 9170 }, { "epoch": 0.06515802417873741, "grad_norm": 0.09521484375, "learning_rate": 0.001999702241655331, "loss": 0.2647, "step": 9180 }, { "epoch": 0.06522900241858352, "grad_norm": 0.1513671875, "learning_rate": 0.00199970151323509, "loss": 0.2718, "step": 9190 }, { "epoch": 0.06529998065842964, "grad_norm": 0.12158203125, "learning_rate": 0.0019997007839251, "loss": 0.2541, "step": 9200 }, { "epoch": 0.06537095889827577, "grad_norm": 0.1123046875, "learning_rate": 0.001999700053725363, "loss": 0.2617, "step": 9210 }, { "epoch": 0.06544193713812188, "grad_norm": 0.10107421875, "learning_rate": 0.0019996993226358783, "loss": 0.2689, "step": 9220 }, { "epoch": 0.065512915377968, "grad_norm": 0.3515625, "learning_rate": 0.0019996985906566476, "loss": 0.2898, "step": 9230 }, { "epoch": 0.06558389361781412, "grad_norm": 0.1767578125, "learning_rate": 0.0019996978577876716, "loss": 0.2726, "step": 9240 }, { "epoch": 0.06565487185766024, "grad_norm": 0.10693359375, "learning_rate": 0.0019996971240289507, "loss": 0.2699, "step": 9250 }, { "epoch": 0.06572585009750635, "grad_norm": 0.142578125, "learning_rate": 0.0019996963893804858, "loss": 0.3009, "step": 9260 }, { "epoch": 0.06579682833735248, "grad_norm": 0.1064453125, "learning_rate": 0.0019996956538422773, "loss": 0.2638, "step": 9270 }, { "epoch": 0.0658678065771986, "grad_norm": 0.0732421875, "learning_rate": 0.001999694917414326, "loss": 0.2736, "step": 9280 }, { "epoch": 0.06593878481704471, "grad_norm": 0.11083984375, "learning_rate": 0.0019996941800966336, "loss": 0.2811, "step": 9290 }, { "epoch": 0.06600976305689084, "grad_norm": 0.091796875, "learning_rate": 0.001999693441889199, "loss": 0.2723, "step": 9300 }, { "epoch": 0.06608074129673695, "grad_norm": 0.1259765625, "learning_rate": 0.001999692702792025, "loss": 0.2698, "step": 9310 }, { "epoch": 0.06615171953658307, "grad_norm": 0.1962890625, "learning_rate": 0.001999691962805111, "loss": 0.2744, "step": 9320 }, { "epoch": 0.0662226977764292, "grad_norm": 0.08740234375, "learning_rate": 0.0019996912219284574, "loss": 0.2695, "step": 9330 }, { "epoch": 0.06629367601627531, "grad_norm": 0.083984375, "learning_rate": 0.0019996904801620665, "loss": 0.2601, "step": 9340 }, { "epoch": 0.06636465425612142, "grad_norm": 0.1298828125, "learning_rate": 0.0019996897375059376, "loss": 0.268, "step": 9350 }, { "epoch": 0.06643563249596755, "grad_norm": 0.10693359375, "learning_rate": 0.0019996889939600717, "loss": 0.2758, "step": 9360 }, { "epoch": 0.06650661073581367, "grad_norm": 0.09765625, "learning_rate": 0.0019996882495244704, "loss": 0.2915, "step": 9370 }, { "epoch": 0.06657758897565978, "grad_norm": 0.12451171875, "learning_rate": 0.0019996875041991334, "loss": 0.2757, "step": 9380 }, { "epoch": 0.06664856721550591, "grad_norm": 0.1044921875, "learning_rate": 0.001999686757984062, "loss": 0.2891, "step": 9390 }, { "epoch": 0.06671954545535203, "grad_norm": 0.2158203125, "learning_rate": 0.0019996860108792568, "loss": 0.2762, "step": 9400 }, { "epoch": 0.06679052369519814, "grad_norm": 0.11767578125, "learning_rate": 0.0019996852628847185, "loss": 0.2688, "step": 9410 }, { "epoch": 0.06686150193504427, "grad_norm": 0.1240234375, "learning_rate": 0.001999684514000448, "loss": 0.2761, "step": 9420 }, { "epoch": 0.06693248017489038, "grad_norm": 0.095703125, "learning_rate": 0.0019996837642264455, "loss": 0.27, "step": 9430 }, { "epoch": 0.0670034584147365, "grad_norm": 0.10400390625, "learning_rate": 0.001999683013562712, "loss": 0.2613, "step": 9440 }, { "epoch": 0.06707443665458263, "grad_norm": 0.09375, "learning_rate": 0.001999682262009249, "loss": 0.2758, "step": 9450 }, { "epoch": 0.06714541489442874, "grad_norm": 0.134765625, "learning_rate": 0.001999681509566056, "loss": 0.2665, "step": 9460 }, { "epoch": 0.06721639313427485, "grad_norm": 0.1318359375, "learning_rate": 0.001999680756233135, "loss": 0.2751, "step": 9470 }, { "epoch": 0.06728737137412098, "grad_norm": 0.177734375, "learning_rate": 0.0019996800020104857, "loss": 0.2931, "step": 9480 }, { "epoch": 0.0673583496139671, "grad_norm": 0.1474609375, "learning_rate": 0.0019996792468981095, "loss": 0.2681, "step": 9490 }, { "epoch": 0.06742932785381321, "grad_norm": 0.12451171875, "learning_rate": 0.001999678490896007, "loss": 0.2865, "step": 9500 }, { "epoch": 0.06750030609365934, "grad_norm": 0.134765625, "learning_rate": 0.0019996777340041787, "loss": 0.2534, "step": 9510 }, { "epoch": 0.06757128433350545, "grad_norm": 0.1884765625, "learning_rate": 0.0019996769762226255, "loss": 0.2766, "step": 9520 }, { "epoch": 0.06764226257335157, "grad_norm": 0.115234375, "learning_rate": 0.001999676217551348, "loss": 0.2801, "step": 9530 }, { "epoch": 0.0677132408131977, "grad_norm": 0.384765625, "learning_rate": 0.0019996754579903473, "loss": 0.2574, "step": 9540 }, { "epoch": 0.06778421905304381, "grad_norm": 0.12060546875, "learning_rate": 0.001999674697539624, "loss": 0.2832, "step": 9550 }, { "epoch": 0.06785519729288993, "grad_norm": 0.126953125, "learning_rate": 0.001999673936199179, "loss": 0.2702, "step": 9560 }, { "epoch": 0.06792617553273606, "grad_norm": 0.1826171875, "learning_rate": 0.0019996731739690124, "loss": 0.2723, "step": 9570 }, { "epoch": 0.06799715377258217, "grad_norm": 0.07470703125, "learning_rate": 0.0019996724108491256, "loss": 0.2648, "step": 9580 }, { "epoch": 0.06806813201242828, "grad_norm": 0.1103515625, "learning_rate": 0.001999671646839519, "loss": 0.2946, "step": 9590 }, { "epoch": 0.06813911025227441, "grad_norm": 0.12890625, "learning_rate": 0.001999670881940194, "loss": 0.2736, "step": 9600 }, { "epoch": 0.06821008849212053, "grad_norm": 0.12158203125, "learning_rate": 0.0019996701161511506, "loss": 0.2583, "step": 9610 }, { "epoch": 0.06828106673196664, "grad_norm": 0.142578125, "learning_rate": 0.0019996693494723898, "loss": 0.2842, "step": 9620 }, { "epoch": 0.06835204497181277, "grad_norm": 0.099609375, "learning_rate": 0.0019996685819039122, "loss": 0.2671, "step": 9630 }, { "epoch": 0.06842302321165888, "grad_norm": 0.1083984375, "learning_rate": 0.0019996678134457193, "loss": 0.2861, "step": 9640 }, { "epoch": 0.068494001451505, "grad_norm": 0.162109375, "learning_rate": 0.001999667044097811, "loss": 0.2748, "step": 9650 }, { "epoch": 0.06856497969135113, "grad_norm": 0.06640625, "learning_rate": 0.001999666273860188, "loss": 0.2777, "step": 9660 }, { "epoch": 0.06863595793119724, "grad_norm": 0.10595703125, "learning_rate": 0.001999665502732852, "loss": 0.2736, "step": 9670 }, { "epoch": 0.06870693617104336, "grad_norm": 0.08935546875, "learning_rate": 0.0019996647307158034, "loss": 0.2663, "step": 9680 }, { "epoch": 0.06877791441088948, "grad_norm": 0.0927734375, "learning_rate": 0.001999663957809042, "loss": 0.2564, "step": 9690 }, { "epoch": 0.0688488926507356, "grad_norm": 0.1201171875, "learning_rate": 0.00199966318401257, "loss": 0.2603, "step": 9700 }, { "epoch": 0.06891987089058171, "grad_norm": 0.1025390625, "learning_rate": 0.001999662409326387, "loss": 0.2628, "step": 9710 }, { "epoch": 0.06899084913042784, "grad_norm": 0.1494140625, "learning_rate": 0.0019996616337504946, "loss": 0.2685, "step": 9720 }, { "epoch": 0.06906182737027396, "grad_norm": 0.146484375, "learning_rate": 0.001999660857284893, "loss": 0.2878, "step": 9730 }, { "epoch": 0.06913280561012007, "grad_norm": 0.08837890625, "learning_rate": 0.0019996600799295837, "loss": 0.2728, "step": 9740 }, { "epoch": 0.0692037838499662, "grad_norm": 0.1015625, "learning_rate": 0.001999659301684566, "loss": 0.2731, "step": 9750 }, { "epoch": 0.06927476208981231, "grad_norm": 0.1025390625, "learning_rate": 0.0019996585225498425, "loss": 0.2578, "step": 9760 }, { "epoch": 0.06934574032965843, "grad_norm": 0.1474609375, "learning_rate": 0.001999657742525413, "loss": 0.2916, "step": 9770 }, { "epoch": 0.06941671856950456, "grad_norm": 0.16796875, "learning_rate": 0.0019996569616112783, "loss": 0.2842, "step": 9780 }, { "epoch": 0.06948769680935067, "grad_norm": 0.1064453125, "learning_rate": 0.001999656179807439, "loss": 0.2671, "step": 9790 }, { "epoch": 0.06955867504919679, "grad_norm": 0.1103515625, "learning_rate": 0.0019996553971138964, "loss": 0.2651, "step": 9800 }, { "epoch": 0.06962965328904291, "grad_norm": 0.15234375, "learning_rate": 0.0019996546135306508, "loss": 0.273, "step": 9810 }, { "epoch": 0.06970063152888903, "grad_norm": 0.09619140625, "learning_rate": 0.0019996538290577032, "loss": 0.2594, "step": 9820 }, { "epoch": 0.06977160976873514, "grad_norm": 0.2177734375, "learning_rate": 0.0019996530436950546, "loss": 0.2523, "step": 9830 }, { "epoch": 0.06984258800858127, "grad_norm": 0.12060546875, "learning_rate": 0.0019996522574427054, "loss": 0.2586, "step": 9840 }, { "epoch": 0.06991356624842739, "grad_norm": 0.1767578125, "learning_rate": 0.001999651470300656, "loss": 0.2692, "step": 9850 }, { "epoch": 0.0699845444882735, "grad_norm": 0.12451171875, "learning_rate": 0.0019996506822689084, "loss": 0.2615, "step": 9860 }, { "epoch": 0.07005552272811963, "grad_norm": 0.427734375, "learning_rate": 0.0019996498933474625, "loss": 0.2757, "step": 9870 }, { "epoch": 0.07012650096796574, "grad_norm": 0.2333984375, "learning_rate": 0.001999649103536319, "loss": 0.2711, "step": 9880 }, { "epoch": 0.07019747920781187, "grad_norm": 0.11376953125, "learning_rate": 0.001999648312835479, "loss": 0.2625, "step": 9890 }, { "epoch": 0.07026845744765799, "grad_norm": 0.12353515625, "learning_rate": 0.0019996475212449433, "loss": 0.254, "step": 9900 }, { "epoch": 0.0703394356875041, "grad_norm": 0.10888671875, "learning_rate": 0.0019996467287647125, "loss": 0.2725, "step": 9910 }, { "epoch": 0.07041041392735023, "grad_norm": 0.11474609375, "learning_rate": 0.001999645935394787, "loss": 0.2651, "step": 9920 }, { "epoch": 0.07048139216719634, "grad_norm": 0.11962890625, "learning_rate": 0.001999645141135169, "loss": 0.2811, "step": 9930 }, { "epoch": 0.07055237040704246, "grad_norm": 0.0869140625, "learning_rate": 0.001999644345985858, "loss": 0.2802, "step": 9940 }, { "epoch": 0.07062334864688859, "grad_norm": 0.11181640625, "learning_rate": 0.001999643549946855, "loss": 0.2906, "step": 9950 }, { "epoch": 0.0706943268867347, "grad_norm": 0.138671875, "learning_rate": 0.001999642753018161, "loss": 0.2681, "step": 9960 }, { "epoch": 0.07076530512658082, "grad_norm": 0.134765625, "learning_rate": 0.001999641955199777, "loss": 0.2799, "step": 9970 }, { "epoch": 0.07083628336642694, "grad_norm": 0.1259765625, "learning_rate": 0.0019996411564917023, "loss": 0.2703, "step": 9980 }, { "epoch": 0.07090726160627306, "grad_norm": 0.08447265625, "learning_rate": 0.00199964035689394, "loss": 0.2725, "step": 9990 }, { "epoch": 0.07097823984611917, "grad_norm": 0.1181640625, "learning_rate": 0.0019996395564064896, "loss": 0.294, "step": 10000 }, { "epoch": 0.07097823984611917, "eval_covost2-zh-en_loss": 4.109087944030762, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 20.429, "eval_covost2-zh-en_samples_per_second": 3.133, "eval_covost2-zh-en_steps_per_second": 0.196, "step": 10000 }, { "epoch": 0.07097823984611917, "eval_covost2-en-zh_loss": 3.195521354675293, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 19.3135, "eval_covost2-en-zh_samples_per_second": 3.314, "eval_covost2-en-zh_steps_per_second": 0.207, "step": 10000 }, { "epoch": 0.0710492180859653, "grad_norm": 0.111328125, "learning_rate": 0.001999638755029352, "loss": 0.2625, "step": 10010 }, { "epoch": 0.07112019632581142, "grad_norm": 0.17578125, "learning_rate": 0.0019996379527625276, "loss": 0.268, "step": 10020 }, { "epoch": 0.07119117456565753, "grad_norm": 0.205078125, "learning_rate": 0.0019996371496060184, "loss": 0.2945, "step": 10030 }, { "epoch": 0.07126215280550366, "grad_norm": 0.14453125, "learning_rate": 0.001999636345559824, "loss": 0.2732, "step": 10040 }, { "epoch": 0.07133313104534977, "grad_norm": 0.13671875, "learning_rate": 0.001999635540623946, "loss": 0.2791, "step": 10050 }, { "epoch": 0.07140410928519589, "grad_norm": 0.1611328125, "learning_rate": 0.001999634734798384, "loss": 0.2902, "step": 10060 }, { "epoch": 0.07147508752504202, "grad_norm": 0.1396484375, "learning_rate": 0.0019996339280831405, "loss": 0.2582, "step": 10070 }, { "epoch": 0.07154606576488813, "grad_norm": 0.1494140625, "learning_rate": 0.001999633120478215, "loss": 0.2715, "step": 10080 }, { "epoch": 0.07161704400473425, "grad_norm": 0.08447265625, "learning_rate": 0.001999632311983609, "loss": 0.2832, "step": 10090 }, { "epoch": 0.07168802224458037, "grad_norm": 0.1103515625, "learning_rate": 0.001999631502599323, "loss": 0.262, "step": 10100 }, { "epoch": 0.07175900048442649, "grad_norm": 0.10400390625, "learning_rate": 0.0019996306923253577, "loss": 0.2633, "step": 10110 }, { "epoch": 0.0718299787242726, "grad_norm": 0.12451171875, "learning_rate": 0.0019996298811617136, "loss": 0.2669, "step": 10120 }, { "epoch": 0.07190095696411873, "grad_norm": 0.10498046875, "learning_rate": 0.0019996290691083928, "loss": 0.2639, "step": 10130 }, { "epoch": 0.07197193520396485, "grad_norm": 0.25, "learning_rate": 0.0019996282561653943, "loss": 0.2835, "step": 10140 }, { "epoch": 0.07204291344381096, "grad_norm": 0.1201171875, "learning_rate": 0.0019996274423327207, "loss": 0.2656, "step": 10150 }, { "epoch": 0.07211389168365709, "grad_norm": 0.369140625, "learning_rate": 0.0019996266276103712, "loss": 0.2696, "step": 10160 }, { "epoch": 0.0721848699235032, "grad_norm": 0.14453125, "learning_rate": 0.001999625811998348, "loss": 0.3001, "step": 10170 }, { "epoch": 0.07225584816334932, "grad_norm": 0.1064453125, "learning_rate": 0.001999624995496651, "loss": 0.2872, "step": 10180 }, { "epoch": 0.07232682640319545, "grad_norm": 0.1298828125, "learning_rate": 0.001999624178105281, "loss": 0.2687, "step": 10190 }, { "epoch": 0.07239780464304156, "grad_norm": 0.09423828125, "learning_rate": 0.0019996233598242397, "loss": 0.2532, "step": 10200 }, { "epoch": 0.07246878288288768, "grad_norm": 0.0966796875, "learning_rate": 0.0019996225406535267, "loss": 0.2667, "step": 10210 }, { "epoch": 0.0725397611227338, "grad_norm": 0.1123046875, "learning_rate": 0.0019996217205931434, "loss": 0.2487, "step": 10220 }, { "epoch": 0.07261073936257992, "grad_norm": 0.169921875, "learning_rate": 0.001999620899643091, "loss": 0.2649, "step": 10230 }, { "epoch": 0.07268171760242603, "grad_norm": 0.14453125, "learning_rate": 0.0019996200778033698, "loss": 0.2748, "step": 10240 }, { "epoch": 0.07275269584227216, "grad_norm": 0.14453125, "learning_rate": 0.001999619255073981, "loss": 0.2745, "step": 10250 }, { "epoch": 0.07282367408211828, "grad_norm": 0.11962890625, "learning_rate": 0.001999618431454924, "loss": 0.2816, "step": 10260 }, { "epoch": 0.07289465232196439, "grad_norm": 0.08544921875, "learning_rate": 0.0019996176069462016, "loss": 0.2609, "step": 10270 }, { "epoch": 0.07296563056181052, "grad_norm": 0.08740234375, "learning_rate": 0.001999616781547814, "loss": 0.2783, "step": 10280 }, { "epoch": 0.07303660880165663, "grad_norm": 0.10498046875, "learning_rate": 0.0019996159552597616, "loss": 0.2668, "step": 10290 }, { "epoch": 0.07310758704150275, "grad_norm": 0.1796875, "learning_rate": 0.001999615128082045, "loss": 0.2492, "step": 10300 }, { "epoch": 0.07317856528134888, "grad_norm": 0.1435546875, "learning_rate": 0.0019996143000146657, "loss": 0.268, "step": 10310 }, { "epoch": 0.07324954352119499, "grad_norm": 0.1865234375, "learning_rate": 0.0019996134710576242, "loss": 0.2645, "step": 10320 }, { "epoch": 0.0733205217610411, "grad_norm": 0.169921875, "learning_rate": 0.0019996126412109216, "loss": 0.2619, "step": 10330 }, { "epoch": 0.07339150000088723, "grad_norm": 0.1259765625, "learning_rate": 0.0019996118104745582, "loss": 0.2694, "step": 10340 }, { "epoch": 0.07346247824073335, "grad_norm": 0.09033203125, "learning_rate": 0.0019996109788485354, "loss": 0.2673, "step": 10350 }, { "epoch": 0.07353345648057946, "grad_norm": 0.10595703125, "learning_rate": 0.001999610146332853, "loss": 0.2626, "step": 10360 }, { "epoch": 0.07360443472042559, "grad_norm": 0.14453125, "learning_rate": 0.0019996093129275136, "loss": 0.2772, "step": 10370 }, { "epoch": 0.0736754129602717, "grad_norm": 0.09521484375, "learning_rate": 0.0019996084786325164, "loss": 0.2655, "step": 10380 }, { "epoch": 0.07374639120011782, "grad_norm": 0.103515625, "learning_rate": 0.0019996076434478627, "loss": 0.2779, "step": 10390 }, { "epoch": 0.07381736943996395, "grad_norm": 0.1162109375, "learning_rate": 0.0019996068073735536, "loss": 0.246, "step": 10400 }, { "epoch": 0.07388834767981006, "grad_norm": 0.0966796875, "learning_rate": 0.0019996059704095893, "loss": 0.2606, "step": 10410 }, { "epoch": 0.07395932591965618, "grad_norm": 0.212890625, "learning_rate": 0.0019996051325559716, "loss": 0.2858, "step": 10420 }, { "epoch": 0.0740303041595023, "grad_norm": 0.0966796875, "learning_rate": 0.0019996042938127006, "loss": 0.2588, "step": 10430 }, { "epoch": 0.07410128239934842, "grad_norm": 0.130859375, "learning_rate": 0.0019996034541797775, "loss": 0.248, "step": 10440 }, { "epoch": 0.07417226063919453, "grad_norm": 0.1533203125, "learning_rate": 0.001999602613657203, "loss": 0.2815, "step": 10450 }, { "epoch": 0.07424323887904066, "grad_norm": 0.09423828125, "learning_rate": 0.0019996017722449778, "loss": 0.2817, "step": 10460 }, { "epoch": 0.07431421711888678, "grad_norm": 0.162109375, "learning_rate": 0.001999600929943103, "loss": 0.2683, "step": 10470 }, { "epoch": 0.07438519535873289, "grad_norm": 0.173828125, "learning_rate": 0.001999600086751579, "loss": 0.2566, "step": 10480 }, { "epoch": 0.07445617359857902, "grad_norm": 0.232421875, "learning_rate": 0.0019995992426704067, "loss": 0.2727, "step": 10490 }, { "epoch": 0.07452715183842513, "grad_norm": 0.10791015625, "learning_rate": 0.0019995983976995877, "loss": 0.2922, "step": 10500 }, { "epoch": 0.07459813007827125, "grad_norm": 0.0966796875, "learning_rate": 0.001999597551839122, "loss": 0.2775, "step": 10510 }, { "epoch": 0.07466910831811738, "grad_norm": 0.1318359375, "learning_rate": 0.0019995967050890104, "loss": 0.2834, "step": 10520 }, { "epoch": 0.07474008655796349, "grad_norm": 0.0966796875, "learning_rate": 0.0019995958574492542, "loss": 0.2925, "step": 10530 }, { "epoch": 0.0748110647978096, "grad_norm": 0.1806640625, "learning_rate": 0.0019995950089198543, "loss": 0.294, "step": 10540 }, { "epoch": 0.07488204303765573, "grad_norm": 0.1435546875, "learning_rate": 0.001999594159500811, "loss": 0.2665, "step": 10550 }, { "epoch": 0.07495302127750185, "grad_norm": 0.10205078125, "learning_rate": 0.001999593309192126, "loss": 0.2672, "step": 10560 }, { "epoch": 0.07502399951734796, "grad_norm": 0.177734375, "learning_rate": 0.001999592457993799, "loss": 0.2766, "step": 10570 }, { "epoch": 0.07509497775719409, "grad_norm": 0.140625, "learning_rate": 0.001999591605905832, "loss": 0.2552, "step": 10580 }, { "epoch": 0.0751659559970402, "grad_norm": 0.1474609375, "learning_rate": 0.001999590752928225, "loss": 0.286, "step": 10590 }, { "epoch": 0.07523693423688632, "grad_norm": 0.18359375, "learning_rate": 0.0019995898990609793, "loss": 0.2615, "step": 10600 }, { "epoch": 0.07530791247673245, "grad_norm": 0.07080078125, "learning_rate": 0.001999589044304095, "loss": 0.2666, "step": 10610 }, { "epoch": 0.07537889071657856, "grad_norm": 0.1220703125, "learning_rate": 0.001999588188657574, "loss": 0.2764, "step": 10620 }, { "epoch": 0.07544986895642468, "grad_norm": 0.2109375, "learning_rate": 0.0019995873321214165, "loss": 0.258, "step": 10630 }, { "epoch": 0.07552084719627081, "grad_norm": 0.08642578125, "learning_rate": 0.0019995864746956238, "loss": 0.2629, "step": 10640 }, { "epoch": 0.07559182543611692, "grad_norm": 0.1328125, "learning_rate": 0.001999585616380196, "loss": 0.2651, "step": 10650 }, { "epoch": 0.07566280367596304, "grad_norm": 0.0966796875, "learning_rate": 0.0019995847571751347, "loss": 0.2669, "step": 10660 }, { "epoch": 0.07573378191580916, "grad_norm": 0.1318359375, "learning_rate": 0.0019995838970804404, "loss": 0.2657, "step": 10670 }, { "epoch": 0.07580476015565528, "grad_norm": 0.083984375, "learning_rate": 0.0019995830360961136, "loss": 0.2553, "step": 10680 }, { "epoch": 0.0758757383955014, "grad_norm": 0.091796875, "learning_rate": 0.0019995821742221564, "loss": 0.2685, "step": 10690 }, { "epoch": 0.07594671663534752, "grad_norm": 0.11328125, "learning_rate": 0.001999581311458568, "loss": 0.2759, "step": 10700 }, { "epoch": 0.07601769487519364, "grad_norm": 0.255859375, "learning_rate": 0.0019995804478053505, "loss": 0.267, "step": 10710 }, { "epoch": 0.07608867311503975, "grad_norm": 0.103515625, "learning_rate": 0.0019995795832625043, "loss": 0.2802, "step": 10720 }, { "epoch": 0.07615965135488588, "grad_norm": 0.11279296875, "learning_rate": 0.0019995787178300303, "loss": 0.2471, "step": 10730 }, { "epoch": 0.076230629594732, "grad_norm": 0.1435546875, "learning_rate": 0.001999577851507929, "loss": 0.2729, "step": 10740 }, { "epoch": 0.07630160783457811, "grad_norm": 0.0986328125, "learning_rate": 0.001999576984296202, "loss": 0.2592, "step": 10750 }, { "epoch": 0.07637258607442424, "grad_norm": 0.068359375, "learning_rate": 0.0019995761161948496, "loss": 0.2578, "step": 10760 }, { "epoch": 0.07644356431427035, "grad_norm": 0.294921875, "learning_rate": 0.001999575247203873, "loss": 0.2698, "step": 10770 }, { "epoch": 0.07651454255411647, "grad_norm": 0.1572265625, "learning_rate": 0.001999574377323272, "loss": 0.2726, "step": 10780 }, { "epoch": 0.0765855207939626, "grad_norm": 0.10791015625, "learning_rate": 0.0019995735065530492, "loss": 0.2552, "step": 10790 }, { "epoch": 0.07665649903380871, "grad_norm": 0.15234375, "learning_rate": 0.0019995726348932042, "loss": 0.287, "step": 10800 }, { "epoch": 0.07672747727365482, "grad_norm": 0.1064453125, "learning_rate": 0.001999571762343739, "loss": 0.2727, "step": 10810 }, { "epoch": 0.07679845551350095, "grad_norm": 0.0849609375, "learning_rate": 0.0019995708889046526, "loss": 0.2631, "step": 10820 }, { "epoch": 0.07686943375334707, "grad_norm": 0.09423828125, "learning_rate": 0.0019995700145759476, "loss": 0.2714, "step": 10830 }, { "epoch": 0.07694041199319318, "grad_norm": 0.205078125, "learning_rate": 0.0019995691393576245, "loss": 0.2813, "step": 10840 }, { "epoch": 0.07701139023303931, "grad_norm": 0.1083984375, "learning_rate": 0.001999568263249683, "loss": 0.2749, "step": 10850 }, { "epoch": 0.07708236847288542, "grad_norm": 0.2451171875, "learning_rate": 0.0019995673862521256, "loss": 0.2869, "step": 10860 }, { "epoch": 0.07715334671273154, "grad_norm": 0.09375, "learning_rate": 0.001999566508364952, "loss": 0.2777, "step": 10870 }, { "epoch": 0.07722432495257767, "grad_norm": 0.09228515625, "learning_rate": 0.0019995656295881637, "loss": 0.2753, "step": 10880 }, { "epoch": 0.07729530319242378, "grad_norm": 0.095703125, "learning_rate": 0.0019995647499217615, "loss": 0.2524, "step": 10890 }, { "epoch": 0.0773662814322699, "grad_norm": 0.0791015625, "learning_rate": 0.001999563869365746, "loss": 0.2927, "step": 10900 }, { "epoch": 0.07743725967211602, "grad_norm": 0.09814453125, "learning_rate": 0.001999562987920118, "loss": 0.2696, "step": 10910 }, { "epoch": 0.07750823791196214, "grad_norm": 0.1982421875, "learning_rate": 0.001999562105584879, "loss": 0.2667, "step": 10920 }, { "epoch": 0.07757921615180825, "grad_norm": 0.11669921875, "learning_rate": 0.0019995612223600292, "loss": 0.275, "step": 10930 }, { "epoch": 0.07765019439165438, "grad_norm": 0.064453125, "learning_rate": 0.0019995603382455696, "loss": 0.2626, "step": 10940 }, { "epoch": 0.0777211726315005, "grad_norm": 0.126953125, "learning_rate": 0.0019995594532415018, "loss": 0.2781, "step": 10950 }, { "epoch": 0.07779215087134661, "grad_norm": 0.095703125, "learning_rate": 0.001999558567347825, "loss": 0.2847, "step": 10960 }, { "epoch": 0.07786312911119274, "grad_norm": 0.1923828125, "learning_rate": 0.0019995576805645426, "loss": 0.2708, "step": 10970 }, { "epoch": 0.07793410735103885, "grad_norm": 0.1826171875, "learning_rate": 0.0019995567928916534, "loss": 0.3128, "step": 10980 }, { "epoch": 0.07800508559088497, "grad_norm": 0.1591796875, "learning_rate": 0.0019995559043291585, "loss": 0.2797, "step": 10990 }, { "epoch": 0.0780760638307311, "grad_norm": 0.10498046875, "learning_rate": 0.0019995550148770597, "loss": 0.272, "step": 11000 }, { "epoch": 0.07814704207057721, "grad_norm": 0.12255859375, "learning_rate": 0.0019995541245353566, "loss": 0.2655, "step": 11010 }, { "epoch": 0.07821802031042333, "grad_norm": 0.125, "learning_rate": 0.0019995532333040517, "loss": 0.2763, "step": 11020 }, { "epoch": 0.07828899855026945, "grad_norm": 0.0859375, "learning_rate": 0.0019995523411831446, "loss": 0.2513, "step": 11030 }, { "epoch": 0.07835997679011557, "grad_norm": 0.09130859375, "learning_rate": 0.001999551448172637, "loss": 0.2689, "step": 11040 }, { "epoch": 0.07843095502996168, "grad_norm": 0.0927734375, "learning_rate": 0.001999550554272529, "loss": 0.2973, "step": 11050 }, { "epoch": 0.07850193326980781, "grad_norm": 0.18359375, "learning_rate": 0.001999549659482822, "loss": 0.2753, "step": 11060 }, { "epoch": 0.07857291150965393, "grad_norm": 0.1845703125, "learning_rate": 0.001999548763803517, "loss": 0.2691, "step": 11070 }, { "epoch": 0.07864388974950005, "grad_norm": 0.07177734375, "learning_rate": 0.001999547867234614, "loss": 0.2679, "step": 11080 }, { "epoch": 0.07871486798934617, "grad_norm": 0.09326171875, "learning_rate": 0.0019995469697761154, "loss": 0.2655, "step": 11090 }, { "epoch": 0.07878584622919228, "grad_norm": 0.1259765625, "learning_rate": 0.0019995460714280207, "loss": 0.2539, "step": 11100 }, { "epoch": 0.07885682446903841, "grad_norm": 0.390625, "learning_rate": 0.001999545172190331, "loss": 0.2749, "step": 11110 }, { "epoch": 0.07892780270888453, "grad_norm": 0.130859375, "learning_rate": 0.001999544272063048, "loss": 0.2713, "step": 11120 }, { "epoch": 0.07899878094873064, "grad_norm": 0.1279296875, "learning_rate": 0.001999543371046172, "loss": 0.2872, "step": 11130 }, { "epoch": 0.07906975918857677, "grad_norm": 0.103515625, "learning_rate": 0.001999542469139704, "loss": 0.2916, "step": 11140 }, { "epoch": 0.07914073742842288, "grad_norm": 0.080078125, "learning_rate": 0.001999541566343645, "loss": 0.2858, "step": 11150 }, { "epoch": 0.079211715668269, "grad_norm": 0.1572265625, "learning_rate": 0.0019995406626579955, "loss": 0.2651, "step": 11160 }, { "epoch": 0.07928269390811513, "grad_norm": 0.11376953125, "learning_rate": 0.001999539758082757, "loss": 0.2728, "step": 11170 }, { "epoch": 0.07935367214796124, "grad_norm": 0.1083984375, "learning_rate": 0.0019995388526179297, "loss": 0.2714, "step": 11180 }, { "epoch": 0.07942465038780736, "grad_norm": 0.146484375, "learning_rate": 0.001999537946263515, "loss": 0.2568, "step": 11190 }, { "epoch": 0.07949562862765348, "grad_norm": 0.10107421875, "learning_rate": 0.0019995370390195138, "loss": 0.2616, "step": 11200 }, { "epoch": 0.0795666068674996, "grad_norm": 0.08544921875, "learning_rate": 0.0019995361308859268, "loss": 0.2534, "step": 11210 }, { "epoch": 0.07963758510734571, "grad_norm": 0.1259765625, "learning_rate": 0.001999535221862755, "loss": 0.2605, "step": 11220 }, { "epoch": 0.07970856334719184, "grad_norm": 0.2373046875, "learning_rate": 0.001999534311949999, "loss": 0.2774, "step": 11230 }, { "epoch": 0.07977954158703796, "grad_norm": 0.1484375, "learning_rate": 0.0019995334011476597, "loss": 0.2732, "step": 11240 }, { "epoch": 0.07985051982688407, "grad_norm": 0.09716796875, "learning_rate": 0.0019995324894557386, "loss": 0.2835, "step": 11250 }, { "epoch": 0.0799214980667302, "grad_norm": 0.09375, "learning_rate": 0.001999531576874236, "loss": 0.2769, "step": 11260 }, { "epoch": 0.07999247630657631, "grad_norm": 0.0859375, "learning_rate": 0.0019995306634031534, "loss": 0.258, "step": 11270 }, { "epoch": 0.08006345454642243, "grad_norm": 0.1494140625, "learning_rate": 0.001999529749042491, "loss": 0.2616, "step": 11280 }, { "epoch": 0.08013443278626856, "grad_norm": 0.12255859375, "learning_rate": 0.0019995288337922505, "loss": 0.2553, "step": 11290 }, { "epoch": 0.08020541102611467, "grad_norm": 0.12109375, "learning_rate": 0.001999527917652432, "loss": 0.2732, "step": 11300 }, { "epoch": 0.08027638926596078, "grad_norm": 0.0810546875, "learning_rate": 0.001999527000623037, "loss": 0.26, "step": 11310 }, { "epoch": 0.08034736750580691, "grad_norm": 0.10595703125, "learning_rate": 0.0019995260827040664, "loss": 0.2437, "step": 11320 }, { "epoch": 0.08041834574565303, "grad_norm": 0.1513671875, "learning_rate": 0.0019995251638955207, "loss": 0.2676, "step": 11330 }, { "epoch": 0.08048932398549914, "grad_norm": 0.1552734375, "learning_rate": 0.0019995242441974005, "loss": 0.2704, "step": 11340 }, { "epoch": 0.08056030222534527, "grad_norm": 0.1103515625, "learning_rate": 0.0019995233236097076, "loss": 0.2478, "step": 11350 }, { "epoch": 0.08063128046519139, "grad_norm": 0.2099609375, "learning_rate": 0.0019995224021324425, "loss": 0.2652, "step": 11360 }, { "epoch": 0.0807022587050375, "grad_norm": 0.07421875, "learning_rate": 0.001999521479765606, "loss": 0.2589, "step": 11370 }, { "epoch": 0.08077323694488363, "grad_norm": 0.058837890625, "learning_rate": 0.0019995205565091993, "loss": 0.2547, "step": 11380 }, { "epoch": 0.08084421518472974, "grad_norm": 0.1484375, "learning_rate": 0.001999519632363223, "loss": 0.2758, "step": 11390 }, { "epoch": 0.08091519342457586, "grad_norm": 0.11474609375, "learning_rate": 0.0019995187073276782, "loss": 0.257, "step": 11400 }, { "epoch": 0.08098617166442199, "grad_norm": 0.1484375, "learning_rate": 0.001999517781402566, "loss": 0.2677, "step": 11410 }, { "epoch": 0.0810571499042681, "grad_norm": 0.08544921875, "learning_rate": 0.001999516854587887, "loss": 0.267, "step": 11420 }, { "epoch": 0.08112812814411421, "grad_norm": 0.103515625, "learning_rate": 0.0019995159268836418, "loss": 0.2593, "step": 11430 }, { "epoch": 0.08119910638396034, "grad_norm": 0.1494140625, "learning_rate": 0.001999514998289832, "loss": 0.2746, "step": 11440 }, { "epoch": 0.08127008462380646, "grad_norm": 0.0927734375, "learning_rate": 0.0019995140688064583, "loss": 0.2701, "step": 11450 }, { "epoch": 0.08134106286365257, "grad_norm": 0.1123046875, "learning_rate": 0.0019995131384335217, "loss": 0.278, "step": 11460 }, { "epoch": 0.0814120411034987, "grad_norm": 0.0927734375, "learning_rate": 0.0019995122071710227, "loss": 0.2697, "step": 11470 }, { "epoch": 0.08148301934334481, "grad_norm": 0.11865234375, "learning_rate": 0.0019995112750189623, "loss": 0.2566, "step": 11480 }, { "epoch": 0.08155399758319093, "grad_norm": 0.169921875, "learning_rate": 0.001999510341977342, "loss": 0.2698, "step": 11490 }, { "epoch": 0.08162497582303706, "grad_norm": 0.09912109375, "learning_rate": 0.0019995094080461624, "loss": 0.2608, "step": 11500 }, { "epoch": 0.08169595406288317, "grad_norm": 0.15625, "learning_rate": 0.0019995084732254242, "loss": 0.2624, "step": 11510 }, { "epoch": 0.08176693230272929, "grad_norm": 0.0869140625, "learning_rate": 0.0019995075375151285, "loss": 0.2649, "step": 11520 }, { "epoch": 0.08183791054257541, "grad_norm": 0.08349609375, "learning_rate": 0.001999506600915276, "loss": 0.2547, "step": 11530 }, { "epoch": 0.08190888878242153, "grad_norm": 0.12451171875, "learning_rate": 0.0019995056634258684, "loss": 0.2601, "step": 11540 }, { "epoch": 0.08197986702226764, "grad_norm": 0.09033203125, "learning_rate": 0.001999504725046906, "loss": 0.2546, "step": 11550 }, { "epoch": 0.08205084526211377, "grad_norm": 0.107421875, "learning_rate": 0.0019995037857783895, "loss": 0.2764, "step": 11560 }, { "epoch": 0.08212182350195989, "grad_norm": 0.0791015625, "learning_rate": 0.00199950284562032, "loss": 0.2391, "step": 11570 }, { "epoch": 0.082192801741806, "grad_norm": 0.11962890625, "learning_rate": 0.001999501904572699, "loss": 0.2728, "step": 11580 }, { "epoch": 0.08226377998165213, "grad_norm": 0.10986328125, "learning_rate": 0.001999500962635527, "loss": 0.2639, "step": 11590 }, { "epoch": 0.08233475822149824, "grad_norm": 0.07666015625, "learning_rate": 0.0019995000198088043, "loss": 0.2458, "step": 11600 }, { "epoch": 0.08240573646134436, "grad_norm": 0.1376953125, "learning_rate": 0.001999499076092533, "loss": 0.2542, "step": 11610 }, { "epoch": 0.08247671470119049, "grad_norm": 0.0947265625, "learning_rate": 0.0019994981314867136, "loss": 0.2463, "step": 11620 }, { "epoch": 0.0825476929410366, "grad_norm": 0.12060546875, "learning_rate": 0.0019994971859913463, "loss": 0.2589, "step": 11630 }, { "epoch": 0.08261867118088272, "grad_norm": 0.1279296875, "learning_rate": 0.001999496239606433, "loss": 0.272, "step": 11640 }, { "epoch": 0.08268964942072884, "grad_norm": 0.1455078125, "learning_rate": 0.0019994952923319746, "loss": 0.2561, "step": 11650 }, { "epoch": 0.08276062766057496, "grad_norm": 0.125, "learning_rate": 0.001999494344167971, "loss": 0.2591, "step": 11660 }, { "epoch": 0.08283160590042107, "grad_norm": 0.1767578125, "learning_rate": 0.001999493395114425, "loss": 0.2831, "step": 11670 }, { "epoch": 0.0829025841402672, "grad_norm": 0.1181640625, "learning_rate": 0.0019994924451713354, "loss": 0.254, "step": 11680 }, { "epoch": 0.08297356238011332, "grad_norm": 0.10009765625, "learning_rate": 0.0019994914943387045, "loss": 0.2784, "step": 11690 }, { "epoch": 0.08304454061995943, "grad_norm": 0.3671875, "learning_rate": 0.001999490542616533, "loss": 0.2693, "step": 11700 }, { "epoch": 0.08311551885980556, "grad_norm": 0.10498046875, "learning_rate": 0.0019994895900048216, "loss": 0.2808, "step": 11710 }, { "epoch": 0.08318649709965167, "grad_norm": 0.236328125, "learning_rate": 0.0019994886365035714, "loss": 0.2611, "step": 11720 }, { "epoch": 0.08325747533949779, "grad_norm": 0.2001953125, "learning_rate": 0.0019994876821127832, "loss": 0.2635, "step": 11730 }, { "epoch": 0.08332845357934392, "grad_norm": 0.1240234375, "learning_rate": 0.0019994867268324583, "loss": 0.2634, "step": 11740 }, { "epoch": 0.08339943181919003, "grad_norm": 0.103515625, "learning_rate": 0.001999485770662597, "loss": 0.2567, "step": 11750 }, { "epoch": 0.08347041005903615, "grad_norm": 0.1181640625, "learning_rate": 0.001999484813603201, "loss": 0.282, "step": 11760 }, { "epoch": 0.08354138829888227, "grad_norm": 0.09326171875, "learning_rate": 0.0019994838556542703, "loss": 0.2588, "step": 11770 }, { "epoch": 0.08361236653872839, "grad_norm": 0.1328125, "learning_rate": 0.0019994828968158073, "loss": 0.2803, "step": 11780 }, { "epoch": 0.0836833447785745, "grad_norm": 0.1904296875, "learning_rate": 0.001999481937087812, "loss": 0.2717, "step": 11790 }, { "epoch": 0.08375432301842063, "grad_norm": 0.20703125, "learning_rate": 0.001999480976470285, "loss": 0.2763, "step": 11800 }, { "epoch": 0.08382530125826675, "grad_norm": 0.08740234375, "learning_rate": 0.001999480014963228, "loss": 0.2612, "step": 11810 }, { "epoch": 0.08389627949811286, "grad_norm": 0.08642578125, "learning_rate": 0.001999479052566641, "loss": 0.2536, "step": 11820 }, { "epoch": 0.08396725773795899, "grad_norm": 0.1484375, "learning_rate": 0.0019994780892805264, "loss": 0.2691, "step": 11830 }, { "epoch": 0.0840382359778051, "grad_norm": 0.1826171875, "learning_rate": 0.001999477125104884, "loss": 0.2801, "step": 11840 }, { "epoch": 0.08410921421765122, "grad_norm": 0.16796875, "learning_rate": 0.001999476160039715, "loss": 0.2709, "step": 11850 }, { "epoch": 0.08418019245749735, "grad_norm": 0.1904296875, "learning_rate": 0.0019994751940850203, "loss": 0.2585, "step": 11860 }, { "epoch": 0.08425117069734346, "grad_norm": 0.10595703125, "learning_rate": 0.0019994742272408015, "loss": 0.2776, "step": 11870 }, { "epoch": 0.08432214893718958, "grad_norm": 0.1318359375, "learning_rate": 0.001999473259507059, "loss": 0.2488, "step": 11880 }, { "epoch": 0.0843931271770357, "grad_norm": 0.15625, "learning_rate": 0.001999472290883793, "loss": 0.2647, "step": 11890 }, { "epoch": 0.08446410541688182, "grad_norm": 0.171875, "learning_rate": 0.0019994713213710063, "loss": 0.2553, "step": 11900 }, { "epoch": 0.08453508365672793, "grad_norm": 0.11083984375, "learning_rate": 0.0019994703509686983, "loss": 0.2654, "step": 11910 }, { "epoch": 0.08460606189657406, "grad_norm": 0.1728515625, "learning_rate": 0.001999469379676871, "loss": 0.2779, "step": 11920 }, { "epoch": 0.08467704013642018, "grad_norm": 0.103515625, "learning_rate": 0.001999468407495524, "loss": 0.2941, "step": 11930 }, { "epoch": 0.08474801837626629, "grad_norm": 0.140625, "learning_rate": 0.00199946743442466, "loss": 0.2604, "step": 11940 }, { "epoch": 0.08481899661611242, "grad_norm": 0.1435546875, "learning_rate": 0.0019994664604642788, "loss": 0.2736, "step": 11950 }, { "epoch": 0.08488997485595853, "grad_norm": 0.099609375, "learning_rate": 0.0019994654856143814, "loss": 0.2624, "step": 11960 }, { "epoch": 0.08496095309580465, "grad_norm": 0.08203125, "learning_rate": 0.001999464509874969, "loss": 0.2547, "step": 11970 }, { "epoch": 0.08503193133565078, "grad_norm": 0.0830078125, "learning_rate": 0.001999463533246043, "loss": 0.2614, "step": 11980 }, { "epoch": 0.08510290957549689, "grad_norm": 0.0947265625, "learning_rate": 0.0019994625557276037, "loss": 0.2861, "step": 11990 }, { "epoch": 0.085173887815343, "grad_norm": 0.26953125, "learning_rate": 0.0019994615773196523, "loss": 0.2573, "step": 12000 }, { "epoch": 0.085173887815343, "eval_covost2-zh-en_loss": 4.052768707275391, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 22.2252, "eval_covost2-zh-en_samples_per_second": 2.88, "eval_covost2-zh-en_steps_per_second": 0.18, "step": 12000 }, { "epoch": 0.085173887815343, "eval_covost2-en-zh_loss": 3.1886186599731445, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 19.3672, "eval_covost2-en-zh_samples_per_second": 3.305, "eval_covost2-en-zh_steps_per_second": 0.207, "step": 12000 }, { "epoch": 0.08524486605518913, "grad_norm": 0.10986328125, "learning_rate": 0.0019994605980221897, "loss": 0.2488, "step": 12010 }, { "epoch": 0.08531584429503525, "grad_norm": 0.142578125, "learning_rate": 0.0019994596178352173, "loss": 0.2552, "step": 12020 }, { "epoch": 0.08538682253488136, "grad_norm": 0.17578125, "learning_rate": 0.001999458636758735, "loss": 0.2522, "step": 12030 }, { "epoch": 0.08545780077472749, "grad_norm": 0.1220703125, "learning_rate": 0.001999457654792745, "loss": 0.2565, "step": 12040 }, { "epoch": 0.0855287790145736, "grad_norm": 0.10791015625, "learning_rate": 0.0019994566719372477, "loss": 0.2631, "step": 12050 }, { "epoch": 0.08559975725441972, "grad_norm": 0.10986328125, "learning_rate": 0.0019994556881922443, "loss": 0.2562, "step": 12060 }, { "epoch": 0.08567073549426585, "grad_norm": 0.09814453125, "learning_rate": 0.001999454703557735, "loss": 0.2553, "step": 12070 }, { "epoch": 0.08574171373411196, "grad_norm": 0.1259765625, "learning_rate": 0.001999453718033722, "loss": 0.2488, "step": 12080 }, { "epoch": 0.08581269197395808, "grad_norm": 0.08056640625, "learning_rate": 0.0019994527316202055, "loss": 0.2668, "step": 12090 }, { "epoch": 0.0858836702138042, "grad_norm": 0.1826171875, "learning_rate": 0.001999451744317186, "loss": 0.2614, "step": 12100 }, { "epoch": 0.08595464845365032, "grad_norm": 0.087890625, "learning_rate": 0.0019994507561246658, "loss": 0.2529, "step": 12110 }, { "epoch": 0.08602562669349643, "grad_norm": 0.10986328125, "learning_rate": 0.001999449767042645, "loss": 0.27, "step": 12120 }, { "epoch": 0.08609660493334256, "grad_norm": 0.12060546875, "learning_rate": 0.001999448777071125, "loss": 0.2543, "step": 12130 }, { "epoch": 0.08616758317318868, "grad_norm": 0.1259765625, "learning_rate": 0.001999447786210106, "loss": 0.2512, "step": 12140 }, { "epoch": 0.08623856141303479, "grad_norm": 0.08154296875, "learning_rate": 0.00199944679445959, "loss": 0.2669, "step": 12150 }, { "epoch": 0.08630953965288092, "grad_norm": 0.14453125, "learning_rate": 0.001999445801819577, "loss": 0.2613, "step": 12160 }, { "epoch": 0.08638051789272704, "grad_norm": 0.11767578125, "learning_rate": 0.001999444808290069, "loss": 0.2479, "step": 12170 }, { "epoch": 0.08645149613257315, "grad_norm": 0.1171875, "learning_rate": 0.001999443813871066, "loss": 0.2767, "step": 12180 }, { "epoch": 0.08652247437241928, "grad_norm": 0.25, "learning_rate": 0.00199944281856257, "loss": 0.2805, "step": 12190 }, { "epoch": 0.08659345261226539, "grad_norm": 0.146484375, "learning_rate": 0.0019994418223645803, "loss": 0.2673, "step": 12200 }, { "epoch": 0.08666443085211151, "grad_norm": 0.08642578125, "learning_rate": 0.0019994408252771, "loss": 0.2735, "step": 12210 }, { "epoch": 0.08673540909195764, "grad_norm": 0.1279296875, "learning_rate": 0.001999439827300129, "loss": 0.2593, "step": 12220 }, { "epoch": 0.08680638733180375, "grad_norm": 0.12451171875, "learning_rate": 0.001999438828433668, "loss": 0.2639, "step": 12230 }, { "epoch": 0.08687736557164988, "grad_norm": 0.10107421875, "learning_rate": 0.0019994378286777186, "loss": 0.2833, "step": 12240 }, { "epoch": 0.08694834381149599, "grad_norm": 0.1474609375, "learning_rate": 0.001999436828032282, "loss": 0.2627, "step": 12250 }, { "epoch": 0.08701932205134211, "grad_norm": 0.10400390625, "learning_rate": 0.001999435826497358, "loss": 0.2705, "step": 12260 }, { "epoch": 0.08709030029118824, "grad_norm": 0.1162109375, "learning_rate": 0.001999434824072949, "loss": 0.2518, "step": 12270 }, { "epoch": 0.08716127853103435, "grad_norm": 0.1259765625, "learning_rate": 0.0019994338207590546, "loss": 0.2473, "step": 12280 }, { "epoch": 0.08723225677088046, "grad_norm": 0.1240234375, "learning_rate": 0.001999432816555677, "loss": 0.2681, "step": 12290 }, { "epoch": 0.0873032350107266, "grad_norm": 0.1376953125, "learning_rate": 0.001999431811462817, "loss": 0.2883, "step": 12300 }, { "epoch": 0.08737421325057271, "grad_norm": 0.1357421875, "learning_rate": 0.001999430805480475, "loss": 0.2654, "step": 12310 }, { "epoch": 0.08744519149041882, "grad_norm": 0.11328125, "learning_rate": 0.001999429798608652, "loss": 0.2766, "step": 12320 }, { "epoch": 0.08751616973026495, "grad_norm": 0.1015625, "learning_rate": 0.0019994287908473496, "loss": 0.2787, "step": 12330 }, { "epoch": 0.08758714797011107, "grad_norm": 0.14453125, "learning_rate": 0.001999427782196568, "loss": 0.2781, "step": 12340 }, { "epoch": 0.08765812620995718, "grad_norm": 0.119140625, "learning_rate": 0.0019994267726563095, "loss": 0.2834, "step": 12350 }, { "epoch": 0.08772910444980331, "grad_norm": 0.1416015625, "learning_rate": 0.0019994257622265735, "loss": 0.2519, "step": 12360 }, { "epoch": 0.08780008268964942, "grad_norm": 0.10205078125, "learning_rate": 0.001999424750907363, "loss": 0.2522, "step": 12370 }, { "epoch": 0.08787106092949554, "grad_norm": 0.1474609375, "learning_rate": 0.0019994237386986766, "loss": 0.261, "step": 12380 }, { "epoch": 0.08794203916934167, "grad_norm": 0.08544921875, "learning_rate": 0.001999422725600517, "loss": 0.277, "step": 12390 }, { "epoch": 0.08801301740918778, "grad_norm": 0.07958984375, "learning_rate": 0.0019994217116128844, "loss": 0.2641, "step": 12400 }, { "epoch": 0.0880839956490339, "grad_norm": 0.109375, "learning_rate": 0.0019994206967357806, "loss": 0.2468, "step": 12410 }, { "epoch": 0.08815497388888002, "grad_norm": 0.10986328125, "learning_rate": 0.0019994196809692056, "loss": 0.2671, "step": 12420 }, { "epoch": 0.08822595212872614, "grad_norm": 0.09716796875, "learning_rate": 0.001999418664313161, "loss": 0.258, "step": 12430 }, { "epoch": 0.08829693036857225, "grad_norm": 0.091796875, "learning_rate": 0.001999417646767648, "loss": 0.2536, "step": 12440 }, { "epoch": 0.08836790860841838, "grad_norm": 0.203125, "learning_rate": 0.001999416628332667, "loss": 0.2534, "step": 12450 }, { "epoch": 0.0884388868482645, "grad_norm": 0.12060546875, "learning_rate": 0.0019994156090082198, "loss": 0.2733, "step": 12460 }, { "epoch": 0.08850986508811061, "grad_norm": 0.12890625, "learning_rate": 0.0019994145887943064, "loss": 0.2574, "step": 12470 }, { "epoch": 0.08858084332795674, "grad_norm": 0.08203125, "learning_rate": 0.0019994135676909284, "loss": 0.28, "step": 12480 }, { "epoch": 0.08865182156780285, "grad_norm": 0.09130859375, "learning_rate": 0.001999412545698087, "loss": 0.2585, "step": 12490 }, { "epoch": 0.08872279980764897, "grad_norm": 0.12109375, "learning_rate": 0.001999411522815783, "loss": 0.273, "step": 12500 }, { "epoch": 0.0887937780474951, "grad_norm": 0.08642578125, "learning_rate": 0.001999410499044017, "loss": 0.2764, "step": 12510 }, { "epoch": 0.08886475628734121, "grad_norm": 0.115234375, "learning_rate": 0.0019994094743827906, "loss": 0.2597, "step": 12520 }, { "epoch": 0.08893573452718732, "grad_norm": 0.1669921875, "learning_rate": 0.0019994084488321046, "loss": 0.2641, "step": 12530 }, { "epoch": 0.08900671276703345, "grad_norm": 0.1162109375, "learning_rate": 0.00199940742239196, "loss": 0.2602, "step": 12540 }, { "epoch": 0.08907769100687957, "grad_norm": 0.1591796875, "learning_rate": 0.001999406395062358, "loss": 0.2777, "step": 12550 }, { "epoch": 0.08914866924672568, "grad_norm": 0.0859375, "learning_rate": 0.0019994053668432993, "loss": 0.2483, "step": 12560 }, { "epoch": 0.08921964748657181, "grad_norm": 0.09521484375, "learning_rate": 0.001999404337734785, "loss": 0.2699, "step": 12570 }, { "epoch": 0.08929062572641792, "grad_norm": 0.11865234375, "learning_rate": 0.001999403307736816, "loss": 0.2629, "step": 12580 }, { "epoch": 0.08936160396626404, "grad_norm": 0.1494140625, "learning_rate": 0.001999402276849394, "loss": 0.2633, "step": 12590 }, { "epoch": 0.08943258220611017, "grad_norm": 0.078125, "learning_rate": 0.001999401245072519, "loss": 0.2483, "step": 12600 }, { "epoch": 0.08950356044595628, "grad_norm": 0.10546875, "learning_rate": 0.001999400212406193, "loss": 0.2592, "step": 12610 }, { "epoch": 0.0895745386858024, "grad_norm": 0.09521484375, "learning_rate": 0.0019993991788504165, "loss": 0.2617, "step": 12620 }, { "epoch": 0.08964551692564852, "grad_norm": 0.1357421875, "learning_rate": 0.0019993981444051904, "loss": 0.2616, "step": 12630 }, { "epoch": 0.08971649516549464, "grad_norm": 0.10302734375, "learning_rate": 0.001999397109070516, "loss": 0.2709, "step": 12640 }, { "epoch": 0.08978747340534075, "grad_norm": 0.0693359375, "learning_rate": 0.001999396072846394, "loss": 0.2529, "step": 12650 }, { "epoch": 0.08985845164518688, "grad_norm": 0.107421875, "learning_rate": 0.0019993950357328257, "loss": 0.2526, "step": 12660 }, { "epoch": 0.089929429885033, "grad_norm": 0.11279296875, "learning_rate": 0.0019993939977298123, "loss": 0.2734, "step": 12670 }, { "epoch": 0.09000040812487911, "grad_norm": 0.07666015625, "learning_rate": 0.0019993929588373546, "loss": 0.2579, "step": 12680 }, { "epoch": 0.09007138636472524, "grad_norm": 0.1513671875, "learning_rate": 0.0019993919190554534, "loss": 0.2656, "step": 12690 }, { "epoch": 0.09014236460457135, "grad_norm": 0.1123046875, "learning_rate": 0.0019993908783841104, "loss": 0.2462, "step": 12700 }, { "epoch": 0.09021334284441747, "grad_norm": 0.10791015625, "learning_rate": 0.0019993898368233258, "loss": 0.2497, "step": 12710 }, { "epoch": 0.0902843210842636, "grad_norm": 0.1064453125, "learning_rate": 0.0019993887943731007, "loss": 0.2558, "step": 12720 }, { "epoch": 0.09035529932410971, "grad_norm": 0.1494140625, "learning_rate": 0.001999387751033437, "loss": 0.2591, "step": 12730 }, { "epoch": 0.09042627756395583, "grad_norm": 0.1552734375, "learning_rate": 0.001999386706804335, "loss": 0.2623, "step": 12740 }, { "epoch": 0.09049725580380195, "grad_norm": 0.12353515625, "learning_rate": 0.0019993856616857962, "loss": 0.2467, "step": 12750 }, { "epoch": 0.09056823404364807, "grad_norm": 0.3828125, "learning_rate": 0.001999384615677821, "loss": 0.2813, "step": 12760 }, { "epoch": 0.09063921228349418, "grad_norm": 0.0888671875, "learning_rate": 0.0019993835687804108, "loss": 0.2521, "step": 12770 }, { "epoch": 0.09071019052334031, "grad_norm": 0.0859375, "learning_rate": 0.001999382520993567, "loss": 0.2743, "step": 12780 }, { "epoch": 0.09078116876318643, "grad_norm": 0.07275390625, "learning_rate": 0.0019993814723172896, "loss": 0.2718, "step": 12790 }, { "epoch": 0.09085214700303254, "grad_norm": 0.09619140625, "learning_rate": 0.001999380422751581, "loss": 0.2709, "step": 12800 }, { "epoch": 0.09092312524287867, "grad_norm": 0.08935546875, "learning_rate": 0.001999379372296441, "loss": 0.2537, "step": 12810 }, { "epoch": 0.09099410348272478, "grad_norm": 0.12109375, "learning_rate": 0.0019993783209518714, "loss": 0.2625, "step": 12820 }, { "epoch": 0.0910650817225709, "grad_norm": 0.1904296875, "learning_rate": 0.0019993772687178727, "loss": 0.2638, "step": 12830 }, { "epoch": 0.09113605996241703, "grad_norm": 0.185546875, "learning_rate": 0.0019993762155944466, "loss": 0.2574, "step": 12840 }, { "epoch": 0.09120703820226314, "grad_norm": 0.08642578125, "learning_rate": 0.001999375161581594, "loss": 0.2615, "step": 12850 }, { "epoch": 0.09127801644210926, "grad_norm": 0.21875, "learning_rate": 0.0019993741066793155, "loss": 0.2541, "step": 12860 }, { "epoch": 0.09134899468195538, "grad_norm": 0.130859375, "learning_rate": 0.001999373050887612, "loss": 0.2528, "step": 12870 }, { "epoch": 0.0914199729218015, "grad_norm": 0.10498046875, "learning_rate": 0.0019993719942064856, "loss": 0.2621, "step": 12880 }, { "epoch": 0.09149095116164761, "grad_norm": 0.087890625, "learning_rate": 0.001999370936635936, "loss": 0.26, "step": 12890 }, { "epoch": 0.09156192940149374, "grad_norm": 0.08447265625, "learning_rate": 0.001999369878175965, "loss": 0.2386, "step": 12900 }, { "epoch": 0.09163290764133986, "grad_norm": 0.0849609375, "learning_rate": 0.001999368818826574, "loss": 0.2832, "step": 12910 }, { "epoch": 0.09170388588118597, "grad_norm": 0.1435546875, "learning_rate": 0.001999367758587763, "loss": 0.2505, "step": 12920 }, { "epoch": 0.0917748641210321, "grad_norm": 0.28515625, "learning_rate": 0.0019993666974595344, "loss": 0.2689, "step": 12930 }, { "epoch": 0.09184584236087821, "grad_norm": 0.10302734375, "learning_rate": 0.0019993656354418883, "loss": 0.2654, "step": 12940 }, { "epoch": 0.09191682060072433, "grad_norm": 0.08447265625, "learning_rate": 0.0019993645725348255, "loss": 0.2776, "step": 12950 }, { "epoch": 0.09198779884057046, "grad_norm": 0.10400390625, "learning_rate": 0.0019993635087383476, "loss": 0.2706, "step": 12960 }, { "epoch": 0.09205877708041657, "grad_norm": 0.09765625, "learning_rate": 0.001999362444052456, "loss": 0.2502, "step": 12970 }, { "epoch": 0.09212975532026269, "grad_norm": 0.11669921875, "learning_rate": 0.0019993613784771514, "loss": 0.2657, "step": 12980 }, { "epoch": 0.09220073356010881, "grad_norm": 0.12060546875, "learning_rate": 0.001999360312012434, "loss": 0.2563, "step": 12990 }, { "epoch": 0.09227171179995493, "grad_norm": 0.1142578125, "learning_rate": 0.001999359244658306, "loss": 0.2653, "step": 13000 }, { "epoch": 0.09234269003980104, "grad_norm": 0.125, "learning_rate": 0.0019993581764147683, "loss": 0.2522, "step": 13010 }, { "epoch": 0.09241366827964717, "grad_norm": 0.17578125, "learning_rate": 0.0019993571072818217, "loss": 0.2676, "step": 13020 }, { "epoch": 0.09248464651949329, "grad_norm": 0.10205078125, "learning_rate": 0.001999356037259467, "loss": 0.251, "step": 13030 }, { "epoch": 0.0925556247593394, "grad_norm": 0.11669921875, "learning_rate": 0.001999354966347706, "loss": 0.2664, "step": 13040 }, { "epoch": 0.09262660299918553, "grad_norm": 0.10791015625, "learning_rate": 0.0019993538945465386, "loss": 0.2647, "step": 13050 }, { "epoch": 0.09269758123903164, "grad_norm": 0.1943359375, "learning_rate": 0.0019993528218559668, "loss": 0.2611, "step": 13060 }, { "epoch": 0.09276855947887776, "grad_norm": 0.1328125, "learning_rate": 0.0019993517482759914, "loss": 0.2693, "step": 13070 }, { "epoch": 0.09283953771872389, "grad_norm": 0.1328125, "learning_rate": 0.001999350673806614, "loss": 0.2912, "step": 13080 }, { "epoch": 0.09291051595857, "grad_norm": 0.1416015625, "learning_rate": 0.0019993495984478348, "loss": 0.2727, "step": 13090 }, { "epoch": 0.09298149419841611, "grad_norm": 0.11865234375, "learning_rate": 0.0019993485221996552, "loss": 0.2733, "step": 13100 }, { "epoch": 0.09305247243826224, "grad_norm": 0.140625, "learning_rate": 0.001999347445062076, "loss": 0.2812, "step": 13110 }, { "epoch": 0.09312345067810836, "grad_norm": 0.10595703125, "learning_rate": 0.001999346367035099, "loss": 0.2743, "step": 13120 }, { "epoch": 0.09319442891795447, "grad_norm": 0.09716796875, "learning_rate": 0.0019993452881187247, "loss": 0.2424, "step": 13130 }, { "epoch": 0.0932654071578006, "grad_norm": 0.0849609375, "learning_rate": 0.0019993442083129542, "loss": 0.2732, "step": 13140 }, { "epoch": 0.09333638539764672, "grad_norm": 0.1455078125, "learning_rate": 0.0019993431276177885, "loss": 0.2723, "step": 13150 }, { "epoch": 0.09340736363749283, "grad_norm": 0.12890625, "learning_rate": 0.001999342046033229, "loss": 0.2747, "step": 13160 }, { "epoch": 0.09347834187733896, "grad_norm": 0.1767578125, "learning_rate": 0.0019993409635592767, "loss": 0.2811, "step": 13170 }, { "epoch": 0.09354932011718507, "grad_norm": 0.10107421875, "learning_rate": 0.0019993398801959324, "loss": 0.2759, "step": 13180 }, { "epoch": 0.09362029835703119, "grad_norm": 0.109375, "learning_rate": 0.0019993387959431974, "loss": 0.2703, "step": 13190 }, { "epoch": 0.09369127659687732, "grad_norm": 0.1064453125, "learning_rate": 0.0019993377108010724, "loss": 0.2635, "step": 13200 }, { "epoch": 0.09376225483672343, "grad_norm": 0.1875, "learning_rate": 0.001999336624769559, "loss": 0.2454, "step": 13210 }, { "epoch": 0.09383323307656954, "grad_norm": 0.12890625, "learning_rate": 0.001999335537848658, "loss": 0.267, "step": 13220 }, { "epoch": 0.09390421131641567, "grad_norm": 0.078125, "learning_rate": 0.0019993344500383705, "loss": 0.2633, "step": 13230 }, { "epoch": 0.09397518955626179, "grad_norm": 0.466796875, "learning_rate": 0.001999333361338698, "loss": 0.2662, "step": 13240 }, { "epoch": 0.0940461677961079, "grad_norm": 0.1357421875, "learning_rate": 0.0019993322717496405, "loss": 0.2709, "step": 13250 }, { "epoch": 0.09411714603595403, "grad_norm": 0.08544921875, "learning_rate": 0.0019993311812712003, "loss": 0.262, "step": 13260 }, { "epoch": 0.09418812427580014, "grad_norm": 0.119140625, "learning_rate": 0.0019993300899033774, "loss": 0.2411, "step": 13270 }, { "epoch": 0.09425910251564626, "grad_norm": 0.1484375, "learning_rate": 0.0019993289976461743, "loss": 0.2645, "step": 13280 }, { "epoch": 0.09433008075549239, "grad_norm": 0.0986328125, "learning_rate": 0.00199932790449959, "loss": 0.271, "step": 13290 }, { "epoch": 0.0944010589953385, "grad_norm": 0.091796875, "learning_rate": 0.0019993268104636277, "loss": 0.2725, "step": 13300 }, { "epoch": 0.09447203723518462, "grad_norm": 0.1298828125, "learning_rate": 0.001999325715538287, "loss": 0.2527, "step": 13310 }, { "epoch": 0.09454301547503074, "grad_norm": 0.16796875, "learning_rate": 0.00199932461972357, "loss": 0.2516, "step": 13320 }, { "epoch": 0.09461399371487686, "grad_norm": 0.1162109375, "learning_rate": 0.001999323523019477, "loss": 0.2546, "step": 13330 }, { "epoch": 0.09468497195472297, "grad_norm": 0.1064453125, "learning_rate": 0.0019993224254260095, "loss": 0.2494, "step": 13340 }, { "epoch": 0.0947559501945691, "grad_norm": 0.09814453125, "learning_rate": 0.0019993213269431684, "loss": 0.2648, "step": 13350 }, { "epoch": 0.09482692843441522, "grad_norm": 0.087890625, "learning_rate": 0.001999320227570955, "loss": 0.2519, "step": 13360 }, { "epoch": 0.09489790667426133, "grad_norm": 0.1240234375, "learning_rate": 0.00199931912730937, "loss": 0.2602, "step": 13370 }, { "epoch": 0.09496888491410746, "grad_norm": 0.09716796875, "learning_rate": 0.0019993180261584148, "loss": 0.2541, "step": 13380 }, { "epoch": 0.09503986315395357, "grad_norm": 0.0791015625, "learning_rate": 0.0019993169241180908, "loss": 0.2641, "step": 13390 }, { "epoch": 0.0951108413937997, "grad_norm": 0.076171875, "learning_rate": 0.0019993158211883983, "loss": 0.2374, "step": 13400 }, { "epoch": 0.09518181963364582, "grad_norm": 0.1591796875, "learning_rate": 0.001999314717369339, "loss": 0.2559, "step": 13410 }, { "epoch": 0.09525279787349193, "grad_norm": 0.1474609375, "learning_rate": 0.001999313612660914, "loss": 0.238, "step": 13420 }, { "epoch": 0.09532377611333806, "grad_norm": 0.138671875, "learning_rate": 0.0019993125070631237, "loss": 0.2571, "step": 13430 }, { "epoch": 0.09539475435318417, "grad_norm": 0.10498046875, "learning_rate": 0.00199931140057597, "loss": 0.2763, "step": 13440 }, { "epoch": 0.09546573259303029, "grad_norm": 0.1025390625, "learning_rate": 0.0019993102931994537, "loss": 0.2618, "step": 13450 }, { "epoch": 0.09553671083287642, "grad_norm": 0.1376953125, "learning_rate": 0.001999309184933576, "loss": 0.2505, "step": 13460 }, { "epoch": 0.09560768907272253, "grad_norm": 0.134765625, "learning_rate": 0.001999308075778338, "loss": 0.24, "step": 13470 }, { "epoch": 0.09567866731256865, "grad_norm": 0.1572265625, "learning_rate": 0.00199930696573374, "loss": 0.2662, "step": 13480 }, { "epoch": 0.09574964555241477, "grad_norm": 0.1376953125, "learning_rate": 0.0019993058547997846, "loss": 0.2712, "step": 13490 }, { "epoch": 0.09582062379226089, "grad_norm": 0.08203125, "learning_rate": 0.0019993047429764715, "loss": 0.276, "step": 13500 }, { "epoch": 0.095891602032107, "grad_norm": 0.205078125, "learning_rate": 0.0019993036302638025, "loss": 0.27, "step": 13510 }, { "epoch": 0.09596258027195313, "grad_norm": 0.072265625, "learning_rate": 0.001999302516661779, "loss": 0.258, "step": 13520 }, { "epoch": 0.09603355851179925, "grad_norm": 0.09814453125, "learning_rate": 0.001999301402170401, "loss": 0.2718, "step": 13530 }, { "epoch": 0.09610453675164536, "grad_norm": 0.11181640625, "learning_rate": 0.0019993002867896704, "loss": 0.2752, "step": 13540 }, { "epoch": 0.09617551499149149, "grad_norm": 0.138671875, "learning_rate": 0.0019992991705195884, "loss": 0.2636, "step": 13550 }, { "epoch": 0.0962464932313376, "grad_norm": 0.091796875, "learning_rate": 0.0019992980533601558, "loss": 0.2463, "step": 13560 }, { "epoch": 0.09631747147118372, "grad_norm": 0.146484375, "learning_rate": 0.0019992969353113737, "loss": 0.2577, "step": 13570 }, { "epoch": 0.09638844971102985, "grad_norm": 0.1201171875, "learning_rate": 0.0019992958163732436, "loss": 0.2576, "step": 13580 }, { "epoch": 0.09645942795087596, "grad_norm": 0.0908203125, "learning_rate": 0.0019992946965457664, "loss": 0.2596, "step": 13590 }, { "epoch": 0.09653040619072208, "grad_norm": 0.26171875, "learning_rate": 0.0019992935758289428, "loss": 0.2458, "step": 13600 }, { "epoch": 0.0966013844305682, "grad_norm": 0.12109375, "learning_rate": 0.001999292454222774, "loss": 0.2574, "step": 13610 }, { "epoch": 0.09667236267041432, "grad_norm": 0.11181640625, "learning_rate": 0.001999291331727262, "loss": 0.2643, "step": 13620 }, { "epoch": 0.09674334091026043, "grad_norm": 0.10107421875, "learning_rate": 0.0019992902083424066, "loss": 0.2504, "step": 13630 }, { "epoch": 0.09681431915010656, "grad_norm": 0.07470703125, "learning_rate": 0.00199928908406821, "loss": 0.2658, "step": 13640 }, { "epoch": 0.09688529738995268, "grad_norm": 0.10791015625, "learning_rate": 0.001999287958904672, "loss": 0.259, "step": 13650 }, { "epoch": 0.09695627562979879, "grad_norm": 0.0966796875, "learning_rate": 0.001999286832851795, "loss": 0.253, "step": 13660 }, { "epoch": 0.09702725386964492, "grad_norm": 0.0888671875, "learning_rate": 0.0019992857059095803, "loss": 0.2607, "step": 13670 }, { "epoch": 0.09709823210949103, "grad_norm": 0.07080078125, "learning_rate": 0.001999284578078028, "loss": 0.2581, "step": 13680 }, { "epoch": 0.09716921034933715, "grad_norm": 0.23046875, "learning_rate": 0.0019992834493571397, "loss": 0.2664, "step": 13690 }, { "epoch": 0.09724018858918328, "grad_norm": 0.1279296875, "learning_rate": 0.0019992823197469162, "loss": 0.2661, "step": 13700 }, { "epoch": 0.09731116682902939, "grad_norm": 0.2119140625, "learning_rate": 0.001999281189247359, "loss": 0.2511, "step": 13710 }, { "epoch": 0.0973821450688755, "grad_norm": 0.0986328125, "learning_rate": 0.001999280057858469, "loss": 0.2665, "step": 13720 }, { "epoch": 0.09745312330872163, "grad_norm": 0.08447265625, "learning_rate": 0.001999278925580247, "loss": 0.2476, "step": 13730 }, { "epoch": 0.09752410154856775, "grad_norm": 0.10205078125, "learning_rate": 0.001999277792412695, "loss": 0.2644, "step": 13740 }, { "epoch": 0.09759507978841386, "grad_norm": 0.11181640625, "learning_rate": 0.001999276658355814, "loss": 0.2597, "step": 13750 }, { "epoch": 0.09766605802825999, "grad_norm": 0.1767578125, "learning_rate": 0.001999275523409604, "loss": 0.2555, "step": 13760 }, { "epoch": 0.0977370362681061, "grad_norm": 0.10400390625, "learning_rate": 0.001999274387574067, "loss": 0.2616, "step": 13770 }, { "epoch": 0.09780801450795222, "grad_norm": 0.10400390625, "learning_rate": 0.001999273250849204, "loss": 0.2462, "step": 13780 }, { "epoch": 0.09787899274779835, "grad_norm": 0.08984375, "learning_rate": 0.0019992721132350167, "loss": 0.2645, "step": 13790 }, { "epoch": 0.09794997098764446, "grad_norm": 0.0966796875, "learning_rate": 0.001999270974731505, "loss": 0.2652, "step": 13800 }, { "epoch": 0.09802094922749058, "grad_norm": 0.1484375, "learning_rate": 0.001999269835338671, "loss": 0.2649, "step": 13810 }, { "epoch": 0.0980919274673367, "grad_norm": 0.09375, "learning_rate": 0.001999268695056515, "loss": 0.2644, "step": 13820 }, { "epoch": 0.09816290570718282, "grad_norm": 0.1064453125, "learning_rate": 0.0019992675538850393, "loss": 0.2544, "step": 13830 }, { "epoch": 0.09823388394702894, "grad_norm": 0.0751953125, "learning_rate": 0.001999266411824244, "loss": 0.2638, "step": 13840 }, { "epoch": 0.09830486218687506, "grad_norm": 0.1787109375, "learning_rate": 0.0019992652688741305, "loss": 0.2627, "step": 13850 }, { "epoch": 0.09837584042672118, "grad_norm": 0.0791015625, "learning_rate": 0.0019992641250347, "loss": 0.2683, "step": 13860 }, { "epoch": 0.09844681866656729, "grad_norm": 0.1435546875, "learning_rate": 0.001999262980305954, "loss": 0.2643, "step": 13870 }, { "epoch": 0.09851779690641342, "grad_norm": 0.1328125, "learning_rate": 0.001999261834687893, "loss": 0.2442, "step": 13880 }, { "epoch": 0.09858877514625954, "grad_norm": 0.1025390625, "learning_rate": 0.0019992606881805187, "loss": 0.2442, "step": 13890 }, { "epoch": 0.09865975338610565, "grad_norm": 0.10986328125, "learning_rate": 0.0019992595407838315, "loss": 0.249, "step": 13900 }, { "epoch": 0.09873073162595178, "grad_norm": 0.08740234375, "learning_rate": 0.001999258392497833, "loss": 0.2532, "step": 13910 }, { "epoch": 0.0988017098657979, "grad_norm": 0.130859375, "learning_rate": 0.0019992572433225244, "loss": 0.2636, "step": 13920 }, { "epoch": 0.09887268810564401, "grad_norm": 0.1279296875, "learning_rate": 0.001999256093257907, "loss": 0.278, "step": 13930 }, { "epoch": 0.09894366634549014, "grad_norm": 0.0830078125, "learning_rate": 0.0019992549423039816, "loss": 0.2521, "step": 13940 }, { "epoch": 0.09901464458533625, "grad_norm": 0.1064453125, "learning_rate": 0.0019992537904607493, "loss": 0.2637, "step": 13950 }, { "epoch": 0.09908562282518237, "grad_norm": 0.13671875, "learning_rate": 0.0019992526377282114, "loss": 0.2457, "step": 13960 }, { "epoch": 0.0991566010650285, "grad_norm": 0.0712890625, "learning_rate": 0.0019992514841063688, "loss": 0.2648, "step": 13970 }, { "epoch": 0.09922757930487461, "grad_norm": 0.099609375, "learning_rate": 0.0019992503295952232, "loss": 0.292, "step": 13980 }, { "epoch": 0.09929855754472072, "grad_norm": 0.185546875, "learning_rate": 0.001999249174194775, "loss": 0.271, "step": 13990 }, { "epoch": 0.09936953578456685, "grad_norm": 0.244140625, "learning_rate": 0.001999248017905026, "loss": 0.2648, "step": 14000 }, { "epoch": 0.09936953578456685, "eval_covost2-zh-en_loss": 4.062838077545166, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 20.8379, "eval_covost2-zh-en_samples_per_second": 3.071, "eval_covost2-zh-en_steps_per_second": 0.192, "step": 14000 }, { "epoch": 0.09936953578456685, "eval_covost2-en-zh_loss": 3.1462626457214355, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 21.4116, "eval_covost2-en-zh_samples_per_second": 2.989, "eval_covost2-en-zh_steps_per_second": 0.187, "step": 14000 }, { "epoch": 0.09944051402441297, "grad_norm": 0.1416015625, "learning_rate": 0.001999246860725977, "loss": 0.271, "step": 14010 }, { "epoch": 0.09951149226425908, "grad_norm": 0.0673828125, "learning_rate": 0.0019992457026576293, "loss": 0.2454, "step": 14020 }, { "epoch": 0.09958247050410521, "grad_norm": 0.0908203125, "learning_rate": 0.0019992445436999838, "loss": 0.2516, "step": 14030 }, { "epoch": 0.09965344874395132, "grad_norm": 0.1416015625, "learning_rate": 0.0019992433838530417, "loss": 0.2551, "step": 14040 }, { "epoch": 0.09972442698379744, "grad_norm": 0.1396484375, "learning_rate": 0.0019992422231168046, "loss": 0.2579, "step": 14050 }, { "epoch": 0.09979540522364357, "grad_norm": 0.125, "learning_rate": 0.001999241061491273, "loss": 0.2678, "step": 14060 }, { "epoch": 0.09986638346348968, "grad_norm": 0.12255859375, "learning_rate": 0.0019992398989764487, "loss": 0.2574, "step": 14070 }, { "epoch": 0.0999373617033358, "grad_norm": 0.09228515625, "learning_rate": 0.001999238735572332, "loss": 0.244, "step": 14080 }, { "epoch": 0.10000833994318192, "grad_norm": 0.10205078125, "learning_rate": 0.0019992375712789253, "loss": 0.2453, "step": 14090 }, { "epoch": 0.10007931818302804, "grad_norm": 0.080078125, "learning_rate": 0.0019992364060962284, "loss": 0.2481, "step": 14100 }, { "epoch": 0.10015029642287415, "grad_norm": 0.15234375, "learning_rate": 0.001999235240024243, "loss": 0.2606, "step": 14110 }, { "epoch": 0.10022127466272028, "grad_norm": 0.150390625, "learning_rate": 0.001999234073062971, "loss": 0.2537, "step": 14120 }, { "epoch": 0.1002922529025664, "grad_norm": 0.12890625, "learning_rate": 0.001999232905212412, "loss": 0.2524, "step": 14130 }, { "epoch": 0.10036323114241251, "grad_norm": 0.0966796875, "learning_rate": 0.0019992317364725684, "loss": 0.28, "step": 14140 }, { "epoch": 0.10043420938225864, "grad_norm": 0.193359375, "learning_rate": 0.0019992305668434405, "loss": 0.2521, "step": 14150 }, { "epoch": 0.10050518762210475, "grad_norm": 0.08349609375, "learning_rate": 0.001999229396325031, "loss": 0.2792, "step": 14160 }, { "epoch": 0.10057616586195087, "grad_norm": 0.09375, "learning_rate": 0.001999228224917339, "loss": 0.2542, "step": 14170 }, { "epoch": 0.100647144101797, "grad_norm": 0.1630859375, "learning_rate": 0.001999227052620367, "loss": 0.2624, "step": 14180 }, { "epoch": 0.10071812234164311, "grad_norm": 0.1455078125, "learning_rate": 0.0019992258794341157, "loss": 0.265, "step": 14190 }, { "epoch": 0.10078910058148922, "grad_norm": 0.10546875, "learning_rate": 0.0019992247053585866, "loss": 0.2664, "step": 14200 }, { "epoch": 0.10086007882133535, "grad_norm": 0.11328125, "learning_rate": 0.0019992235303937806, "loss": 0.2609, "step": 14210 }, { "epoch": 0.10093105706118147, "grad_norm": 0.0732421875, "learning_rate": 0.001999222354539699, "loss": 0.2583, "step": 14220 }, { "epoch": 0.10100203530102758, "grad_norm": 0.0947265625, "learning_rate": 0.001999221177796342, "loss": 0.2573, "step": 14230 }, { "epoch": 0.10107301354087371, "grad_norm": 0.1064453125, "learning_rate": 0.0019992200001637127, "loss": 0.2599, "step": 14240 }, { "epoch": 0.10114399178071982, "grad_norm": 0.09033203125, "learning_rate": 0.0019992188216418107, "loss": 0.2643, "step": 14250 }, { "epoch": 0.10121497002056594, "grad_norm": 0.0888671875, "learning_rate": 0.0019992176422306378, "loss": 0.2576, "step": 14260 }, { "epoch": 0.10128594826041207, "grad_norm": 0.0751953125, "learning_rate": 0.001999216461930195, "loss": 0.2481, "step": 14270 }, { "epoch": 0.10135692650025818, "grad_norm": 0.09130859375, "learning_rate": 0.001999215280740483, "loss": 0.2527, "step": 14280 }, { "epoch": 0.1014279047401043, "grad_norm": 0.1474609375, "learning_rate": 0.001999214098661504, "loss": 0.2532, "step": 14290 }, { "epoch": 0.10149888297995042, "grad_norm": 0.1181640625, "learning_rate": 0.001999212915693258, "loss": 0.248, "step": 14300 }, { "epoch": 0.10156986121979654, "grad_norm": 0.2099609375, "learning_rate": 0.0019992117318357473, "loss": 0.2556, "step": 14310 }, { "epoch": 0.10164083945964265, "grad_norm": 0.1220703125, "learning_rate": 0.0019992105470889726, "loss": 0.2532, "step": 14320 }, { "epoch": 0.10171181769948878, "grad_norm": 0.1328125, "learning_rate": 0.001999209361452935, "loss": 0.2533, "step": 14330 }, { "epoch": 0.1017827959393349, "grad_norm": 0.1220703125, "learning_rate": 0.0019992081749276355, "loss": 0.2656, "step": 14340 }, { "epoch": 0.10185377417918101, "grad_norm": 0.1171875, "learning_rate": 0.0019992069875130756, "loss": 0.2621, "step": 14350 }, { "epoch": 0.10192475241902714, "grad_norm": 0.1064453125, "learning_rate": 0.001999205799209256, "loss": 0.2694, "step": 14360 }, { "epoch": 0.10199573065887325, "grad_norm": 0.07568359375, "learning_rate": 0.0019992046100161787, "loss": 0.2463, "step": 14370 }, { "epoch": 0.10206670889871937, "grad_norm": 0.10498046875, "learning_rate": 0.001999203419933844, "loss": 0.2653, "step": 14380 }, { "epoch": 0.1021376871385655, "grad_norm": 0.09521484375, "learning_rate": 0.001999202228962254, "loss": 0.2627, "step": 14390 }, { "epoch": 0.10220866537841161, "grad_norm": 0.08203125, "learning_rate": 0.0019992010371014087, "loss": 0.2701, "step": 14400 }, { "epoch": 0.10227964361825773, "grad_norm": 0.11328125, "learning_rate": 0.0019991998443513103, "loss": 0.2593, "step": 14410 }, { "epoch": 0.10235062185810385, "grad_norm": 0.1298828125, "learning_rate": 0.00199919865071196, "loss": 0.2557, "step": 14420 }, { "epoch": 0.10242160009794997, "grad_norm": 0.099609375, "learning_rate": 0.001999197456183358, "loss": 0.2538, "step": 14430 }, { "epoch": 0.10249257833779608, "grad_norm": 0.0986328125, "learning_rate": 0.001999196260765506, "loss": 0.2612, "step": 14440 }, { "epoch": 0.10256355657764221, "grad_norm": 0.146484375, "learning_rate": 0.0019991950644584057, "loss": 0.252, "step": 14450 }, { "epoch": 0.10263453481748833, "grad_norm": 0.1455078125, "learning_rate": 0.0019991938672620575, "loss": 0.272, "step": 14460 }, { "epoch": 0.10270551305733444, "grad_norm": 0.17578125, "learning_rate": 0.001999192669176463, "loss": 0.2566, "step": 14470 }, { "epoch": 0.10277649129718057, "grad_norm": 0.10107421875, "learning_rate": 0.0019991914702016237, "loss": 0.251, "step": 14480 }, { "epoch": 0.10284746953702668, "grad_norm": 0.1318359375, "learning_rate": 0.0019991902703375398, "loss": 0.264, "step": 14490 }, { "epoch": 0.1029184477768728, "grad_norm": 0.1328125, "learning_rate": 0.0019991890695842136, "loss": 0.2582, "step": 14500 }, { "epoch": 0.10298942601671893, "grad_norm": 0.1435546875, "learning_rate": 0.001999187867941645, "loss": 0.2688, "step": 14510 }, { "epoch": 0.10306040425656504, "grad_norm": 0.1044921875, "learning_rate": 0.0019991866654098366, "loss": 0.2675, "step": 14520 }, { "epoch": 0.10313138249641116, "grad_norm": 0.08251953125, "learning_rate": 0.001999185461988789, "loss": 0.2684, "step": 14530 }, { "epoch": 0.10320236073625728, "grad_norm": 0.10205078125, "learning_rate": 0.0019991842576785033, "loss": 0.2455, "step": 14540 }, { "epoch": 0.1032733389761034, "grad_norm": 0.12890625, "learning_rate": 0.0019991830524789802, "loss": 0.2509, "step": 14550 }, { "epoch": 0.10334431721594953, "grad_norm": 0.13671875, "learning_rate": 0.001999181846390222, "loss": 0.2586, "step": 14560 }, { "epoch": 0.10341529545579564, "grad_norm": 0.11279296875, "learning_rate": 0.001999180639412229, "loss": 0.2605, "step": 14570 }, { "epoch": 0.10348627369564176, "grad_norm": 0.1513671875, "learning_rate": 0.0019991794315450027, "loss": 0.2708, "step": 14580 }, { "epoch": 0.10355725193548788, "grad_norm": 0.09912109375, "learning_rate": 0.0019991782227885444, "loss": 0.2668, "step": 14590 }, { "epoch": 0.103628230175334, "grad_norm": 0.1357421875, "learning_rate": 0.0019991770131428553, "loss": 0.2497, "step": 14600 }, { "epoch": 0.10369920841518011, "grad_norm": 0.126953125, "learning_rate": 0.0019991758026079364, "loss": 0.2656, "step": 14610 }, { "epoch": 0.10377018665502624, "grad_norm": 0.1611328125, "learning_rate": 0.001999174591183789, "loss": 0.2828, "step": 14620 }, { "epoch": 0.10384116489487236, "grad_norm": 0.076171875, "learning_rate": 0.001999173378870414, "loss": 0.2446, "step": 14630 }, { "epoch": 0.10391214313471847, "grad_norm": 0.1455078125, "learning_rate": 0.0019991721656678133, "loss": 0.2637, "step": 14640 }, { "epoch": 0.1039831213745646, "grad_norm": 0.1376953125, "learning_rate": 0.0019991709515759875, "loss": 0.2623, "step": 14650 }, { "epoch": 0.10405409961441071, "grad_norm": 0.0849609375, "learning_rate": 0.0019991697365949377, "loss": 0.2431, "step": 14660 }, { "epoch": 0.10412507785425683, "grad_norm": 0.09375, "learning_rate": 0.001999168520724666, "loss": 0.2525, "step": 14670 }, { "epoch": 0.10419605609410296, "grad_norm": 0.134765625, "learning_rate": 0.0019991673039651725, "loss": 0.2519, "step": 14680 }, { "epoch": 0.10426703433394907, "grad_norm": 0.1103515625, "learning_rate": 0.001999166086316459, "loss": 0.2504, "step": 14690 }, { "epoch": 0.10433801257379519, "grad_norm": 0.12109375, "learning_rate": 0.001999164867778527, "loss": 0.2618, "step": 14700 }, { "epoch": 0.10440899081364131, "grad_norm": 0.111328125, "learning_rate": 0.0019991636483513765, "loss": 0.2583, "step": 14710 }, { "epoch": 0.10447996905348743, "grad_norm": 0.0712890625, "learning_rate": 0.00199916242803501, "loss": 0.252, "step": 14720 }, { "epoch": 0.10455094729333354, "grad_norm": 0.12890625, "learning_rate": 0.001999161206829428, "loss": 0.2607, "step": 14730 }, { "epoch": 0.10462192553317967, "grad_norm": 0.1865234375, "learning_rate": 0.0019991599847346323, "loss": 0.2537, "step": 14740 }, { "epoch": 0.10469290377302579, "grad_norm": 0.171875, "learning_rate": 0.0019991587617506235, "loss": 0.2478, "step": 14750 }, { "epoch": 0.1047638820128719, "grad_norm": 0.125, "learning_rate": 0.0019991575378774033, "loss": 0.2599, "step": 14760 }, { "epoch": 0.10483486025271803, "grad_norm": 0.08935546875, "learning_rate": 0.001999156313114972, "loss": 0.255, "step": 14770 }, { "epoch": 0.10490583849256414, "grad_norm": 0.11669921875, "learning_rate": 0.001999155087463332, "loss": 0.2592, "step": 14780 }, { "epoch": 0.10497681673241026, "grad_norm": 0.111328125, "learning_rate": 0.001999153860922484, "loss": 0.2429, "step": 14790 }, { "epoch": 0.10504779497225639, "grad_norm": 0.12451171875, "learning_rate": 0.0019991526334924292, "loss": 0.2603, "step": 14800 }, { "epoch": 0.1051187732121025, "grad_norm": 0.091796875, "learning_rate": 0.0019991514051731682, "loss": 0.254, "step": 14810 }, { "epoch": 0.10518975145194862, "grad_norm": 0.1396484375, "learning_rate": 0.001999150175964703, "loss": 0.2398, "step": 14820 }, { "epoch": 0.10526072969179474, "grad_norm": 0.1396484375, "learning_rate": 0.001999148945867035, "loss": 0.2691, "step": 14830 }, { "epoch": 0.10533170793164086, "grad_norm": 0.1318359375, "learning_rate": 0.001999147714880165, "loss": 0.2477, "step": 14840 }, { "epoch": 0.10540268617148697, "grad_norm": 0.1328125, "learning_rate": 0.0019991464830040944, "loss": 0.2475, "step": 14850 }, { "epoch": 0.1054736644113331, "grad_norm": 0.1005859375, "learning_rate": 0.001999145250238824, "loss": 0.2605, "step": 14860 }, { "epoch": 0.10554464265117922, "grad_norm": 0.0751953125, "learning_rate": 0.0019991440165843554, "loss": 0.2591, "step": 14870 }, { "epoch": 0.10561562089102533, "grad_norm": 0.109375, "learning_rate": 0.0019991427820406893, "loss": 0.2741, "step": 14880 }, { "epoch": 0.10568659913087146, "grad_norm": 0.06640625, "learning_rate": 0.001999141546607828, "loss": 0.2396, "step": 14890 }, { "epoch": 0.10575757737071757, "grad_norm": 0.1337890625, "learning_rate": 0.001999140310285772, "loss": 0.2693, "step": 14900 }, { "epoch": 0.10582855561056369, "grad_norm": 0.10009765625, "learning_rate": 0.001999139073074522, "loss": 0.2629, "step": 14910 }, { "epoch": 0.10589953385040982, "grad_norm": 0.09912109375, "learning_rate": 0.00199913783497408, "loss": 0.2459, "step": 14920 }, { "epoch": 0.10597051209025593, "grad_norm": 0.099609375, "learning_rate": 0.0019991365959844474, "loss": 0.265, "step": 14930 }, { "epoch": 0.10604149033010205, "grad_norm": 0.0927734375, "learning_rate": 0.001999135356105625, "loss": 0.2559, "step": 14940 }, { "epoch": 0.10611246856994817, "grad_norm": 0.1494140625, "learning_rate": 0.001999134115337614, "loss": 0.2777, "step": 14950 }, { "epoch": 0.10618344680979429, "grad_norm": 0.06591796875, "learning_rate": 0.0019991328736804156, "loss": 0.254, "step": 14960 }, { "epoch": 0.1062544250496404, "grad_norm": 0.166015625, "learning_rate": 0.0019991316311340312, "loss": 0.2603, "step": 14970 }, { "epoch": 0.10632540328948653, "grad_norm": 0.10107421875, "learning_rate": 0.0019991303876984623, "loss": 0.2575, "step": 14980 }, { "epoch": 0.10639638152933265, "grad_norm": 0.0927734375, "learning_rate": 0.0019991291433737093, "loss": 0.2655, "step": 14990 }, { "epoch": 0.10646735976917876, "grad_norm": 0.1162109375, "learning_rate": 0.001999127898159774, "loss": 0.2564, "step": 15000 }, { "epoch": 0.10653833800902489, "grad_norm": 0.1259765625, "learning_rate": 0.0019991266520566577, "loss": 0.2673, "step": 15010 }, { "epoch": 0.106609316248871, "grad_norm": 0.09619140625, "learning_rate": 0.0019991254050643614, "loss": 0.2664, "step": 15020 }, { "epoch": 0.10668029448871712, "grad_norm": 0.1181640625, "learning_rate": 0.0019991241571828866, "loss": 0.2564, "step": 15030 }, { "epoch": 0.10675127272856325, "grad_norm": 0.10009765625, "learning_rate": 0.001999122908412234, "loss": 0.2573, "step": 15040 }, { "epoch": 0.10682225096840936, "grad_norm": 0.109375, "learning_rate": 0.001999121658752406, "loss": 0.2634, "step": 15050 }, { "epoch": 0.10689322920825547, "grad_norm": 0.09130859375, "learning_rate": 0.001999120408203402, "loss": 0.2616, "step": 15060 }, { "epoch": 0.1069642074481016, "grad_norm": 0.1982421875, "learning_rate": 0.001999119156765225, "loss": 0.2793, "step": 15070 }, { "epoch": 0.10703518568794772, "grad_norm": 0.0849609375, "learning_rate": 0.001999117904437875, "loss": 0.2469, "step": 15080 }, { "epoch": 0.10710616392779383, "grad_norm": 0.1396484375, "learning_rate": 0.001999116651221354, "loss": 0.2632, "step": 15090 }, { "epoch": 0.10717714216763996, "grad_norm": 0.09130859375, "learning_rate": 0.001999115397115663, "loss": 0.2642, "step": 15100 }, { "epoch": 0.10724812040748608, "grad_norm": 0.09033203125, "learning_rate": 0.0019991141421208036, "loss": 0.2679, "step": 15110 }, { "epoch": 0.10731909864733219, "grad_norm": 0.1669921875, "learning_rate": 0.0019991128862367757, "loss": 0.2453, "step": 15120 }, { "epoch": 0.10739007688717832, "grad_norm": 0.10009765625, "learning_rate": 0.0019991116294635824, "loss": 0.2706, "step": 15130 }, { "epoch": 0.10746105512702443, "grad_norm": 0.1142578125, "learning_rate": 0.0019991103718012236, "loss": 0.2611, "step": 15140 }, { "epoch": 0.10753203336687055, "grad_norm": 0.08251953125, "learning_rate": 0.001999109113249701, "loss": 0.2545, "step": 15150 }, { "epoch": 0.10760301160671668, "grad_norm": 0.10888671875, "learning_rate": 0.001999107853809016, "loss": 0.2701, "step": 15160 }, { "epoch": 0.10767398984656279, "grad_norm": 0.09716796875, "learning_rate": 0.0019991065934791695, "loss": 0.2541, "step": 15170 }, { "epoch": 0.1077449680864089, "grad_norm": 0.10595703125, "learning_rate": 0.001999105332260163, "loss": 0.2787, "step": 15180 }, { "epoch": 0.10781594632625503, "grad_norm": 0.19140625, "learning_rate": 0.001999104070151998, "loss": 0.2685, "step": 15190 }, { "epoch": 0.10788692456610115, "grad_norm": 0.1103515625, "learning_rate": 0.001999102807154675, "loss": 0.2568, "step": 15200 }, { "epoch": 0.10795790280594726, "grad_norm": 0.11962890625, "learning_rate": 0.001999101543268196, "loss": 0.2625, "step": 15210 }, { "epoch": 0.10802888104579339, "grad_norm": 0.0751953125, "learning_rate": 0.0019991002784925617, "loss": 0.2517, "step": 15220 }, { "epoch": 0.1080998592856395, "grad_norm": 0.1669921875, "learning_rate": 0.001999099012827774, "loss": 0.2617, "step": 15230 }, { "epoch": 0.10817083752548562, "grad_norm": 0.1728515625, "learning_rate": 0.001999097746273833, "loss": 0.2589, "step": 15240 }, { "epoch": 0.10824181576533175, "grad_norm": 0.1591796875, "learning_rate": 0.0019990964788307412, "loss": 0.2585, "step": 15250 }, { "epoch": 0.10831279400517786, "grad_norm": 0.1220703125, "learning_rate": 0.001999095210498499, "loss": 0.274, "step": 15260 }, { "epoch": 0.10838377224502398, "grad_norm": 0.154296875, "learning_rate": 0.0019990939412771083, "loss": 0.2752, "step": 15270 }, { "epoch": 0.1084547504848701, "grad_norm": 0.095703125, "learning_rate": 0.00199909267116657, "loss": 0.25, "step": 15280 }, { "epoch": 0.10852572872471622, "grad_norm": 0.134765625, "learning_rate": 0.0019990914001668855, "loss": 0.2552, "step": 15290 }, { "epoch": 0.10859670696456233, "grad_norm": 0.091796875, "learning_rate": 0.0019990901282780555, "loss": 0.2646, "step": 15300 }, { "epoch": 0.10866768520440846, "grad_norm": 0.107421875, "learning_rate": 0.0019990888555000822, "loss": 0.2729, "step": 15310 }, { "epoch": 0.10873866344425458, "grad_norm": 0.177734375, "learning_rate": 0.001999087581832966, "loss": 0.266, "step": 15320 }, { "epoch": 0.10880964168410069, "grad_norm": 0.09423828125, "learning_rate": 0.0019990863072767086, "loss": 0.2475, "step": 15330 }, { "epoch": 0.10888061992394682, "grad_norm": 0.1181640625, "learning_rate": 0.0019990850318313113, "loss": 0.2464, "step": 15340 }, { "epoch": 0.10895159816379293, "grad_norm": 0.109375, "learning_rate": 0.001999083755496775, "loss": 0.2602, "step": 15350 }, { "epoch": 0.10902257640363905, "grad_norm": 0.076171875, "learning_rate": 0.001999082478273102, "loss": 0.2452, "step": 15360 }, { "epoch": 0.10909355464348518, "grad_norm": 0.158203125, "learning_rate": 0.0019990812001602918, "loss": 0.2682, "step": 15370 }, { "epoch": 0.10916453288333129, "grad_norm": 0.08935546875, "learning_rate": 0.001999079921158347, "loss": 0.2653, "step": 15380 }, { "epoch": 0.1092355111231774, "grad_norm": 0.154296875, "learning_rate": 0.0019990786412672685, "loss": 0.2648, "step": 15390 }, { "epoch": 0.10930648936302353, "grad_norm": 0.0693359375, "learning_rate": 0.001999077360487058, "loss": 0.259, "step": 15400 }, { "epoch": 0.10937746760286965, "grad_norm": 0.1201171875, "learning_rate": 0.0019990760788177156, "loss": 0.2689, "step": 15410 }, { "epoch": 0.10944844584271576, "grad_norm": 0.126953125, "learning_rate": 0.001999074796259244, "loss": 0.2565, "step": 15420 }, { "epoch": 0.10951942408256189, "grad_norm": 0.08447265625, "learning_rate": 0.0019990735128116435, "loss": 0.2579, "step": 15430 }, { "epoch": 0.109590402322408, "grad_norm": 0.13671875, "learning_rate": 0.0019990722284749154, "loss": 0.2623, "step": 15440 }, { "epoch": 0.10966138056225412, "grad_norm": 0.1044921875, "learning_rate": 0.0019990709432490617, "loss": 0.2587, "step": 15450 }, { "epoch": 0.10973235880210025, "grad_norm": 0.11083984375, "learning_rate": 0.001999069657134083, "loss": 0.2619, "step": 15460 }, { "epoch": 0.10980333704194636, "grad_norm": 0.09130859375, "learning_rate": 0.00199906837012998, "loss": 0.2759, "step": 15470 }, { "epoch": 0.10987431528179248, "grad_norm": 0.1337890625, "learning_rate": 0.001999067082236756, "loss": 0.2563, "step": 15480 }, { "epoch": 0.1099452935216386, "grad_norm": 0.09619140625, "learning_rate": 0.00199906579345441, "loss": 0.2613, "step": 15490 }, { "epoch": 0.11001627176148472, "grad_norm": 0.1025390625, "learning_rate": 0.001999064503782945, "loss": 0.282, "step": 15500 }, { "epoch": 0.11008725000133084, "grad_norm": 0.1708984375, "learning_rate": 0.001999063213222361, "loss": 0.2653, "step": 15510 }, { "epoch": 0.11015822824117696, "grad_norm": 0.09033203125, "learning_rate": 0.00199906192177266, "loss": 0.2519, "step": 15520 }, { "epoch": 0.11022920648102308, "grad_norm": 0.1416015625, "learning_rate": 0.001999060629433843, "loss": 0.2477, "step": 15530 }, { "epoch": 0.1103001847208692, "grad_norm": 0.263671875, "learning_rate": 0.0019990593362059117, "loss": 0.2527, "step": 15540 }, { "epoch": 0.11037116296071532, "grad_norm": 0.10009765625, "learning_rate": 0.0019990580420888673, "loss": 0.2574, "step": 15550 }, { "epoch": 0.11044214120056144, "grad_norm": 0.154296875, "learning_rate": 0.0019990567470827103, "loss": 0.2551, "step": 15560 }, { "epoch": 0.11051311944040755, "grad_norm": 0.1083984375, "learning_rate": 0.0019990554511874425, "loss": 0.2411, "step": 15570 }, { "epoch": 0.11058409768025368, "grad_norm": 0.1513671875, "learning_rate": 0.0019990541544030655, "loss": 0.2571, "step": 15580 }, { "epoch": 0.1106550759200998, "grad_norm": 0.1259765625, "learning_rate": 0.00199905285672958, "loss": 0.2397, "step": 15590 }, { "epoch": 0.11072605415994591, "grad_norm": 0.09814453125, "learning_rate": 0.001999051558166988, "loss": 0.2697, "step": 15600 }, { "epoch": 0.11079703239979204, "grad_norm": 0.1806640625, "learning_rate": 0.0019990502587152897, "loss": 0.2726, "step": 15610 }, { "epoch": 0.11086801063963815, "grad_norm": 0.10205078125, "learning_rate": 0.0019990489583744876, "loss": 0.2599, "step": 15620 }, { "epoch": 0.11093898887948427, "grad_norm": 0.0771484375, "learning_rate": 0.0019990476571445826, "loss": 0.2533, "step": 15630 }, { "epoch": 0.1110099671193304, "grad_norm": 0.11083984375, "learning_rate": 0.001999046355025575, "loss": 0.2363, "step": 15640 }, { "epoch": 0.11108094535917651, "grad_norm": 0.09326171875, "learning_rate": 0.0019990450520174677, "loss": 0.2516, "step": 15650 }, { "epoch": 0.11115192359902262, "grad_norm": 0.11865234375, "learning_rate": 0.001999043748120261, "loss": 0.2487, "step": 15660 }, { "epoch": 0.11122290183886875, "grad_norm": 0.0830078125, "learning_rate": 0.001999042443333956, "loss": 0.2466, "step": 15670 }, { "epoch": 0.11129388007871487, "grad_norm": 0.0888671875, "learning_rate": 0.0019990411376585547, "loss": 0.2605, "step": 15680 }, { "epoch": 0.11136485831856098, "grad_norm": 0.09228515625, "learning_rate": 0.001999039831094058, "loss": 0.2617, "step": 15690 }, { "epoch": 0.11143583655840711, "grad_norm": 0.123046875, "learning_rate": 0.001999038523640467, "loss": 0.2557, "step": 15700 }, { "epoch": 0.11150681479825322, "grad_norm": 0.115234375, "learning_rate": 0.001999037215297784, "loss": 0.257, "step": 15710 }, { "epoch": 0.11157779303809935, "grad_norm": 0.11083984375, "learning_rate": 0.001999035906066009, "loss": 0.251, "step": 15720 }, { "epoch": 0.11164877127794547, "grad_norm": 0.1572265625, "learning_rate": 0.0019990345959451437, "loss": 0.2577, "step": 15730 }, { "epoch": 0.11171974951779158, "grad_norm": 0.10888671875, "learning_rate": 0.0019990332849351895, "loss": 0.2627, "step": 15740 }, { "epoch": 0.11179072775763771, "grad_norm": 0.09716796875, "learning_rate": 0.0019990319730361483, "loss": 0.2732, "step": 15750 }, { "epoch": 0.11186170599748382, "grad_norm": 0.2109375, "learning_rate": 0.0019990306602480205, "loss": 0.2734, "step": 15760 }, { "epoch": 0.11193268423732994, "grad_norm": 0.091796875, "learning_rate": 0.0019990293465708075, "loss": 0.2479, "step": 15770 }, { "epoch": 0.11200366247717607, "grad_norm": 0.15625, "learning_rate": 0.001999028032004511, "loss": 0.2344, "step": 15780 }, { "epoch": 0.11207464071702218, "grad_norm": 0.123046875, "learning_rate": 0.001999026716549132, "loss": 0.2766, "step": 15790 }, { "epoch": 0.1121456189568683, "grad_norm": 0.173828125, "learning_rate": 0.001999025400204672, "loss": 0.2538, "step": 15800 }, { "epoch": 0.11221659719671442, "grad_norm": 0.0947265625, "learning_rate": 0.0019990240829711324, "loss": 0.2655, "step": 15810 }, { "epoch": 0.11228757543656054, "grad_norm": 0.130859375, "learning_rate": 0.001999022764848514, "loss": 0.2643, "step": 15820 }, { "epoch": 0.11235855367640665, "grad_norm": 0.0908203125, "learning_rate": 0.001999021445836819, "loss": 0.2503, "step": 15830 }, { "epoch": 0.11242953191625278, "grad_norm": 0.1689453125, "learning_rate": 0.001999020125936048, "loss": 0.2613, "step": 15840 }, { "epoch": 0.1125005101560989, "grad_norm": 0.0908203125, "learning_rate": 0.001999018805146202, "loss": 0.2531, "step": 15850 }, { "epoch": 0.11257148839594501, "grad_norm": 0.265625, "learning_rate": 0.0019990174834672823, "loss": 0.2577, "step": 15860 }, { "epoch": 0.11264246663579114, "grad_norm": 0.138671875, "learning_rate": 0.001999016160899292, "loss": 0.2619, "step": 15870 }, { "epoch": 0.11271344487563725, "grad_norm": 0.10791015625, "learning_rate": 0.00199901483744223, "loss": 0.274, "step": 15880 }, { "epoch": 0.11278442311548337, "grad_norm": 0.0849609375, "learning_rate": 0.001999013513096099, "loss": 0.2631, "step": 15890 }, { "epoch": 0.1128554013553295, "grad_norm": 0.171875, "learning_rate": 0.0019990121878609, "loss": 0.2664, "step": 15900 }, { "epoch": 0.11292637959517561, "grad_norm": 0.10546875, "learning_rate": 0.0019990108617366346, "loss": 0.2417, "step": 15910 }, { "epoch": 0.11299735783502173, "grad_norm": 0.1103515625, "learning_rate": 0.0019990095347233033, "loss": 0.2759, "step": 15920 }, { "epoch": 0.11306833607486785, "grad_norm": 0.12890625, "learning_rate": 0.0019990082068209085, "loss": 0.2453, "step": 15930 }, { "epoch": 0.11313931431471397, "grad_norm": 0.146484375, "learning_rate": 0.00199900687802945, "loss": 0.2491, "step": 15940 }, { "epoch": 0.11321029255456008, "grad_norm": 0.1357421875, "learning_rate": 0.001999005548348931, "loss": 0.2536, "step": 15950 }, { "epoch": 0.11328127079440621, "grad_norm": 0.34375, "learning_rate": 0.0019990042177793514, "loss": 0.2566, "step": 15960 }, { "epoch": 0.11335224903425233, "grad_norm": 0.09912109375, "learning_rate": 0.001999002886320713, "loss": 0.2415, "step": 15970 }, { "epoch": 0.11342322727409844, "grad_norm": 0.10107421875, "learning_rate": 0.001999001553973017, "loss": 0.2669, "step": 15980 }, { "epoch": 0.11349420551394457, "grad_norm": 0.2333984375, "learning_rate": 0.001999000220736265, "loss": 0.2591, "step": 15990 }, { "epoch": 0.11356518375379068, "grad_norm": 0.10595703125, "learning_rate": 0.001998998886610458, "loss": 0.2765, "step": 16000 }, { "epoch": 0.11356518375379068, "eval_covost2-zh-en_loss": 3.9471583366394043, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 21.8752, "eval_covost2-zh-en_samples_per_second": 2.926, "eval_covost2-zh-en_steps_per_second": 0.183, "step": 16000 }, { "epoch": 0.11356518375379068, "eval_covost2-en-zh_loss": 3.1456193923950195, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 19.6752, "eval_covost2-en-zh_samples_per_second": 3.253, "eval_covost2-en-zh_steps_per_second": 0.203, "step": 16000 }, { "epoch": 0.1136361619936368, "grad_norm": 0.11181640625, "learning_rate": 0.0019989975515955975, "loss": 0.2669, "step": 16010 }, { "epoch": 0.11370714023348293, "grad_norm": 0.0830078125, "learning_rate": 0.001998996215691685, "loss": 0.2592, "step": 16020 }, { "epoch": 0.11377811847332904, "grad_norm": 0.138671875, "learning_rate": 0.001998994878898721, "loss": 0.2333, "step": 16030 }, { "epoch": 0.11384909671317515, "grad_norm": 0.078125, "learning_rate": 0.0019989935412167083, "loss": 0.2599, "step": 16040 }, { "epoch": 0.11392007495302128, "grad_norm": 0.103515625, "learning_rate": 0.0019989922026456466, "loss": 0.238, "step": 16050 }, { "epoch": 0.1139910531928674, "grad_norm": 0.0947265625, "learning_rate": 0.001998990863185538, "loss": 0.2819, "step": 16060 }, { "epoch": 0.11406203143271351, "grad_norm": 0.08447265625, "learning_rate": 0.001998989522836384, "loss": 0.2669, "step": 16070 }, { "epoch": 0.11413300967255964, "grad_norm": 0.1474609375, "learning_rate": 0.0019989881815981854, "loss": 0.2347, "step": 16080 }, { "epoch": 0.11420398791240575, "grad_norm": 0.138671875, "learning_rate": 0.001998986839470944, "loss": 0.2735, "step": 16090 }, { "epoch": 0.11427496615225187, "grad_norm": 0.1328125, "learning_rate": 0.0019989854964546612, "loss": 0.2552, "step": 16100 }, { "epoch": 0.114345944392098, "grad_norm": 0.09375, "learning_rate": 0.001998984152549338, "loss": 0.2509, "step": 16110 }, { "epoch": 0.11441692263194411, "grad_norm": 0.1298828125, "learning_rate": 0.0019989828077549756, "loss": 0.255, "step": 16120 }, { "epoch": 0.11448790087179023, "grad_norm": 0.1435546875, "learning_rate": 0.001998981462071576, "loss": 0.263, "step": 16130 }, { "epoch": 0.11455887911163636, "grad_norm": 0.12109375, "learning_rate": 0.00199898011549914, "loss": 0.2361, "step": 16140 }, { "epoch": 0.11462985735148247, "grad_norm": 0.1416015625, "learning_rate": 0.0019989787680376684, "loss": 0.2775, "step": 16150 }, { "epoch": 0.11470083559132858, "grad_norm": 0.134765625, "learning_rate": 0.0019989774196871634, "loss": 0.265, "step": 16160 }, { "epoch": 0.11477181383117471, "grad_norm": 0.11962890625, "learning_rate": 0.0019989760704476264, "loss": 0.2513, "step": 16170 }, { "epoch": 0.11484279207102083, "grad_norm": 0.08984375, "learning_rate": 0.0019989747203190576, "loss": 0.2623, "step": 16180 }, { "epoch": 0.11491377031086694, "grad_norm": 0.1767578125, "learning_rate": 0.00199897336930146, "loss": 0.2431, "step": 16190 }, { "epoch": 0.11498474855071307, "grad_norm": 0.1865234375, "learning_rate": 0.0019989720173948337, "loss": 0.2536, "step": 16200 }, { "epoch": 0.11505572679055918, "grad_norm": 0.11279296875, "learning_rate": 0.0019989706645991805, "loss": 0.2355, "step": 16210 }, { "epoch": 0.1151267050304053, "grad_norm": 0.1318359375, "learning_rate": 0.001998969310914502, "loss": 0.2482, "step": 16220 }, { "epoch": 0.11519768327025143, "grad_norm": 0.07763671875, "learning_rate": 0.0019989679563407986, "loss": 0.2481, "step": 16230 }, { "epoch": 0.11526866151009754, "grad_norm": 0.1689453125, "learning_rate": 0.0019989666008780726, "loss": 0.272, "step": 16240 }, { "epoch": 0.11533963974994366, "grad_norm": 0.0751953125, "learning_rate": 0.001998965244526325, "loss": 0.2465, "step": 16250 }, { "epoch": 0.11541061798978978, "grad_norm": 0.10693359375, "learning_rate": 0.001998963887285557, "loss": 0.2834, "step": 16260 }, { "epoch": 0.1154815962296359, "grad_norm": 0.1396484375, "learning_rate": 0.00199896252915577, "loss": 0.2353, "step": 16270 }, { "epoch": 0.11555257446948201, "grad_norm": 0.1123046875, "learning_rate": 0.0019989611701369653, "loss": 0.2543, "step": 16280 }, { "epoch": 0.11562355270932814, "grad_norm": 0.12109375, "learning_rate": 0.0019989598102291444, "loss": 0.2744, "step": 16290 }, { "epoch": 0.11569453094917426, "grad_norm": 0.099609375, "learning_rate": 0.001998958449432309, "loss": 0.2813, "step": 16300 }, { "epoch": 0.11576550918902037, "grad_norm": 0.2138671875, "learning_rate": 0.00199895708774646, "loss": 0.2616, "step": 16310 }, { "epoch": 0.1158364874288665, "grad_norm": 0.08447265625, "learning_rate": 0.0019989557251715983, "loss": 0.2547, "step": 16320 }, { "epoch": 0.11590746566871261, "grad_norm": 0.1220703125, "learning_rate": 0.0019989543617077257, "loss": 0.2537, "step": 16330 }, { "epoch": 0.11597844390855873, "grad_norm": 0.13671875, "learning_rate": 0.001998952997354844, "loss": 0.2485, "step": 16340 }, { "epoch": 0.11604942214840486, "grad_norm": 0.10888671875, "learning_rate": 0.001998951632112954, "loss": 0.2477, "step": 16350 }, { "epoch": 0.11612040038825097, "grad_norm": 0.09375, "learning_rate": 0.001998950265982057, "loss": 0.2546, "step": 16360 }, { "epoch": 0.11619137862809709, "grad_norm": 0.08154296875, "learning_rate": 0.0019989488989621547, "loss": 0.2525, "step": 16370 }, { "epoch": 0.11626235686794321, "grad_norm": 0.10888671875, "learning_rate": 0.0019989475310532483, "loss": 0.2489, "step": 16380 }, { "epoch": 0.11633333510778933, "grad_norm": 0.08740234375, "learning_rate": 0.001998946162255339, "loss": 0.2433, "step": 16390 }, { "epoch": 0.11640431334763544, "grad_norm": 0.1298828125, "learning_rate": 0.001998944792568428, "loss": 0.2468, "step": 16400 }, { "epoch": 0.11647529158748157, "grad_norm": 0.1337890625, "learning_rate": 0.0019989434219925173, "loss": 0.2606, "step": 16410 }, { "epoch": 0.11654626982732769, "grad_norm": 0.08251953125, "learning_rate": 0.001998942050527608, "loss": 0.2522, "step": 16420 }, { "epoch": 0.1166172480671738, "grad_norm": 0.181640625, "learning_rate": 0.001998940678173701, "loss": 0.27, "step": 16430 }, { "epoch": 0.11668822630701993, "grad_norm": 0.2041015625, "learning_rate": 0.0019989393049307984, "loss": 0.2685, "step": 16440 }, { "epoch": 0.11675920454686604, "grad_norm": 0.140625, "learning_rate": 0.001998937930798901, "loss": 0.271, "step": 16450 }, { "epoch": 0.11683018278671216, "grad_norm": 0.08935546875, "learning_rate": 0.00199893655577801, "loss": 0.2393, "step": 16460 }, { "epoch": 0.11690116102655829, "grad_norm": 0.09814453125, "learning_rate": 0.001998935179868128, "loss": 0.2463, "step": 16470 }, { "epoch": 0.1169721392664044, "grad_norm": 0.154296875, "learning_rate": 0.001998933803069255, "loss": 0.266, "step": 16480 }, { "epoch": 0.11704311750625052, "grad_norm": 0.1103515625, "learning_rate": 0.001998932425381392, "loss": 0.2567, "step": 16490 }, { "epoch": 0.11711409574609664, "grad_norm": 0.1025390625, "learning_rate": 0.0019989310468045422, "loss": 0.2546, "step": 16500 }, { "epoch": 0.11718507398594276, "grad_norm": 0.1240234375, "learning_rate": 0.0019989296673387055, "loss": 0.2588, "step": 16510 }, { "epoch": 0.11725605222578887, "grad_norm": 0.1103515625, "learning_rate": 0.001998928286983884, "loss": 0.2507, "step": 16520 }, { "epoch": 0.117327030465635, "grad_norm": 0.10546875, "learning_rate": 0.0019989269057400787, "loss": 0.2372, "step": 16530 }, { "epoch": 0.11739800870548112, "grad_norm": 0.09814453125, "learning_rate": 0.0019989255236072907, "loss": 0.2542, "step": 16540 }, { "epoch": 0.11746898694532723, "grad_norm": 0.09619140625, "learning_rate": 0.001998924140585522, "loss": 0.2518, "step": 16550 }, { "epoch": 0.11753996518517336, "grad_norm": 0.09716796875, "learning_rate": 0.0019989227566747734, "loss": 0.2492, "step": 16560 }, { "epoch": 0.11761094342501947, "grad_norm": 0.08056640625, "learning_rate": 0.0019989213718750467, "loss": 0.2427, "step": 16570 }, { "epoch": 0.11768192166486559, "grad_norm": 0.11328125, "learning_rate": 0.001998919986186343, "loss": 0.2374, "step": 16580 }, { "epoch": 0.11775289990471172, "grad_norm": 0.12255859375, "learning_rate": 0.0019989185996086636, "loss": 0.2608, "step": 16590 }, { "epoch": 0.11782387814455783, "grad_norm": 0.12158203125, "learning_rate": 0.0019989172121420104, "loss": 0.2595, "step": 16600 }, { "epoch": 0.11789485638440395, "grad_norm": 0.09619140625, "learning_rate": 0.0019989158237863846, "loss": 0.2931, "step": 16610 }, { "epoch": 0.11796583462425007, "grad_norm": 0.1357421875, "learning_rate": 0.001998914434541787, "loss": 0.2722, "step": 16620 }, { "epoch": 0.11803681286409619, "grad_norm": 0.095703125, "learning_rate": 0.0019989130444082195, "loss": 0.2642, "step": 16630 }, { "epoch": 0.1181077911039423, "grad_norm": 0.0859375, "learning_rate": 0.001998911653385683, "loss": 0.2531, "step": 16640 }, { "epoch": 0.11817876934378843, "grad_norm": 0.08154296875, "learning_rate": 0.0019989102614741796, "loss": 0.2438, "step": 16650 }, { "epoch": 0.11824974758363455, "grad_norm": 0.08251953125, "learning_rate": 0.0019989088686737104, "loss": 0.2509, "step": 16660 }, { "epoch": 0.11832072582348066, "grad_norm": 0.09326171875, "learning_rate": 0.0019989074749842766, "loss": 0.2465, "step": 16670 }, { "epoch": 0.11839170406332679, "grad_norm": 0.11767578125, "learning_rate": 0.0019989060804058794, "loss": 0.2697, "step": 16680 }, { "epoch": 0.1184626823031729, "grad_norm": 0.130859375, "learning_rate": 0.0019989046849385205, "loss": 0.242, "step": 16690 }, { "epoch": 0.11853366054301902, "grad_norm": 0.1279296875, "learning_rate": 0.0019989032885822013, "loss": 0.2647, "step": 16700 }, { "epoch": 0.11860463878286515, "grad_norm": 0.10205078125, "learning_rate": 0.0019989018913369226, "loss": 0.2621, "step": 16710 }, { "epoch": 0.11867561702271126, "grad_norm": 0.0712890625, "learning_rate": 0.001998900493202687, "loss": 0.257, "step": 16720 }, { "epoch": 0.11874659526255738, "grad_norm": 0.12890625, "learning_rate": 0.001998899094179495, "loss": 0.2717, "step": 16730 }, { "epoch": 0.1188175735024035, "grad_norm": 0.09521484375, "learning_rate": 0.001998897694267348, "loss": 0.2554, "step": 16740 }, { "epoch": 0.11888855174224962, "grad_norm": 0.353515625, "learning_rate": 0.0019988962934662474, "loss": 0.2593, "step": 16750 }, { "epoch": 0.11895952998209573, "grad_norm": 0.09375, "learning_rate": 0.0019988948917761947, "loss": 0.2577, "step": 16760 }, { "epoch": 0.11903050822194186, "grad_norm": 0.09619140625, "learning_rate": 0.0019988934891971913, "loss": 0.2639, "step": 16770 }, { "epoch": 0.11910148646178798, "grad_norm": 0.08447265625, "learning_rate": 0.001998892085729239, "loss": 0.2644, "step": 16780 }, { "epoch": 0.11917246470163409, "grad_norm": 0.142578125, "learning_rate": 0.001998890681372338, "loss": 0.2627, "step": 16790 }, { "epoch": 0.11924344294148022, "grad_norm": 0.166015625, "learning_rate": 0.0019988892761264913, "loss": 0.2697, "step": 16800 }, { "epoch": 0.11931442118132633, "grad_norm": 0.08447265625, "learning_rate": 0.001998887869991699, "loss": 0.2483, "step": 16810 }, { "epoch": 0.11938539942117245, "grad_norm": 0.1279296875, "learning_rate": 0.0019988864629679626, "loss": 0.2654, "step": 16820 }, { "epoch": 0.11945637766101858, "grad_norm": 0.1240234375, "learning_rate": 0.0019988850550552843, "loss": 0.2534, "step": 16830 }, { "epoch": 0.11952735590086469, "grad_norm": 0.107421875, "learning_rate": 0.0019988836462536646, "loss": 0.2588, "step": 16840 }, { "epoch": 0.1195983341407108, "grad_norm": 0.0966796875, "learning_rate": 0.001998882236563106, "loss": 0.2755, "step": 16850 }, { "epoch": 0.11966931238055693, "grad_norm": 0.1123046875, "learning_rate": 0.001998880825983609, "loss": 0.2486, "step": 16860 }, { "epoch": 0.11974029062040305, "grad_norm": 0.07080078125, "learning_rate": 0.001998879414515175, "loss": 0.2557, "step": 16870 }, { "epoch": 0.11981126886024918, "grad_norm": 0.1318359375, "learning_rate": 0.0019988780021578056, "loss": 0.2894, "step": 16880 }, { "epoch": 0.11988224710009529, "grad_norm": 0.0986328125, "learning_rate": 0.0019988765889115017, "loss": 0.271, "step": 16890 }, { "epoch": 0.1199532253399414, "grad_norm": 0.1611328125, "learning_rate": 0.001998875174776266, "loss": 0.2596, "step": 16900 }, { "epoch": 0.12002420357978753, "grad_norm": 0.2060546875, "learning_rate": 0.001998873759752099, "loss": 0.2667, "step": 16910 }, { "epoch": 0.12009518181963365, "grad_norm": 0.1376953125, "learning_rate": 0.001998872343839002, "loss": 0.2542, "step": 16920 }, { "epoch": 0.12016616005947976, "grad_norm": 0.14453125, "learning_rate": 0.001998870927036976, "loss": 0.2515, "step": 16930 }, { "epoch": 0.12023713829932589, "grad_norm": 0.197265625, "learning_rate": 0.001998869509346024, "loss": 0.2542, "step": 16940 }, { "epoch": 0.120308116539172, "grad_norm": 0.1513671875, "learning_rate": 0.001998868090766146, "loss": 0.2588, "step": 16950 }, { "epoch": 0.12037909477901812, "grad_norm": 0.0986328125, "learning_rate": 0.0019988666712973434, "loss": 0.2453, "step": 16960 }, { "epoch": 0.12045007301886425, "grad_norm": 0.11767578125, "learning_rate": 0.0019988652509396188, "loss": 0.2566, "step": 16970 }, { "epoch": 0.12052105125871036, "grad_norm": 0.1865234375, "learning_rate": 0.0019988638296929723, "loss": 0.2535, "step": 16980 }, { "epoch": 0.12059202949855648, "grad_norm": 0.1640625, "learning_rate": 0.0019988624075574055, "loss": 0.2866, "step": 16990 }, { "epoch": 0.1206630077384026, "grad_norm": 0.14453125, "learning_rate": 0.0019988609845329208, "loss": 0.264, "step": 17000 }, { "epoch": 0.12073398597824872, "grad_norm": 0.08935546875, "learning_rate": 0.0019988595606195182, "loss": 0.2593, "step": 17010 }, { "epoch": 0.12080496421809483, "grad_norm": 0.0810546875, "learning_rate": 0.0019988581358172004, "loss": 0.2725, "step": 17020 }, { "epoch": 0.12087594245794096, "grad_norm": 0.1435546875, "learning_rate": 0.001998856710125968, "loss": 0.2489, "step": 17030 }, { "epoch": 0.12094692069778708, "grad_norm": 0.09619140625, "learning_rate": 0.0019988552835458226, "loss": 0.2843, "step": 17040 }, { "epoch": 0.12101789893763319, "grad_norm": 0.1708984375, "learning_rate": 0.001998853856076766, "loss": 0.2685, "step": 17050 }, { "epoch": 0.12108887717747932, "grad_norm": 0.12451171875, "learning_rate": 0.001998852427718799, "loss": 0.2645, "step": 17060 }, { "epoch": 0.12115985541732543, "grad_norm": 0.12890625, "learning_rate": 0.001998850998471923, "loss": 0.249, "step": 17070 }, { "epoch": 0.12123083365717155, "grad_norm": 0.0869140625, "learning_rate": 0.0019988495683361403, "loss": 0.2651, "step": 17080 }, { "epoch": 0.12130181189701768, "grad_norm": 0.095703125, "learning_rate": 0.0019988481373114514, "loss": 0.2489, "step": 17090 }, { "epoch": 0.12137279013686379, "grad_norm": 0.10009765625, "learning_rate": 0.001998846705397858, "loss": 0.2577, "step": 17100 }, { "epoch": 0.1214437683767099, "grad_norm": 0.13671875, "learning_rate": 0.0019988452725953616, "loss": 0.2688, "step": 17110 }, { "epoch": 0.12151474661655604, "grad_norm": 0.091796875, "learning_rate": 0.001998843838903964, "loss": 0.2591, "step": 17120 }, { "epoch": 0.12158572485640215, "grad_norm": 0.10546875, "learning_rate": 0.0019988424043236655, "loss": 0.2766, "step": 17130 }, { "epoch": 0.12165670309624826, "grad_norm": 0.1787109375, "learning_rate": 0.001998840968854468, "loss": 0.2586, "step": 17140 }, { "epoch": 0.12172768133609439, "grad_norm": 0.09521484375, "learning_rate": 0.001998839532496374, "loss": 0.2671, "step": 17150 }, { "epoch": 0.12179865957594051, "grad_norm": 0.1337890625, "learning_rate": 0.001998838095249383, "loss": 0.2644, "step": 17160 }, { "epoch": 0.12186963781578662, "grad_norm": 0.08984375, "learning_rate": 0.0019988366571134985, "loss": 0.2576, "step": 17170 }, { "epoch": 0.12194061605563275, "grad_norm": 0.10595703125, "learning_rate": 0.0019988352180887203, "loss": 0.2561, "step": 17180 }, { "epoch": 0.12201159429547886, "grad_norm": 0.1328125, "learning_rate": 0.00199883377817505, "loss": 0.2594, "step": 17190 }, { "epoch": 0.12208257253532498, "grad_norm": 0.1103515625, "learning_rate": 0.0019988323373724905, "loss": 0.2466, "step": 17200 }, { "epoch": 0.12215355077517111, "grad_norm": 0.064453125, "learning_rate": 0.0019988308956810415, "loss": 0.2523, "step": 17210 }, { "epoch": 0.12222452901501722, "grad_norm": 0.1650390625, "learning_rate": 0.0019988294531007055, "loss": 0.2346, "step": 17220 }, { "epoch": 0.12229550725486334, "grad_norm": 0.09033203125, "learning_rate": 0.001998828009631483, "loss": 0.2514, "step": 17230 }, { "epoch": 0.12236648549470946, "grad_norm": 0.10009765625, "learning_rate": 0.0019988265652733763, "loss": 0.248, "step": 17240 }, { "epoch": 0.12243746373455558, "grad_norm": 0.10888671875, "learning_rate": 0.001998825120026386, "loss": 0.2575, "step": 17250 }, { "epoch": 0.1225084419744017, "grad_norm": 0.08837890625, "learning_rate": 0.001998823673890514, "loss": 0.2562, "step": 17260 }, { "epoch": 0.12257942021424782, "grad_norm": 0.1064453125, "learning_rate": 0.001998822226865762, "loss": 0.2614, "step": 17270 }, { "epoch": 0.12265039845409394, "grad_norm": 0.125, "learning_rate": 0.001998820778952131, "loss": 0.2665, "step": 17280 }, { "epoch": 0.12272137669394005, "grad_norm": 0.123046875, "learning_rate": 0.001998819330149623, "loss": 0.2575, "step": 17290 }, { "epoch": 0.12279235493378618, "grad_norm": 0.07568359375, "learning_rate": 0.001998817880458239, "loss": 0.2553, "step": 17300 }, { "epoch": 0.1228633331736323, "grad_norm": 0.087890625, "learning_rate": 0.00199881642987798, "loss": 0.2468, "step": 17310 }, { "epoch": 0.12293431141347841, "grad_norm": 0.09033203125, "learning_rate": 0.001998814978408848, "loss": 0.2702, "step": 17320 }, { "epoch": 0.12300528965332454, "grad_norm": 0.0791015625, "learning_rate": 0.0019988135260508443, "loss": 0.2598, "step": 17330 }, { "epoch": 0.12307626789317065, "grad_norm": 0.1083984375, "learning_rate": 0.0019988120728039703, "loss": 0.2626, "step": 17340 }, { "epoch": 0.12314724613301677, "grad_norm": 0.15234375, "learning_rate": 0.001998810618668228, "loss": 0.2597, "step": 17350 }, { "epoch": 0.1232182243728629, "grad_norm": 0.12158203125, "learning_rate": 0.0019988091636436176, "loss": 0.2528, "step": 17360 }, { "epoch": 0.12328920261270901, "grad_norm": 0.126953125, "learning_rate": 0.001998807707730142, "loss": 0.2578, "step": 17370 }, { "epoch": 0.12336018085255512, "grad_norm": 0.259765625, "learning_rate": 0.001998806250927801, "loss": 0.2473, "step": 17380 }, { "epoch": 0.12343115909240125, "grad_norm": 0.11962890625, "learning_rate": 0.001998804793236598, "loss": 0.2656, "step": 17390 }, { "epoch": 0.12350213733224737, "grad_norm": 0.11669921875, "learning_rate": 0.0019988033346565325, "loss": 0.2568, "step": 17400 }, { "epoch": 0.12357311557209348, "grad_norm": 0.158203125, "learning_rate": 0.0019988018751876074, "loss": 0.2925, "step": 17410 }, { "epoch": 0.12364409381193961, "grad_norm": 0.10498046875, "learning_rate": 0.0019988004148298235, "loss": 0.2342, "step": 17420 }, { "epoch": 0.12371507205178572, "grad_norm": 0.12158203125, "learning_rate": 0.001998798953583182, "loss": 0.2517, "step": 17430 }, { "epoch": 0.12378605029163184, "grad_norm": 0.1357421875, "learning_rate": 0.001998797491447685, "loss": 0.243, "step": 17440 }, { "epoch": 0.12385702853147797, "grad_norm": 0.125, "learning_rate": 0.0019987960284233337, "loss": 0.2707, "step": 17450 }, { "epoch": 0.12392800677132408, "grad_norm": 0.10400390625, "learning_rate": 0.001998794564510129, "loss": 0.2489, "step": 17460 }, { "epoch": 0.1239989850111702, "grad_norm": 0.07470703125, "learning_rate": 0.0019987930997080734, "loss": 0.2417, "step": 17470 }, { "epoch": 0.12406996325101632, "grad_norm": 0.142578125, "learning_rate": 0.001998791634017167, "loss": 0.251, "step": 17480 }, { "epoch": 0.12414094149086244, "grad_norm": 0.11328125, "learning_rate": 0.0019987901674374123, "loss": 0.283, "step": 17490 }, { "epoch": 0.12421191973070855, "grad_norm": 0.2392578125, "learning_rate": 0.001998788699968811, "loss": 0.2589, "step": 17500 }, { "epoch": 0.12428289797055468, "grad_norm": 0.11865234375, "learning_rate": 0.0019987872316113635, "loss": 0.2483, "step": 17510 }, { "epoch": 0.1243538762104008, "grad_norm": 0.083984375, "learning_rate": 0.001998785762365072, "loss": 0.2707, "step": 17520 }, { "epoch": 0.12442485445024691, "grad_norm": 0.109375, "learning_rate": 0.0019987842922299373, "loss": 0.2408, "step": 17530 }, { "epoch": 0.12449583269009304, "grad_norm": 0.09033203125, "learning_rate": 0.0019987828212059615, "loss": 0.2583, "step": 17540 }, { "epoch": 0.12456681092993915, "grad_norm": 0.09912109375, "learning_rate": 0.001998781349293146, "loss": 0.2589, "step": 17550 }, { "epoch": 0.12463778916978527, "grad_norm": 0.1123046875, "learning_rate": 0.001998779876491492, "loss": 0.274, "step": 17560 }, { "epoch": 0.1247087674096314, "grad_norm": 0.115234375, "learning_rate": 0.0019987784028010006, "loss": 0.2484, "step": 17570 }, { "epoch": 0.12477974564947751, "grad_norm": 0.0927734375, "learning_rate": 0.0019987769282216743, "loss": 0.2545, "step": 17580 }, { "epoch": 0.12485072388932363, "grad_norm": 0.111328125, "learning_rate": 0.0019987754527535134, "loss": 0.2628, "step": 17590 }, { "epoch": 0.12492170212916975, "grad_norm": 0.11669921875, "learning_rate": 0.0019987739763965206, "loss": 0.2654, "step": 17600 }, { "epoch": 0.12499268036901587, "grad_norm": 0.10693359375, "learning_rate": 0.001998772499150696, "loss": 0.2372, "step": 17610 }, { "epoch": 0.12506365860886198, "grad_norm": 0.0947265625, "learning_rate": 0.001998771021016042, "loss": 0.2381, "step": 17620 }, { "epoch": 0.1251346368487081, "grad_norm": 0.11962890625, "learning_rate": 0.00199876954199256, "loss": 0.2606, "step": 17630 }, { "epoch": 0.12520561508855424, "grad_norm": 0.10400390625, "learning_rate": 0.001998768062080251, "loss": 0.2606, "step": 17640 }, { "epoch": 0.12527659332840035, "grad_norm": 0.1484375, "learning_rate": 0.0019987665812791163, "loss": 0.2681, "step": 17650 }, { "epoch": 0.12534757156824647, "grad_norm": 0.07958984375, "learning_rate": 0.0019987650995891585, "loss": 0.266, "step": 17660 }, { "epoch": 0.12541854980809258, "grad_norm": 0.08154296875, "learning_rate": 0.001998763617010378, "loss": 0.2651, "step": 17670 }, { "epoch": 0.1254895280479387, "grad_norm": 0.1259765625, "learning_rate": 0.001998762133542777, "loss": 0.2653, "step": 17680 }, { "epoch": 0.1255605062877848, "grad_norm": 0.115234375, "learning_rate": 0.0019987606491863557, "loss": 0.2475, "step": 17690 }, { "epoch": 0.12563148452763095, "grad_norm": 0.08837890625, "learning_rate": 0.0019987591639411173, "loss": 0.2603, "step": 17700 }, { "epoch": 0.12570246276747707, "grad_norm": 0.0986328125, "learning_rate": 0.001998757677807062, "loss": 0.2482, "step": 17710 }, { "epoch": 0.12577344100732318, "grad_norm": 0.1162109375, "learning_rate": 0.0019987561907841922, "loss": 0.2818, "step": 17720 }, { "epoch": 0.1258444192471693, "grad_norm": 0.16015625, "learning_rate": 0.001998754702872508, "loss": 0.2826, "step": 17730 }, { "epoch": 0.1259153974870154, "grad_norm": 0.12890625, "learning_rate": 0.0019987532140720125, "loss": 0.2678, "step": 17740 }, { "epoch": 0.12598637572686153, "grad_norm": 0.09716796875, "learning_rate": 0.001998751724382706, "loss": 0.2649, "step": 17750 }, { "epoch": 0.12605735396670767, "grad_norm": 0.14453125, "learning_rate": 0.0019987502338045904, "loss": 0.2753, "step": 17760 }, { "epoch": 0.12612833220655378, "grad_norm": 0.11376953125, "learning_rate": 0.0019987487423376673, "loss": 0.2606, "step": 17770 }, { "epoch": 0.1261993104463999, "grad_norm": 0.11279296875, "learning_rate": 0.001998747249981938, "loss": 0.2708, "step": 17780 }, { "epoch": 0.126270288686246, "grad_norm": 0.1435546875, "learning_rate": 0.0019987457567374038, "loss": 0.2624, "step": 17790 }, { "epoch": 0.12634126692609213, "grad_norm": 0.076171875, "learning_rate": 0.0019987442626040663, "loss": 0.2551, "step": 17800 }, { "epoch": 0.12641224516593824, "grad_norm": 0.0751953125, "learning_rate": 0.0019987427675819272, "loss": 0.2671, "step": 17810 }, { "epoch": 0.12648322340578438, "grad_norm": 0.083984375, "learning_rate": 0.0019987412716709883, "loss": 0.2614, "step": 17820 }, { "epoch": 0.1265542016456305, "grad_norm": 0.1708984375, "learning_rate": 0.0019987397748712503, "loss": 0.2826, "step": 17830 }, { "epoch": 0.1266251798854766, "grad_norm": 0.0751953125, "learning_rate": 0.0019987382771827146, "loss": 0.2529, "step": 17840 }, { "epoch": 0.12669615812532273, "grad_norm": 0.140625, "learning_rate": 0.001998736778605384, "loss": 0.2595, "step": 17850 }, { "epoch": 0.12676713636516884, "grad_norm": 0.123046875, "learning_rate": 0.001998735279139258, "loss": 0.2681, "step": 17860 }, { "epoch": 0.12683811460501496, "grad_norm": 0.11474609375, "learning_rate": 0.0019987337787843395, "loss": 0.2531, "step": 17870 }, { "epoch": 0.1269090928448611, "grad_norm": 0.12353515625, "learning_rate": 0.0019987322775406294, "loss": 0.2643, "step": 17880 }, { "epoch": 0.1269800710847072, "grad_norm": 0.0849609375, "learning_rate": 0.00199873077540813, "loss": 0.2577, "step": 17890 }, { "epoch": 0.12705104932455333, "grad_norm": 0.10791015625, "learning_rate": 0.0019987292723868417, "loss": 0.254, "step": 17900 }, { "epoch": 0.12712202756439944, "grad_norm": 0.08935546875, "learning_rate": 0.0019987277684767667, "loss": 0.2402, "step": 17910 }, { "epoch": 0.12719300580424556, "grad_norm": 0.07666015625, "learning_rate": 0.001998726263677906, "loss": 0.252, "step": 17920 }, { "epoch": 0.12726398404409167, "grad_norm": 0.091796875, "learning_rate": 0.001998724757990262, "loss": 0.2491, "step": 17930 }, { "epoch": 0.1273349622839378, "grad_norm": 0.0908203125, "learning_rate": 0.001998723251413835, "loss": 0.2635, "step": 17940 }, { "epoch": 0.12740594052378393, "grad_norm": 0.0693359375, "learning_rate": 0.0019987217439486273, "loss": 0.2495, "step": 17950 }, { "epoch": 0.12747691876363004, "grad_norm": 0.201171875, "learning_rate": 0.00199872023559464, "loss": 0.2676, "step": 17960 }, { "epoch": 0.12754789700347616, "grad_norm": 0.1708984375, "learning_rate": 0.001998718726351874, "loss": 0.2694, "step": 17970 }, { "epoch": 0.12761887524332227, "grad_norm": 0.0849609375, "learning_rate": 0.0019987172162203326, "loss": 0.2603, "step": 17980 }, { "epoch": 0.1276898534831684, "grad_norm": 0.11572265625, "learning_rate": 0.0019987157052000154, "loss": 0.258, "step": 17990 }, { "epoch": 0.12776083172301453, "grad_norm": 0.0947265625, "learning_rate": 0.0019987141932909253, "loss": 0.2262, "step": 18000 }, { "epoch": 0.12776083172301453, "eval_covost2-zh-en_loss": 3.901607036590576, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 23.4917, "eval_covost2-zh-en_samples_per_second": 2.724, "eval_covost2-zh-en_steps_per_second": 0.17, "step": 18000 }, { "epoch": 0.12776083172301453, "eval_covost2-en-zh_loss": 3.1818437576293945, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 19.6064, "eval_covost2-en-zh_samples_per_second": 3.264, "eval_covost2-en-zh_steps_per_second": 0.204, "step": 18000 }, { "epoch": 0.12783180996286064, "grad_norm": 0.0859375, "learning_rate": 0.001998712680493063, "loss": 0.2601, "step": 18010 }, { "epoch": 0.12790278820270676, "grad_norm": 0.11328125, "learning_rate": 0.00199871116680643, "loss": 0.2691, "step": 18020 }, { "epoch": 0.12797376644255287, "grad_norm": 0.1357421875, "learning_rate": 0.0019987096522310283, "loss": 0.2645, "step": 18030 }, { "epoch": 0.128044744682399, "grad_norm": 0.19140625, "learning_rate": 0.0019987081367668588, "loss": 0.2703, "step": 18040 }, { "epoch": 0.1281157229222451, "grad_norm": 0.1376953125, "learning_rate": 0.0019987066204139236, "loss": 0.2444, "step": 18050 }, { "epoch": 0.12818670116209124, "grad_norm": 0.123046875, "learning_rate": 0.0019987051031722233, "loss": 0.2464, "step": 18060 }, { "epoch": 0.12825767940193736, "grad_norm": 0.11767578125, "learning_rate": 0.0019987035850417604, "loss": 0.2498, "step": 18070 }, { "epoch": 0.12832865764178347, "grad_norm": 0.1064453125, "learning_rate": 0.001998702066022536, "loss": 0.2506, "step": 18080 }, { "epoch": 0.1283996358816296, "grad_norm": 0.0947265625, "learning_rate": 0.001998700546114552, "loss": 0.2592, "step": 18090 }, { "epoch": 0.1284706141214757, "grad_norm": 0.10009765625, "learning_rate": 0.0019986990253178087, "loss": 0.2496, "step": 18100 }, { "epoch": 0.12854159236132182, "grad_norm": 0.267578125, "learning_rate": 0.0019986975036323084, "loss": 0.2559, "step": 18110 }, { "epoch": 0.12861257060116796, "grad_norm": 0.150390625, "learning_rate": 0.0019986959810580534, "loss": 0.2455, "step": 18120 }, { "epoch": 0.12868354884101407, "grad_norm": 0.5390625, "learning_rate": 0.0019986944575950435, "loss": 0.262, "step": 18130 }, { "epoch": 0.1287545270808602, "grad_norm": 0.10107421875, "learning_rate": 0.0019986929332432815, "loss": 0.2621, "step": 18140 }, { "epoch": 0.1288255053207063, "grad_norm": 0.1884765625, "learning_rate": 0.0019986914080027686, "loss": 0.2678, "step": 18150 }, { "epoch": 0.12889648356055242, "grad_norm": 0.11328125, "learning_rate": 0.001998689881873506, "loss": 0.2564, "step": 18160 }, { "epoch": 0.12896746180039853, "grad_norm": 0.08984375, "learning_rate": 0.001998688354855496, "loss": 0.2585, "step": 18170 }, { "epoch": 0.12903844004024467, "grad_norm": 0.138671875, "learning_rate": 0.0019986868269487386, "loss": 0.2483, "step": 18180 }, { "epoch": 0.1291094182800908, "grad_norm": 0.10791015625, "learning_rate": 0.001998685298153237, "loss": 0.2524, "step": 18190 }, { "epoch": 0.1291803965199369, "grad_norm": 0.0927734375, "learning_rate": 0.0019986837684689914, "loss": 0.2587, "step": 18200 }, { "epoch": 0.12925137475978302, "grad_norm": 0.07958984375, "learning_rate": 0.0019986822378960044, "loss": 0.2657, "step": 18210 }, { "epoch": 0.12932235299962913, "grad_norm": 0.1328125, "learning_rate": 0.001998680706434277, "loss": 0.2798, "step": 18220 }, { "epoch": 0.12939333123947525, "grad_norm": 0.09423828125, "learning_rate": 0.0019986791740838105, "loss": 0.2477, "step": 18230 }, { "epoch": 0.1294643094793214, "grad_norm": 0.11962890625, "learning_rate": 0.0019986776408446065, "loss": 0.2465, "step": 18240 }, { "epoch": 0.1295352877191675, "grad_norm": 0.126953125, "learning_rate": 0.0019986761067166668, "loss": 0.2535, "step": 18250 }, { "epoch": 0.12960626595901362, "grad_norm": 0.08544921875, "learning_rate": 0.0019986745716999927, "loss": 0.2516, "step": 18260 }, { "epoch": 0.12967724419885973, "grad_norm": 0.099609375, "learning_rate": 0.0019986730357945855, "loss": 0.2553, "step": 18270 }, { "epoch": 0.12974822243870585, "grad_norm": 0.1591796875, "learning_rate": 0.0019986714990004473, "loss": 0.2542, "step": 18280 }, { "epoch": 0.12981920067855196, "grad_norm": 0.0693359375, "learning_rate": 0.001998669961317579, "loss": 0.2517, "step": 18290 }, { "epoch": 0.1298901789183981, "grad_norm": 0.16796875, "learning_rate": 0.0019986684227459826, "loss": 0.2771, "step": 18300 }, { "epoch": 0.12996115715824422, "grad_norm": 0.12109375, "learning_rate": 0.00199866688328566, "loss": 0.2558, "step": 18310 }, { "epoch": 0.13003213539809033, "grad_norm": 0.1474609375, "learning_rate": 0.0019986653429366116, "loss": 0.2536, "step": 18320 }, { "epoch": 0.13010311363793645, "grad_norm": 0.115234375, "learning_rate": 0.00199866380169884, "loss": 0.2577, "step": 18330 }, { "epoch": 0.13017409187778256, "grad_norm": 0.099609375, "learning_rate": 0.0019986622595723454, "loss": 0.269, "step": 18340 }, { "epoch": 0.13024507011762868, "grad_norm": 0.095703125, "learning_rate": 0.0019986607165571307, "loss": 0.2602, "step": 18350 }, { "epoch": 0.13031604835747482, "grad_norm": 0.0810546875, "learning_rate": 0.0019986591726531967, "loss": 0.2434, "step": 18360 }, { "epoch": 0.13038702659732093, "grad_norm": 0.1357421875, "learning_rate": 0.0019986576278605453, "loss": 0.2377, "step": 18370 }, { "epoch": 0.13045800483716705, "grad_norm": 0.0830078125, "learning_rate": 0.0019986560821791777, "loss": 0.2497, "step": 18380 }, { "epoch": 0.13052898307701316, "grad_norm": 0.09619140625, "learning_rate": 0.0019986545356090956, "loss": 0.2688, "step": 18390 }, { "epoch": 0.13059996131685928, "grad_norm": 0.09130859375, "learning_rate": 0.0019986529881503004, "loss": 0.2393, "step": 18400 }, { "epoch": 0.1306709395567054, "grad_norm": 0.08642578125, "learning_rate": 0.0019986514398027937, "loss": 0.2395, "step": 18410 }, { "epoch": 0.13074191779655153, "grad_norm": 0.10205078125, "learning_rate": 0.0019986498905665774, "loss": 0.265, "step": 18420 }, { "epoch": 0.13081289603639765, "grad_norm": 0.09423828125, "learning_rate": 0.001998648340441652, "loss": 0.2578, "step": 18430 }, { "epoch": 0.13088387427624376, "grad_norm": 0.1591796875, "learning_rate": 0.00199864678942802, "loss": 0.2426, "step": 18440 }, { "epoch": 0.13095485251608988, "grad_norm": 0.1533203125, "learning_rate": 0.001998645237525683, "loss": 0.274, "step": 18450 }, { "epoch": 0.131025830755936, "grad_norm": 0.09423828125, "learning_rate": 0.001998643684734642, "loss": 0.2485, "step": 18460 }, { "epoch": 0.1310968089957821, "grad_norm": 0.08203125, "learning_rate": 0.0019986421310548984, "loss": 0.2653, "step": 18470 }, { "epoch": 0.13116778723562825, "grad_norm": 0.224609375, "learning_rate": 0.0019986405764864544, "loss": 0.2419, "step": 18480 }, { "epoch": 0.13123876547547436, "grad_norm": 0.1318359375, "learning_rate": 0.001998639021029311, "loss": 0.2601, "step": 18490 }, { "epoch": 0.13130974371532048, "grad_norm": 0.09521484375, "learning_rate": 0.0019986374646834702, "loss": 0.255, "step": 18500 }, { "epoch": 0.1313807219551666, "grad_norm": 0.1962890625, "learning_rate": 0.001998635907448933, "loss": 0.2647, "step": 18510 }, { "epoch": 0.1314517001950127, "grad_norm": 0.08056640625, "learning_rate": 0.0019986343493257016, "loss": 0.2654, "step": 18520 }, { "epoch": 0.13152267843485882, "grad_norm": 0.10107421875, "learning_rate": 0.001998632790313777, "loss": 0.2659, "step": 18530 }, { "epoch": 0.13159365667470496, "grad_norm": 0.15234375, "learning_rate": 0.0019986312304131606, "loss": 0.2639, "step": 18540 }, { "epoch": 0.13166463491455108, "grad_norm": 0.095703125, "learning_rate": 0.0019986296696238542, "loss": 0.2476, "step": 18550 }, { "epoch": 0.1317356131543972, "grad_norm": 0.1884765625, "learning_rate": 0.00199862810794586, "loss": 0.2609, "step": 18560 }, { "epoch": 0.1318065913942433, "grad_norm": 0.1240234375, "learning_rate": 0.001998626545379178, "loss": 0.2478, "step": 18570 }, { "epoch": 0.13187756963408942, "grad_norm": 0.1220703125, "learning_rate": 0.0019986249819238114, "loss": 0.2703, "step": 18580 }, { "epoch": 0.13194854787393553, "grad_norm": 0.091796875, "learning_rate": 0.0019986234175797604, "loss": 0.2434, "step": 18590 }, { "epoch": 0.13201952611378168, "grad_norm": 0.07275390625, "learning_rate": 0.0019986218523470275, "loss": 0.2496, "step": 18600 }, { "epoch": 0.1320905043536278, "grad_norm": 0.10791015625, "learning_rate": 0.0019986202862256144, "loss": 0.259, "step": 18610 }, { "epoch": 0.1321614825934739, "grad_norm": 0.12890625, "learning_rate": 0.0019986187192155215, "loss": 0.2645, "step": 18620 }, { "epoch": 0.13223246083332002, "grad_norm": 0.12060546875, "learning_rate": 0.001998617151316751, "loss": 0.257, "step": 18630 }, { "epoch": 0.13230343907316613, "grad_norm": 0.10107421875, "learning_rate": 0.0019986155825293048, "loss": 0.2646, "step": 18640 }, { "epoch": 0.13237441731301228, "grad_norm": 0.1494140625, "learning_rate": 0.001998614012853184, "loss": 0.2353, "step": 18650 }, { "epoch": 0.1324453955528584, "grad_norm": 0.166015625, "learning_rate": 0.00199861244228839, "loss": 0.2697, "step": 18660 }, { "epoch": 0.1325163737927045, "grad_norm": 0.09765625, "learning_rate": 0.001998610870834925, "loss": 0.2532, "step": 18670 }, { "epoch": 0.13258735203255062, "grad_norm": 0.1611328125, "learning_rate": 0.00199860929849279, "loss": 0.258, "step": 18680 }, { "epoch": 0.13265833027239674, "grad_norm": 0.14453125, "learning_rate": 0.0019986077252619867, "loss": 0.2575, "step": 18690 }, { "epoch": 0.13272930851224285, "grad_norm": 0.091796875, "learning_rate": 0.001998606151142517, "loss": 0.2631, "step": 18700 }, { "epoch": 0.132800286752089, "grad_norm": 0.166015625, "learning_rate": 0.0019986045761343813, "loss": 0.2576, "step": 18710 }, { "epoch": 0.1328712649919351, "grad_norm": 0.08056640625, "learning_rate": 0.001998603000237583, "loss": 0.2642, "step": 18720 }, { "epoch": 0.13294224323178122, "grad_norm": 0.10693359375, "learning_rate": 0.001998601423452122, "loss": 0.2359, "step": 18730 }, { "epoch": 0.13301322147162734, "grad_norm": 0.12353515625, "learning_rate": 0.001998599845778001, "loss": 0.2653, "step": 18740 }, { "epoch": 0.13308419971147345, "grad_norm": 0.103515625, "learning_rate": 0.0019985982672152203, "loss": 0.2667, "step": 18750 }, { "epoch": 0.13315517795131956, "grad_norm": 0.09912109375, "learning_rate": 0.001998596687763783, "loss": 0.2543, "step": 18760 }, { "epoch": 0.1332261561911657, "grad_norm": 0.244140625, "learning_rate": 0.0019985951074236894, "loss": 0.2684, "step": 18770 }, { "epoch": 0.13329713443101182, "grad_norm": 0.08740234375, "learning_rate": 0.001998593526194942, "loss": 0.2583, "step": 18780 }, { "epoch": 0.13336811267085794, "grad_norm": 0.07568359375, "learning_rate": 0.0019985919440775415, "loss": 0.2643, "step": 18790 }, { "epoch": 0.13343909091070405, "grad_norm": 0.0966796875, "learning_rate": 0.00199859036107149, "loss": 0.255, "step": 18800 }, { "epoch": 0.13351006915055016, "grad_norm": 0.125, "learning_rate": 0.0019985887771767894, "loss": 0.2512, "step": 18810 }, { "epoch": 0.13358104739039628, "grad_norm": 0.11181640625, "learning_rate": 0.00199858719239344, "loss": 0.2594, "step": 18820 }, { "epoch": 0.13365202563024242, "grad_norm": 0.111328125, "learning_rate": 0.0019985856067214446, "loss": 0.268, "step": 18830 }, { "epoch": 0.13372300387008854, "grad_norm": 0.103515625, "learning_rate": 0.0019985840201608047, "loss": 0.2576, "step": 18840 }, { "epoch": 0.13379398210993465, "grad_norm": 0.109375, "learning_rate": 0.0019985824327115207, "loss": 0.2555, "step": 18850 }, { "epoch": 0.13386496034978076, "grad_norm": 0.09521484375, "learning_rate": 0.0019985808443735956, "loss": 0.2602, "step": 18860 }, { "epoch": 0.13393593858962688, "grad_norm": 0.09619140625, "learning_rate": 0.0019985792551470306, "loss": 0.275, "step": 18870 }, { "epoch": 0.134006916829473, "grad_norm": 0.259765625, "learning_rate": 0.001998577665031827, "loss": 0.2546, "step": 18880 }, { "epoch": 0.13407789506931914, "grad_norm": 0.255859375, "learning_rate": 0.0019985760740279857, "loss": 0.2466, "step": 18890 }, { "epoch": 0.13414887330916525, "grad_norm": 0.12109375, "learning_rate": 0.0019985744821355094, "loss": 0.2586, "step": 18900 }, { "epoch": 0.13421985154901137, "grad_norm": 0.11181640625, "learning_rate": 0.0019985728893543993, "loss": 0.2553, "step": 18910 }, { "epoch": 0.13429082978885748, "grad_norm": 0.061279296875, "learning_rate": 0.0019985712956846566, "loss": 0.2642, "step": 18920 }, { "epoch": 0.1343618080287036, "grad_norm": 0.11328125, "learning_rate": 0.0019985697011262837, "loss": 0.2646, "step": 18930 }, { "epoch": 0.1344327862685497, "grad_norm": 0.09033203125, "learning_rate": 0.0019985681056792813, "loss": 0.2747, "step": 18940 }, { "epoch": 0.13450376450839585, "grad_norm": 0.099609375, "learning_rate": 0.0019985665093436517, "loss": 0.2463, "step": 18950 }, { "epoch": 0.13457474274824197, "grad_norm": 0.0830078125, "learning_rate": 0.001998564912119396, "loss": 0.2633, "step": 18960 }, { "epoch": 0.13464572098808808, "grad_norm": 0.1181640625, "learning_rate": 0.0019985633140065157, "loss": 0.2688, "step": 18970 }, { "epoch": 0.1347166992279342, "grad_norm": 0.091796875, "learning_rate": 0.001998561715005013, "loss": 0.2697, "step": 18980 }, { "epoch": 0.1347876774677803, "grad_norm": 0.10791015625, "learning_rate": 0.0019985601151148885, "loss": 0.2641, "step": 18990 }, { "epoch": 0.13485865570762642, "grad_norm": 0.11376953125, "learning_rate": 0.001998558514336145, "loss": 0.2729, "step": 19000 }, { "epoch": 0.13492963394747257, "grad_norm": 0.126953125, "learning_rate": 0.001998556912668783, "loss": 0.2517, "step": 19010 }, { "epoch": 0.13500061218731868, "grad_norm": 0.1376953125, "learning_rate": 0.0019985553101128047, "loss": 0.2583, "step": 19020 }, { "epoch": 0.1350715904271648, "grad_norm": 0.10302734375, "learning_rate": 0.0019985537066682115, "loss": 0.2619, "step": 19030 }, { "epoch": 0.1351425686670109, "grad_norm": 0.18359375, "learning_rate": 0.001998552102335005, "loss": 0.2654, "step": 19040 }, { "epoch": 0.13521354690685702, "grad_norm": 0.1611328125, "learning_rate": 0.0019985504971131865, "loss": 0.2518, "step": 19050 }, { "epoch": 0.13528452514670314, "grad_norm": 0.08935546875, "learning_rate": 0.0019985488910027583, "loss": 0.2728, "step": 19060 }, { "epoch": 0.13535550338654928, "grad_norm": 0.11181640625, "learning_rate": 0.0019985472840037214, "loss": 0.2399, "step": 19070 }, { "epoch": 0.1354264816263954, "grad_norm": 0.10693359375, "learning_rate": 0.0019985456761160776, "loss": 0.2619, "step": 19080 }, { "epoch": 0.1354974598662415, "grad_norm": 0.0830078125, "learning_rate": 0.0019985440673398287, "loss": 0.2508, "step": 19090 }, { "epoch": 0.13556843810608762, "grad_norm": 0.1220703125, "learning_rate": 0.0019985424576749755, "loss": 0.2503, "step": 19100 }, { "epoch": 0.13563941634593374, "grad_norm": 0.10009765625, "learning_rate": 0.00199854084712152, "loss": 0.2382, "step": 19110 }, { "epoch": 0.13571039458577985, "grad_norm": 0.142578125, "learning_rate": 0.001998539235679465, "loss": 0.2501, "step": 19120 }, { "epoch": 0.135781372825626, "grad_norm": 0.09765625, "learning_rate": 0.00199853762334881, "loss": 0.2691, "step": 19130 }, { "epoch": 0.1358523510654721, "grad_norm": 0.171875, "learning_rate": 0.001998536010129558, "loss": 0.2538, "step": 19140 }, { "epoch": 0.13592332930531822, "grad_norm": 0.11865234375, "learning_rate": 0.00199853439602171, "loss": 0.2683, "step": 19150 }, { "epoch": 0.13599430754516434, "grad_norm": 0.07421875, "learning_rate": 0.0019985327810252682, "loss": 0.2524, "step": 19160 }, { "epoch": 0.13606528578501045, "grad_norm": 0.09423828125, "learning_rate": 0.001998531165140233, "loss": 0.2486, "step": 19170 }, { "epoch": 0.13613626402485657, "grad_norm": 0.1474609375, "learning_rate": 0.0019985295483666075, "loss": 0.25, "step": 19180 }, { "epoch": 0.1362072422647027, "grad_norm": 0.1572265625, "learning_rate": 0.0019985279307043924, "loss": 0.2463, "step": 19190 }, { "epoch": 0.13627822050454882, "grad_norm": 0.08447265625, "learning_rate": 0.0019985263121535895, "loss": 0.2534, "step": 19200 }, { "epoch": 0.13634919874439494, "grad_norm": 0.1552734375, "learning_rate": 0.0019985246927142005, "loss": 0.2349, "step": 19210 }, { "epoch": 0.13642017698424105, "grad_norm": 0.1201171875, "learning_rate": 0.0019985230723862267, "loss": 0.255, "step": 19220 }, { "epoch": 0.13649115522408717, "grad_norm": 0.1572265625, "learning_rate": 0.00199852145116967, "loss": 0.258, "step": 19230 }, { "epoch": 0.13656213346393328, "grad_norm": 0.142578125, "learning_rate": 0.0019985198290645317, "loss": 0.2592, "step": 19240 }, { "epoch": 0.13663311170377943, "grad_norm": 0.087890625, "learning_rate": 0.001998518206070814, "loss": 0.2423, "step": 19250 }, { "epoch": 0.13670408994362554, "grad_norm": 0.0947265625, "learning_rate": 0.0019985165821885175, "loss": 0.2425, "step": 19260 }, { "epoch": 0.13677506818347165, "grad_norm": 0.08544921875, "learning_rate": 0.001998514957417645, "loss": 0.2602, "step": 19270 }, { "epoch": 0.13684604642331777, "grad_norm": 0.09228515625, "learning_rate": 0.0019985133317581972, "loss": 0.2297, "step": 19280 }, { "epoch": 0.13691702466316388, "grad_norm": 0.11083984375, "learning_rate": 0.0019985117052101764, "loss": 0.2429, "step": 19290 }, { "epoch": 0.13698800290301, "grad_norm": 0.1025390625, "learning_rate": 0.0019985100777735834, "loss": 0.259, "step": 19300 }, { "epoch": 0.13705898114285614, "grad_norm": 0.08642578125, "learning_rate": 0.0019985084494484203, "loss": 0.2456, "step": 19310 }, { "epoch": 0.13712995938270225, "grad_norm": 0.1142578125, "learning_rate": 0.001998506820234689, "loss": 0.2342, "step": 19320 }, { "epoch": 0.13720093762254837, "grad_norm": 0.0908203125, "learning_rate": 0.0019985051901323905, "loss": 0.2671, "step": 19330 }, { "epoch": 0.13727191586239448, "grad_norm": 0.0986328125, "learning_rate": 0.0019985035591415265, "loss": 0.2496, "step": 19340 }, { "epoch": 0.1373428941022406, "grad_norm": 0.0615234375, "learning_rate": 0.001998501927262099, "loss": 0.241, "step": 19350 }, { "epoch": 0.1374138723420867, "grad_norm": 0.1572265625, "learning_rate": 0.0019985002944941095, "loss": 0.2599, "step": 19360 }, { "epoch": 0.13748485058193285, "grad_norm": 0.07421875, "learning_rate": 0.0019984986608375596, "loss": 0.2432, "step": 19370 }, { "epoch": 0.13755582882177897, "grad_norm": 0.0830078125, "learning_rate": 0.0019984970262924505, "loss": 0.2563, "step": 19380 }, { "epoch": 0.13762680706162508, "grad_norm": 0.119140625, "learning_rate": 0.0019984953908587844, "loss": 0.2377, "step": 19390 }, { "epoch": 0.1376977853014712, "grad_norm": 0.1064453125, "learning_rate": 0.0019984937545365626, "loss": 0.251, "step": 19400 }, { "epoch": 0.1377687635413173, "grad_norm": 0.12255859375, "learning_rate": 0.001998492117325787, "loss": 0.2703, "step": 19410 }, { "epoch": 0.13783974178116343, "grad_norm": 0.140625, "learning_rate": 0.0019984904792264588, "loss": 0.2635, "step": 19420 }, { "epoch": 0.13791072002100957, "grad_norm": 0.140625, "learning_rate": 0.0019984888402385798, "loss": 0.2728, "step": 19430 }, { "epoch": 0.13798169826085568, "grad_norm": 0.10693359375, "learning_rate": 0.0019984872003621515, "loss": 0.2442, "step": 19440 }, { "epoch": 0.1380526765007018, "grad_norm": 0.1123046875, "learning_rate": 0.001998485559597176, "loss": 0.2476, "step": 19450 }, { "epoch": 0.1381236547405479, "grad_norm": 0.08056640625, "learning_rate": 0.0019984839179436544, "loss": 0.2653, "step": 19460 }, { "epoch": 0.13819463298039403, "grad_norm": 0.1064453125, "learning_rate": 0.0019984822754015885, "loss": 0.2574, "step": 19470 }, { "epoch": 0.13826561122024014, "grad_norm": 0.07080078125, "learning_rate": 0.0019984806319709803, "loss": 0.2579, "step": 19480 }, { "epoch": 0.13833658946008628, "grad_norm": 0.1904296875, "learning_rate": 0.0019984789876518303, "loss": 0.2589, "step": 19490 }, { "epoch": 0.1384075676999324, "grad_norm": 0.1474609375, "learning_rate": 0.0019984773424441414, "loss": 0.2603, "step": 19500 }, { "epoch": 0.1384785459397785, "grad_norm": 0.11474609375, "learning_rate": 0.0019984756963479147, "loss": 0.2666, "step": 19510 }, { "epoch": 0.13854952417962463, "grad_norm": 0.103515625, "learning_rate": 0.0019984740493631517, "loss": 0.2514, "step": 19520 }, { "epoch": 0.13862050241947074, "grad_norm": 0.1337890625, "learning_rate": 0.0019984724014898547, "loss": 0.2577, "step": 19530 }, { "epoch": 0.13869148065931686, "grad_norm": 0.09033203125, "learning_rate": 0.001998470752728024, "loss": 0.2674, "step": 19540 }, { "epoch": 0.138762458899163, "grad_norm": 0.09423828125, "learning_rate": 0.0019984691030776624, "loss": 0.2612, "step": 19550 }, { "epoch": 0.1388334371390091, "grad_norm": 0.08837890625, "learning_rate": 0.0019984674525387715, "loss": 0.2523, "step": 19560 }, { "epoch": 0.13890441537885523, "grad_norm": 0.240234375, "learning_rate": 0.0019984658011113522, "loss": 0.2803, "step": 19570 }, { "epoch": 0.13897539361870134, "grad_norm": 0.1142578125, "learning_rate": 0.0019984641487954067, "loss": 0.2762, "step": 19580 }, { "epoch": 0.13904637185854746, "grad_norm": 0.1552734375, "learning_rate": 0.0019984624955909362, "loss": 0.2424, "step": 19590 }, { "epoch": 0.13911735009839357, "grad_norm": 0.1318359375, "learning_rate": 0.001998460841497943, "loss": 0.2705, "step": 19600 }, { "epoch": 0.13918832833823971, "grad_norm": 0.1416015625, "learning_rate": 0.0019984591865164283, "loss": 0.2497, "step": 19610 }, { "epoch": 0.13925930657808583, "grad_norm": 0.1474609375, "learning_rate": 0.0019984575306463935, "loss": 0.2441, "step": 19620 }, { "epoch": 0.13933028481793194, "grad_norm": 0.1484375, "learning_rate": 0.0019984558738878407, "loss": 0.2602, "step": 19630 }, { "epoch": 0.13940126305777806, "grad_norm": 0.19140625, "learning_rate": 0.001998454216240771, "loss": 0.2511, "step": 19640 }, { "epoch": 0.13947224129762417, "grad_norm": 0.080078125, "learning_rate": 0.0019984525577051867, "loss": 0.2666, "step": 19650 }, { "epoch": 0.1395432195374703, "grad_norm": 0.10302734375, "learning_rate": 0.0019984508982810894, "loss": 0.2469, "step": 19660 }, { "epoch": 0.13961419777731643, "grad_norm": 0.0693359375, "learning_rate": 0.00199844923796848, "loss": 0.246, "step": 19670 }, { "epoch": 0.13968517601716254, "grad_norm": 0.1748046875, "learning_rate": 0.001998447576767361, "loss": 0.2705, "step": 19680 }, { "epoch": 0.13975615425700866, "grad_norm": 0.09814453125, "learning_rate": 0.0019984459146777335, "loss": 0.2528, "step": 19690 }, { "epoch": 0.13982713249685477, "grad_norm": 0.09716796875, "learning_rate": 0.001998444251699599, "loss": 0.2627, "step": 19700 }, { "epoch": 0.1398981107367009, "grad_norm": 0.13671875, "learning_rate": 0.00199844258783296, "loss": 0.27, "step": 19710 }, { "epoch": 0.139969088976547, "grad_norm": 0.09521484375, "learning_rate": 0.001998440923077817, "loss": 0.2563, "step": 19720 }, { "epoch": 0.14004006721639314, "grad_norm": 0.1328125, "learning_rate": 0.0019984392574341724, "loss": 0.2429, "step": 19730 }, { "epoch": 0.14011104545623926, "grad_norm": 0.119140625, "learning_rate": 0.001998437590902028, "loss": 0.2483, "step": 19740 }, { "epoch": 0.14018202369608537, "grad_norm": 0.10205078125, "learning_rate": 0.001998435923481385, "loss": 0.2577, "step": 19750 }, { "epoch": 0.1402530019359315, "grad_norm": 0.111328125, "learning_rate": 0.001998434255172245, "loss": 0.2456, "step": 19760 }, { "epoch": 0.1403239801757776, "grad_norm": 0.158203125, "learning_rate": 0.00199843258597461, "loss": 0.2543, "step": 19770 }, { "epoch": 0.14039495841562374, "grad_norm": 0.10888671875, "learning_rate": 0.0019984309158884816, "loss": 0.2419, "step": 19780 }, { "epoch": 0.14046593665546986, "grad_norm": 0.16796875, "learning_rate": 0.001998429244913861, "loss": 0.248, "step": 19790 }, { "epoch": 0.14053691489531597, "grad_norm": 0.10888671875, "learning_rate": 0.0019984275730507506, "loss": 0.2412, "step": 19800 }, { "epoch": 0.1406078931351621, "grad_norm": 0.08154296875, "learning_rate": 0.001998425900299151, "loss": 0.2575, "step": 19810 }, { "epoch": 0.1406788713750082, "grad_norm": 0.2578125, "learning_rate": 0.001998424226659065, "loss": 0.248, "step": 19820 }, { "epoch": 0.14074984961485432, "grad_norm": 0.1025390625, "learning_rate": 0.0019984225521304934, "loss": 0.2477, "step": 19830 }, { "epoch": 0.14082082785470046, "grad_norm": 0.10791015625, "learning_rate": 0.0019984208767134387, "loss": 0.2569, "step": 19840 }, { "epoch": 0.14089180609454657, "grad_norm": 0.0791015625, "learning_rate": 0.0019984192004079016, "loss": 0.2613, "step": 19850 }, { "epoch": 0.1409627843343927, "grad_norm": 0.1298828125, "learning_rate": 0.0019984175232138847, "loss": 0.2491, "step": 19860 }, { "epoch": 0.1410337625742388, "grad_norm": 0.140625, "learning_rate": 0.001998415845131389, "loss": 0.2504, "step": 19870 }, { "epoch": 0.14110474081408492, "grad_norm": 0.09228515625, "learning_rate": 0.0019984141661604157, "loss": 0.2556, "step": 19880 }, { "epoch": 0.14117571905393103, "grad_norm": 0.11279296875, "learning_rate": 0.001998412486300968, "loss": 0.2432, "step": 19890 }, { "epoch": 0.14124669729377717, "grad_norm": 0.078125, "learning_rate": 0.0019984108055530465, "loss": 0.2489, "step": 19900 }, { "epoch": 0.1413176755336233, "grad_norm": 0.10205078125, "learning_rate": 0.0019984091239166526, "loss": 0.2632, "step": 19910 }, { "epoch": 0.1413886537734694, "grad_norm": 0.173828125, "learning_rate": 0.0019984074413917884, "loss": 0.2752, "step": 19920 }, { "epoch": 0.14145963201331552, "grad_norm": 0.09521484375, "learning_rate": 0.001998405757978456, "loss": 0.2602, "step": 19930 }, { "epoch": 0.14153061025316163, "grad_norm": 0.10888671875, "learning_rate": 0.0019984040736766564, "loss": 0.2484, "step": 19940 }, { "epoch": 0.14160158849300775, "grad_norm": 0.123046875, "learning_rate": 0.0019984023884863913, "loss": 0.2576, "step": 19950 }, { "epoch": 0.1416725667328539, "grad_norm": 0.11572265625, "learning_rate": 0.0019984007024076624, "loss": 0.2494, "step": 19960 }, { "epoch": 0.1417435449727, "grad_norm": 0.0732421875, "learning_rate": 0.001998399015440472, "loss": 0.2659, "step": 19970 }, { "epoch": 0.14181452321254612, "grad_norm": 0.173828125, "learning_rate": 0.001998397327584821, "loss": 0.2557, "step": 19980 }, { "epoch": 0.14188550145239223, "grad_norm": 0.078125, "learning_rate": 0.0019983956388407113, "loss": 0.2662, "step": 19990 }, { "epoch": 0.14195647969223835, "grad_norm": 0.2578125, "learning_rate": 0.001998393949208145, "loss": 0.2524, "step": 20000 }, { "epoch": 0.14195647969223835, "eval_covost2-zh-en_loss": 3.9750304222106934, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 21.3886, "eval_covost2-zh-en_samples_per_second": 2.992, "eval_covost2-zh-en_steps_per_second": 0.187, "step": 20000 }, { "epoch": 0.14195647969223835, "eval_covost2-en-zh_loss": 3.1860837936401367, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 20.9189, "eval_covost2-en-zh_samples_per_second": 3.059, "eval_covost2-en-zh_steps_per_second": 0.191, "step": 20000 }, { "epoch": 0.14202745793208446, "grad_norm": 0.11474609375, "learning_rate": 0.001998392258687123, "loss": 0.2619, "step": 20010 }, { "epoch": 0.1420984361719306, "grad_norm": 0.09716796875, "learning_rate": 0.0019983905672776477, "loss": 0.2383, "step": 20020 }, { "epoch": 0.14216941441177672, "grad_norm": 0.12255859375, "learning_rate": 0.00199838887497972, "loss": 0.2695, "step": 20030 }, { "epoch": 0.14224039265162283, "grad_norm": 0.080078125, "learning_rate": 0.0019983871817933424, "loss": 0.2712, "step": 20040 }, { "epoch": 0.14231137089146895, "grad_norm": 0.07275390625, "learning_rate": 0.001998385487718516, "loss": 0.2759, "step": 20050 }, { "epoch": 0.14238234913131506, "grad_norm": 0.103515625, "learning_rate": 0.0019983837927552424, "loss": 0.2461, "step": 20060 }, { "epoch": 0.14245332737116118, "grad_norm": 0.1005859375, "learning_rate": 0.0019983820969035238, "loss": 0.2679, "step": 20070 }, { "epoch": 0.14252430561100732, "grad_norm": 0.080078125, "learning_rate": 0.0019983804001633618, "loss": 0.2431, "step": 20080 }, { "epoch": 0.14259528385085343, "grad_norm": 0.1669921875, "learning_rate": 0.0019983787025347577, "loss": 0.2558, "step": 20090 }, { "epoch": 0.14266626209069955, "grad_norm": 0.09423828125, "learning_rate": 0.001998377004017713, "loss": 0.269, "step": 20100 }, { "epoch": 0.14273724033054566, "grad_norm": 0.376953125, "learning_rate": 0.0019983753046122306, "loss": 0.2451, "step": 20110 }, { "epoch": 0.14280821857039178, "grad_norm": 0.154296875, "learning_rate": 0.0019983736043183106, "loss": 0.2698, "step": 20120 }, { "epoch": 0.1428791968102379, "grad_norm": 0.23046875, "learning_rate": 0.001998371903135956, "loss": 0.2572, "step": 20130 }, { "epoch": 0.14295017505008403, "grad_norm": 0.11328125, "learning_rate": 0.001998370201065167, "loss": 0.2574, "step": 20140 }, { "epoch": 0.14302115328993015, "grad_norm": 0.09130859375, "learning_rate": 0.001998368498105947, "loss": 0.2495, "step": 20150 }, { "epoch": 0.14309213152977626, "grad_norm": 0.064453125, "learning_rate": 0.0019983667942582964, "loss": 0.2557, "step": 20160 }, { "epoch": 0.14316310976962238, "grad_norm": 0.150390625, "learning_rate": 0.001998365089522218, "loss": 0.2595, "step": 20170 }, { "epoch": 0.1432340880094685, "grad_norm": 0.20703125, "learning_rate": 0.001998363383897712, "loss": 0.2556, "step": 20180 }, { "epoch": 0.1433050662493146, "grad_norm": 0.1796875, "learning_rate": 0.0019983616773847812, "loss": 0.2605, "step": 20190 }, { "epoch": 0.14337604448916075, "grad_norm": 0.1865234375, "learning_rate": 0.0019983599699834272, "loss": 0.2576, "step": 20200 }, { "epoch": 0.14344702272900686, "grad_norm": 0.1162109375, "learning_rate": 0.001998358261693651, "loss": 0.2516, "step": 20210 }, { "epoch": 0.14351800096885298, "grad_norm": 0.07958984375, "learning_rate": 0.0019983565525154554, "loss": 0.2415, "step": 20220 }, { "epoch": 0.1435889792086991, "grad_norm": 0.11181640625, "learning_rate": 0.0019983548424488415, "loss": 0.2545, "step": 20230 }, { "epoch": 0.1436599574485452, "grad_norm": 0.2470703125, "learning_rate": 0.0019983531314938102, "loss": 0.2519, "step": 20240 }, { "epoch": 0.14373093568839132, "grad_norm": 0.09326171875, "learning_rate": 0.001998351419650365, "loss": 0.2534, "step": 20250 }, { "epoch": 0.14380191392823746, "grad_norm": 0.10888671875, "learning_rate": 0.001998349706918506, "loss": 0.2629, "step": 20260 }, { "epoch": 0.14387289216808358, "grad_norm": 0.12353515625, "learning_rate": 0.0019983479932982355, "loss": 0.235, "step": 20270 }, { "epoch": 0.1439438704079297, "grad_norm": 0.130859375, "learning_rate": 0.0019983462787895546, "loss": 0.2592, "step": 20280 }, { "epoch": 0.1440148486477758, "grad_norm": 0.06982421875, "learning_rate": 0.0019983445633924663, "loss": 0.2705, "step": 20290 }, { "epoch": 0.14408582688762192, "grad_norm": 0.1640625, "learning_rate": 0.001998342847106971, "loss": 0.2858, "step": 20300 }, { "epoch": 0.14415680512746804, "grad_norm": 0.0791015625, "learning_rate": 0.0019983411299330715, "loss": 0.2739, "step": 20310 }, { "epoch": 0.14422778336731418, "grad_norm": 0.08203125, "learning_rate": 0.0019983394118707684, "loss": 0.2456, "step": 20320 }, { "epoch": 0.1442987616071603, "grad_norm": 0.08935546875, "learning_rate": 0.001998337692920064, "loss": 0.258, "step": 20330 }, { "epoch": 0.1443697398470064, "grad_norm": 0.0986328125, "learning_rate": 0.00199833597308096, "loss": 0.2565, "step": 20340 }, { "epoch": 0.14444071808685252, "grad_norm": 0.1474609375, "learning_rate": 0.001998334252353458, "loss": 0.2437, "step": 20350 }, { "epoch": 0.14451169632669864, "grad_norm": 0.11474609375, "learning_rate": 0.00199833253073756, "loss": 0.2405, "step": 20360 }, { "epoch": 0.14458267456654475, "grad_norm": 0.138671875, "learning_rate": 0.001998330808233267, "loss": 0.2561, "step": 20370 }, { "epoch": 0.1446536528063909, "grad_norm": 0.11572265625, "learning_rate": 0.0019983290848405817, "loss": 0.2671, "step": 20380 }, { "epoch": 0.144724631046237, "grad_norm": 0.09765625, "learning_rate": 0.001998327360559505, "loss": 0.25, "step": 20390 }, { "epoch": 0.14479560928608312, "grad_norm": 0.1552734375, "learning_rate": 0.0019983256353900383, "loss": 0.2401, "step": 20400 }, { "epoch": 0.14486658752592924, "grad_norm": 0.0791015625, "learning_rate": 0.0019983239093321844, "loss": 0.2367, "step": 20410 }, { "epoch": 0.14493756576577535, "grad_norm": 0.07861328125, "learning_rate": 0.0019983221823859447, "loss": 0.2452, "step": 20420 }, { "epoch": 0.14500854400562146, "grad_norm": 0.150390625, "learning_rate": 0.0019983204545513202, "loss": 0.253, "step": 20430 }, { "epoch": 0.1450795222454676, "grad_norm": 0.1083984375, "learning_rate": 0.0019983187258283135, "loss": 0.2546, "step": 20440 }, { "epoch": 0.14515050048531372, "grad_norm": 0.08203125, "learning_rate": 0.0019983169962169253, "loss": 0.2472, "step": 20450 }, { "epoch": 0.14522147872515984, "grad_norm": 0.130859375, "learning_rate": 0.001998315265717158, "loss": 0.2474, "step": 20460 }, { "epoch": 0.14529245696500595, "grad_norm": 0.09130859375, "learning_rate": 0.001998313534329013, "loss": 0.2754, "step": 20470 }, { "epoch": 0.14536343520485207, "grad_norm": 0.126953125, "learning_rate": 0.001998311802052493, "loss": 0.2588, "step": 20480 }, { "epoch": 0.14543441344469818, "grad_norm": 0.1552734375, "learning_rate": 0.0019983100688875984, "loss": 0.2662, "step": 20490 }, { "epoch": 0.14550539168454432, "grad_norm": 0.09423828125, "learning_rate": 0.0019983083348343316, "loss": 0.2405, "step": 20500 }, { "epoch": 0.14557636992439044, "grad_norm": 0.12451171875, "learning_rate": 0.001998306599892694, "loss": 0.2566, "step": 20510 }, { "epoch": 0.14564734816423655, "grad_norm": 0.111328125, "learning_rate": 0.001998304864062688, "loss": 0.2544, "step": 20520 }, { "epoch": 0.14571832640408267, "grad_norm": 0.1328125, "learning_rate": 0.001998303127344314, "loss": 0.2466, "step": 20530 }, { "epoch": 0.14578930464392878, "grad_norm": 0.12353515625, "learning_rate": 0.001998301389737575, "loss": 0.2496, "step": 20540 }, { "epoch": 0.1458602828837749, "grad_norm": 0.0830078125, "learning_rate": 0.001998299651242472, "loss": 0.2552, "step": 20550 }, { "epoch": 0.14593126112362104, "grad_norm": 0.0888671875, "learning_rate": 0.001998297911859007, "loss": 0.2232, "step": 20560 }, { "epoch": 0.14600223936346715, "grad_norm": 0.0771484375, "learning_rate": 0.0019982961715871816, "loss": 0.2462, "step": 20570 }, { "epoch": 0.14607321760331327, "grad_norm": 0.1279296875, "learning_rate": 0.001998294430426998, "loss": 0.2564, "step": 20580 }, { "epoch": 0.14614419584315938, "grad_norm": 0.2119140625, "learning_rate": 0.001998292688378457, "loss": 0.252, "step": 20590 }, { "epoch": 0.1462151740830055, "grad_norm": 0.068359375, "learning_rate": 0.0019982909454415608, "loss": 0.2489, "step": 20600 }, { "epoch": 0.1462861523228516, "grad_norm": 0.08447265625, "learning_rate": 0.0019982892016163116, "loss": 0.258, "step": 20610 }, { "epoch": 0.14635713056269775, "grad_norm": 0.1083984375, "learning_rate": 0.0019982874569027106, "loss": 0.2296, "step": 20620 }, { "epoch": 0.14642810880254387, "grad_norm": 0.154296875, "learning_rate": 0.0019982857113007594, "loss": 0.2593, "step": 20630 }, { "epoch": 0.14649908704238998, "grad_norm": 0.1328125, "learning_rate": 0.0019982839648104603, "loss": 0.251, "step": 20640 }, { "epoch": 0.1465700652822361, "grad_norm": 0.10400390625, "learning_rate": 0.001998282217431814, "loss": 0.2432, "step": 20650 }, { "epoch": 0.1466410435220822, "grad_norm": 0.09814453125, "learning_rate": 0.001998280469164823, "loss": 0.2477, "step": 20660 }, { "epoch": 0.14671202176192832, "grad_norm": 0.1103515625, "learning_rate": 0.0019982787200094896, "loss": 0.2402, "step": 20670 }, { "epoch": 0.14678300000177447, "grad_norm": 0.1611328125, "learning_rate": 0.0019982769699658145, "loss": 0.2669, "step": 20680 }, { "epoch": 0.14685397824162058, "grad_norm": 0.09521484375, "learning_rate": 0.0019982752190337997, "loss": 0.2547, "step": 20690 }, { "epoch": 0.1469249564814667, "grad_norm": 0.11328125, "learning_rate": 0.0019982734672134472, "loss": 0.279, "step": 20700 }, { "epoch": 0.1469959347213128, "grad_norm": 0.115234375, "learning_rate": 0.001998271714504758, "loss": 0.2583, "step": 20710 }, { "epoch": 0.14706691296115892, "grad_norm": 0.1826171875, "learning_rate": 0.001998269960907735, "loss": 0.2765, "step": 20720 }, { "epoch": 0.14713789120100504, "grad_norm": 0.189453125, "learning_rate": 0.001998268206422379, "loss": 0.2534, "step": 20730 }, { "epoch": 0.14720886944085118, "grad_norm": 0.08837890625, "learning_rate": 0.0019982664510486917, "loss": 0.2495, "step": 20740 }, { "epoch": 0.1472798476806973, "grad_norm": 0.201171875, "learning_rate": 0.0019982646947866757, "loss": 0.2495, "step": 20750 }, { "epoch": 0.1473508259205434, "grad_norm": 0.083984375, "learning_rate": 0.001998262937636332, "loss": 0.266, "step": 20760 }, { "epoch": 0.14742180416038952, "grad_norm": 0.119140625, "learning_rate": 0.0019982611795976626, "loss": 0.2775, "step": 20770 }, { "epoch": 0.14749278240023564, "grad_norm": 0.0888671875, "learning_rate": 0.0019982594206706694, "loss": 0.2615, "step": 20780 }, { "epoch": 0.14756376064008175, "grad_norm": 0.064453125, "learning_rate": 0.0019982576608553538, "loss": 0.2466, "step": 20790 }, { "epoch": 0.1476347388799279, "grad_norm": 0.12060546875, "learning_rate": 0.0019982559001517175, "loss": 0.2534, "step": 20800 }, { "epoch": 0.147705717119774, "grad_norm": 0.1318359375, "learning_rate": 0.0019982541385597623, "loss": 0.2549, "step": 20810 }, { "epoch": 0.14777669535962012, "grad_norm": 0.08251953125, "learning_rate": 0.0019982523760794903, "loss": 0.2684, "step": 20820 }, { "epoch": 0.14784767359946624, "grad_norm": 0.10693359375, "learning_rate": 0.001998250612710903, "loss": 0.2572, "step": 20830 }, { "epoch": 0.14791865183931235, "grad_norm": 0.10791015625, "learning_rate": 0.001998248848454002, "loss": 0.251, "step": 20840 }, { "epoch": 0.14798963007915847, "grad_norm": 0.09814453125, "learning_rate": 0.001998247083308789, "loss": 0.2585, "step": 20850 }, { "epoch": 0.1480606083190046, "grad_norm": 0.09326171875, "learning_rate": 0.0019982453172752664, "loss": 0.2485, "step": 20860 }, { "epoch": 0.14813158655885073, "grad_norm": 0.07861328125, "learning_rate": 0.0019982435503534354, "loss": 0.2547, "step": 20870 }, { "epoch": 0.14820256479869684, "grad_norm": 0.2001953125, "learning_rate": 0.001998241782543298, "loss": 0.2544, "step": 20880 }, { "epoch": 0.14827354303854295, "grad_norm": 0.0947265625, "learning_rate": 0.0019982400138448555, "loss": 0.253, "step": 20890 }, { "epoch": 0.14834452127838907, "grad_norm": 0.125, "learning_rate": 0.0019982382442581097, "loss": 0.2774, "step": 20900 }, { "epoch": 0.14841549951823518, "grad_norm": 0.10400390625, "learning_rate": 0.001998236473783063, "loss": 0.2531, "step": 20910 }, { "epoch": 0.14848647775808133, "grad_norm": 0.1220703125, "learning_rate": 0.0019982347024197164, "loss": 0.2531, "step": 20920 }, { "epoch": 0.14855745599792744, "grad_norm": 0.1171875, "learning_rate": 0.001998232930168072, "loss": 0.2631, "step": 20930 }, { "epoch": 0.14862843423777355, "grad_norm": 0.1376953125, "learning_rate": 0.001998231157028132, "loss": 0.2663, "step": 20940 }, { "epoch": 0.14869941247761967, "grad_norm": 0.09619140625, "learning_rate": 0.001998229382999897, "loss": 0.2603, "step": 20950 }, { "epoch": 0.14877039071746578, "grad_norm": 0.08203125, "learning_rate": 0.0019982276080833697, "loss": 0.25, "step": 20960 }, { "epoch": 0.14884136895731193, "grad_norm": 0.1357421875, "learning_rate": 0.001998225832278552, "loss": 0.2612, "step": 20970 }, { "epoch": 0.14891234719715804, "grad_norm": 0.11767578125, "learning_rate": 0.0019982240555854445, "loss": 0.242, "step": 20980 }, { "epoch": 0.14898332543700415, "grad_norm": 0.11083984375, "learning_rate": 0.0019982222780040504, "loss": 0.2506, "step": 20990 }, { "epoch": 0.14905430367685027, "grad_norm": 0.138671875, "learning_rate": 0.00199822049953437, "loss": 0.2608, "step": 21000 }, { "epoch": 0.14912528191669638, "grad_norm": 0.193359375, "learning_rate": 0.0019982187201764063, "loss": 0.2515, "step": 21010 }, { "epoch": 0.1491962601565425, "grad_norm": 0.1220703125, "learning_rate": 0.0019982169399301603, "loss": 0.2434, "step": 21020 }, { "epoch": 0.14926723839638864, "grad_norm": 0.11474609375, "learning_rate": 0.0019982151587956344, "loss": 0.2632, "step": 21030 }, { "epoch": 0.14933821663623476, "grad_norm": 0.1787109375, "learning_rate": 0.00199821337677283, "loss": 0.2522, "step": 21040 }, { "epoch": 0.14940919487608087, "grad_norm": 0.10205078125, "learning_rate": 0.001998211593861749, "loss": 0.2377, "step": 21050 }, { "epoch": 0.14948017311592698, "grad_norm": 0.1904296875, "learning_rate": 0.0019982098100623925, "loss": 0.2514, "step": 21060 }, { "epoch": 0.1495511513557731, "grad_norm": 0.095703125, "learning_rate": 0.001998208025374763, "loss": 0.2664, "step": 21070 }, { "epoch": 0.1496221295956192, "grad_norm": 0.16015625, "learning_rate": 0.0019982062397988624, "loss": 0.2806, "step": 21080 }, { "epoch": 0.14969310783546536, "grad_norm": 0.142578125, "learning_rate": 0.001998204453334692, "loss": 0.2433, "step": 21090 }, { "epoch": 0.14976408607531147, "grad_norm": 0.1572265625, "learning_rate": 0.001998202665982253, "loss": 0.2476, "step": 21100 }, { "epoch": 0.14983506431515758, "grad_norm": 0.10302734375, "learning_rate": 0.0019982008777415486, "loss": 0.2505, "step": 21110 }, { "epoch": 0.1499060425550037, "grad_norm": 0.09716796875, "learning_rate": 0.0019981990886125795, "loss": 0.2442, "step": 21120 }, { "epoch": 0.1499770207948498, "grad_norm": 0.1103515625, "learning_rate": 0.0019981972985953483, "loss": 0.2432, "step": 21130 }, { "epoch": 0.15004799903469593, "grad_norm": 0.11474609375, "learning_rate": 0.001998195507689856, "loss": 0.2221, "step": 21140 }, { "epoch": 0.15011897727454207, "grad_norm": 0.095703125, "learning_rate": 0.0019981937158961042, "loss": 0.2519, "step": 21150 }, { "epoch": 0.15018995551438818, "grad_norm": 0.1005859375, "learning_rate": 0.0019981919232140953, "loss": 0.2561, "step": 21160 }, { "epoch": 0.1502609337542343, "grad_norm": 0.134765625, "learning_rate": 0.001998190129643831, "loss": 0.2516, "step": 21170 }, { "epoch": 0.1503319119940804, "grad_norm": 0.1552734375, "learning_rate": 0.0019981883351853132, "loss": 0.2397, "step": 21180 }, { "epoch": 0.15040289023392653, "grad_norm": 0.09521484375, "learning_rate": 0.001998186539838543, "loss": 0.2543, "step": 21190 }, { "epoch": 0.15047386847377264, "grad_norm": 0.330078125, "learning_rate": 0.001998184743603523, "loss": 0.2563, "step": 21200 }, { "epoch": 0.15054484671361879, "grad_norm": 0.08056640625, "learning_rate": 0.0019981829464802542, "loss": 0.2413, "step": 21210 }, { "epoch": 0.1506158249534649, "grad_norm": 0.11279296875, "learning_rate": 0.001998181148468739, "loss": 0.2513, "step": 21220 }, { "epoch": 0.15068680319331101, "grad_norm": 0.0859375, "learning_rate": 0.001998179349568979, "loss": 0.2438, "step": 21230 }, { "epoch": 0.15075778143315713, "grad_norm": 0.09765625, "learning_rate": 0.001998177549780976, "loss": 0.2456, "step": 21240 }, { "epoch": 0.15082875967300324, "grad_norm": 0.125, "learning_rate": 0.0019981757491047314, "loss": 0.2339, "step": 21250 }, { "epoch": 0.15089973791284936, "grad_norm": 0.142578125, "learning_rate": 0.001998173947540247, "loss": 0.2521, "step": 21260 }, { "epoch": 0.1509707161526955, "grad_norm": 0.1630859375, "learning_rate": 0.0019981721450875256, "loss": 0.2394, "step": 21270 }, { "epoch": 0.15104169439254161, "grad_norm": 0.11669921875, "learning_rate": 0.001998170341746568, "loss": 0.2525, "step": 21280 }, { "epoch": 0.15111267263238773, "grad_norm": 0.10791015625, "learning_rate": 0.001998168537517376, "loss": 0.2721, "step": 21290 }, { "epoch": 0.15118365087223384, "grad_norm": 0.10302734375, "learning_rate": 0.0019981667323999515, "loss": 0.2656, "step": 21300 }, { "epoch": 0.15125462911207996, "grad_norm": 0.103515625, "learning_rate": 0.0019981649263942965, "loss": 0.2471, "step": 21310 }, { "epoch": 0.15132560735192607, "grad_norm": 0.1123046875, "learning_rate": 0.001998163119500413, "loss": 0.257, "step": 21320 }, { "epoch": 0.15139658559177221, "grad_norm": 0.08935546875, "learning_rate": 0.0019981613117183022, "loss": 0.2533, "step": 21330 }, { "epoch": 0.15146756383161833, "grad_norm": 0.05517578125, "learning_rate": 0.001998159503047966, "loss": 0.2535, "step": 21340 }, { "epoch": 0.15153854207146444, "grad_norm": 0.1259765625, "learning_rate": 0.0019981576934894065, "loss": 0.242, "step": 21350 }, { "epoch": 0.15160952031131056, "grad_norm": 0.1767578125, "learning_rate": 0.0019981558830426257, "loss": 0.2731, "step": 21360 }, { "epoch": 0.15168049855115667, "grad_norm": 0.0947265625, "learning_rate": 0.0019981540717076245, "loss": 0.2528, "step": 21370 }, { "epoch": 0.1517514767910028, "grad_norm": 0.2578125, "learning_rate": 0.0019981522594844053, "loss": 0.2617, "step": 21380 }, { "epoch": 0.15182245503084893, "grad_norm": 0.1220703125, "learning_rate": 0.00199815044637297, "loss": 0.2348, "step": 21390 }, { "epoch": 0.15189343327069504, "grad_norm": 0.15625, "learning_rate": 0.0019981486323733203, "loss": 0.2671, "step": 21400 }, { "epoch": 0.15196441151054116, "grad_norm": 0.1494140625, "learning_rate": 0.0019981468174854575, "loss": 0.2692, "step": 21410 }, { "epoch": 0.15203538975038727, "grad_norm": 0.1474609375, "learning_rate": 0.001998145001709384, "loss": 0.2469, "step": 21420 }, { "epoch": 0.1521063679902334, "grad_norm": 0.103515625, "learning_rate": 0.0019981431850451016, "loss": 0.2662, "step": 21430 }, { "epoch": 0.1521773462300795, "grad_norm": 0.09619140625, "learning_rate": 0.0019981413674926116, "loss": 0.2508, "step": 21440 }, { "epoch": 0.15224832446992564, "grad_norm": 0.1337890625, "learning_rate": 0.0019981395490519162, "loss": 0.2715, "step": 21450 }, { "epoch": 0.15231930270977176, "grad_norm": 0.08837890625, "learning_rate": 0.001998137729723017, "loss": 0.2636, "step": 21460 }, { "epoch": 0.15239028094961787, "grad_norm": 0.12451171875, "learning_rate": 0.0019981359095059157, "loss": 0.2547, "step": 21470 }, { "epoch": 0.152461259189464, "grad_norm": 0.1103515625, "learning_rate": 0.001998134088400615, "loss": 0.2627, "step": 21480 }, { "epoch": 0.1525322374293101, "grad_norm": 0.12060546875, "learning_rate": 0.001998132266407115, "loss": 0.2505, "step": 21490 }, { "epoch": 0.15260321566915622, "grad_norm": 0.263671875, "learning_rate": 0.001998130443525419, "loss": 0.2506, "step": 21500 }, { "epoch": 0.15267419390900236, "grad_norm": 0.09814453125, "learning_rate": 0.001998128619755528, "loss": 0.2663, "step": 21510 }, { "epoch": 0.15274517214884847, "grad_norm": 0.0859375, "learning_rate": 0.0019981267950974443, "loss": 0.2417, "step": 21520 }, { "epoch": 0.1528161503886946, "grad_norm": 0.31640625, "learning_rate": 0.0019981249695511694, "loss": 0.2523, "step": 21530 }, { "epoch": 0.1528871286285407, "grad_norm": 0.07763671875, "learning_rate": 0.0019981231431167056, "loss": 0.2539, "step": 21540 }, { "epoch": 0.15295810686838682, "grad_norm": 0.1357421875, "learning_rate": 0.001998121315794054, "loss": 0.2521, "step": 21550 }, { "epoch": 0.15302908510823293, "grad_norm": 0.1259765625, "learning_rate": 0.0019981194875832164, "loss": 0.2546, "step": 21560 }, { "epoch": 0.15310006334807907, "grad_norm": 0.10302734375, "learning_rate": 0.0019981176584841953, "loss": 0.2434, "step": 21570 }, { "epoch": 0.1531710415879252, "grad_norm": 0.146484375, "learning_rate": 0.001998115828496992, "loss": 0.2525, "step": 21580 }, { "epoch": 0.1532420198277713, "grad_norm": 0.146484375, "learning_rate": 0.0019981139976216083, "loss": 0.2517, "step": 21590 }, { "epoch": 0.15331299806761742, "grad_norm": 0.10009765625, "learning_rate": 0.0019981121658580467, "loss": 0.2513, "step": 21600 }, { "epoch": 0.15338397630746353, "grad_norm": 0.109375, "learning_rate": 0.0019981103332063076, "loss": 0.2523, "step": 21610 }, { "epoch": 0.15345495454730965, "grad_norm": 0.1357421875, "learning_rate": 0.0019981084996663944, "loss": 0.2676, "step": 21620 }, { "epoch": 0.1535259327871558, "grad_norm": 0.212890625, "learning_rate": 0.001998106665238308, "loss": 0.2382, "step": 21630 }, { "epoch": 0.1535969110270019, "grad_norm": 0.11376953125, "learning_rate": 0.0019981048299220505, "loss": 0.2595, "step": 21640 }, { "epoch": 0.15366788926684802, "grad_norm": 0.1259765625, "learning_rate": 0.001998102993717623, "loss": 0.261, "step": 21650 }, { "epoch": 0.15373886750669413, "grad_norm": 0.1708984375, "learning_rate": 0.0019981011566250284, "loss": 0.2401, "step": 21660 }, { "epoch": 0.15380984574654025, "grad_norm": 0.220703125, "learning_rate": 0.0019980993186442682, "loss": 0.2584, "step": 21670 }, { "epoch": 0.15388082398638636, "grad_norm": 0.115234375, "learning_rate": 0.001998097479775344, "loss": 0.2623, "step": 21680 }, { "epoch": 0.1539518022262325, "grad_norm": 0.10498046875, "learning_rate": 0.0019980956400182572, "loss": 0.2591, "step": 21690 }, { "epoch": 0.15402278046607862, "grad_norm": 0.0927734375, "learning_rate": 0.0019980937993730108, "loss": 0.2409, "step": 21700 }, { "epoch": 0.15409375870592473, "grad_norm": 0.154296875, "learning_rate": 0.001998091957839605, "loss": 0.2637, "step": 21710 }, { "epoch": 0.15416473694577085, "grad_norm": 0.08984375, "learning_rate": 0.0019980901154180433, "loss": 0.2888, "step": 21720 }, { "epoch": 0.15423571518561696, "grad_norm": 0.08154296875, "learning_rate": 0.0019980882721083266, "loss": 0.2731, "step": 21730 }, { "epoch": 0.15430669342546308, "grad_norm": 0.099609375, "learning_rate": 0.0019980864279104567, "loss": 0.246, "step": 21740 }, { "epoch": 0.15437767166530922, "grad_norm": 0.1875, "learning_rate": 0.0019980845828244356, "loss": 0.259, "step": 21750 }, { "epoch": 0.15444864990515533, "grad_norm": 0.10009765625, "learning_rate": 0.0019980827368502652, "loss": 0.2548, "step": 21760 }, { "epoch": 0.15451962814500145, "grad_norm": 0.119140625, "learning_rate": 0.0019980808899879477, "loss": 0.2504, "step": 21770 }, { "epoch": 0.15459060638484756, "grad_norm": 0.09375, "learning_rate": 0.001998079042237484, "loss": 0.2564, "step": 21780 }, { "epoch": 0.15466158462469368, "grad_norm": 0.1435546875, "learning_rate": 0.0019980771935988765, "loss": 0.2574, "step": 21790 }, { "epoch": 0.1547325628645398, "grad_norm": 0.11376953125, "learning_rate": 0.001998075344072127, "loss": 0.2723, "step": 21800 }, { "epoch": 0.15480354110438593, "grad_norm": 0.140625, "learning_rate": 0.0019980734936572374, "loss": 0.235, "step": 21810 }, { "epoch": 0.15487451934423205, "grad_norm": 0.0869140625, "learning_rate": 0.001998071642354209, "loss": 0.2719, "step": 21820 }, { "epoch": 0.15494549758407816, "grad_norm": 0.1904296875, "learning_rate": 0.0019980697901630443, "loss": 0.2748, "step": 21830 }, { "epoch": 0.15501647582392428, "grad_norm": 0.0947265625, "learning_rate": 0.001998067937083745, "loss": 0.2558, "step": 21840 }, { "epoch": 0.1550874540637704, "grad_norm": 0.1455078125, "learning_rate": 0.001998066083116313, "loss": 0.2628, "step": 21850 }, { "epoch": 0.1551584323036165, "grad_norm": 0.09765625, "learning_rate": 0.001998064228260749, "loss": 0.2428, "step": 21860 }, { "epoch": 0.15522941054346265, "grad_norm": 0.1318359375, "learning_rate": 0.0019980623725170564, "loss": 0.2448, "step": 21870 }, { "epoch": 0.15530038878330876, "grad_norm": 0.0888671875, "learning_rate": 0.0019980605158852367, "loss": 0.2472, "step": 21880 }, { "epoch": 0.15537136702315488, "grad_norm": 0.111328125, "learning_rate": 0.0019980586583652907, "loss": 0.2462, "step": 21890 }, { "epoch": 0.155442345263001, "grad_norm": 0.1142578125, "learning_rate": 0.0019980567999572213, "loss": 0.2469, "step": 21900 }, { "epoch": 0.1555133235028471, "grad_norm": 0.08203125, "learning_rate": 0.0019980549406610304, "loss": 0.2678, "step": 21910 }, { "epoch": 0.15558430174269322, "grad_norm": 0.1318359375, "learning_rate": 0.001998053080476719, "loss": 0.2711, "step": 21920 }, { "epoch": 0.15565527998253936, "grad_norm": 0.095703125, "learning_rate": 0.0019980512194042897, "loss": 0.2472, "step": 21930 }, { "epoch": 0.15572625822238548, "grad_norm": 0.23046875, "learning_rate": 0.0019980493574437434, "loss": 0.2694, "step": 21940 }, { "epoch": 0.1557972364622316, "grad_norm": 0.12060546875, "learning_rate": 0.0019980474945950834, "loss": 0.2718, "step": 21950 }, { "epoch": 0.1558682147020777, "grad_norm": 0.1201171875, "learning_rate": 0.00199804563085831, "loss": 0.2483, "step": 21960 }, { "epoch": 0.15593919294192382, "grad_norm": 0.1376953125, "learning_rate": 0.0019980437662334262, "loss": 0.2499, "step": 21970 }, { "epoch": 0.15601017118176994, "grad_norm": 0.1435546875, "learning_rate": 0.001998041900720433, "loss": 0.2609, "step": 21980 }, { "epoch": 0.15608114942161608, "grad_norm": 0.1044921875, "learning_rate": 0.001998040034319333, "loss": 0.261, "step": 21990 }, { "epoch": 0.1561521276614622, "grad_norm": 0.12451171875, "learning_rate": 0.0019980381670301277, "loss": 0.2863, "step": 22000 }, { "epoch": 0.1561521276614622, "eval_covost2-zh-en_loss": 3.9690017700195312, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 20.9151, "eval_covost2-zh-en_samples_per_second": 3.06, "eval_covost2-zh-en_steps_per_second": 0.191, "step": 22000 }, { "epoch": 0.1561521276614622, "eval_covost2-en-zh_loss": 3.091444492340088, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 19.3556, "eval_covost2-en-zh_samples_per_second": 3.307, "eval_covost2-en-zh_steps_per_second": 0.207, "step": 22000 }, { "epoch": 0.1562231059013083, "grad_norm": 0.09326171875, "learning_rate": 0.0019980362988528187, "loss": 0.2457, "step": 22010 }, { "epoch": 0.15629408414115442, "grad_norm": 0.140625, "learning_rate": 0.0019980344297874084, "loss": 0.2457, "step": 22020 }, { "epoch": 0.15636506238100054, "grad_norm": 0.11669921875, "learning_rate": 0.001998032559833898, "loss": 0.2542, "step": 22030 }, { "epoch": 0.15643604062084665, "grad_norm": 0.1083984375, "learning_rate": 0.00199803068899229, "loss": 0.2342, "step": 22040 }, { "epoch": 0.1565070188606928, "grad_norm": 0.177734375, "learning_rate": 0.0019980288172625855, "loss": 0.2649, "step": 22050 }, { "epoch": 0.1565779971005389, "grad_norm": 0.0849609375, "learning_rate": 0.0019980269446447873, "loss": 0.248, "step": 22060 }, { "epoch": 0.15664897534038502, "grad_norm": 0.0791015625, "learning_rate": 0.0019980250711388965, "loss": 0.2553, "step": 22070 }, { "epoch": 0.15671995358023114, "grad_norm": 0.09375, "learning_rate": 0.001998023196744915, "loss": 0.248, "step": 22080 }, { "epoch": 0.15679093182007725, "grad_norm": 0.07958984375, "learning_rate": 0.001998021321462845, "loss": 0.247, "step": 22090 }, { "epoch": 0.15686191005992337, "grad_norm": 0.0947265625, "learning_rate": 0.001998019445292688, "loss": 0.2429, "step": 22100 }, { "epoch": 0.1569328882997695, "grad_norm": 0.294921875, "learning_rate": 0.0019980175682344464, "loss": 0.2771, "step": 22110 }, { "epoch": 0.15700386653961562, "grad_norm": 0.1298828125, "learning_rate": 0.0019980156902881215, "loss": 0.243, "step": 22120 }, { "epoch": 0.15707484477946174, "grad_norm": 0.150390625, "learning_rate": 0.0019980138114537157, "loss": 0.2632, "step": 22130 }, { "epoch": 0.15714582301930785, "grad_norm": 0.1865234375, "learning_rate": 0.00199801193173123, "loss": 0.2504, "step": 22140 }, { "epoch": 0.15721680125915397, "grad_norm": 0.10009765625, "learning_rate": 0.0019980100511206673, "loss": 0.2622, "step": 22150 }, { "epoch": 0.1572877794990001, "grad_norm": 0.10400390625, "learning_rate": 0.0019980081696220286, "loss": 0.2582, "step": 22160 }, { "epoch": 0.15735875773884622, "grad_norm": 0.107421875, "learning_rate": 0.0019980062872353163, "loss": 0.2432, "step": 22170 }, { "epoch": 0.15742973597869234, "grad_norm": 0.1630859375, "learning_rate": 0.001998004403960532, "loss": 0.2818, "step": 22180 }, { "epoch": 0.15750071421853845, "grad_norm": 0.1337890625, "learning_rate": 0.0019980025197976776, "loss": 0.2557, "step": 22190 }, { "epoch": 0.15757169245838457, "grad_norm": 0.123046875, "learning_rate": 0.001998000634746755, "loss": 0.2424, "step": 22200 }, { "epoch": 0.15764267069823068, "grad_norm": 0.1474609375, "learning_rate": 0.0019979987488077658, "loss": 0.2549, "step": 22210 }, { "epoch": 0.15771364893807682, "grad_norm": 0.10595703125, "learning_rate": 0.001997996861980712, "loss": 0.2597, "step": 22220 }, { "epoch": 0.15778462717792294, "grad_norm": 0.1689453125, "learning_rate": 0.001997994974265596, "loss": 0.2491, "step": 22230 }, { "epoch": 0.15785560541776905, "grad_norm": 0.1279296875, "learning_rate": 0.0019979930856624194, "loss": 0.274, "step": 22240 }, { "epoch": 0.15792658365761517, "grad_norm": 0.11572265625, "learning_rate": 0.0019979911961711836, "loss": 0.2532, "step": 22250 }, { "epoch": 0.15799756189746128, "grad_norm": 0.1669921875, "learning_rate": 0.001997989305791891, "loss": 0.2612, "step": 22260 }, { "epoch": 0.1580685401373074, "grad_norm": 0.15234375, "learning_rate": 0.001997987414524543, "loss": 0.2569, "step": 22270 }, { "epoch": 0.15813951837715354, "grad_norm": 0.087890625, "learning_rate": 0.0019979855223691417, "loss": 0.2513, "step": 22280 }, { "epoch": 0.15821049661699965, "grad_norm": 0.19140625, "learning_rate": 0.0019979836293256893, "loss": 0.2374, "step": 22290 }, { "epoch": 0.15828147485684577, "grad_norm": 0.2099609375, "learning_rate": 0.001997981735394187, "loss": 0.2834, "step": 22300 }, { "epoch": 0.15835245309669188, "grad_norm": 0.08544921875, "learning_rate": 0.0019979798405746372, "loss": 0.2462, "step": 22310 }, { "epoch": 0.158423431336538, "grad_norm": 0.09375, "learning_rate": 0.001997977944867042, "loss": 0.2517, "step": 22320 }, { "epoch": 0.1584944095763841, "grad_norm": 0.1259765625, "learning_rate": 0.0019979760482714023, "loss": 0.2366, "step": 22330 }, { "epoch": 0.15856538781623025, "grad_norm": 0.09619140625, "learning_rate": 0.0019979741507877213, "loss": 0.271, "step": 22340 }, { "epoch": 0.15863636605607637, "grad_norm": 0.10791015625, "learning_rate": 0.0019979722524159993, "loss": 0.2624, "step": 22350 }, { "epoch": 0.15870734429592248, "grad_norm": 0.10498046875, "learning_rate": 0.0019979703531562394, "loss": 0.2483, "step": 22360 }, { "epoch": 0.1587783225357686, "grad_norm": 0.126953125, "learning_rate": 0.0019979684530084434, "loss": 0.26, "step": 22370 }, { "epoch": 0.1588493007756147, "grad_norm": 0.08544921875, "learning_rate": 0.001997966551972612, "loss": 0.2651, "step": 22380 }, { "epoch": 0.15892027901546082, "grad_norm": 0.1015625, "learning_rate": 0.0019979646500487493, "loss": 0.2402, "step": 22390 }, { "epoch": 0.15899125725530697, "grad_norm": 0.0703125, "learning_rate": 0.0019979627472368547, "loss": 0.2595, "step": 22400 }, { "epoch": 0.15906223549515308, "grad_norm": 0.08935546875, "learning_rate": 0.0019979608435369317, "loss": 0.2398, "step": 22410 }, { "epoch": 0.1591332137349992, "grad_norm": 0.1337890625, "learning_rate": 0.0019979589389489817, "loss": 0.2568, "step": 22420 }, { "epoch": 0.1592041919748453, "grad_norm": 0.08056640625, "learning_rate": 0.0019979570334730063, "loss": 0.2489, "step": 22430 }, { "epoch": 0.15927517021469142, "grad_norm": 0.2021484375, "learning_rate": 0.001997955127109008, "loss": 0.2469, "step": 22440 }, { "epoch": 0.15934614845453754, "grad_norm": 0.1630859375, "learning_rate": 0.0019979532198569883, "loss": 0.2693, "step": 22450 }, { "epoch": 0.15941712669438368, "grad_norm": 0.17578125, "learning_rate": 0.001997951311716949, "loss": 0.249, "step": 22460 }, { "epoch": 0.1594881049342298, "grad_norm": 0.1044921875, "learning_rate": 0.0019979494026888924, "loss": 0.254, "step": 22470 }, { "epoch": 0.1595590831740759, "grad_norm": 0.1396484375, "learning_rate": 0.00199794749277282, "loss": 0.2487, "step": 22480 }, { "epoch": 0.15963006141392203, "grad_norm": 0.1943359375, "learning_rate": 0.0019979455819687337, "loss": 0.2566, "step": 22490 }, { "epoch": 0.15970103965376814, "grad_norm": 0.07373046875, "learning_rate": 0.0019979436702766353, "loss": 0.2561, "step": 22500 }, { "epoch": 0.15977201789361425, "grad_norm": 0.0966796875, "learning_rate": 0.0019979417576965275, "loss": 0.2635, "step": 22510 }, { "epoch": 0.1598429961334604, "grad_norm": 0.09423828125, "learning_rate": 0.001997939844228411, "loss": 0.2242, "step": 22520 }, { "epoch": 0.1599139743733065, "grad_norm": 0.0986328125, "learning_rate": 0.0019979379298722885, "loss": 0.2456, "step": 22530 }, { "epoch": 0.15998495261315263, "grad_norm": 0.0712890625, "learning_rate": 0.0019979360146281616, "loss": 0.2579, "step": 22540 }, { "epoch": 0.16005593085299874, "grad_norm": 0.08984375, "learning_rate": 0.0019979340984960323, "loss": 0.2427, "step": 22550 }, { "epoch": 0.16012690909284485, "grad_norm": 0.12451171875, "learning_rate": 0.0019979321814759025, "loss": 0.2742, "step": 22560 }, { "epoch": 0.16019788733269097, "grad_norm": 0.1416015625, "learning_rate": 0.0019979302635677737, "loss": 0.2407, "step": 22570 }, { "epoch": 0.1602688655725371, "grad_norm": 0.22265625, "learning_rate": 0.0019979283447716487, "loss": 0.2606, "step": 22580 }, { "epoch": 0.16033984381238323, "grad_norm": 0.1337890625, "learning_rate": 0.0019979264250875282, "loss": 0.2552, "step": 22590 }, { "epoch": 0.16041082205222934, "grad_norm": 0.0966796875, "learning_rate": 0.0019979245045154154, "loss": 0.2466, "step": 22600 }, { "epoch": 0.16048180029207545, "grad_norm": 0.1455078125, "learning_rate": 0.001997922583055311, "loss": 0.2782, "step": 22610 }, { "epoch": 0.16055277853192157, "grad_norm": 0.11181640625, "learning_rate": 0.0019979206607072184, "loss": 0.2927, "step": 22620 }, { "epoch": 0.16062375677176768, "grad_norm": 0.1318359375, "learning_rate": 0.0019979187374711375, "loss": 0.2463, "step": 22630 }, { "epoch": 0.16069473501161383, "grad_norm": 0.10205078125, "learning_rate": 0.0019979168133470717, "loss": 0.2393, "step": 22640 }, { "epoch": 0.16076571325145994, "grad_norm": 0.08447265625, "learning_rate": 0.0019979148883350222, "loss": 0.2541, "step": 22650 }, { "epoch": 0.16083669149130606, "grad_norm": 0.150390625, "learning_rate": 0.0019979129624349916, "loss": 0.2664, "step": 22660 }, { "epoch": 0.16090766973115217, "grad_norm": 0.08544921875, "learning_rate": 0.001997911035646981, "loss": 0.2604, "step": 22670 }, { "epoch": 0.16097864797099828, "grad_norm": 0.08447265625, "learning_rate": 0.0019979091079709924, "loss": 0.2455, "step": 22680 }, { "epoch": 0.1610496262108444, "grad_norm": 0.1123046875, "learning_rate": 0.0019979071794070285, "loss": 0.2667, "step": 22690 }, { "epoch": 0.16112060445069054, "grad_norm": 0.11376953125, "learning_rate": 0.0019979052499550904, "loss": 0.2554, "step": 22700 }, { "epoch": 0.16119158269053666, "grad_norm": 0.078125, "learning_rate": 0.0019979033196151803, "loss": 0.2462, "step": 22710 }, { "epoch": 0.16126256093038277, "grad_norm": 0.05859375, "learning_rate": 0.0019979013883873005, "loss": 0.2446, "step": 22720 }, { "epoch": 0.16133353917022888, "grad_norm": 0.138671875, "learning_rate": 0.001997899456271452, "loss": 0.2634, "step": 22730 }, { "epoch": 0.161404517410075, "grad_norm": 0.1181640625, "learning_rate": 0.0019978975232676373, "loss": 0.2553, "step": 22740 }, { "epoch": 0.1614754956499211, "grad_norm": 0.1904296875, "learning_rate": 0.0019978955893758584, "loss": 0.2496, "step": 22750 }, { "epoch": 0.16154647388976726, "grad_norm": 0.1181640625, "learning_rate": 0.0019978936545961166, "loss": 0.2652, "step": 22760 }, { "epoch": 0.16161745212961337, "grad_norm": 0.20703125, "learning_rate": 0.0019978917189284145, "loss": 0.2392, "step": 22770 }, { "epoch": 0.16168843036945948, "grad_norm": 0.11474609375, "learning_rate": 0.001997889782372754, "loss": 0.2497, "step": 22780 }, { "epoch": 0.1617594086093056, "grad_norm": 0.1494140625, "learning_rate": 0.001997887844929137, "loss": 0.2709, "step": 22790 }, { "epoch": 0.16183038684915171, "grad_norm": 0.087890625, "learning_rate": 0.0019978859065975644, "loss": 0.253, "step": 22800 }, { "epoch": 0.16190136508899783, "grad_norm": 0.09619140625, "learning_rate": 0.0019978839673780394, "loss": 0.2501, "step": 22810 }, { "epoch": 0.16197234332884397, "grad_norm": 0.10693359375, "learning_rate": 0.0019978820272705637, "loss": 0.261, "step": 22820 }, { "epoch": 0.16204332156869009, "grad_norm": 0.1533203125, "learning_rate": 0.001997880086275138, "loss": 0.2474, "step": 22830 }, { "epoch": 0.1621142998085362, "grad_norm": 0.1298828125, "learning_rate": 0.001997878144391766, "loss": 0.2541, "step": 22840 }, { "epoch": 0.16218527804838231, "grad_norm": 0.1064453125, "learning_rate": 0.0019978762016204486, "loss": 0.2549, "step": 22850 }, { "epoch": 0.16225625628822843, "grad_norm": 0.1318359375, "learning_rate": 0.0019978742579611877, "loss": 0.2527, "step": 22860 }, { "epoch": 0.16232723452807454, "grad_norm": 0.1552734375, "learning_rate": 0.0019978723134139857, "loss": 0.2805, "step": 22870 }, { "epoch": 0.16239821276792069, "grad_norm": 0.169921875, "learning_rate": 0.0019978703679788446, "loss": 0.2731, "step": 22880 }, { "epoch": 0.1624691910077668, "grad_norm": 0.138671875, "learning_rate": 0.0019978684216557653, "loss": 0.2531, "step": 22890 }, { "epoch": 0.16254016924761291, "grad_norm": 0.07763671875, "learning_rate": 0.001997866474444751, "loss": 0.2433, "step": 22900 }, { "epoch": 0.16261114748745903, "grad_norm": 0.1416015625, "learning_rate": 0.0019978645263458022, "loss": 0.256, "step": 22910 }, { "epoch": 0.16268212572730514, "grad_norm": 0.1044921875, "learning_rate": 0.0019978625773589224, "loss": 0.2547, "step": 22920 }, { "epoch": 0.16275310396715126, "grad_norm": 0.1552734375, "learning_rate": 0.001997860627484112, "loss": 0.2613, "step": 22930 }, { "epoch": 0.1628240822069974, "grad_norm": 0.08740234375, "learning_rate": 0.0019978586767213746, "loss": 0.2608, "step": 22940 }, { "epoch": 0.16289506044684351, "grad_norm": 0.11181640625, "learning_rate": 0.001997856725070711, "loss": 0.2561, "step": 22950 }, { "epoch": 0.16296603868668963, "grad_norm": 0.10498046875, "learning_rate": 0.001997854772532123, "loss": 0.2379, "step": 22960 }, { "epoch": 0.16303701692653574, "grad_norm": 0.11474609375, "learning_rate": 0.0019978528191056132, "loss": 0.2416, "step": 22970 }, { "epoch": 0.16310799516638186, "grad_norm": 0.09375, "learning_rate": 0.0019978508647911833, "loss": 0.254, "step": 22980 }, { "epoch": 0.16317897340622797, "grad_norm": 0.1015625, "learning_rate": 0.0019978489095888352, "loss": 0.2459, "step": 22990 }, { "epoch": 0.16324995164607412, "grad_norm": 0.09423828125, "learning_rate": 0.00199784695349857, "loss": 0.2553, "step": 23000 }, { "epoch": 0.16332092988592023, "grad_norm": 0.224609375, "learning_rate": 0.0019978449965203917, "loss": 0.2511, "step": 23010 }, { "epoch": 0.16339190812576634, "grad_norm": 0.150390625, "learning_rate": 0.0019978430386543003, "loss": 0.2472, "step": 23020 }, { "epoch": 0.16346288636561246, "grad_norm": 0.142578125, "learning_rate": 0.0019978410799002984, "loss": 0.2509, "step": 23030 }, { "epoch": 0.16353386460545857, "grad_norm": 0.28125, "learning_rate": 0.001997839120258388, "loss": 0.2585, "step": 23040 }, { "epoch": 0.1636048428453047, "grad_norm": 0.08349609375, "learning_rate": 0.001997837159728571, "loss": 0.2487, "step": 23050 }, { "epoch": 0.16367582108515083, "grad_norm": 0.09228515625, "learning_rate": 0.001997835198310849, "loss": 0.2419, "step": 23060 }, { "epoch": 0.16374679932499694, "grad_norm": 0.10791015625, "learning_rate": 0.0019978332360052246, "loss": 0.2647, "step": 23070 }, { "epoch": 0.16381777756484306, "grad_norm": 0.1455078125, "learning_rate": 0.0019978312728116993, "loss": 0.2532, "step": 23080 }, { "epoch": 0.16388875580468917, "grad_norm": 0.166015625, "learning_rate": 0.0019978293087302753, "loss": 0.2604, "step": 23090 }, { "epoch": 0.1639597340445353, "grad_norm": 0.0986328125, "learning_rate": 0.0019978273437609544, "loss": 0.241, "step": 23100 }, { "epoch": 0.1640307122843814, "grad_norm": 0.1083984375, "learning_rate": 0.0019978253779037386, "loss": 0.2546, "step": 23110 }, { "epoch": 0.16410169052422754, "grad_norm": 0.0810546875, "learning_rate": 0.0019978234111586294, "loss": 0.2483, "step": 23120 }, { "epoch": 0.16417266876407366, "grad_norm": 0.10009765625, "learning_rate": 0.0019978214435256293, "loss": 0.2573, "step": 23130 }, { "epoch": 0.16424364700391977, "grad_norm": 0.11279296875, "learning_rate": 0.00199781947500474, "loss": 0.2341, "step": 23140 }, { "epoch": 0.1643146252437659, "grad_norm": 0.10400390625, "learning_rate": 0.0019978175055959634, "loss": 0.2562, "step": 23150 }, { "epoch": 0.164385603483612, "grad_norm": 0.099609375, "learning_rate": 0.001997815535299302, "loss": 0.2716, "step": 23160 }, { "epoch": 0.16445658172345812, "grad_norm": 0.0732421875, "learning_rate": 0.001997813564114757, "loss": 0.2605, "step": 23170 }, { "epoch": 0.16452755996330426, "grad_norm": 0.10791015625, "learning_rate": 0.001997811592042331, "loss": 0.2531, "step": 23180 }, { "epoch": 0.16459853820315037, "grad_norm": 0.087890625, "learning_rate": 0.001997809619082025, "loss": 0.2527, "step": 23190 }, { "epoch": 0.1646695164429965, "grad_norm": 0.0927734375, "learning_rate": 0.001997807645233842, "loss": 0.2278, "step": 23200 }, { "epoch": 0.1647404946828426, "grad_norm": 0.12890625, "learning_rate": 0.0019978056704977832, "loss": 0.2593, "step": 23210 }, { "epoch": 0.16481147292268872, "grad_norm": 0.11669921875, "learning_rate": 0.001997803694873851, "loss": 0.2463, "step": 23220 }, { "epoch": 0.16488245116253483, "grad_norm": 0.09033203125, "learning_rate": 0.0019978017183620474, "loss": 0.2552, "step": 23230 }, { "epoch": 0.16495342940238097, "grad_norm": 0.1796875, "learning_rate": 0.0019977997409623737, "loss": 0.2503, "step": 23240 }, { "epoch": 0.1650244076422271, "grad_norm": 0.083984375, "learning_rate": 0.001997797762674833, "loss": 0.2641, "step": 23250 }, { "epoch": 0.1650953858820732, "grad_norm": 0.09033203125, "learning_rate": 0.001997795783499426, "loss": 0.2564, "step": 23260 }, { "epoch": 0.16516636412191932, "grad_norm": 0.08935546875, "learning_rate": 0.0019977938034361556, "loss": 0.2539, "step": 23270 }, { "epoch": 0.16523734236176543, "grad_norm": 0.11328125, "learning_rate": 0.0019977918224850233, "loss": 0.2546, "step": 23280 }, { "epoch": 0.16530832060161157, "grad_norm": 0.10205078125, "learning_rate": 0.001997789840646031, "loss": 0.2516, "step": 23290 }, { "epoch": 0.1653792988414577, "grad_norm": 0.130859375, "learning_rate": 0.001997787857919181, "loss": 0.2779, "step": 23300 }, { "epoch": 0.1654502770813038, "grad_norm": 0.091796875, "learning_rate": 0.001997785874304475, "loss": 0.2536, "step": 23310 }, { "epoch": 0.16552125532114992, "grad_norm": 0.05712890625, "learning_rate": 0.001997783889801915, "loss": 0.2332, "step": 23320 }, { "epoch": 0.16559223356099603, "grad_norm": 0.1396484375, "learning_rate": 0.001997781904411503, "loss": 0.2506, "step": 23330 }, { "epoch": 0.16566321180084215, "grad_norm": 0.16796875, "learning_rate": 0.001997779918133241, "loss": 0.2637, "step": 23340 }, { "epoch": 0.1657341900406883, "grad_norm": 0.10986328125, "learning_rate": 0.0019977779309671307, "loss": 0.2526, "step": 23350 }, { "epoch": 0.1658051682805344, "grad_norm": 0.1337890625, "learning_rate": 0.001997775942913175, "loss": 0.2417, "step": 23360 }, { "epoch": 0.16587614652038052, "grad_norm": 0.09130859375, "learning_rate": 0.0019977739539713745, "loss": 0.2622, "step": 23370 }, { "epoch": 0.16594712476022663, "grad_norm": 0.095703125, "learning_rate": 0.001997771964141732, "loss": 0.2523, "step": 23380 }, { "epoch": 0.16601810300007275, "grad_norm": 0.125, "learning_rate": 0.001997769973424249, "loss": 0.2549, "step": 23390 }, { "epoch": 0.16608908123991886, "grad_norm": 0.11279296875, "learning_rate": 0.0019977679818189284, "loss": 0.2443, "step": 23400 }, { "epoch": 0.166160059479765, "grad_norm": 0.0888671875, "learning_rate": 0.0019977659893257713, "loss": 0.2487, "step": 23410 }, { "epoch": 0.16623103771961112, "grad_norm": 0.0986328125, "learning_rate": 0.0019977639959447794, "loss": 0.2428, "step": 23420 }, { "epoch": 0.16630201595945723, "grad_norm": 0.115234375, "learning_rate": 0.001997762001675956, "loss": 0.2313, "step": 23430 }, { "epoch": 0.16637299419930335, "grad_norm": 0.0966796875, "learning_rate": 0.0019977600065193015, "loss": 0.2517, "step": 23440 }, { "epoch": 0.16644397243914946, "grad_norm": 0.1513671875, "learning_rate": 0.001997758010474819, "loss": 0.2253, "step": 23450 }, { "epoch": 0.16651495067899558, "grad_norm": 0.1044921875, "learning_rate": 0.0019977560135425103, "loss": 0.2515, "step": 23460 }, { "epoch": 0.16658592891884172, "grad_norm": 0.0810546875, "learning_rate": 0.001997754015722377, "loss": 0.2371, "step": 23470 }, { "epoch": 0.16665690715868783, "grad_norm": 0.1064453125, "learning_rate": 0.001997752017014421, "loss": 0.2676, "step": 23480 }, { "epoch": 0.16672788539853395, "grad_norm": 0.1298828125, "learning_rate": 0.0019977500174186446, "loss": 0.2309, "step": 23490 }, { "epoch": 0.16679886363838006, "grad_norm": 0.078125, "learning_rate": 0.00199774801693505, "loss": 0.2471, "step": 23500 }, { "epoch": 0.16686984187822618, "grad_norm": 0.1767578125, "learning_rate": 0.0019977460155636384, "loss": 0.256, "step": 23510 }, { "epoch": 0.1669408201180723, "grad_norm": 0.1298828125, "learning_rate": 0.0019977440133044123, "loss": 0.2566, "step": 23520 }, { "epoch": 0.16701179835791843, "grad_norm": 0.0693359375, "learning_rate": 0.0019977420101573743, "loss": 0.2766, "step": 23530 }, { "epoch": 0.16708277659776455, "grad_norm": 0.10986328125, "learning_rate": 0.001997740006122525, "loss": 0.2589, "step": 23540 }, { "epoch": 0.16715375483761066, "grad_norm": 0.1103515625, "learning_rate": 0.0019977380011998672, "loss": 0.2552, "step": 23550 }, { "epoch": 0.16722473307745678, "grad_norm": 0.0859375, "learning_rate": 0.0019977359953894033, "loss": 0.266, "step": 23560 }, { "epoch": 0.1672957113173029, "grad_norm": 0.0986328125, "learning_rate": 0.001997733988691134, "loss": 0.2531, "step": 23570 }, { "epoch": 0.167366689557149, "grad_norm": 0.1123046875, "learning_rate": 0.0019977319811050626, "loss": 0.2691, "step": 23580 }, { "epoch": 0.16743766779699515, "grad_norm": 0.111328125, "learning_rate": 0.0019977299726311906, "loss": 0.2355, "step": 23590 }, { "epoch": 0.16750864603684126, "grad_norm": 0.12451171875, "learning_rate": 0.0019977279632695196, "loss": 0.2459, "step": 23600 }, { "epoch": 0.16757962427668738, "grad_norm": 0.0947265625, "learning_rate": 0.001997725953020052, "loss": 0.2463, "step": 23610 }, { "epoch": 0.1676506025165335, "grad_norm": 0.2109375, "learning_rate": 0.0019977239418827895, "loss": 0.2581, "step": 23620 }, { "epoch": 0.1677215807563796, "grad_norm": 0.310546875, "learning_rate": 0.0019977219298577347, "loss": 0.2767, "step": 23630 }, { "epoch": 0.16779255899622572, "grad_norm": 0.0751953125, "learning_rate": 0.001997719916944889, "loss": 0.2685, "step": 23640 }, { "epoch": 0.16786353723607186, "grad_norm": 0.2734375, "learning_rate": 0.0019977179031442547, "loss": 0.2405, "step": 23650 }, { "epoch": 0.16793451547591798, "grad_norm": 0.1337890625, "learning_rate": 0.0019977158884558334, "loss": 0.2464, "step": 23660 }, { "epoch": 0.1680054937157641, "grad_norm": 0.0810546875, "learning_rate": 0.0019977138728796274, "loss": 0.2404, "step": 23670 }, { "epoch": 0.1680764719556102, "grad_norm": 0.08349609375, "learning_rate": 0.001997711856415639, "loss": 0.265, "step": 23680 }, { "epoch": 0.16814745019545632, "grad_norm": 0.107421875, "learning_rate": 0.0019977098390638695, "loss": 0.2541, "step": 23690 }, { "epoch": 0.16821842843530244, "grad_norm": 0.1220703125, "learning_rate": 0.0019977078208243213, "loss": 0.2619, "step": 23700 }, { "epoch": 0.16828940667514858, "grad_norm": 0.0986328125, "learning_rate": 0.0019977058016969963, "loss": 0.2637, "step": 23710 }, { "epoch": 0.1683603849149947, "grad_norm": 0.08837890625, "learning_rate": 0.0019977037816818963, "loss": 0.2648, "step": 23720 }, { "epoch": 0.1684313631548408, "grad_norm": 0.12890625, "learning_rate": 0.0019977017607790237, "loss": 0.2402, "step": 23730 }, { "epoch": 0.16850234139468692, "grad_norm": 0.09521484375, "learning_rate": 0.0019976997389883804, "loss": 0.2385, "step": 23740 }, { "epoch": 0.16857331963453304, "grad_norm": 0.126953125, "learning_rate": 0.0019976977163099685, "loss": 0.2634, "step": 23750 }, { "epoch": 0.16864429787437915, "grad_norm": 0.146484375, "learning_rate": 0.0019976956927437893, "loss": 0.2543, "step": 23760 }, { "epoch": 0.1687152761142253, "grad_norm": 0.1201171875, "learning_rate": 0.001997693668289846, "loss": 0.2419, "step": 23770 }, { "epoch": 0.1687862543540714, "grad_norm": 0.10546875, "learning_rate": 0.0019976916429481394, "loss": 0.259, "step": 23780 }, { "epoch": 0.16885723259391752, "grad_norm": 0.07763671875, "learning_rate": 0.001997689616718672, "loss": 0.2351, "step": 23790 }, { "epoch": 0.16892821083376364, "grad_norm": 0.10302734375, "learning_rate": 0.001997687589601446, "loss": 0.2439, "step": 23800 }, { "epoch": 0.16899918907360975, "grad_norm": 0.1552734375, "learning_rate": 0.0019976855615964633, "loss": 0.2613, "step": 23810 }, { "epoch": 0.16907016731345587, "grad_norm": 0.15625, "learning_rate": 0.001997683532703726, "loss": 0.2701, "step": 23820 }, { "epoch": 0.169141145553302, "grad_norm": 0.15625, "learning_rate": 0.0019976815029232355, "loss": 0.2518, "step": 23830 }, { "epoch": 0.16921212379314812, "grad_norm": 0.10302734375, "learning_rate": 0.0019976794722549947, "loss": 0.2585, "step": 23840 }, { "epoch": 0.16928310203299424, "grad_norm": 0.07763671875, "learning_rate": 0.001997677440699005, "loss": 0.2537, "step": 23850 }, { "epoch": 0.16935408027284035, "grad_norm": 0.1064453125, "learning_rate": 0.0019976754082552685, "loss": 0.24, "step": 23860 }, { "epoch": 0.16942505851268647, "grad_norm": 0.10888671875, "learning_rate": 0.001997673374923787, "loss": 0.2495, "step": 23870 }, { "epoch": 0.16949603675253258, "grad_norm": 0.1552734375, "learning_rate": 0.0019976713407045634, "loss": 0.2576, "step": 23880 }, { "epoch": 0.16956701499237872, "grad_norm": 0.099609375, "learning_rate": 0.0019976693055975985, "loss": 0.2377, "step": 23890 }, { "epoch": 0.16963799323222484, "grad_norm": 0.11181640625, "learning_rate": 0.0019976672696028954, "loss": 0.2433, "step": 23900 }, { "epoch": 0.16970897147207095, "grad_norm": 0.1279296875, "learning_rate": 0.001997665232720456, "loss": 0.2587, "step": 23910 }, { "epoch": 0.16977994971191707, "grad_norm": 0.1044921875, "learning_rate": 0.001997663194950281, "loss": 0.2474, "step": 23920 }, { "epoch": 0.16985092795176318, "grad_norm": 0.32421875, "learning_rate": 0.0019976611562923735, "loss": 0.2678, "step": 23930 }, { "epoch": 0.1699219061916093, "grad_norm": 0.162109375, "learning_rate": 0.0019976591167467357, "loss": 0.2458, "step": 23940 }, { "epoch": 0.16999288443145544, "grad_norm": 0.0859375, "learning_rate": 0.0019976570763133696, "loss": 0.2719, "step": 23950 }, { "epoch": 0.17006386267130155, "grad_norm": 0.10400390625, "learning_rate": 0.0019976550349922765, "loss": 0.2445, "step": 23960 }, { "epoch": 0.17013484091114767, "grad_norm": 0.099609375, "learning_rate": 0.0019976529927834587, "loss": 0.251, "step": 23970 }, { "epoch": 0.17020581915099378, "grad_norm": 0.11279296875, "learning_rate": 0.0019976509496869186, "loss": 0.2412, "step": 23980 }, { "epoch": 0.1702767973908399, "grad_norm": 0.138671875, "learning_rate": 0.0019976489057026577, "loss": 0.2546, "step": 23990 }, { "epoch": 0.170347775630686, "grad_norm": 0.08544921875, "learning_rate": 0.0019976468608306785, "loss": 0.2571, "step": 24000 }, { "epoch": 0.170347775630686, "eval_covost2-zh-en_loss": 3.947054862976074, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 23.3664, "eval_covost2-zh-en_samples_per_second": 2.739, "eval_covost2-zh-en_steps_per_second": 0.171, "step": 24000 }, { "epoch": 0.170347775630686, "eval_covost2-en-zh_loss": 3.1605939865112305, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 20.7171, "eval_covost2-en-zh_samples_per_second": 3.089, "eval_covost2-en-zh_steps_per_second": 0.193, "step": 24000 }, { "epoch": 0.17041875387053215, "grad_norm": 0.146484375, "learning_rate": 0.0019976448150709827, "loss": 0.2557, "step": 24010 }, { "epoch": 0.17048973211037827, "grad_norm": 0.07861328125, "learning_rate": 0.0019976427684235726, "loss": 0.2251, "step": 24020 }, { "epoch": 0.17056071035022438, "grad_norm": 0.09375, "learning_rate": 0.0019976407208884502, "loss": 0.2582, "step": 24030 }, { "epoch": 0.1706316885900705, "grad_norm": 0.134765625, "learning_rate": 0.001997638672465617, "loss": 0.2392, "step": 24040 }, { "epoch": 0.1707026668299166, "grad_norm": 0.11962890625, "learning_rate": 0.001997636623155076, "loss": 0.2403, "step": 24050 }, { "epoch": 0.17077364506976273, "grad_norm": 0.0888671875, "learning_rate": 0.001997634572956828, "loss": 0.2641, "step": 24060 }, { "epoch": 0.17084462330960887, "grad_norm": 0.09814453125, "learning_rate": 0.0019976325218708756, "loss": 0.2532, "step": 24070 }, { "epoch": 0.17091560154945498, "grad_norm": 0.1181640625, "learning_rate": 0.0019976304698972216, "loss": 0.2445, "step": 24080 }, { "epoch": 0.1709865797893011, "grad_norm": 0.09130859375, "learning_rate": 0.0019976284170358668, "loss": 0.2447, "step": 24090 }, { "epoch": 0.1710575580291472, "grad_norm": 0.10986328125, "learning_rate": 0.001997626363286814, "loss": 0.2486, "step": 24100 }, { "epoch": 0.17112853626899333, "grad_norm": 0.11669921875, "learning_rate": 0.0019976243086500646, "loss": 0.2392, "step": 24110 }, { "epoch": 0.17119951450883944, "grad_norm": 0.111328125, "learning_rate": 0.0019976222531256213, "loss": 0.2727, "step": 24120 }, { "epoch": 0.17127049274868558, "grad_norm": 0.134765625, "learning_rate": 0.001997620196713486, "loss": 0.2475, "step": 24130 }, { "epoch": 0.1713414709885317, "grad_norm": 0.1142578125, "learning_rate": 0.0019976181394136604, "loss": 0.2624, "step": 24140 }, { "epoch": 0.1714124492283778, "grad_norm": 0.0966796875, "learning_rate": 0.0019976160812261467, "loss": 0.2685, "step": 24150 }, { "epoch": 0.17148342746822393, "grad_norm": 0.076171875, "learning_rate": 0.0019976140221509473, "loss": 0.2507, "step": 24160 }, { "epoch": 0.17155440570807004, "grad_norm": 0.12890625, "learning_rate": 0.0019976119621880634, "loss": 0.244, "step": 24170 }, { "epoch": 0.17162538394791615, "grad_norm": 0.1259765625, "learning_rate": 0.0019976099013374973, "loss": 0.2552, "step": 24180 }, { "epoch": 0.1716963621877623, "grad_norm": 0.1162109375, "learning_rate": 0.001997607839599252, "loss": 0.2573, "step": 24190 }, { "epoch": 0.1717673404276084, "grad_norm": 0.12109375, "learning_rate": 0.0019976057769733285, "loss": 0.2327, "step": 24200 }, { "epoch": 0.17183831866745453, "grad_norm": 0.10546875, "learning_rate": 0.001997603713459729, "loss": 0.2646, "step": 24210 }, { "epoch": 0.17190929690730064, "grad_norm": 0.087890625, "learning_rate": 0.0019976016490584557, "loss": 0.2535, "step": 24220 }, { "epoch": 0.17198027514714676, "grad_norm": 0.091796875, "learning_rate": 0.001997599583769511, "loss": 0.2604, "step": 24230 }, { "epoch": 0.17205125338699287, "grad_norm": 0.12451171875, "learning_rate": 0.001997597517592896, "loss": 0.2512, "step": 24240 }, { "epoch": 0.172122231626839, "grad_norm": 0.10986328125, "learning_rate": 0.0019975954505286136, "loss": 0.2422, "step": 24250 }, { "epoch": 0.17219320986668513, "grad_norm": 0.216796875, "learning_rate": 0.001997593382576666, "loss": 0.2496, "step": 24260 }, { "epoch": 0.17226418810653124, "grad_norm": 0.1513671875, "learning_rate": 0.001997591313737054, "loss": 0.2541, "step": 24270 }, { "epoch": 0.17233516634637736, "grad_norm": 0.123046875, "learning_rate": 0.001997589244009781, "loss": 0.2558, "step": 24280 }, { "epoch": 0.17240614458622347, "grad_norm": 0.12060546875, "learning_rate": 0.0019975871733948483, "loss": 0.2537, "step": 24290 }, { "epoch": 0.17247712282606958, "grad_norm": 0.125, "learning_rate": 0.001997585101892258, "loss": 0.2818, "step": 24300 }, { "epoch": 0.17254810106591573, "grad_norm": 0.11474609375, "learning_rate": 0.001997583029502012, "loss": 0.2358, "step": 24310 }, { "epoch": 0.17261907930576184, "grad_norm": 0.10546875, "learning_rate": 0.001997580956224113, "loss": 0.2535, "step": 24320 }, { "epoch": 0.17269005754560796, "grad_norm": 0.14453125, "learning_rate": 0.001997578882058563, "loss": 0.2493, "step": 24330 }, { "epoch": 0.17276103578545407, "grad_norm": 0.10009765625, "learning_rate": 0.0019975768070053636, "loss": 0.2399, "step": 24340 }, { "epoch": 0.17283201402530018, "grad_norm": 0.07470703125, "learning_rate": 0.0019975747310645173, "loss": 0.2692, "step": 24350 }, { "epoch": 0.1729029922651463, "grad_norm": 0.1328125, "learning_rate": 0.001997572654236025, "loss": 0.2487, "step": 24360 }, { "epoch": 0.17297397050499244, "grad_norm": 0.28515625, "learning_rate": 0.00199757057651989, "loss": 0.2512, "step": 24370 }, { "epoch": 0.17304494874483856, "grad_norm": 0.115234375, "learning_rate": 0.001997568497916114, "loss": 0.2438, "step": 24380 }, { "epoch": 0.17311592698468467, "grad_norm": 0.166015625, "learning_rate": 0.001997566418424699, "loss": 0.2651, "step": 24390 }, { "epoch": 0.17318690522453078, "grad_norm": 0.1064453125, "learning_rate": 0.0019975643380456473, "loss": 0.2518, "step": 24400 }, { "epoch": 0.1732578834643769, "grad_norm": 0.0908203125, "learning_rate": 0.0019975622567789605, "loss": 0.2582, "step": 24410 }, { "epoch": 0.17332886170422301, "grad_norm": 0.07666015625, "learning_rate": 0.001997560174624641, "loss": 0.2406, "step": 24420 }, { "epoch": 0.17339983994406916, "grad_norm": 0.330078125, "learning_rate": 0.00199755809158269, "loss": 0.2562, "step": 24430 }, { "epoch": 0.17347081818391527, "grad_norm": 0.07177734375, "learning_rate": 0.0019975560076531114, "loss": 0.2437, "step": 24440 }, { "epoch": 0.17354179642376139, "grad_norm": 0.150390625, "learning_rate": 0.0019975539228359054, "loss": 0.2556, "step": 24450 }, { "epoch": 0.1736127746636075, "grad_norm": 0.1494140625, "learning_rate": 0.001997551837131075, "loss": 0.2536, "step": 24460 }, { "epoch": 0.17368375290345361, "grad_norm": 0.103515625, "learning_rate": 0.0019975497505386225, "loss": 0.2407, "step": 24470 }, { "epoch": 0.17375473114329976, "grad_norm": 0.12890625, "learning_rate": 0.001997547663058549, "loss": 0.2624, "step": 24480 }, { "epoch": 0.17382570938314587, "grad_norm": 0.107421875, "learning_rate": 0.0019975455746908577, "loss": 0.2301, "step": 24490 }, { "epoch": 0.17389668762299199, "grad_norm": 0.0966796875, "learning_rate": 0.0019975434854355495, "loss": 0.248, "step": 24500 }, { "epoch": 0.1739676658628381, "grad_norm": 0.095703125, "learning_rate": 0.0019975413952926274, "loss": 0.2536, "step": 24510 }, { "epoch": 0.17403864410268421, "grad_norm": 0.12255859375, "learning_rate": 0.001997539304262093, "loss": 0.2636, "step": 24520 }, { "epoch": 0.17410962234253033, "grad_norm": 0.09326171875, "learning_rate": 0.001997537212343948, "loss": 0.2413, "step": 24530 }, { "epoch": 0.17418060058237647, "grad_norm": 0.140625, "learning_rate": 0.0019975351195381957, "loss": 0.2426, "step": 24540 }, { "epoch": 0.17425157882222259, "grad_norm": 0.193359375, "learning_rate": 0.0019975330258448372, "loss": 0.2556, "step": 24550 }, { "epoch": 0.1743225570620687, "grad_norm": 0.171875, "learning_rate": 0.0019975309312638746, "loss": 0.2515, "step": 24560 }, { "epoch": 0.17439353530191481, "grad_norm": 0.1923828125, "learning_rate": 0.0019975288357953103, "loss": 0.2561, "step": 24570 }, { "epoch": 0.17446451354176093, "grad_norm": 0.2158203125, "learning_rate": 0.001997526739439146, "loss": 0.2523, "step": 24580 }, { "epoch": 0.17453549178160704, "grad_norm": 0.0859375, "learning_rate": 0.0019975246421953846, "loss": 0.2721, "step": 24590 }, { "epoch": 0.1746064700214532, "grad_norm": 0.107421875, "learning_rate": 0.001997522544064027, "loss": 0.2434, "step": 24600 }, { "epoch": 0.1746774482612993, "grad_norm": 0.12109375, "learning_rate": 0.0019975204450450763, "loss": 0.2581, "step": 24610 }, { "epoch": 0.17474842650114542, "grad_norm": 0.228515625, "learning_rate": 0.001997518345138534, "loss": 0.2659, "step": 24620 }, { "epoch": 0.17481940474099153, "grad_norm": 0.0908203125, "learning_rate": 0.0019975162443444023, "loss": 0.2439, "step": 24630 }, { "epoch": 0.17489038298083764, "grad_norm": 0.09375, "learning_rate": 0.001997514142662683, "loss": 0.2755, "step": 24640 }, { "epoch": 0.17496136122068376, "grad_norm": 0.11767578125, "learning_rate": 0.0019975120400933786, "loss": 0.2418, "step": 24650 }, { "epoch": 0.1750323394605299, "grad_norm": 0.095703125, "learning_rate": 0.0019975099366364908, "loss": 0.26, "step": 24660 }, { "epoch": 0.17510331770037602, "grad_norm": 0.1337890625, "learning_rate": 0.0019975078322920226, "loss": 0.2446, "step": 24670 }, { "epoch": 0.17517429594022213, "grad_norm": 0.111328125, "learning_rate": 0.001997505727059975, "loss": 0.2572, "step": 24680 }, { "epoch": 0.17524527418006824, "grad_norm": 0.3359375, "learning_rate": 0.0019975036209403502, "loss": 0.2558, "step": 24690 }, { "epoch": 0.17531625241991436, "grad_norm": 0.111328125, "learning_rate": 0.0019975015139331513, "loss": 0.2761, "step": 24700 }, { "epoch": 0.17538723065976047, "grad_norm": 0.0771484375, "learning_rate": 0.001997499406038379, "loss": 0.2697, "step": 24710 }, { "epoch": 0.17545820889960662, "grad_norm": 0.0830078125, "learning_rate": 0.001997497297256036, "loss": 0.2532, "step": 24720 }, { "epoch": 0.17552918713945273, "grad_norm": 0.091796875, "learning_rate": 0.001997495187586125, "loss": 0.2637, "step": 24730 }, { "epoch": 0.17560016537929884, "grad_norm": 0.0849609375, "learning_rate": 0.001997493077028647, "loss": 0.2466, "step": 24740 }, { "epoch": 0.17567114361914496, "grad_norm": 0.11279296875, "learning_rate": 0.001997490965583605, "loss": 0.26, "step": 24750 }, { "epoch": 0.17574212185899107, "grad_norm": 0.125, "learning_rate": 0.0019974888532510006, "loss": 0.2694, "step": 24760 }, { "epoch": 0.1758131000988372, "grad_norm": 0.1103515625, "learning_rate": 0.001997486740030836, "loss": 0.2342, "step": 24770 }, { "epoch": 0.17588407833868333, "grad_norm": 0.1298828125, "learning_rate": 0.001997484625923113, "loss": 0.2483, "step": 24780 }, { "epoch": 0.17595505657852945, "grad_norm": 0.1318359375, "learning_rate": 0.001997482510927834, "loss": 0.238, "step": 24790 }, { "epoch": 0.17602603481837556, "grad_norm": 0.1279296875, "learning_rate": 0.0019974803950450013, "loss": 0.2391, "step": 24800 }, { "epoch": 0.17609701305822167, "grad_norm": 0.1357421875, "learning_rate": 0.001997478278274616, "loss": 0.2307, "step": 24810 }, { "epoch": 0.1761679912980678, "grad_norm": 0.1259765625, "learning_rate": 0.0019974761606166816, "loss": 0.2455, "step": 24820 }, { "epoch": 0.1762389695379139, "grad_norm": 0.10400390625, "learning_rate": 0.0019974740420711998, "loss": 0.2336, "step": 24830 }, { "epoch": 0.17630994777776005, "grad_norm": 0.126953125, "learning_rate": 0.001997471922638172, "loss": 0.2505, "step": 24840 }, { "epoch": 0.17638092601760616, "grad_norm": 0.11669921875, "learning_rate": 0.0019974698023176004, "loss": 0.238, "step": 24850 }, { "epoch": 0.17645190425745227, "grad_norm": 0.150390625, "learning_rate": 0.001997467681109488, "loss": 0.2468, "step": 24860 }, { "epoch": 0.1765228824972984, "grad_norm": 0.09033203125, "learning_rate": 0.0019974655590138356, "loss": 0.2648, "step": 24870 }, { "epoch": 0.1765938607371445, "grad_norm": 0.08154296875, "learning_rate": 0.001997463436030647, "loss": 0.2274, "step": 24880 }, { "epoch": 0.17666483897699062, "grad_norm": 0.06884765625, "learning_rate": 0.0019974613121599228, "loss": 0.2471, "step": 24890 }, { "epoch": 0.17673581721683676, "grad_norm": 0.1484375, "learning_rate": 0.001997459187401666, "loss": 0.2524, "step": 24900 }, { "epoch": 0.17680679545668287, "grad_norm": 0.130859375, "learning_rate": 0.0019974570617558775, "loss": 0.2516, "step": 24910 }, { "epoch": 0.176877773696529, "grad_norm": 0.0966796875, "learning_rate": 0.0019974549352225605, "loss": 0.2616, "step": 24920 }, { "epoch": 0.1769487519363751, "grad_norm": 0.12890625, "learning_rate": 0.001997452807801717, "loss": 0.2489, "step": 24930 }, { "epoch": 0.17701973017622122, "grad_norm": 0.0771484375, "learning_rate": 0.0019974506794933487, "loss": 0.2565, "step": 24940 }, { "epoch": 0.17709070841606733, "grad_norm": 0.126953125, "learning_rate": 0.0019974485502974584, "loss": 0.2565, "step": 24950 }, { "epoch": 0.17716168665591348, "grad_norm": 0.11328125, "learning_rate": 0.001997446420214047, "loss": 0.2449, "step": 24960 }, { "epoch": 0.1772326648957596, "grad_norm": 0.1044921875, "learning_rate": 0.001997444289243118, "loss": 0.2355, "step": 24970 }, { "epoch": 0.1773036431356057, "grad_norm": 0.1669921875, "learning_rate": 0.0019974421573846724, "loss": 0.2674, "step": 24980 }, { "epoch": 0.17737462137545182, "grad_norm": 0.076171875, "learning_rate": 0.001997440024638713, "loss": 0.2469, "step": 24990 }, { "epoch": 0.17744559961529793, "grad_norm": 0.111328125, "learning_rate": 0.0019974378910052417, "loss": 0.2366, "step": 25000 }, { "epoch": 0.17751657785514405, "grad_norm": 0.099609375, "learning_rate": 0.0019974357564842603, "loss": 0.2438, "step": 25010 }, { "epoch": 0.1775875560949902, "grad_norm": 0.091796875, "learning_rate": 0.0019974336210757714, "loss": 0.2434, "step": 25020 }, { "epoch": 0.1776585343348363, "grad_norm": 0.123046875, "learning_rate": 0.001997431484779777, "loss": 0.2481, "step": 25030 }, { "epoch": 0.17772951257468242, "grad_norm": 0.1279296875, "learning_rate": 0.0019974293475962792, "loss": 0.2529, "step": 25040 }, { "epoch": 0.17780049081452853, "grad_norm": 0.1396484375, "learning_rate": 0.0019974272095252795, "loss": 0.2596, "step": 25050 }, { "epoch": 0.17787146905437465, "grad_norm": 0.177734375, "learning_rate": 0.0019974250705667813, "loss": 0.2762, "step": 25060 }, { "epoch": 0.17794244729422076, "grad_norm": 0.1328125, "learning_rate": 0.001997422930720785, "loss": 0.2455, "step": 25070 }, { "epoch": 0.1780134255340669, "grad_norm": 0.1533203125, "learning_rate": 0.001997420789987294, "loss": 0.2581, "step": 25080 }, { "epoch": 0.17808440377391302, "grad_norm": 0.10888671875, "learning_rate": 0.0019974186483663107, "loss": 0.2614, "step": 25090 }, { "epoch": 0.17815538201375913, "grad_norm": 0.08935546875, "learning_rate": 0.0019974165058578358, "loss": 0.2596, "step": 25100 }, { "epoch": 0.17822636025360525, "grad_norm": 0.10693359375, "learning_rate": 0.001997414362461873, "loss": 0.2525, "step": 25110 }, { "epoch": 0.17829733849345136, "grad_norm": 0.11572265625, "learning_rate": 0.001997412218178423, "loss": 0.2669, "step": 25120 }, { "epoch": 0.17836831673329748, "grad_norm": 0.162109375, "learning_rate": 0.0019974100730074888, "loss": 0.2483, "step": 25130 }, { "epoch": 0.17843929497314362, "grad_norm": 0.0849609375, "learning_rate": 0.0019974079269490723, "loss": 0.2506, "step": 25140 }, { "epoch": 0.17851027321298973, "grad_norm": 0.1240234375, "learning_rate": 0.0019974057800031756, "loss": 0.2523, "step": 25150 }, { "epoch": 0.17858125145283585, "grad_norm": 0.11328125, "learning_rate": 0.001997403632169801, "loss": 0.2426, "step": 25160 }, { "epoch": 0.17865222969268196, "grad_norm": 0.08349609375, "learning_rate": 0.00199740148344895, "loss": 0.2569, "step": 25170 }, { "epoch": 0.17872320793252808, "grad_norm": 0.11328125, "learning_rate": 0.0019973993338406252, "loss": 0.2668, "step": 25180 }, { "epoch": 0.1787941861723742, "grad_norm": 0.111328125, "learning_rate": 0.001997397183344829, "loss": 0.2434, "step": 25190 }, { "epoch": 0.17886516441222033, "grad_norm": 0.06494140625, "learning_rate": 0.001997395031961563, "loss": 0.2778, "step": 25200 }, { "epoch": 0.17893614265206645, "grad_norm": 0.109375, "learning_rate": 0.00199739287969083, "loss": 0.2561, "step": 25210 }, { "epoch": 0.17900712089191256, "grad_norm": 0.1044921875, "learning_rate": 0.0019973907265326313, "loss": 0.2657, "step": 25220 }, { "epoch": 0.17907809913175868, "grad_norm": 0.1337890625, "learning_rate": 0.0019973885724869694, "loss": 0.2619, "step": 25230 }, { "epoch": 0.1791490773716048, "grad_norm": 0.07470703125, "learning_rate": 0.0019973864175538464, "loss": 0.2323, "step": 25240 }, { "epoch": 0.1792200556114509, "grad_norm": 0.11962890625, "learning_rate": 0.001997384261733265, "loss": 0.2546, "step": 25250 }, { "epoch": 0.17929103385129705, "grad_norm": 0.1162109375, "learning_rate": 0.001997382105025226, "loss": 0.2558, "step": 25260 }, { "epoch": 0.17936201209114316, "grad_norm": 0.08154296875, "learning_rate": 0.001997379947429733, "loss": 0.2511, "step": 25270 }, { "epoch": 0.17943299033098928, "grad_norm": 0.1494140625, "learning_rate": 0.001997377788946787, "loss": 0.2331, "step": 25280 }, { "epoch": 0.1795039685708354, "grad_norm": 0.0859375, "learning_rate": 0.001997375629576391, "loss": 0.2531, "step": 25290 }, { "epoch": 0.1795749468106815, "grad_norm": 0.09228515625, "learning_rate": 0.0019973734693185465, "loss": 0.2354, "step": 25300 }, { "epoch": 0.17964592505052762, "grad_norm": 0.10009765625, "learning_rate": 0.001997371308173256, "loss": 0.2446, "step": 25310 }, { "epoch": 0.17971690329037376, "grad_norm": 0.10205078125, "learning_rate": 0.0019973691461405217, "loss": 0.2595, "step": 25320 }, { "epoch": 0.17978788153021988, "grad_norm": 0.09521484375, "learning_rate": 0.001997366983220345, "loss": 0.2404, "step": 25330 }, { "epoch": 0.179858859770066, "grad_norm": 0.1337890625, "learning_rate": 0.0019973648194127292, "loss": 0.2457, "step": 25340 }, { "epoch": 0.1799298380099121, "grad_norm": 0.08154296875, "learning_rate": 0.0019973626547176754, "loss": 0.2508, "step": 25350 }, { "epoch": 0.18000081624975822, "grad_norm": 0.12353515625, "learning_rate": 0.0019973604891351865, "loss": 0.2539, "step": 25360 }, { "epoch": 0.18007179448960434, "grad_norm": 0.11962890625, "learning_rate": 0.001997358322665264, "loss": 0.2484, "step": 25370 }, { "epoch": 0.18014277272945048, "grad_norm": 0.09423828125, "learning_rate": 0.0019973561553079107, "loss": 0.2495, "step": 25380 }, { "epoch": 0.1802137509692966, "grad_norm": 0.23828125, "learning_rate": 0.001997353987063128, "loss": 0.2481, "step": 25390 }, { "epoch": 0.1802847292091427, "grad_norm": 0.078125, "learning_rate": 0.001997351817930919, "loss": 0.2727, "step": 25400 }, { "epoch": 0.18035570744898882, "grad_norm": 0.10986328125, "learning_rate": 0.0019973496479112848, "loss": 0.2511, "step": 25410 }, { "epoch": 0.18042668568883494, "grad_norm": 0.12451171875, "learning_rate": 0.0019973474770042282, "loss": 0.2642, "step": 25420 }, { "epoch": 0.18049766392868105, "grad_norm": 0.09228515625, "learning_rate": 0.0019973453052097513, "loss": 0.2474, "step": 25430 }, { "epoch": 0.1805686421685272, "grad_norm": 0.1044921875, "learning_rate": 0.0019973431325278563, "loss": 0.2351, "step": 25440 }, { "epoch": 0.1806396204083733, "grad_norm": 0.1904296875, "learning_rate": 0.0019973409589585443, "loss": 0.2496, "step": 25450 }, { "epoch": 0.18071059864821942, "grad_norm": 0.146484375, "learning_rate": 0.0019973387845018194, "loss": 0.2658, "step": 25460 }, { "epoch": 0.18078157688806554, "grad_norm": 0.134765625, "learning_rate": 0.001997336609157682, "loss": 0.2535, "step": 25470 }, { "epoch": 0.18085255512791165, "grad_norm": 0.09912109375, "learning_rate": 0.001997334432926135, "loss": 0.2632, "step": 25480 }, { "epoch": 0.18092353336775777, "grad_norm": 0.1396484375, "learning_rate": 0.0019973322558071807, "loss": 0.2524, "step": 25490 }, { "epoch": 0.1809945116076039, "grad_norm": 0.1015625, "learning_rate": 0.001997330077800821, "loss": 0.2469, "step": 25500 }, { "epoch": 0.18106548984745002, "grad_norm": 0.08251953125, "learning_rate": 0.0019973278989070583, "loss": 0.2443, "step": 25510 }, { "epoch": 0.18113646808729614, "grad_norm": 0.08447265625, "learning_rate": 0.001997325719125894, "loss": 0.2483, "step": 25520 }, { "epoch": 0.18120744632714225, "grad_norm": 0.205078125, "learning_rate": 0.0019973235384573313, "loss": 0.2502, "step": 25530 }, { "epoch": 0.18127842456698837, "grad_norm": 0.11083984375, "learning_rate": 0.0019973213569013716, "loss": 0.2592, "step": 25540 }, { "epoch": 0.18134940280683448, "grad_norm": 0.1025390625, "learning_rate": 0.0019973191744580175, "loss": 0.27, "step": 25550 }, { "epoch": 0.18142038104668062, "grad_norm": 0.10107421875, "learning_rate": 0.0019973169911272712, "loss": 0.2437, "step": 25560 }, { "epoch": 0.18149135928652674, "grad_norm": 0.11279296875, "learning_rate": 0.001997314806909134, "loss": 0.2677, "step": 25570 }, { "epoch": 0.18156233752637285, "grad_norm": 0.0810546875, "learning_rate": 0.0019973126218036093, "loss": 0.2448, "step": 25580 }, { "epoch": 0.18163331576621897, "grad_norm": 0.10888671875, "learning_rate": 0.0019973104358106983, "loss": 0.2409, "step": 25590 }, { "epoch": 0.18170429400606508, "grad_norm": 0.10986328125, "learning_rate": 0.0019973082489304034, "loss": 0.2373, "step": 25600 }, { "epoch": 0.18177527224591122, "grad_norm": 0.0888671875, "learning_rate": 0.001997306061162727, "loss": 0.2493, "step": 25610 }, { "epoch": 0.18184625048575734, "grad_norm": 0.1337890625, "learning_rate": 0.0019973038725076713, "loss": 0.266, "step": 25620 }, { "epoch": 0.18191722872560345, "grad_norm": 0.08349609375, "learning_rate": 0.0019973016829652386, "loss": 0.2364, "step": 25630 }, { "epoch": 0.18198820696544957, "grad_norm": 0.134765625, "learning_rate": 0.00199729949253543, "loss": 0.257, "step": 25640 }, { "epoch": 0.18205918520529568, "grad_norm": 0.15234375, "learning_rate": 0.001997297301218249, "loss": 0.2533, "step": 25650 }, { "epoch": 0.1821301634451418, "grad_norm": 0.400390625, "learning_rate": 0.001997295109013697, "loss": 0.2561, "step": 25660 }, { "epoch": 0.18220114168498794, "grad_norm": 0.1982421875, "learning_rate": 0.0019972929159217763, "loss": 0.2437, "step": 25670 }, { "epoch": 0.18227211992483405, "grad_norm": 0.2470703125, "learning_rate": 0.0019972907219424894, "loss": 0.269, "step": 25680 }, { "epoch": 0.18234309816468017, "grad_norm": 0.08935546875, "learning_rate": 0.001997288527075838, "loss": 0.2601, "step": 25690 }, { "epoch": 0.18241407640452628, "grad_norm": 0.0927734375, "learning_rate": 0.0019972863313218246, "loss": 0.2521, "step": 25700 }, { "epoch": 0.1824850546443724, "grad_norm": 0.10986328125, "learning_rate": 0.001997284134680451, "loss": 0.2573, "step": 25710 }, { "epoch": 0.1825560328842185, "grad_norm": 0.064453125, "learning_rate": 0.00199728193715172, "loss": 0.252, "step": 25720 }, { "epoch": 0.18262701112406465, "grad_norm": 0.1357421875, "learning_rate": 0.0019972797387356332, "loss": 0.2515, "step": 25730 }, { "epoch": 0.18269798936391077, "grad_norm": 0.1171875, "learning_rate": 0.0019972775394321934, "loss": 0.2597, "step": 25740 }, { "epoch": 0.18276896760375688, "grad_norm": 0.12060546875, "learning_rate": 0.0019972753392414016, "loss": 0.2538, "step": 25750 }, { "epoch": 0.182839945843603, "grad_norm": 0.166015625, "learning_rate": 0.001997273138163261, "loss": 0.2637, "step": 25760 }, { "epoch": 0.1829109240834491, "grad_norm": 0.08056640625, "learning_rate": 0.0019972709361977736, "loss": 0.2732, "step": 25770 }, { "epoch": 0.18298190232329523, "grad_norm": 0.0703125, "learning_rate": 0.001997268733344941, "loss": 0.2705, "step": 25780 }, { "epoch": 0.18305288056314137, "grad_norm": 0.2197265625, "learning_rate": 0.001997266529604767, "loss": 0.2437, "step": 25790 }, { "epoch": 0.18312385880298748, "grad_norm": 0.07080078125, "learning_rate": 0.0019972643249772516, "loss": 0.2431, "step": 25800 }, { "epoch": 0.1831948370428336, "grad_norm": 0.09619140625, "learning_rate": 0.0019972621194623984, "loss": 0.243, "step": 25810 }, { "epoch": 0.1832658152826797, "grad_norm": 0.1181640625, "learning_rate": 0.0019972599130602094, "loss": 0.2645, "step": 25820 }, { "epoch": 0.18333679352252583, "grad_norm": 0.10107421875, "learning_rate": 0.0019972577057706864, "loss": 0.2383, "step": 25830 }, { "epoch": 0.18340777176237194, "grad_norm": 0.11376953125, "learning_rate": 0.0019972554975938315, "loss": 0.2601, "step": 25840 }, { "epoch": 0.18347875000221808, "grad_norm": 0.1357421875, "learning_rate": 0.0019972532885296477, "loss": 0.2471, "step": 25850 }, { "epoch": 0.1835497282420642, "grad_norm": 0.091796875, "learning_rate": 0.001997251078578136, "loss": 0.2522, "step": 25860 }, { "epoch": 0.1836207064819103, "grad_norm": 0.0986328125, "learning_rate": 0.0019972488677392998, "loss": 0.2455, "step": 25870 }, { "epoch": 0.18369168472175643, "grad_norm": 0.11279296875, "learning_rate": 0.0019972466560131403, "loss": 0.2382, "step": 25880 }, { "epoch": 0.18376266296160254, "grad_norm": 0.11279296875, "learning_rate": 0.0019972444433996603, "loss": 0.2563, "step": 25890 }, { "epoch": 0.18383364120144866, "grad_norm": 0.1015625, "learning_rate": 0.0019972422298988614, "loss": 0.2532, "step": 25900 }, { "epoch": 0.1839046194412948, "grad_norm": 0.09619140625, "learning_rate": 0.0019972400155107467, "loss": 0.2591, "step": 25910 }, { "epoch": 0.1839755976811409, "grad_norm": 0.1279296875, "learning_rate": 0.0019972378002353175, "loss": 0.2572, "step": 25920 }, { "epoch": 0.18404657592098703, "grad_norm": 0.1650390625, "learning_rate": 0.0019972355840725763, "loss": 0.2458, "step": 25930 }, { "epoch": 0.18411755416083314, "grad_norm": 0.1611328125, "learning_rate": 0.0019972333670225254, "loss": 0.2541, "step": 25940 }, { "epoch": 0.18418853240067926, "grad_norm": 0.099609375, "learning_rate": 0.0019972311490851673, "loss": 0.2343, "step": 25950 }, { "epoch": 0.18425951064052537, "grad_norm": 0.134765625, "learning_rate": 0.001997228930260504, "loss": 0.2528, "step": 25960 }, { "epoch": 0.1843304888803715, "grad_norm": 0.125, "learning_rate": 0.0019972267105485366, "loss": 0.2462, "step": 25970 }, { "epoch": 0.18440146712021763, "grad_norm": 0.09814453125, "learning_rate": 0.001997224489949269, "loss": 0.2555, "step": 25980 }, { "epoch": 0.18447244536006374, "grad_norm": 0.130859375, "learning_rate": 0.001997222268462702, "loss": 0.2733, "step": 25990 }, { "epoch": 0.18454342359990986, "grad_norm": 0.1328125, "learning_rate": 0.001997220046088839, "loss": 0.2545, "step": 26000 }, { "epoch": 0.18454342359990986, "eval_covost2-zh-en_loss": 3.892001152038574, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 20.52, "eval_covost2-zh-en_samples_per_second": 3.119, "eval_covost2-zh-en_steps_per_second": 0.195, "step": 26000 }, { "epoch": 0.18454342359990986, "eval_covost2-en-zh_loss": 3.149587392807007, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 21.9239, "eval_covost2-en-zh_samples_per_second": 2.919, "eval_covost2-en-zh_steps_per_second": 0.182, "step": 26000 }, { "epoch": 0.18461440183975597, "grad_norm": 0.10302734375, "learning_rate": 0.0019972178228276813, "loss": 0.2517, "step": 26010 }, { "epoch": 0.18468538007960209, "grad_norm": 0.1435546875, "learning_rate": 0.0019972155986792313, "loss": 0.2586, "step": 26020 }, { "epoch": 0.18475635831944823, "grad_norm": 0.08203125, "learning_rate": 0.0019972133736434914, "loss": 0.2461, "step": 26030 }, { "epoch": 0.18482733655929434, "grad_norm": 0.1279296875, "learning_rate": 0.001997211147720464, "loss": 0.2615, "step": 26040 }, { "epoch": 0.18489831479914046, "grad_norm": 0.0966796875, "learning_rate": 0.0019972089209101505, "loss": 0.2511, "step": 26050 }, { "epoch": 0.18496929303898657, "grad_norm": 0.1259765625, "learning_rate": 0.001997206693212554, "loss": 0.2467, "step": 26060 }, { "epoch": 0.18504027127883269, "grad_norm": 0.10498046875, "learning_rate": 0.001997204464627676, "loss": 0.2434, "step": 26070 }, { "epoch": 0.1851112495186788, "grad_norm": 0.11669921875, "learning_rate": 0.001997202235155519, "loss": 0.2446, "step": 26080 }, { "epoch": 0.18518222775852494, "grad_norm": 0.095703125, "learning_rate": 0.001997200004796086, "loss": 0.2506, "step": 26090 }, { "epoch": 0.18525320599837106, "grad_norm": 0.12451171875, "learning_rate": 0.001997197773549378, "loss": 0.2428, "step": 26100 }, { "epoch": 0.18532418423821717, "grad_norm": 0.09814453125, "learning_rate": 0.001997195541415397, "loss": 0.2517, "step": 26110 }, { "epoch": 0.18539516247806329, "grad_norm": 0.09033203125, "learning_rate": 0.0019971933083941466, "loss": 0.2483, "step": 26120 }, { "epoch": 0.1854661407179094, "grad_norm": 0.10791015625, "learning_rate": 0.0019971910744856275, "loss": 0.2473, "step": 26130 }, { "epoch": 0.18553711895775551, "grad_norm": 0.08349609375, "learning_rate": 0.0019971888396898433, "loss": 0.2568, "step": 26140 }, { "epoch": 0.18560809719760166, "grad_norm": 0.1357421875, "learning_rate": 0.0019971866040067953, "loss": 0.2514, "step": 26150 }, { "epoch": 0.18567907543744777, "grad_norm": 0.1103515625, "learning_rate": 0.0019971843674364866, "loss": 0.2541, "step": 26160 }, { "epoch": 0.18575005367729389, "grad_norm": 0.0947265625, "learning_rate": 0.001997182129978918, "loss": 0.2594, "step": 26170 }, { "epoch": 0.18582103191714, "grad_norm": 0.1298828125, "learning_rate": 0.0019971798916340925, "loss": 0.2581, "step": 26180 }, { "epoch": 0.18589201015698611, "grad_norm": 0.091796875, "learning_rate": 0.0019971776524020128, "loss": 0.2674, "step": 26190 }, { "epoch": 0.18596298839683223, "grad_norm": 0.1611328125, "learning_rate": 0.0019971754122826806, "loss": 0.2519, "step": 26200 }, { "epoch": 0.18603396663667837, "grad_norm": 0.09326171875, "learning_rate": 0.001997173171276098, "loss": 0.2519, "step": 26210 }, { "epoch": 0.1861049448765245, "grad_norm": 0.07177734375, "learning_rate": 0.0019971709293822673, "loss": 0.2344, "step": 26220 }, { "epoch": 0.1861759231163706, "grad_norm": 0.08935546875, "learning_rate": 0.0019971686866011905, "loss": 0.2758, "step": 26230 }, { "epoch": 0.18624690135621672, "grad_norm": 0.08837890625, "learning_rate": 0.001997166442932871, "loss": 0.2349, "step": 26240 }, { "epoch": 0.18631787959606283, "grad_norm": 0.1572265625, "learning_rate": 0.001997164198377309, "loss": 0.2472, "step": 26250 }, { "epoch": 0.18638885783590894, "grad_norm": 0.09326171875, "learning_rate": 0.0019971619529345085, "loss": 0.2525, "step": 26260 }, { "epoch": 0.1864598360757551, "grad_norm": 0.07861328125, "learning_rate": 0.0019971597066044708, "loss": 0.2554, "step": 26270 }, { "epoch": 0.1865308143156012, "grad_norm": 0.103515625, "learning_rate": 0.0019971574593871983, "loss": 0.2323, "step": 26280 }, { "epoch": 0.18660179255544732, "grad_norm": 0.099609375, "learning_rate": 0.0019971552112826936, "loss": 0.2587, "step": 26290 }, { "epoch": 0.18667277079529343, "grad_norm": 0.12158203125, "learning_rate": 0.0019971529622909586, "loss": 0.2635, "step": 26300 }, { "epoch": 0.18674374903513954, "grad_norm": 0.091796875, "learning_rate": 0.0019971507124119958, "loss": 0.2374, "step": 26310 }, { "epoch": 0.18681472727498566, "grad_norm": 0.1181640625, "learning_rate": 0.0019971484616458065, "loss": 0.2546, "step": 26320 }, { "epoch": 0.1868857055148318, "grad_norm": 0.11962890625, "learning_rate": 0.001997146209992394, "loss": 0.2433, "step": 26330 }, { "epoch": 0.18695668375467792, "grad_norm": 0.09326171875, "learning_rate": 0.00199714395745176, "loss": 0.2523, "step": 26340 }, { "epoch": 0.18702766199452403, "grad_norm": 0.08984375, "learning_rate": 0.001997141704023907, "loss": 0.2416, "step": 26350 }, { "epoch": 0.18709864023437014, "grad_norm": 0.08935546875, "learning_rate": 0.001997139449708837, "loss": 0.2483, "step": 26360 }, { "epoch": 0.18716961847421626, "grad_norm": 0.0830078125, "learning_rate": 0.0019971371945065523, "loss": 0.2462, "step": 26370 }, { "epoch": 0.18724059671406237, "grad_norm": 0.083984375, "learning_rate": 0.0019971349384170552, "loss": 0.249, "step": 26380 }, { "epoch": 0.18731157495390852, "grad_norm": 0.09619140625, "learning_rate": 0.0019971326814403477, "loss": 0.2614, "step": 26390 }, { "epoch": 0.18738255319375463, "grad_norm": 0.1337890625, "learning_rate": 0.0019971304235764323, "loss": 0.2529, "step": 26400 }, { "epoch": 0.18745353143360075, "grad_norm": 0.111328125, "learning_rate": 0.0019971281648253113, "loss": 0.2369, "step": 26410 }, { "epoch": 0.18752450967344686, "grad_norm": 0.11474609375, "learning_rate": 0.0019971259051869868, "loss": 0.2439, "step": 26420 }, { "epoch": 0.18759548791329297, "grad_norm": 0.150390625, "learning_rate": 0.001997123644661461, "loss": 0.2394, "step": 26430 }, { "epoch": 0.1876664661531391, "grad_norm": 0.0927734375, "learning_rate": 0.001997121383248736, "loss": 0.2523, "step": 26440 }, { "epoch": 0.18773744439298523, "grad_norm": 0.1494140625, "learning_rate": 0.0019971191209488142, "loss": 0.2372, "step": 26450 }, { "epoch": 0.18780842263283135, "grad_norm": 0.11767578125, "learning_rate": 0.0019971168577616978, "loss": 0.248, "step": 26460 }, { "epoch": 0.18787940087267746, "grad_norm": 0.166015625, "learning_rate": 0.0019971145936873887, "loss": 0.2416, "step": 26470 }, { "epoch": 0.18795037911252357, "grad_norm": 0.2138671875, "learning_rate": 0.00199711232872589, "loss": 0.2539, "step": 26480 }, { "epoch": 0.1880213573523697, "grad_norm": 0.08837890625, "learning_rate": 0.0019971100628772033, "loss": 0.2382, "step": 26490 }, { "epoch": 0.1880923355922158, "grad_norm": 0.11767578125, "learning_rate": 0.001997107796141331, "loss": 0.2474, "step": 26500 }, { "epoch": 0.18816331383206195, "grad_norm": 0.11376953125, "learning_rate": 0.001997105528518275, "loss": 0.2676, "step": 26510 }, { "epoch": 0.18823429207190806, "grad_norm": 0.10107421875, "learning_rate": 0.001997103260008038, "loss": 0.2417, "step": 26520 }, { "epoch": 0.18830527031175417, "grad_norm": 0.08544921875, "learning_rate": 0.0019971009906106225, "loss": 0.2371, "step": 26530 }, { "epoch": 0.1883762485516003, "grad_norm": 0.103515625, "learning_rate": 0.00199709872032603, "loss": 0.2393, "step": 26540 }, { "epoch": 0.1884472267914464, "grad_norm": 0.1533203125, "learning_rate": 0.001997096449154263, "loss": 0.2371, "step": 26550 }, { "epoch": 0.18851820503129252, "grad_norm": 0.10498046875, "learning_rate": 0.001997094177095324, "loss": 0.2437, "step": 26560 }, { "epoch": 0.18858918327113866, "grad_norm": 0.0986328125, "learning_rate": 0.0019970919041492155, "loss": 0.2505, "step": 26570 }, { "epoch": 0.18866016151098478, "grad_norm": 0.09765625, "learning_rate": 0.001997089630315939, "loss": 0.2387, "step": 26580 }, { "epoch": 0.1887311397508309, "grad_norm": 0.10888671875, "learning_rate": 0.001997087355595497, "loss": 0.2586, "step": 26590 }, { "epoch": 0.188802117990677, "grad_norm": 0.103515625, "learning_rate": 0.0019970850799878918, "loss": 0.2438, "step": 26600 }, { "epoch": 0.18887309623052312, "grad_norm": 0.1416015625, "learning_rate": 0.0019970828034931256, "loss": 0.2265, "step": 26610 }, { "epoch": 0.18894407447036923, "grad_norm": 0.06689453125, "learning_rate": 0.0019970805261112006, "loss": 0.2336, "step": 26620 }, { "epoch": 0.18901505271021538, "grad_norm": 0.12353515625, "learning_rate": 0.001997078247842119, "loss": 0.2437, "step": 26630 }, { "epoch": 0.1890860309500615, "grad_norm": 0.11279296875, "learning_rate": 0.0019970759686858837, "loss": 0.2393, "step": 26640 }, { "epoch": 0.1891570091899076, "grad_norm": 0.12255859375, "learning_rate": 0.001997073688642497, "loss": 0.2509, "step": 26650 }, { "epoch": 0.18922798742975372, "grad_norm": 0.14453125, "learning_rate": 0.0019970714077119597, "loss": 0.2292, "step": 26660 }, { "epoch": 0.18929896566959983, "grad_norm": 0.16796875, "learning_rate": 0.0019970691258942757, "loss": 0.2561, "step": 26670 }, { "epoch": 0.18936994390944595, "grad_norm": 0.1611328125, "learning_rate": 0.0019970668431894462, "loss": 0.258, "step": 26680 }, { "epoch": 0.1894409221492921, "grad_norm": 0.1484375, "learning_rate": 0.001997064559597474, "loss": 0.2427, "step": 26690 }, { "epoch": 0.1895119003891382, "grad_norm": 0.08642578125, "learning_rate": 0.001997062275118361, "loss": 0.277, "step": 26700 }, { "epoch": 0.18958287862898432, "grad_norm": 0.2392578125, "learning_rate": 0.0019970599897521095, "loss": 0.2641, "step": 26710 }, { "epoch": 0.18965385686883043, "grad_norm": 0.09423828125, "learning_rate": 0.001997057703498722, "loss": 0.2628, "step": 26720 }, { "epoch": 0.18972483510867655, "grad_norm": 0.0927734375, "learning_rate": 0.0019970554163582007, "loss": 0.2648, "step": 26730 }, { "epoch": 0.18979581334852266, "grad_norm": 0.09716796875, "learning_rate": 0.001997053128330548, "loss": 0.2431, "step": 26740 }, { "epoch": 0.1898667915883688, "grad_norm": 0.0849609375, "learning_rate": 0.0019970508394157654, "loss": 0.2616, "step": 26750 }, { "epoch": 0.18993776982821492, "grad_norm": 0.087890625, "learning_rate": 0.0019970485496138566, "loss": 0.2561, "step": 26760 }, { "epoch": 0.19000874806806103, "grad_norm": 0.099609375, "learning_rate": 0.0019970462589248223, "loss": 0.2507, "step": 26770 }, { "epoch": 0.19007972630790715, "grad_norm": 0.1298828125, "learning_rate": 0.0019970439673486656, "loss": 0.2489, "step": 26780 }, { "epoch": 0.19015070454775326, "grad_norm": 0.0859375, "learning_rate": 0.001997041674885389, "loss": 0.2608, "step": 26790 }, { "epoch": 0.1902216827875994, "grad_norm": 0.0947265625, "learning_rate": 0.0019970393815349945, "loss": 0.2338, "step": 26800 }, { "epoch": 0.19029266102744552, "grad_norm": 0.125, "learning_rate": 0.0019970370872974836, "loss": 0.2428, "step": 26810 }, { "epoch": 0.19036363926729163, "grad_norm": 0.154296875, "learning_rate": 0.0019970347921728594, "loss": 0.2344, "step": 26820 }, { "epoch": 0.19043461750713775, "grad_norm": 0.109375, "learning_rate": 0.0019970324961611245, "loss": 0.2554, "step": 26830 }, { "epoch": 0.19050559574698386, "grad_norm": 0.091796875, "learning_rate": 0.00199703019926228, "loss": 0.2552, "step": 26840 }, { "epoch": 0.19057657398682998, "grad_norm": 0.16015625, "learning_rate": 0.0019970279014763295, "loss": 0.2529, "step": 26850 }, { "epoch": 0.19064755222667612, "grad_norm": 0.1435546875, "learning_rate": 0.0019970256028032742, "loss": 0.2623, "step": 26860 }, { "epoch": 0.19071853046652223, "grad_norm": 0.05517578125, "learning_rate": 0.001997023303243117, "loss": 0.2482, "step": 26870 }, { "epoch": 0.19078950870636835, "grad_norm": 0.251953125, "learning_rate": 0.0019970210027958598, "loss": 0.2566, "step": 26880 }, { "epoch": 0.19086048694621446, "grad_norm": 0.10302734375, "learning_rate": 0.0019970187014615053, "loss": 0.2429, "step": 26890 }, { "epoch": 0.19093146518606058, "grad_norm": 0.12158203125, "learning_rate": 0.0019970163992400554, "loss": 0.2391, "step": 26900 }, { "epoch": 0.1910024434259067, "grad_norm": 0.1005859375, "learning_rate": 0.0019970140961315126, "loss": 0.2518, "step": 26910 }, { "epoch": 0.19107342166575284, "grad_norm": 0.134765625, "learning_rate": 0.001997011792135879, "loss": 0.2548, "step": 26920 }, { "epoch": 0.19114439990559895, "grad_norm": 0.1787109375, "learning_rate": 0.001997009487253157, "loss": 0.2628, "step": 26930 }, { "epoch": 0.19121537814544506, "grad_norm": 0.125, "learning_rate": 0.0019970071814833486, "loss": 0.2675, "step": 26940 }, { "epoch": 0.19128635638529118, "grad_norm": 0.11279296875, "learning_rate": 0.0019970048748264566, "loss": 0.2763, "step": 26950 }, { "epoch": 0.1913573346251373, "grad_norm": 0.16796875, "learning_rate": 0.001997002567282483, "loss": 0.267, "step": 26960 }, { "epoch": 0.1914283128649834, "grad_norm": 0.09814453125, "learning_rate": 0.00199700025885143, "loss": 0.243, "step": 26970 }, { "epoch": 0.19149929110482955, "grad_norm": 0.11865234375, "learning_rate": 0.0019969979495332996, "loss": 0.2451, "step": 26980 }, { "epoch": 0.19157026934467566, "grad_norm": 0.130859375, "learning_rate": 0.001996995639328095, "loss": 0.2508, "step": 26990 }, { "epoch": 0.19164124758452178, "grad_norm": 0.173828125, "learning_rate": 0.0019969933282358177, "loss": 0.2489, "step": 27000 }, { "epoch": 0.1917122258243679, "grad_norm": 0.197265625, "learning_rate": 0.00199699101625647, "loss": 0.2617, "step": 27010 }, { "epoch": 0.191783204064214, "grad_norm": 0.1513671875, "learning_rate": 0.001996988703390055, "loss": 0.2706, "step": 27020 }, { "epoch": 0.19185418230406012, "grad_norm": 0.1689453125, "learning_rate": 0.001996986389636574, "loss": 0.2595, "step": 27030 }, { "epoch": 0.19192516054390626, "grad_norm": 0.1982421875, "learning_rate": 0.0019969840749960296, "loss": 0.232, "step": 27040 }, { "epoch": 0.19199613878375238, "grad_norm": 0.078125, "learning_rate": 0.001996981759468424, "loss": 0.2642, "step": 27050 }, { "epoch": 0.1920671170235985, "grad_norm": 0.1259765625, "learning_rate": 0.00199697944305376, "loss": 0.2474, "step": 27060 }, { "epoch": 0.1921380952634446, "grad_norm": 0.1123046875, "learning_rate": 0.0019969771257520395, "loss": 0.2437, "step": 27070 }, { "epoch": 0.19220907350329072, "grad_norm": 0.1279296875, "learning_rate": 0.001996974807563265, "loss": 0.2368, "step": 27080 }, { "epoch": 0.19228005174313684, "grad_norm": 0.1142578125, "learning_rate": 0.0019969724884874385, "loss": 0.256, "step": 27090 }, { "epoch": 0.19235102998298298, "grad_norm": 0.1025390625, "learning_rate": 0.001996970168524562, "loss": 0.2472, "step": 27100 }, { "epoch": 0.1924220082228291, "grad_norm": 0.0986328125, "learning_rate": 0.001996967847674639, "loss": 0.2611, "step": 27110 }, { "epoch": 0.1924929864626752, "grad_norm": 0.16796875, "learning_rate": 0.0019969655259376704, "loss": 0.2412, "step": 27120 }, { "epoch": 0.19256396470252132, "grad_norm": 0.1015625, "learning_rate": 0.0019969632033136593, "loss": 0.237, "step": 27130 }, { "epoch": 0.19263494294236744, "grad_norm": 0.11328125, "learning_rate": 0.001996960879802608, "loss": 0.2564, "step": 27140 }, { "epoch": 0.19270592118221355, "grad_norm": 0.1220703125, "learning_rate": 0.001996958555404518, "loss": 0.2412, "step": 27150 }, { "epoch": 0.1927768994220597, "grad_norm": 0.16015625, "learning_rate": 0.001996956230119393, "loss": 0.2337, "step": 27160 }, { "epoch": 0.1928478776619058, "grad_norm": 0.12109375, "learning_rate": 0.001996953903947234, "loss": 0.2456, "step": 27170 }, { "epoch": 0.19291885590175192, "grad_norm": 0.0869140625, "learning_rate": 0.001996951576888044, "loss": 0.2497, "step": 27180 }, { "epoch": 0.19298983414159804, "grad_norm": 0.08935546875, "learning_rate": 0.001996949248941825, "loss": 0.2602, "step": 27190 }, { "epoch": 0.19306081238144415, "grad_norm": 0.1826171875, "learning_rate": 0.0019969469201085795, "loss": 0.2532, "step": 27200 }, { "epoch": 0.19313179062129027, "grad_norm": 0.109375, "learning_rate": 0.00199694459038831, "loss": 0.2475, "step": 27210 }, { "epoch": 0.1932027688611364, "grad_norm": 0.13671875, "learning_rate": 0.0019969422597810177, "loss": 0.2546, "step": 27220 }, { "epoch": 0.19327374710098252, "grad_norm": 0.12158203125, "learning_rate": 0.0019969399282867063, "loss": 0.261, "step": 27230 }, { "epoch": 0.19334472534082864, "grad_norm": 0.099609375, "learning_rate": 0.0019969375959053774, "loss": 0.2376, "step": 27240 }, { "epoch": 0.19341570358067475, "grad_norm": 0.0966796875, "learning_rate": 0.0019969352626370337, "loss": 0.2502, "step": 27250 }, { "epoch": 0.19348668182052087, "grad_norm": 0.1015625, "learning_rate": 0.0019969329284816773, "loss": 0.2481, "step": 27260 }, { "epoch": 0.19355766006036698, "grad_norm": 0.11328125, "learning_rate": 0.00199693059343931, "loss": 0.2468, "step": 27270 }, { "epoch": 0.19362863830021312, "grad_norm": 0.0791015625, "learning_rate": 0.0019969282575099346, "loss": 0.2473, "step": 27280 }, { "epoch": 0.19369961654005924, "grad_norm": 0.09228515625, "learning_rate": 0.001996925920693553, "loss": 0.2435, "step": 27290 }, { "epoch": 0.19377059477990535, "grad_norm": 0.126953125, "learning_rate": 0.001996923582990169, "loss": 0.2579, "step": 27300 }, { "epoch": 0.19384157301975147, "grad_norm": 0.09423828125, "learning_rate": 0.001996921244399783, "loss": 0.2359, "step": 27310 }, { "epoch": 0.19391255125959758, "grad_norm": 0.07373046875, "learning_rate": 0.001996918904922398, "loss": 0.2244, "step": 27320 }, { "epoch": 0.1939835294994437, "grad_norm": 0.0810546875, "learning_rate": 0.001996916564558017, "loss": 0.2385, "step": 27330 }, { "epoch": 0.19405450773928984, "grad_norm": 0.08154296875, "learning_rate": 0.001996914223306641, "loss": 0.2554, "step": 27340 }, { "epoch": 0.19412548597913595, "grad_norm": 0.0859375, "learning_rate": 0.001996911881168274, "loss": 0.2513, "step": 27350 }, { "epoch": 0.19419646421898207, "grad_norm": 0.09033203125, "learning_rate": 0.0019969095381429164, "loss": 0.2627, "step": 27360 }, { "epoch": 0.19426744245882818, "grad_norm": 0.1123046875, "learning_rate": 0.0019969071942305718, "loss": 0.2495, "step": 27370 }, { "epoch": 0.1943384206986743, "grad_norm": 0.134765625, "learning_rate": 0.0019969048494312427, "loss": 0.2438, "step": 27380 }, { "epoch": 0.1944093989385204, "grad_norm": 0.09765625, "learning_rate": 0.0019969025037449304, "loss": 0.2343, "step": 27390 }, { "epoch": 0.19448037717836655, "grad_norm": 0.11328125, "learning_rate": 0.001996900157171638, "loss": 0.2372, "step": 27400 }, { "epoch": 0.19455135541821267, "grad_norm": 0.10595703125, "learning_rate": 0.001996897809711367, "loss": 0.2301, "step": 27410 }, { "epoch": 0.19462233365805878, "grad_norm": 0.11572265625, "learning_rate": 0.001996895461364121, "loss": 0.2378, "step": 27420 }, { "epoch": 0.1946933118979049, "grad_norm": 0.0849609375, "learning_rate": 0.001996893112129901, "loss": 0.25, "step": 27430 }, { "epoch": 0.194764290137751, "grad_norm": 0.11669921875, "learning_rate": 0.0019968907620087103, "loss": 0.2453, "step": 27440 }, { "epoch": 0.19483526837759713, "grad_norm": 0.0791015625, "learning_rate": 0.001996888411000551, "loss": 0.2632, "step": 27450 }, { "epoch": 0.19490624661744327, "grad_norm": 0.1357421875, "learning_rate": 0.0019968860591054243, "loss": 0.2423, "step": 27460 }, { "epoch": 0.19497722485728938, "grad_norm": 0.123046875, "learning_rate": 0.0019968837063233345, "loss": 0.2502, "step": 27470 }, { "epoch": 0.1950482030971355, "grad_norm": 0.06689453125, "learning_rate": 0.0019968813526542828, "loss": 0.2473, "step": 27480 }, { "epoch": 0.1951191813369816, "grad_norm": 0.1259765625, "learning_rate": 0.0019968789980982717, "loss": 0.2428, "step": 27490 }, { "epoch": 0.19519015957682773, "grad_norm": 0.1640625, "learning_rate": 0.001996876642655303, "loss": 0.2641, "step": 27500 }, { "epoch": 0.19526113781667384, "grad_norm": 0.1455078125, "learning_rate": 0.0019968742863253797, "loss": 0.2294, "step": 27510 }, { "epoch": 0.19533211605651998, "grad_norm": 0.09326171875, "learning_rate": 0.0019968719291085044, "loss": 0.2594, "step": 27520 }, { "epoch": 0.1954030942963661, "grad_norm": 0.1357421875, "learning_rate": 0.0019968695710046785, "loss": 0.2487, "step": 27530 }, { "epoch": 0.1954740725362122, "grad_norm": 0.099609375, "learning_rate": 0.0019968672120139045, "loss": 0.2455, "step": 27540 }, { "epoch": 0.19554505077605833, "grad_norm": 0.09814453125, "learning_rate": 0.001996864852136186, "loss": 0.2434, "step": 27550 }, { "epoch": 0.19561602901590444, "grad_norm": 0.09130859375, "learning_rate": 0.0019968624913715236, "loss": 0.2464, "step": 27560 }, { "epoch": 0.19568700725575056, "grad_norm": 0.169921875, "learning_rate": 0.0019968601297199206, "loss": 0.2534, "step": 27570 }, { "epoch": 0.1957579854955967, "grad_norm": 0.09912109375, "learning_rate": 0.001996857767181379, "loss": 0.233, "step": 27580 }, { "epoch": 0.1958289637354428, "grad_norm": 0.12353515625, "learning_rate": 0.0019968554037559016, "loss": 0.2546, "step": 27590 }, { "epoch": 0.19589994197528893, "grad_norm": 0.08984375, "learning_rate": 0.0019968530394434904, "loss": 0.2444, "step": 27600 }, { "epoch": 0.19597092021513504, "grad_norm": 0.14453125, "learning_rate": 0.0019968506742441476, "loss": 0.252, "step": 27610 }, { "epoch": 0.19604189845498116, "grad_norm": 0.2021484375, "learning_rate": 0.001996848308157876, "loss": 0.2497, "step": 27620 }, { "epoch": 0.19611287669482727, "grad_norm": 0.1455078125, "learning_rate": 0.001996845941184677, "loss": 0.2654, "step": 27630 }, { "epoch": 0.1961838549346734, "grad_norm": 0.0654296875, "learning_rate": 0.001996843573324554, "loss": 0.2305, "step": 27640 }, { "epoch": 0.19625483317451953, "grad_norm": 0.10546875, "learning_rate": 0.0019968412045775086, "loss": 0.2552, "step": 27650 }, { "epoch": 0.19632581141436564, "grad_norm": 0.1123046875, "learning_rate": 0.001996838834943544, "loss": 0.2576, "step": 27660 }, { "epoch": 0.19639678965421176, "grad_norm": 0.1396484375, "learning_rate": 0.0019968364644226614, "loss": 0.2592, "step": 27670 }, { "epoch": 0.19646776789405787, "grad_norm": 0.1298828125, "learning_rate": 0.0019968340930148643, "loss": 0.2793, "step": 27680 }, { "epoch": 0.19653874613390399, "grad_norm": 0.1025390625, "learning_rate": 0.0019968317207201543, "loss": 0.2541, "step": 27690 }, { "epoch": 0.19660972437375013, "grad_norm": 0.1201171875, "learning_rate": 0.0019968293475385334, "loss": 0.2363, "step": 27700 }, { "epoch": 0.19668070261359624, "grad_norm": 0.140625, "learning_rate": 0.0019968269734700053, "loss": 0.2531, "step": 27710 }, { "epoch": 0.19675168085344236, "grad_norm": 0.08447265625, "learning_rate": 0.001996824598514571, "loss": 0.2501, "step": 27720 }, { "epoch": 0.19682265909328847, "grad_norm": 0.1455078125, "learning_rate": 0.0019968222226722336, "loss": 0.2545, "step": 27730 }, { "epoch": 0.19689363733313459, "grad_norm": 0.1728515625, "learning_rate": 0.0019968198459429953, "loss": 0.2638, "step": 27740 }, { "epoch": 0.1969646155729807, "grad_norm": 0.1748046875, "learning_rate": 0.0019968174683268583, "loss": 0.2501, "step": 27750 }, { "epoch": 0.19703559381282684, "grad_norm": 0.10498046875, "learning_rate": 0.001996815089823825, "loss": 0.2713, "step": 27760 }, { "epoch": 0.19710657205267296, "grad_norm": 0.14453125, "learning_rate": 0.0019968127104338976, "loss": 0.2474, "step": 27770 }, { "epoch": 0.19717755029251907, "grad_norm": 0.28125, "learning_rate": 0.0019968103301570787, "loss": 0.2404, "step": 27780 }, { "epoch": 0.19724852853236519, "grad_norm": 0.10107421875, "learning_rate": 0.001996807948993371, "loss": 0.2452, "step": 27790 }, { "epoch": 0.1973195067722113, "grad_norm": 0.1435546875, "learning_rate": 0.001996805566942776, "loss": 0.2388, "step": 27800 }, { "epoch": 0.19739048501205742, "grad_norm": 0.0888671875, "learning_rate": 0.001996803184005297, "loss": 0.248, "step": 27810 }, { "epoch": 0.19746146325190356, "grad_norm": 0.09814453125, "learning_rate": 0.0019968008001809354, "loss": 0.2539, "step": 27820 }, { "epoch": 0.19753244149174967, "grad_norm": 0.125, "learning_rate": 0.001996798415469694, "loss": 0.2465, "step": 27830 }, { "epoch": 0.1976034197315958, "grad_norm": 0.0849609375, "learning_rate": 0.0019967960298715756, "loss": 0.2377, "step": 27840 }, { "epoch": 0.1976743979714419, "grad_norm": 0.12255859375, "learning_rate": 0.0019967936433865816, "loss": 0.2501, "step": 27850 }, { "epoch": 0.19774537621128802, "grad_norm": 0.080078125, "learning_rate": 0.001996791256014715, "loss": 0.2357, "step": 27860 }, { "epoch": 0.19781635445113413, "grad_norm": 0.1171875, "learning_rate": 0.0019967888677559784, "loss": 0.2538, "step": 27870 }, { "epoch": 0.19788733269098027, "grad_norm": 0.08349609375, "learning_rate": 0.0019967864786103735, "loss": 0.2591, "step": 27880 }, { "epoch": 0.1979583109308264, "grad_norm": 0.1201171875, "learning_rate": 0.001996784088577903, "loss": 0.2361, "step": 27890 }, { "epoch": 0.1980292891706725, "grad_norm": 0.1875, "learning_rate": 0.0019967816976585694, "loss": 0.2754, "step": 27900 }, { "epoch": 0.19810026741051862, "grad_norm": 0.1689453125, "learning_rate": 0.001996779305852375, "loss": 0.2367, "step": 27910 }, { "epoch": 0.19817124565036473, "grad_norm": 0.103515625, "learning_rate": 0.0019967769131593216, "loss": 0.2552, "step": 27920 }, { "epoch": 0.19824222389021087, "grad_norm": 0.1591796875, "learning_rate": 0.0019967745195794127, "loss": 0.2673, "step": 27930 }, { "epoch": 0.198313202130057, "grad_norm": 0.0615234375, "learning_rate": 0.0019967721251126494, "loss": 0.2461, "step": 27940 }, { "epoch": 0.1983841803699031, "grad_norm": 0.1435546875, "learning_rate": 0.0019967697297590347, "loss": 0.2432, "step": 27950 }, { "epoch": 0.19845515860974922, "grad_norm": 0.0732421875, "learning_rate": 0.0019967673335185713, "loss": 0.249, "step": 27960 }, { "epoch": 0.19852613684959533, "grad_norm": 0.12890625, "learning_rate": 0.001996764936391261, "loss": 0.2379, "step": 27970 }, { "epoch": 0.19859711508944144, "grad_norm": 0.126953125, "learning_rate": 0.0019967625383771064, "loss": 0.2485, "step": 27980 }, { "epoch": 0.1986680933292876, "grad_norm": 0.16015625, "learning_rate": 0.00199676013947611, "loss": 0.2485, "step": 27990 }, { "epoch": 0.1987390715691337, "grad_norm": 0.1162109375, "learning_rate": 0.001996757739688274, "loss": 0.2701, "step": 28000 }, { "epoch": 0.1987390715691337, "eval_covost2-zh-en_loss": 4.027144432067871, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 21.327, "eval_covost2-zh-en_samples_per_second": 3.001, "eval_covost2-zh-en_steps_per_second": 0.188, "step": 28000 }, { "epoch": 0.1987390715691337, "eval_covost2-en-zh_loss": 3.112156629562378, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 22.4275, "eval_covost2-en-zh_samples_per_second": 2.854, "eval_covost2-en-zh_steps_per_second": 0.178, "step": 28000 }, { "epoch": 0.19881004980897982, "grad_norm": 0.1904296875, "learning_rate": 0.001996755339013601, "loss": 0.2389, "step": 28010 }, { "epoch": 0.19888102804882593, "grad_norm": 0.08349609375, "learning_rate": 0.0019967529374520926, "loss": 0.2507, "step": 28020 }, { "epoch": 0.19895200628867205, "grad_norm": 0.11572265625, "learning_rate": 0.001996750535003752, "loss": 0.2511, "step": 28030 }, { "epoch": 0.19902298452851816, "grad_norm": 0.125, "learning_rate": 0.0019967481316685814, "loss": 0.2485, "step": 28040 }, { "epoch": 0.1990939627683643, "grad_norm": 0.10888671875, "learning_rate": 0.001996745727446583, "loss": 0.2224, "step": 28050 }, { "epoch": 0.19916494100821042, "grad_norm": 0.1025390625, "learning_rate": 0.0019967433223377594, "loss": 0.2416, "step": 28060 }, { "epoch": 0.19923591924805653, "grad_norm": 0.10302734375, "learning_rate": 0.0019967409163421128, "loss": 0.2424, "step": 28070 }, { "epoch": 0.19930689748790265, "grad_norm": 0.08056640625, "learning_rate": 0.0019967385094596455, "loss": 0.2441, "step": 28080 }, { "epoch": 0.19937787572774876, "grad_norm": 0.1435546875, "learning_rate": 0.0019967361016903604, "loss": 0.2474, "step": 28090 }, { "epoch": 0.19944885396759487, "grad_norm": 0.1611328125, "learning_rate": 0.001996733693034259, "loss": 0.2613, "step": 28100 }, { "epoch": 0.19951983220744102, "grad_norm": 0.1015625, "learning_rate": 0.001996731283491345, "loss": 0.2384, "step": 28110 }, { "epoch": 0.19959081044728713, "grad_norm": 0.1396484375, "learning_rate": 0.001996728873061619, "loss": 0.2801, "step": 28120 }, { "epoch": 0.19966178868713325, "grad_norm": 0.099609375, "learning_rate": 0.0019967264617450848, "loss": 0.2588, "step": 28130 }, { "epoch": 0.19973276692697936, "grad_norm": 0.1240234375, "learning_rate": 0.001996724049541744, "loss": 0.2488, "step": 28140 }, { "epoch": 0.19980374516682547, "grad_norm": 0.10791015625, "learning_rate": 0.0019967216364515997, "loss": 0.2458, "step": 28150 }, { "epoch": 0.1998747234066716, "grad_norm": 0.1044921875, "learning_rate": 0.001996719222474654, "loss": 0.2566, "step": 28160 }, { "epoch": 0.19994570164651773, "grad_norm": 0.10595703125, "learning_rate": 0.0019967168076109086, "loss": 0.2482, "step": 28170 }, { "epoch": 0.20001667988636385, "grad_norm": 0.119140625, "learning_rate": 0.0019967143918603668, "loss": 0.2467, "step": 28180 }, { "epoch": 0.20008765812620996, "grad_norm": 0.0859375, "learning_rate": 0.0019967119752230308, "loss": 0.25, "step": 28190 }, { "epoch": 0.20015863636605608, "grad_norm": 0.0810546875, "learning_rate": 0.0019967095576989033, "loss": 0.2663, "step": 28200 }, { "epoch": 0.2002296146059022, "grad_norm": 0.1015625, "learning_rate": 0.0019967071392879856, "loss": 0.2534, "step": 28210 }, { "epoch": 0.2003005928457483, "grad_norm": 0.09033203125, "learning_rate": 0.0019967047199902807, "loss": 0.2353, "step": 28220 }, { "epoch": 0.20037157108559445, "grad_norm": 0.078125, "learning_rate": 0.0019967022998057912, "loss": 0.2595, "step": 28230 }, { "epoch": 0.20044254932544056, "grad_norm": 0.1279296875, "learning_rate": 0.0019966998787345194, "loss": 0.2432, "step": 28240 }, { "epoch": 0.20051352756528668, "grad_norm": 0.09521484375, "learning_rate": 0.0019966974567764673, "loss": 0.2592, "step": 28250 }, { "epoch": 0.2005845058051328, "grad_norm": 0.2060546875, "learning_rate": 0.001996695033931638, "loss": 0.2445, "step": 28260 }, { "epoch": 0.2006554840449789, "grad_norm": 0.07177734375, "learning_rate": 0.0019966926102000333, "loss": 0.2435, "step": 28270 }, { "epoch": 0.20072646228482502, "grad_norm": 0.099609375, "learning_rate": 0.001996690185581656, "loss": 0.2432, "step": 28280 }, { "epoch": 0.20079744052467116, "grad_norm": 0.0859375, "learning_rate": 0.0019966877600765083, "loss": 0.2556, "step": 28290 }, { "epoch": 0.20086841876451728, "grad_norm": 0.23046875, "learning_rate": 0.0019966853336845923, "loss": 0.2363, "step": 28300 }, { "epoch": 0.2009393970043634, "grad_norm": 0.12890625, "learning_rate": 0.001996682906405911, "loss": 0.2638, "step": 28310 }, { "epoch": 0.2010103752442095, "grad_norm": 0.1220703125, "learning_rate": 0.0019966804782404666, "loss": 0.2368, "step": 28320 }, { "epoch": 0.20108135348405562, "grad_norm": 0.18359375, "learning_rate": 0.001996678049188261, "loss": 0.2707, "step": 28330 }, { "epoch": 0.20115233172390173, "grad_norm": 0.11328125, "learning_rate": 0.001996675619249297, "loss": 0.2551, "step": 28340 }, { "epoch": 0.20122330996374788, "grad_norm": 0.1728515625, "learning_rate": 0.0019966731884235775, "loss": 0.2351, "step": 28350 }, { "epoch": 0.201294288203594, "grad_norm": 0.09912109375, "learning_rate": 0.001996670756711104, "loss": 0.2519, "step": 28360 }, { "epoch": 0.2013652664434401, "grad_norm": 0.12890625, "learning_rate": 0.0019966683241118793, "loss": 0.2401, "step": 28370 }, { "epoch": 0.20143624468328622, "grad_norm": 0.058837890625, "learning_rate": 0.0019966658906259063, "loss": 0.2467, "step": 28380 }, { "epoch": 0.20150722292313233, "grad_norm": 0.0517578125, "learning_rate": 0.001996663456253187, "loss": 0.2288, "step": 28390 }, { "epoch": 0.20157820116297845, "grad_norm": 0.1826171875, "learning_rate": 0.0019966610209937227, "loss": 0.2518, "step": 28400 }, { "epoch": 0.2016491794028246, "grad_norm": 0.2119140625, "learning_rate": 0.0019966585848475178, "loss": 0.2488, "step": 28410 }, { "epoch": 0.2017201576426707, "grad_norm": 0.0849609375, "learning_rate": 0.0019966561478145734, "loss": 0.2489, "step": 28420 }, { "epoch": 0.20179113588251682, "grad_norm": 0.212890625, "learning_rate": 0.001996653709894892, "loss": 0.2398, "step": 28430 }, { "epoch": 0.20186211412236293, "grad_norm": 0.1025390625, "learning_rate": 0.001996651271088477, "loss": 0.2491, "step": 28440 }, { "epoch": 0.20193309236220905, "grad_norm": 0.1279296875, "learning_rate": 0.00199664883139533, "loss": 0.2379, "step": 28450 }, { "epoch": 0.20200407060205516, "grad_norm": 0.109375, "learning_rate": 0.001996646390815453, "loss": 0.2453, "step": 28460 }, { "epoch": 0.2020750488419013, "grad_norm": 0.109375, "learning_rate": 0.001996643949348849, "loss": 0.2425, "step": 28470 }, { "epoch": 0.20214602708174742, "grad_norm": 0.1279296875, "learning_rate": 0.00199664150699552, "loss": 0.2369, "step": 28480 }, { "epoch": 0.20221700532159353, "grad_norm": 0.09814453125, "learning_rate": 0.00199663906375547, "loss": 0.2436, "step": 28490 }, { "epoch": 0.20228798356143965, "grad_norm": 0.08154296875, "learning_rate": 0.001996636619628699, "loss": 0.2614, "step": 28500 }, { "epoch": 0.20235896180128576, "grad_norm": 0.0810546875, "learning_rate": 0.001996634174615211, "loss": 0.2492, "step": 28510 }, { "epoch": 0.20242994004113188, "grad_norm": 0.158203125, "learning_rate": 0.001996631728715008, "loss": 0.2531, "step": 28520 }, { "epoch": 0.20250091828097802, "grad_norm": 0.205078125, "learning_rate": 0.001996629281928092, "loss": 0.2707, "step": 28530 }, { "epoch": 0.20257189652082414, "grad_norm": 0.09619140625, "learning_rate": 0.0019966268342544663, "loss": 0.2549, "step": 28540 }, { "epoch": 0.20264287476067025, "grad_norm": 0.1416015625, "learning_rate": 0.0019966243856941327, "loss": 0.2403, "step": 28550 }, { "epoch": 0.20271385300051636, "grad_norm": 0.0830078125, "learning_rate": 0.001996621936247094, "loss": 0.2452, "step": 28560 }, { "epoch": 0.20278483124036248, "grad_norm": 0.0859375, "learning_rate": 0.0019966194859133523, "loss": 0.2465, "step": 28570 }, { "epoch": 0.2028558094802086, "grad_norm": 0.072265625, "learning_rate": 0.00199661703469291, "loss": 0.2466, "step": 28580 }, { "epoch": 0.20292678772005474, "grad_norm": 0.1357421875, "learning_rate": 0.0019966145825857694, "loss": 0.2488, "step": 28590 }, { "epoch": 0.20299776595990085, "grad_norm": 0.11328125, "learning_rate": 0.0019966121295919336, "loss": 0.2444, "step": 28600 }, { "epoch": 0.20306874419974696, "grad_norm": 0.0859375, "learning_rate": 0.0019966096757114047, "loss": 0.2422, "step": 28610 }, { "epoch": 0.20313972243959308, "grad_norm": 0.09765625, "learning_rate": 0.001996607220944185, "loss": 0.2434, "step": 28620 }, { "epoch": 0.2032107006794392, "grad_norm": 0.13671875, "learning_rate": 0.0019966047652902764, "loss": 0.2454, "step": 28630 }, { "epoch": 0.2032816789192853, "grad_norm": 0.1201171875, "learning_rate": 0.001996602308749682, "loss": 0.2435, "step": 28640 }, { "epoch": 0.20335265715913145, "grad_norm": 0.1298828125, "learning_rate": 0.0019965998513224044, "loss": 0.2539, "step": 28650 }, { "epoch": 0.20342363539897756, "grad_norm": 0.119140625, "learning_rate": 0.001996597393008446, "loss": 0.2491, "step": 28660 }, { "epoch": 0.20349461363882368, "grad_norm": 0.0849609375, "learning_rate": 0.0019965949338078084, "loss": 0.2422, "step": 28670 }, { "epoch": 0.2035655918786698, "grad_norm": 0.12255859375, "learning_rate": 0.001996592473720495, "loss": 0.2663, "step": 28680 }, { "epoch": 0.2036365701185159, "grad_norm": 0.28125, "learning_rate": 0.0019965900127465073, "loss": 0.2636, "step": 28690 }, { "epoch": 0.20370754835836202, "grad_norm": 0.0908203125, "learning_rate": 0.0019965875508858493, "loss": 0.2557, "step": 28700 }, { "epoch": 0.20377852659820817, "grad_norm": 0.07958984375, "learning_rate": 0.001996585088138521, "loss": 0.2455, "step": 28710 }, { "epoch": 0.20384950483805428, "grad_norm": 0.2578125, "learning_rate": 0.0019965826245045273, "loss": 0.2397, "step": 28720 }, { "epoch": 0.2039204830779004, "grad_norm": 0.068359375, "learning_rate": 0.0019965801599838694, "loss": 0.2596, "step": 28730 }, { "epoch": 0.2039914613177465, "grad_norm": 0.1318359375, "learning_rate": 0.0019965776945765497, "loss": 0.2569, "step": 28740 }, { "epoch": 0.20406243955759262, "grad_norm": 0.103515625, "learning_rate": 0.0019965752282825712, "loss": 0.2629, "step": 28750 }, { "epoch": 0.20413341779743874, "grad_norm": 0.09814453125, "learning_rate": 0.0019965727611019357, "loss": 0.2456, "step": 28760 }, { "epoch": 0.20420439603728488, "grad_norm": 0.10302734375, "learning_rate": 0.0019965702930346456, "loss": 0.2556, "step": 28770 }, { "epoch": 0.204275374277131, "grad_norm": 0.11279296875, "learning_rate": 0.001996567824080704, "loss": 0.2535, "step": 28780 }, { "epoch": 0.2043463525169771, "grad_norm": 0.1728515625, "learning_rate": 0.0019965653542401134, "loss": 0.2473, "step": 28790 }, { "epoch": 0.20441733075682322, "grad_norm": 0.08837890625, "learning_rate": 0.0019965628835128756, "loss": 0.2481, "step": 28800 }, { "epoch": 0.20448830899666934, "grad_norm": 0.1318359375, "learning_rate": 0.001996560411898993, "loss": 0.2534, "step": 28810 }, { "epoch": 0.20455928723651545, "grad_norm": 0.0908203125, "learning_rate": 0.0019965579393984687, "loss": 0.2484, "step": 28820 }, { "epoch": 0.2046302654763616, "grad_norm": 0.0712890625, "learning_rate": 0.0019965554660113047, "loss": 0.2403, "step": 28830 }, { "epoch": 0.2047012437162077, "grad_norm": 0.1357421875, "learning_rate": 0.0019965529917375033, "loss": 0.2504, "step": 28840 }, { "epoch": 0.20477222195605382, "grad_norm": 0.078125, "learning_rate": 0.0019965505165770673, "loss": 0.2393, "step": 28850 }, { "epoch": 0.20484320019589994, "grad_norm": 0.10107421875, "learning_rate": 0.001996548040529999, "loss": 0.2397, "step": 28860 }, { "epoch": 0.20491417843574605, "grad_norm": 0.1064453125, "learning_rate": 0.001996545563596301, "loss": 0.2531, "step": 28870 }, { "epoch": 0.20498515667559217, "grad_norm": 0.08251953125, "learning_rate": 0.0019965430857759757, "loss": 0.2553, "step": 28880 }, { "epoch": 0.2050561349154383, "grad_norm": 0.10693359375, "learning_rate": 0.0019965406070690255, "loss": 0.248, "step": 28890 }, { "epoch": 0.20512711315528442, "grad_norm": 0.10400390625, "learning_rate": 0.001996538127475453, "loss": 0.2442, "step": 28900 }, { "epoch": 0.20519809139513054, "grad_norm": 0.10595703125, "learning_rate": 0.0019965356469952597, "loss": 0.2605, "step": 28910 }, { "epoch": 0.20526906963497665, "grad_norm": 0.111328125, "learning_rate": 0.0019965331656284497, "loss": 0.2458, "step": 28920 }, { "epoch": 0.20534004787482277, "grad_norm": 0.0771484375, "learning_rate": 0.001996530683375024, "loss": 0.238, "step": 28930 }, { "epoch": 0.20541102611466888, "grad_norm": 0.1240234375, "learning_rate": 0.0019965282002349857, "loss": 0.2488, "step": 28940 }, { "epoch": 0.20548200435451502, "grad_norm": 0.126953125, "learning_rate": 0.0019965257162083373, "loss": 0.2551, "step": 28950 }, { "epoch": 0.20555298259436114, "grad_norm": 0.10009765625, "learning_rate": 0.0019965232312950812, "loss": 0.2464, "step": 28960 }, { "epoch": 0.20562396083420725, "grad_norm": 0.09716796875, "learning_rate": 0.0019965207454952197, "loss": 0.2464, "step": 28970 }, { "epoch": 0.20569493907405337, "grad_norm": 0.1240234375, "learning_rate": 0.0019965182588087558, "loss": 0.2732, "step": 28980 }, { "epoch": 0.20576591731389948, "grad_norm": 0.083984375, "learning_rate": 0.0019965157712356916, "loss": 0.2367, "step": 28990 }, { "epoch": 0.2058368955537456, "grad_norm": 0.1640625, "learning_rate": 0.001996513282776029, "loss": 0.2565, "step": 29000 }, { "epoch": 0.20590787379359174, "grad_norm": 0.10498046875, "learning_rate": 0.0019965107934297706, "loss": 0.229, "step": 29010 }, { "epoch": 0.20597885203343785, "grad_norm": 0.11474609375, "learning_rate": 0.00199650830319692, "loss": 0.2454, "step": 29020 }, { "epoch": 0.20604983027328397, "grad_norm": 0.1328125, "learning_rate": 0.0019965058120774786, "loss": 0.2366, "step": 29030 }, { "epoch": 0.20612080851313008, "grad_norm": 0.10595703125, "learning_rate": 0.0019965033200714487, "loss": 0.239, "step": 29040 }, { "epoch": 0.2061917867529762, "grad_norm": 0.1044921875, "learning_rate": 0.0019965008271788337, "loss": 0.2448, "step": 29050 }, { "epoch": 0.2062627649928223, "grad_norm": 0.11328125, "learning_rate": 0.001996498333399636, "loss": 0.2548, "step": 29060 }, { "epoch": 0.20633374323266845, "grad_norm": 0.12890625, "learning_rate": 0.0019964958387338567, "loss": 0.2481, "step": 29070 }, { "epoch": 0.20640472147251457, "grad_norm": 0.1142578125, "learning_rate": 0.0019964933431814995, "loss": 0.2357, "step": 29080 }, { "epoch": 0.20647569971236068, "grad_norm": 0.095703125, "learning_rate": 0.0019964908467425668, "loss": 0.241, "step": 29090 }, { "epoch": 0.2065466779522068, "grad_norm": 0.11865234375, "learning_rate": 0.0019964883494170606, "loss": 0.2528, "step": 29100 }, { "epoch": 0.2066176561920529, "grad_norm": 0.1103515625, "learning_rate": 0.0019964858512049838, "loss": 0.2618, "step": 29110 }, { "epoch": 0.20668863443189905, "grad_norm": 0.134765625, "learning_rate": 0.0019964833521063387, "loss": 0.2379, "step": 29120 }, { "epoch": 0.20675961267174517, "grad_norm": 0.119140625, "learning_rate": 0.0019964808521211277, "loss": 0.2394, "step": 29130 }, { "epoch": 0.20683059091159128, "grad_norm": 0.07763671875, "learning_rate": 0.0019964783512493533, "loss": 0.2403, "step": 29140 }, { "epoch": 0.2069015691514374, "grad_norm": 0.09765625, "learning_rate": 0.0019964758494910176, "loss": 0.2423, "step": 29150 }, { "epoch": 0.2069725473912835, "grad_norm": 0.1435546875, "learning_rate": 0.001996473346846124, "loss": 0.2603, "step": 29160 }, { "epoch": 0.20704352563112963, "grad_norm": 0.1416015625, "learning_rate": 0.001996470843314674, "loss": 0.2572, "step": 29170 }, { "epoch": 0.20711450387097577, "grad_norm": 0.09912109375, "learning_rate": 0.001996468338896671, "loss": 0.2447, "step": 29180 }, { "epoch": 0.20718548211082188, "grad_norm": 0.197265625, "learning_rate": 0.001996465833592117, "loss": 0.2449, "step": 29190 }, { "epoch": 0.207256460350668, "grad_norm": 0.1005859375, "learning_rate": 0.0019964633274010143, "loss": 0.2375, "step": 29200 }, { "epoch": 0.2073274385905141, "grad_norm": 0.10888671875, "learning_rate": 0.0019964608203233655, "loss": 0.2372, "step": 29210 }, { "epoch": 0.20739841683036023, "grad_norm": 0.10009765625, "learning_rate": 0.0019964583123591732, "loss": 0.2451, "step": 29220 }, { "epoch": 0.20746939507020634, "grad_norm": 0.107421875, "learning_rate": 0.0019964558035084397, "loss": 0.2549, "step": 29230 }, { "epoch": 0.20754037331005248, "grad_norm": 0.0986328125, "learning_rate": 0.0019964532937711676, "loss": 0.24, "step": 29240 }, { "epoch": 0.2076113515498986, "grad_norm": 0.10107421875, "learning_rate": 0.0019964507831473598, "loss": 0.2491, "step": 29250 }, { "epoch": 0.2076823297897447, "grad_norm": 0.09814453125, "learning_rate": 0.0019964482716370176, "loss": 0.2494, "step": 29260 }, { "epoch": 0.20775330802959083, "grad_norm": 0.1044921875, "learning_rate": 0.0019964457592401446, "loss": 0.2395, "step": 29270 }, { "epoch": 0.20782428626943694, "grad_norm": 0.09814453125, "learning_rate": 0.0019964432459567433, "loss": 0.2491, "step": 29280 }, { "epoch": 0.20789526450928306, "grad_norm": 0.126953125, "learning_rate": 0.0019964407317868155, "loss": 0.2396, "step": 29290 }, { "epoch": 0.2079662427491292, "grad_norm": 0.134765625, "learning_rate": 0.001996438216730364, "loss": 0.2435, "step": 29300 }, { "epoch": 0.2080372209889753, "grad_norm": 0.062255859375, "learning_rate": 0.001996435700787391, "loss": 0.2475, "step": 29310 }, { "epoch": 0.20810819922882143, "grad_norm": 0.185546875, "learning_rate": 0.0019964331839579, "loss": 0.2374, "step": 29320 }, { "epoch": 0.20817917746866754, "grad_norm": 0.12060546875, "learning_rate": 0.0019964306662418924, "loss": 0.2304, "step": 29330 }, { "epoch": 0.20825015570851366, "grad_norm": 0.146484375, "learning_rate": 0.001996428147639371, "loss": 0.2335, "step": 29340 }, { "epoch": 0.20832113394835977, "grad_norm": 0.09765625, "learning_rate": 0.001996425628150338, "loss": 0.2527, "step": 29350 }, { "epoch": 0.2083921121882059, "grad_norm": 0.0869140625, "learning_rate": 0.0019964231077747967, "loss": 0.262, "step": 29360 }, { "epoch": 0.20846309042805203, "grad_norm": 0.10107421875, "learning_rate": 0.001996420586512749, "loss": 0.2375, "step": 29370 }, { "epoch": 0.20853406866789814, "grad_norm": 0.1787109375, "learning_rate": 0.001996418064364198, "loss": 0.2455, "step": 29380 }, { "epoch": 0.20860504690774426, "grad_norm": 0.1494140625, "learning_rate": 0.001996415541329145, "loss": 0.2403, "step": 29390 }, { "epoch": 0.20867602514759037, "grad_norm": 0.111328125, "learning_rate": 0.001996413017407593, "loss": 0.2524, "step": 29400 }, { "epoch": 0.20874700338743649, "grad_norm": 0.1318359375, "learning_rate": 0.001996410492599545, "loss": 0.2524, "step": 29410 }, { "epoch": 0.20881798162728263, "grad_norm": 0.1396484375, "learning_rate": 0.0019964079669050035, "loss": 0.2534, "step": 29420 }, { "epoch": 0.20888895986712874, "grad_norm": 0.10791015625, "learning_rate": 0.001996405440323971, "loss": 0.2461, "step": 29430 }, { "epoch": 0.20895993810697486, "grad_norm": 0.126953125, "learning_rate": 0.0019964029128564493, "loss": 0.2565, "step": 29440 }, { "epoch": 0.20903091634682097, "grad_norm": 0.11328125, "learning_rate": 0.0019964003845024413, "loss": 0.2583, "step": 29450 }, { "epoch": 0.2091018945866671, "grad_norm": 0.1171875, "learning_rate": 0.0019963978552619494, "loss": 0.2366, "step": 29460 }, { "epoch": 0.2091728728265132, "grad_norm": 0.091796875, "learning_rate": 0.0019963953251349764, "loss": 0.2349, "step": 29470 }, { "epoch": 0.20924385106635934, "grad_norm": 0.07568359375, "learning_rate": 0.0019963927941215242, "loss": 0.2376, "step": 29480 }, { "epoch": 0.20931482930620546, "grad_norm": 0.09521484375, "learning_rate": 0.001996390262221596, "loss": 0.2521, "step": 29490 }, { "epoch": 0.20938580754605157, "grad_norm": 0.10009765625, "learning_rate": 0.001996387729435194, "loss": 0.2447, "step": 29500 }, { "epoch": 0.2094567857858977, "grad_norm": 0.1748046875, "learning_rate": 0.0019963851957623207, "loss": 0.2384, "step": 29510 }, { "epoch": 0.2095277640257438, "grad_norm": 0.11181640625, "learning_rate": 0.0019963826612029788, "loss": 0.2884, "step": 29520 }, { "epoch": 0.20959874226558992, "grad_norm": 0.09423828125, "learning_rate": 0.00199638012575717, "loss": 0.2494, "step": 29530 }, { "epoch": 0.20966972050543606, "grad_norm": 0.1669921875, "learning_rate": 0.001996377589424898, "loss": 0.2308, "step": 29540 }, { "epoch": 0.20974069874528217, "grad_norm": 0.08447265625, "learning_rate": 0.0019963750522061646, "loss": 0.2474, "step": 29550 }, { "epoch": 0.2098116769851283, "grad_norm": 0.1318359375, "learning_rate": 0.0019963725141009725, "loss": 0.2632, "step": 29560 }, { "epoch": 0.2098826552249744, "grad_norm": 0.076171875, "learning_rate": 0.0019963699751093245, "loss": 0.2588, "step": 29570 }, { "epoch": 0.20995363346482052, "grad_norm": 0.09716796875, "learning_rate": 0.0019963674352312223, "loss": 0.2457, "step": 29580 }, { "epoch": 0.21002461170466663, "grad_norm": 0.0908203125, "learning_rate": 0.001996364894466669, "loss": 0.2461, "step": 29590 }, { "epoch": 0.21009558994451277, "grad_norm": 0.15234375, "learning_rate": 0.001996362352815667, "loss": 0.2351, "step": 29600 }, { "epoch": 0.2101665681843589, "grad_norm": 0.134765625, "learning_rate": 0.0019963598102782187, "loss": 0.2324, "step": 29610 }, { "epoch": 0.210237546424205, "grad_norm": 0.12060546875, "learning_rate": 0.001996357266854327, "loss": 0.248, "step": 29620 }, { "epoch": 0.21030852466405112, "grad_norm": 0.11962890625, "learning_rate": 0.001996354722543994, "loss": 0.2433, "step": 29630 }, { "epoch": 0.21037950290389723, "grad_norm": 0.1416015625, "learning_rate": 0.0019963521773472223, "loss": 0.2524, "step": 29640 }, { "epoch": 0.21045048114374335, "grad_norm": 0.11181640625, "learning_rate": 0.0019963496312640147, "loss": 0.2448, "step": 29650 }, { "epoch": 0.2105214593835895, "grad_norm": 0.08154296875, "learning_rate": 0.001996347084294373, "loss": 0.2464, "step": 29660 }, { "epoch": 0.2105924376234356, "grad_norm": 0.1123046875, "learning_rate": 0.0019963445364383006, "loss": 0.2388, "step": 29670 }, { "epoch": 0.21066341586328172, "grad_norm": 0.1396484375, "learning_rate": 0.0019963419876957997, "loss": 0.2506, "step": 29680 }, { "epoch": 0.21073439410312783, "grad_norm": 0.10009765625, "learning_rate": 0.001996339438066873, "loss": 0.2612, "step": 29690 }, { "epoch": 0.21080537234297395, "grad_norm": 0.10009765625, "learning_rate": 0.001996336887551522, "loss": 0.2553, "step": 29700 }, { "epoch": 0.21087635058282006, "grad_norm": 0.13671875, "learning_rate": 0.00199633433614975, "loss": 0.2415, "step": 29710 }, { "epoch": 0.2109473288226662, "grad_norm": 0.119140625, "learning_rate": 0.0019963317838615604, "loss": 0.2423, "step": 29720 }, { "epoch": 0.21101830706251232, "grad_norm": 0.0830078125, "learning_rate": 0.001996329230686954, "loss": 0.244, "step": 29730 }, { "epoch": 0.21108928530235843, "grad_norm": 0.107421875, "learning_rate": 0.001996326676625935, "loss": 0.2553, "step": 29740 }, { "epoch": 0.21116026354220455, "grad_norm": 0.103515625, "learning_rate": 0.0019963241216785044, "loss": 0.2708, "step": 29750 }, { "epoch": 0.21123124178205066, "grad_norm": 0.11572265625, "learning_rate": 0.0019963215658446655, "loss": 0.2553, "step": 29760 }, { "epoch": 0.21130222002189677, "grad_norm": 0.099609375, "learning_rate": 0.001996319009124421, "loss": 0.2284, "step": 29770 }, { "epoch": 0.21137319826174292, "grad_norm": 0.0751953125, "learning_rate": 0.001996316451517773, "loss": 0.2474, "step": 29780 }, { "epoch": 0.21144417650158903, "grad_norm": 0.11083984375, "learning_rate": 0.001996313893024724, "loss": 0.2523, "step": 29790 }, { "epoch": 0.21151515474143515, "grad_norm": 0.138671875, "learning_rate": 0.0019963113336452774, "loss": 0.2301, "step": 29800 }, { "epoch": 0.21158613298128126, "grad_norm": 0.0849609375, "learning_rate": 0.0019963087733794343, "loss": 0.2521, "step": 29810 }, { "epoch": 0.21165711122112738, "grad_norm": 0.11474609375, "learning_rate": 0.0019963062122271983, "loss": 0.2667, "step": 29820 }, { "epoch": 0.2117280894609735, "grad_norm": 0.10107421875, "learning_rate": 0.001996303650188572, "loss": 0.2701, "step": 29830 }, { "epoch": 0.21179906770081963, "grad_norm": 0.12890625, "learning_rate": 0.0019963010872635573, "loss": 0.2508, "step": 29840 }, { "epoch": 0.21187004594066575, "grad_norm": 0.16796875, "learning_rate": 0.0019962985234521567, "loss": 0.2483, "step": 29850 }, { "epoch": 0.21194102418051186, "grad_norm": 0.10986328125, "learning_rate": 0.001996295958754373, "loss": 0.2461, "step": 29860 }, { "epoch": 0.21201200242035798, "grad_norm": 0.09765625, "learning_rate": 0.0019962933931702096, "loss": 0.245, "step": 29870 }, { "epoch": 0.2120829806602041, "grad_norm": 0.140625, "learning_rate": 0.0019962908266996674, "loss": 0.2513, "step": 29880 }, { "epoch": 0.2121539589000502, "grad_norm": 0.11376953125, "learning_rate": 0.00199628825934275, "loss": 0.2457, "step": 29890 }, { "epoch": 0.21222493713989635, "grad_norm": 0.10302734375, "learning_rate": 0.0019962856910994598, "loss": 0.2413, "step": 29900 }, { "epoch": 0.21229591537974246, "grad_norm": 0.15625, "learning_rate": 0.001996283121969799, "loss": 0.2388, "step": 29910 }, { "epoch": 0.21236689361958858, "grad_norm": 0.0888671875, "learning_rate": 0.001996280551953771, "loss": 0.2426, "step": 29920 }, { "epoch": 0.2124378718594347, "grad_norm": 0.1572265625, "learning_rate": 0.0019962779810513767, "loss": 0.2578, "step": 29930 }, { "epoch": 0.2125088500992808, "grad_norm": 0.357421875, "learning_rate": 0.0019962754092626204, "loss": 0.2454, "step": 29940 }, { "epoch": 0.21257982833912692, "grad_norm": 0.1279296875, "learning_rate": 0.001996272836587503, "loss": 0.2564, "step": 29950 }, { "epoch": 0.21265080657897306, "grad_norm": 0.103515625, "learning_rate": 0.001996270263026029, "loss": 0.2399, "step": 29960 }, { "epoch": 0.21272178481881918, "grad_norm": 0.07763671875, "learning_rate": 0.0019962676885781996, "loss": 0.267, "step": 29970 }, { "epoch": 0.2127927630586653, "grad_norm": 0.0830078125, "learning_rate": 0.001996265113244017, "loss": 0.2392, "step": 29980 }, { "epoch": 0.2128637412985114, "grad_norm": 0.1279296875, "learning_rate": 0.001996262537023485, "loss": 0.2404, "step": 29990 }, { "epoch": 0.21293471953835752, "grad_norm": 0.1689453125, "learning_rate": 0.0019962599599166056, "loss": 0.2555, "step": 30000 }, { "epoch": 0.21293471953835752, "eval_covost2-zh-en_loss": 3.9658279418945312, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 19.4834, "eval_covost2-zh-en_samples_per_second": 3.285, "eval_covost2-zh-en_steps_per_second": 0.205, "step": 30000 }, { "epoch": 0.21293471953835752, "eval_covost2-en-zh_loss": 3.1966934204101562, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 19.8675, "eval_covost2-en-zh_samples_per_second": 3.221, "eval_covost2-en-zh_steps_per_second": 0.201, "step": 30000 }, { "epoch": 0.21300569777820363, "grad_norm": 0.08447265625, "learning_rate": 0.001996257381923381, "loss": 0.2534, "step": 30010 }, { "epoch": 0.21307667601804978, "grad_norm": 0.150390625, "learning_rate": 0.001996254803043814, "loss": 0.2502, "step": 30020 }, { "epoch": 0.2131476542578959, "grad_norm": 0.09033203125, "learning_rate": 0.001996252223277908, "loss": 0.246, "step": 30030 }, { "epoch": 0.213218632497742, "grad_norm": 0.123046875, "learning_rate": 0.0019962496426256635, "loss": 0.2598, "step": 30040 }, { "epoch": 0.21328961073758812, "grad_norm": 0.0849609375, "learning_rate": 0.001996247061087085, "loss": 0.2493, "step": 30050 }, { "epoch": 0.21336058897743423, "grad_norm": 0.0859375, "learning_rate": 0.0019962444786621736, "loss": 0.2538, "step": 30060 }, { "epoch": 0.21343156721728035, "grad_norm": 0.1689453125, "learning_rate": 0.0019962418953509333, "loss": 0.2704, "step": 30070 }, { "epoch": 0.2135025454571265, "grad_norm": 0.21484375, "learning_rate": 0.0019962393111533656, "loss": 0.2752, "step": 30080 }, { "epoch": 0.2135735236969726, "grad_norm": 0.06591796875, "learning_rate": 0.0019962367260694734, "loss": 0.2455, "step": 30090 }, { "epoch": 0.21364450193681872, "grad_norm": 0.09765625, "learning_rate": 0.00199623414009926, "loss": 0.2391, "step": 30100 }, { "epoch": 0.21371548017666483, "grad_norm": 0.09033203125, "learning_rate": 0.001996231553242726, "loss": 0.2498, "step": 30110 }, { "epoch": 0.21378645841651095, "grad_norm": 0.0908203125, "learning_rate": 0.001996228965499876, "loss": 0.2525, "step": 30120 }, { "epoch": 0.21385743665635706, "grad_norm": 0.1533203125, "learning_rate": 0.001996226376870711, "loss": 0.2668, "step": 30130 }, { "epoch": 0.2139284148962032, "grad_norm": 0.095703125, "learning_rate": 0.001996223787355235, "loss": 0.2409, "step": 30140 }, { "epoch": 0.21399939313604932, "grad_norm": 0.115234375, "learning_rate": 0.0019962211969534495, "loss": 0.2439, "step": 30150 }, { "epoch": 0.21407037137589544, "grad_norm": 0.1376953125, "learning_rate": 0.0019962186056653574, "loss": 0.2449, "step": 30160 }, { "epoch": 0.21414134961574155, "grad_norm": 0.0859375, "learning_rate": 0.0019962160134909613, "loss": 0.2392, "step": 30170 }, { "epoch": 0.21421232785558766, "grad_norm": 0.0966796875, "learning_rate": 0.0019962134204302637, "loss": 0.2479, "step": 30180 }, { "epoch": 0.21428330609543378, "grad_norm": 0.087890625, "learning_rate": 0.0019962108264832677, "loss": 0.2544, "step": 30190 }, { "epoch": 0.21435428433527992, "grad_norm": 0.08740234375, "learning_rate": 0.001996208231649975, "loss": 0.2379, "step": 30200 }, { "epoch": 0.21442526257512604, "grad_norm": 0.0791015625, "learning_rate": 0.0019962056359303882, "loss": 0.2523, "step": 30210 }, { "epoch": 0.21449624081497215, "grad_norm": 0.08935546875, "learning_rate": 0.0019962030393245104, "loss": 0.2534, "step": 30220 }, { "epoch": 0.21456721905481826, "grad_norm": 0.107421875, "learning_rate": 0.001996200441832344, "loss": 0.2327, "step": 30230 }, { "epoch": 0.21463819729466438, "grad_norm": 0.12109375, "learning_rate": 0.0019961978434538913, "loss": 0.2476, "step": 30240 }, { "epoch": 0.21470917553451052, "grad_norm": 0.1611328125, "learning_rate": 0.0019961952441891555, "loss": 0.2487, "step": 30250 }, { "epoch": 0.21478015377435664, "grad_norm": 0.1015625, "learning_rate": 0.0019961926440381387, "loss": 0.2518, "step": 30260 }, { "epoch": 0.21485113201420275, "grad_norm": 0.0986328125, "learning_rate": 0.0019961900430008435, "loss": 0.2385, "step": 30270 }, { "epoch": 0.21492211025404886, "grad_norm": 0.1240234375, "learning_rate": 0.0019961874410772723, "loss": 0.2499, "step": 30280 }, { "epoch": 0.21499308849389498, "grad_norm": 0.078125, "learning_rate": 0.0019961848382674284, "loss": 0.2346, "step": 30290 }, { "epoch": 0.2150640667337411, "grad_norm": 0.13671875, "learning_rate": 0.0019961822345713133, "loss": 0.2379, "step": 30300 }, { "epoch": 0.21513504497358724, "grad_norm": 0.08203125, "learning_rate": 0.00199617962998893, "loss": 0.2321, "step": 30310 }, { "epoch": 0.21520602321343335, "grad_norm": 0.1171875, "learning_rate": 0.001996177024520282, "loss": 0.2246, "step": 30320 }, { "epoch": 0.21527700145327947, "grad_norm": 0.1328125, "learning_rate": 0.0019961744181653707, "loss": 0.2394, "step": 30330 }, { "epoch": 0.21534797969312558, "grad_norm": 0.1484375, "learning_rate": 0.001996171810924199, "loss": 0.2651, "step": 30340 }, { "epoch": 0.2154189579329717, "grad_norm": 0.1640625, "learning_rate": 0.0019961692027967697, "loss": 0.2548, "step": 30350 }, { "epoch": 0.2154899361728178, "grad_norm": 0.09423828125, "learning_rate": 0.0019961665937830854, "loss": 0.2413, "step": 30360 }, { "epoch": 0.21556091441266395, "grad_norm": 0.1181640625, "learning_rate": 0.0019961639838831477, "loss": 0.258, "step": 30370 }, { "epoch": 0.21563189265251007, "grad_norm": 0.13671875, "learning_rate": 0.0019961613730969606, "loss": 0.247, "step": 30380 }, { "epoch": 0.21570287089235618, "grad_norm": 0.1513671875, "learning_rate": 0.001996158761424526, "loss": 0.2549, "step": 30390 }, { "epoch": 0.2157738491322023, "grad_norm": 0.12158203125, "learning_rate": 0.0019961561488658465, "loss": 0.2554, "step": 30400 }, { "epoch": 0.2158448273720484, "grad_norm": 0.091796875, "learning_rate": 0.0019961535354209247, "loss": 0.2484, "step": 30410 }, { "epoch": 0.21591580561189452, "grad_norm": 0.103515625, "learning_rate": 0.0019961509210897634, "loss": 0.2561, "step": 30420 }, { "epoch": 0.21598678385174067, "grad_norm": 0.12109375, "learning_rate": 0.001996148305872365, "loss": 0.2444, "step": 30430 }, { "epoch": 0.21605776209158678, "grad_norm": 0.109375, "learning_rate": 0.0019961456897687317, "loss": 0.2545, "step": 30440 }, { "epoch": 0.2161287403314329, "grad_norm": 0.1220703125, "learning_rate": 0.001996143072778867, "loss": 0.2294, "step": 30450 }, { "epoch": 0.216199718571279, "grad_norm": 0.10009765625, "learning_rate": 0.0019961404549027725, "loss": 0.2545, "step": 30460 }, { "epoch": 0.21627069681112512, "grad_norm": 0.1162109375, "learning_rate": 0.001996137836140452, "loss": 0.2302, "step": 30470 }, { "epoch": 0.21634167505097124, "grad_norm": 0.08544921875, "learning_rate": 0.001996135216491906, "loss": 0.2493, "step": 30480 }, { "epoch": 0.21641265329081738, "grad_norm": 0.08935546875, "learning_rate": 0.00199613259595714, "loss": 0.2581, "step": 30490 }, { "epoch": 0.2164836315306635, "grad_norm": 0.1494140625, "learning_rate": 0.001996129974536154, "loss": 0.2558, "step": 30500 }, { "epoch": 0.2165546097705096, "grad_norm": 0.10205078125, "learning_rate": 0.0019961273522289516, "loss": 0.2531, "step": 30510 }, { "epoch": 0.21662558801035572, "grad_norm": 0.10205078125, "learning_rate": 0.001996124729035536, "loss": 0.2462, "step": 30520 }, { "epoch": 0.21669656625020184, "grad_norm": 0.19921875, "learning_rate": 0.001996122104955909, "loss": 0.2515, "step": 30530 }, { "epoch": 0.21676754449004795, "grad_norm": 0.154296875, "learning_rate": 0.001996119479990073, "loss": 0.2608, "step": 30540 }, { "epoch": 0.2168385227298941, "grad_norm": 0.1982421875, "learning_rate": 0.0019961168541380316, "loss": 0.2392, "step": 30550 }, { "epoch": 0.2169095009697402, "grad_norm": 0.1396484375, "learning_rate": 0.0019961142273997863, "loss": 0.243, "step": 30560 }, { "epoch": 0.21698047920958632, "grad_norm": 0.11572265625, "learning_rate": 0.0019961115997753406, "loss": 0.2487, "step": 30570 }, { "epoch": 0.21705145744943244, "grad_norm": 0.0791015625, "learning_rate": 0.0019961089712646966, "loss": 0.2389, "step": 30580 }, { "epoch": 0.21712243568927855, "grad_norm": 0.1513671875, "learning_rate": 0.0019961063418678566, "loss": 0.2497, "step": 30590 }, { "epoch": 0.21719341392912467, "grad_norm": 0.1181640625, "learning_rate": 0.001996103711584824, "loss": 0.2522, "step": 30600 }, { "epoch": 0.2172643921689708, "grad_norm": 0.09423828125, "learning_rate": 0.001996101080415601, "loss": 0.2334, "step": 30610 }, { "epoch": 0.21733537040881692, "grad_norm": 0.08544921875, "learning_rate": 0.0019960984483601897, "loss": 0.2569, "step": 30620 }, { "epoch": 0.21740634864866304, "grad_norm": 0.1015625, "learning_rate": 0.001996095815418594, "loss": 0.271, "step": 30630 }, { "epoch": 0.21747732688850915, "grad_norm": 0.158203125, "learning_rate": 0.0019960931815908153, "loss": 0.2436, "step": 30640 }, { "epoch": 0.21754830512835527, "grad_norm": 0.1494140625, "learning_rate": 0.001996090546876856, "loss": 0.244, "step": 30650 }, { "epoch": 0.21761928336820138, "grad_norm": 0.1953125, "learning_rate": 0.00199608791127672, "loss": 0.2395, "step": 30660 }, { "epoch": 0.21769026160804753, "grad_norm": 0.099609375, "learning_rate": 0.001996085274790409, "loss": 0.2431, "step": 30670 }, { "epoch": 0.21776123984789364, "grad_norm": 0.1650390625, "learning_rate": 0.0019960826374179263, "loss": 0.2445, "step": 30680 }, { "epoch": 0.21783221808773975, "grad_norm": 0.072265625, "learning_rate": 0.0019960799991592735, "loss": 0.2472, "step": 30690 }, { "epoch": 0.21790319632758587, "grad_norm": 0.1025390625, "learning_rate": 0.0019960773600144538, "loss": 0.2644, "step": 30700 }, { "epoch": 0.21797417456743198, "grad_norm": 0.1572265625, "learning_rate": 0.00199607471998347, "loss": 0.2413, "step": 30710 }, { "epoch": 0.2180451528072781, "grad_norm": 0.12255859375, "learning_rate": 0.0019960720790663244, "loss": 0.2604, "step": 30720 }, { "epoch": 0.21811613104712424, "grad_norm": 0.07861328125, "learning_rate": 0.0019960694372630197, "loss": 0.2613, "step": 30730 }, { "epoch": 0.21818710928697035, "grad_norm": 0.11328125, "learning_rate": 0.0019960667945735583, "loss": 0.2597, "step": 30740 }, { "epoch": 0.21825808752681647, "grad_norm": 0.11572265625, "learning_rate": 0.001996064150997943, "loss": 0.2504, "step": 30750 }, { "epoch": 0.21832906576666258, "grad_norm": 0.091796875, "learning_rate": 0.0019960615065361766, "loss": 0.2491, "step": 30760 }, { "epoch": 0.2184000440065087, "grad_norm": 0.15625, "learning_rate": 0.001996058861188261, "loss": 0.225, "step": 30770 }, { "epoch": 0.2184710222463548, "grad_norm": 0.10791015625, "learning_rate": 0.0019960562149542002, "loss": 0.2532, "step": 30780 }, { "epoch": 0.21854200048620095, "grad_norm": 0.10546875, "learning_rate": 0.0019960535678339953, "loss": 0.2329, "step": 30790 }, { "epoch": 0.21861297872604707, "grad_norm": 0.150390625, "learning_rate": 0.00199605091982765, "loss": 0.2343, "step": 30800 }, { "epoch": 0.21868395696589318, "grad_norm": 0.1435546875, "learning_rate": 0.001996048270935166, "loss": 0.2535, "step": 30810 }, { "epoch": 0.2187549352057393, "grad_norm": 0.07421875, "learning_rate": 0.001996045621156547, "loss": 0.2247, "step": 30820 }, { "epoch": 0.2188259134455854, "grad_norm": 0.11083984375, "learning_rate": 0.0019960429704917945, "loss": 0.253, "step": 30830 }, { "epoch": 0.21889689168543153, "grad_norm": 0.09130859375, "learning_rate": 0.001996040318940912, "loss": 0.2425, "step": 30840 }, { "epoch": 0.21896786992527767, "grad_norm": 0.07568359375, "learning_rate": 0.0019960376665039015, "loss": 0.2454, "step": 30850 }, { "epoch": 0.21903884816512378, "grad_norm": 0.09814453125, "learning_rate": 0.0019960350131807665, "loss": 0.2365, "step": 30860 }, { "epoch": 0.2191098264049699, "grad_norm": 0.06591796875, "learning_rate": 0.0019960323589715088, "loss": 0.2545, "step": 30870 }, { "epoch": 0.219180804644816, "grad_norm": 0.1328125, "learning_rate": 0.001996029703876131, "loss": 0.2449, "step": 30880 }, { "epoch": 0.21925178288466213, "grad_norm": 0.1181640625, "learning_rate": 0.001996027047894636, "loss": 0.2385, "step": 30890 }, { "epoch": 0.21932276112450824, "grad_norm": 0.103515625, "learning_rate": 0.001996024391027026, "loss": 0.2617, "step": 30900 }, { "epoch": 0.21939373936435438, "grad_norm": 0.1171875, "learning_rate": 0.0019960217332733047, "loss": 0.2329, "step": 30910 }, { "epoch": 0.2194647176042005, "grad_norm": 0.125, "learning_rate": 0.0019960190746334742, "loss": 0.2377, "step": 30920 }, { "epoch": 0.2195356958440466, "grad_norm": 0.1162109375, "learning_rate": 0.0019960164151075366, "loss": 0.2578, "step": 30930 }, { "epoch": 0.21960667408389273, "grad_norm": 0.1103515625, "learning_rate": 0.001996013754695495, "loss": 0.2364, "step": 30940 }, { "epoch": 0.21967765232373884, "grad_norm": 0.0849609375, "learning_rate": 0.0019960110933973517, "loss": 0.2374, "step": 30950 }, { "epoch": 0.21974863056358496, "grad_norm": 0.12109375, "learning_rate": 0.00199600843121311, "loss": 0.2482, "step": 30960 }, { "epoch": 0.2198196088034311, "grad_norm": 0.1083984375, "learning_rate": 0.001996005768142772, "loss": 0.2488, "step": 30970 }, { "epoch": 0.2198905870432772, "grad_norm": 0.09423828125, "learning_rate": 0.0019960031041863406, "loss": 0.2275, "step": 30980 }, { "epoch": 0.21996156528312333, "grad_norm": 0.07275390625, "learning_rate": 0.001996000439343818, "loss": 0.2402, "step": 30990 }, { "epoch": 0.22003254352296944, "grad_norm": 0.11669921875, "learning_rate": 0.0019959977736152073, "loss": 0.2579, "step": 31000 }, { "epoch": 0.22010352176281556, "grad_norm": 0.150390625, "learning_rate": 0.001995995107000511, "loss": 0.2444, "step": 31010 }, { "epoch": 0.22017450000266167, "grad_norm": 0.068359375, "learning_rate": 0.001995992439499731, "loss": 0.2403, "step": 31020 }, { "epoch": 0.22024547824250781, "grad_norm": 0.1220703125, "learning_rate": 0.0019959897711128716, "loss": 0.2616, "step": 31030 }, { "epoch": 0.22031645648235393, "grad_norm": 0.13671875, "learning_rate": 0.001995987101839934, "loss": 0.262, "step": 31040 }, { "epoch": 0.22038743472220004, "grad_norm": 0.1640625, "learning_rate": 0.0019959844316809216, "loss": 0.2396, "step": 31050 }, { "epoch": 0.22045841296204616, "grad_norm": 0.09033203125, "learning_rate": 0.0019959817606358364, "loss": 0.2374, "step": 31060 }, { "epoch": 0.22052939120189227, "grad_norm": 0.0849609375, "learning_rate": 0.0019959790887046813, "loss": 0.2494, "step": 31070 }, { "epoch": 0.2206003694417384, "grad_norm": 0.119140625, "learning_rate": 0.0019959764158874595, "loss": 0.2362, "step": 31080 }, { "epoch": 0.22067134768158453, "grad_norm": 0.10888671875, "learning_rate": 0.001995973742184173, "loss": 0.2349, "step": 31090 }, { "epoch": 0.22074232592143064, "grad_norm": 0.09619140625, "learning_rate": 0.0019959710675948247, "loss": 0.2448, "step": 31100 }, { "epoch": 0.22081330416127676, "grad_norm": 0.1064453125, "learning_rate": 0.001995968392119417, "loss": 0.2429, "step": 31110 }, { "epoch": 0.22088428240112287, "grad_norm": 0.083984375, "learning_rate": 0.0019959657157579526, "loss": 0.2385, "step": 31120 }, { "epoch": 0.220955260640969, "grad_norm": 0.0732421875, "learning_rate": 0.0019959630385104343, "loss": 0.245, "step": 31130 }, { "epoch": 0.2210262388808151, "grad_norm": 0.10205078125, "learning_rate": 0.001995960360376865, "loss": 0.2465, "step": 31140 }, { "epoch": 0.22109721712066124, "grad_norm": 0.1533203125, "learning_rate": 0.0019959576813572467, "loss": 0.2403, "step": 31150 }, { "epoch": 0.22116819536050736, "grad_norm": 0.125, "learning_rate": 0.001995955001451583, "loss": 0.2505, "step": 31160 }, { "epoch": 0.22123917360035347, "grad_norm": 0.10302734375, "learning_rate": 0.0019959523206598754, "loss": 0.2581, "step": 31170 }, { "epoch": 0.2213101518401996, "grad_norm": 0.12890625, "learning_rate": 0.001995949638982127, "loss": 0.2609, "step": 31180 }, { "epoch": 0.2213811300800457, "grad_norm": 0.08349609375, "learning_rate": 0.001995946956418341, "loss": 0.2503, "step": 31190 }, { "epoch": 0.22145210831989182, "grad_norm": 0.20703125, "learning_rate": 0.0019959442729685192, "loss": 0.2341, "step": 31200 }, { "epoch": 0.22152308655973796, "grad_norm": 0.10791015625, "learning_rate": 0.001995941588632665, "loss": 0.2406, "step": 31210 }, { "epoch": 0.22159406479958407, "grad_norm": 0.16796875, "learning_rate": 0.0019959389034107807, "loss": 0.2287, "step": 31220 }, { "epoch": 0.2216650430394302, "grad_norm": 0.07080078125, "learning_rate": 0.0019959362173028686, "loss": 0.2361, "step": 31230 }, { "epoch": 0.2217360212792763, "grad_norm": 0.201171875, "learning_rate": 0.0019959335303089323, "loss": 0.2331, "step": 31240 }, { "epoch": 0.22180699951912242, "grad_norm": 0.12353515625, "learning_rate": 0.0019959308424289733, "loss": 0.237, "step": 31250 }, { "epoch": 0.22187797775896853, "grad_norm": 0.09375, "learning_rate": 0.001995928153662995, "loss": 0.2548, "step": 31260 }, { "epoch": 0.22194895599881467, "grad_norm": 0.08447265625, "learning_rate": 0.0019959254640110006, "loss": 0.2336, "step": 31270 }, { "epoch": 0.2220199342386608, "grad_norm": 0.09716796875, "learning_rate": 0.0019959227734729916, "loss": 0.2629, "step": 31280 }, { "epoch": 0.2220909124785069, "grad_norm": 0.25390625, "learning_rate": 0.001995920082048971, "loss": 0.2753, "step": 31290 }, { "epoch": 0.22216189071835302, "grad_norm": 0.1015625, "learning_rate": 0.0019959173897389415, "loss": 0.2522, "step": 31300 }, { "epoch": 0.22223286895819913, "grad_norm": 0.08935546875, "learning_rate": 0.0019959146965429064, "loss": 0.2674, "step": 31310 }, { "epoch": 0.22230384719804525, "grad_norm": 0.173828125, "learning_rate": 0.0019959120024608673, "loss": 0.2451, "step": 31320 }, { "epoch": 0.2223748254378914, "grad_norm": 0.1796875, "learning_rate": 0.0019959093074928273, "loss": 0.2686, "step": 31330 }, { "epoch": 0.2224458036777375, "grad_norm": 0.09033203125, "learning_rate": 0.0019959066116387895, "loss": 0.2421, "step": 31340 }, { "epoch": 0.22251678191758362, "grad_norm": 0.0712890625, "learning_rate": 0.001995903914898756, "loss": 0.2414, "step": 31350 }, { "epoch": 0.22258776015742973, "grad_norm": 0.109375, "learning_rate": 0.0019959012172727298, "loss": 0.2466, "step": 31360 }, { "epoch": 0.22265873839727585, "grad_norm": 0.1396484375, "learning_rate": 0.001995898518760713, "loss": 0.2535, "step": 31370 }, { "epoch": 0.22272971663712196, "grad_norm": 0.107421875, "learning_rate": 0.0019958958193627093, "loss": 0.2294, "step": 31380 }, { "epoch": 0.2228006948769681, "grad_norm": 0.11328125, "learning_rate": 0.0019958931190787207, "loss": 0.2374, "step": 31390 }, { "epoch": 0.22287167311681422, "grad_norm": 0.130859375, "learning_rate": 0.00199589041790875, "loss": 0.2334, "step": 31400 }, { "epoch": 0.22294265135666033, "grad_norm": 0.125, "learning_rate": 0.0019958877158527994, "loss": 0.261, "step": 31410 }, { "epoch": 0.22301362959650645, "grad_norm": 0.0986328125, "learning_rate": 0.0019958850129108722, "loss": 0.249, "step": 31420 }, { "epoch": 0.22308460783635256, "grad_norm": 0.08837890625, "learning_rate": 0.001995882309082971, "loss": 0.2354, "step": 31430 }, { "epoch": 0.2231555860761987, "grad_norm": 0.103515625, "learning_rate": 0.0019958796043690985, "loss": 0.2608, "step": 31440 }, { "epoch": 0.22322656431604482, "grad_norm": 0.10595703125, "learning_rate": 0.0019958768987692567, "loss": 0.2447, "step": 31450 }, { "epoch": 0.22329754255589093, "grad_norm": 0.220703125, "learning_rate": 0.0019958741922834495, "loss": 0.2532, "step": 31460 }, { "epoch": 0.22336852079573705, "grad_norm": 0.1611328125, "learning_rate": 0.0019958714849116783, "loss": 0.2416, "step": 31470 }, { "epoch": 0.22343949903558316, "grad_norm": 0.080078125, "learning_rate": 0.0019958687766539465, "loss": 0.2369, "step": 31480 }, { "epoch": 0.22351047727542928, "grad_norm": 0.11328125, "learning_rate": 0.0019958660675102567, "loss": 0.2534, "step": 31490 }, { "epoch": 0.22358145551527542, "grad_norm": 0.1591796875, "learning_rate": 0.001995863357480611, "loss": 0.2659, "step": 31500 }, { "epoch": 0.22365243375512153, "grad_norm": 0.09375, "learning_rate": 0.0019958606465650132, "loss": 0.2413, "step": 31510 }, { "epoch": 0.22372341199496765, "grad_norm": 0.12158203125, "learning_rate": 0.0019958579347634656, "loss": 0.2573, "step": 31520 }, { "epoch": 0.22379439023481376, "grad_norm": 0.0947265625, "learning_rate": 0.00199585522207597, "loss": 0.2608, "step": 31530 }, { "epoch": 0.22386536847465988, "grad_norm": 0.134765625, "learning_rate": 0.0019958525085025302, "loss": 0.2735, "step": 31540 }, { "epoch": 0.223936346714506, "grad_norm": 0.0927734375, "learning_rate": 0.001995849794043148, "loss": 0.2436, "step": 31550 }, { "epoch": 0.22400732495435213, "grad_norm": 0.12353515625, "learning_rate": 0.0019958470786978267, "loss": 0.2485, "step": 31560 }, { "epoch": 0.22407830319419825, "grad_norm": 0.08447265625, "learning_rate": 0.0019958443624665686, "loss": 0.2581, "step": 31570 }, { "epoch": 0.22414928143404436, "grad_norm": 0.259765625, "learning_rate": 0.001995841645349377, "loss": 0.2461, "step": 31580 }, { "epoch": 0.22422025967389048, "grad_norm": 0.080078125, "learning_rate": 0.001995838927346254, "loss": 0.2826, "step": 31590 }, { "epoch": 0.2242912379137366, "grad_norm": 0.09423828125, "learning_rate": 0.001995836208457202, "loss": 0.237, "step": 31600 }, { "epoch": 0.2243622161535827, "grad_norm": 0.1328125, "learning_rate": 0.0019958334886822247, "loss": 0.2468, "step": 31610 }, { "epoch": 0.22443319439342885, "grad_norm": 0.1142578125, "learning_rate": 0.001995830768021324, "loss": 0.2598, "step": 31620 }, { "epoch": 0.22450417263327496, "grad_norm": 0.1201171875, "learning_rate": 0.001995828046474503, "loss": 0.2467, "step": 31630 }, { "epoch": 0.22457515087312108, "grad_norm": 0.0927734375, "learning_rate": 0.001995825324041764, "loss": 0.2378, "step": 31640 }, { "epoch": 0.2246461291129672, "grad_norm": 0.1708984375, "learning_rate": 0.00199582260072311, "loss": 0.2609, "step": 31650 }, { "epoch": 0.2247171073528133, "grad_norm": 0.09765625, "learning_rate": 0.0019958198765185437, "loss": 0.2664, "step": 31660 }, { "epoch": 0.22478808559265942, "grad_norm": 0.1787109375, "learning_rate": 0.0019958171514280676, "loss": 0.2554, "step": 31670 }, { "epoch": 0.22485906383250556, "grad_norm": 0.10888671875, "learning_rate": 0.001995814425451684, "loss": 0.2343, "step": 31680 }, { "epoch": 0.22493004207235168, "grad_norm": 0.13671875, "learning_rate": 0.0019958116985893965, "loss": 0.2313, "step": 31690 }, { "epoch": 0.2250010203121978, "grad_norm": 0.09130859375, "learning_rate": 0.0019958089708412073, "loss": 0.245, "step": 31700 }, { "epoch": 0.2250719985520439, "grad_norm": 0.087890625, "learning_rate": 0.0019958062422071195, "loss": 0.2387, "step": 31710 }, { "epoch": 0.22514297679189002, "grad_norm": 0.1357421875, "learning_rate": 0.0019958035126871354, "loss": 0.2352, "step": 31720 }, { "epoch": 0.22521395503173613, "grad_norm": 0.1142578125, "learning_rate": 0.0019958007822812574, "loss": 0.2346, "step": 31730 }, { "epoch": 0.22528493327158228, "grad_norm": 0.111328125, "learning_rate": 0.0019957980509894887, "loss": 0.2474, "step": 31740 }, { "epoch": 0.2253559115114284, "grad_norm": 0.291015625, "learning_rate": 0.0019957953188118323, "loss": 0.2411, "step": 31750 }, { "epoch": 0.2254268897512745, "grad_norm": 0.083984375, "learning_rate": 0.00199579258574829, "loss": 0.2541, "step": 31760 }, { "epoch": 0.22549786799112062, "grad_norm": 0.2353515625, "learning_rate": 0.001995789851798865, "loss": 0.2324, "step": 31770 }, { "epoch": 0.22556884623096674, "grad_norm": 0.07666015625, "learning_rate": 0.0019957871169635597, "loss": 0.2348, "step": 31780 }, { "epoch": 0.22563982447081285, "grad_norm": 0.1142578125, "learning_rate": 0.0019957843812423775, "loss": 0.2477, "step": 31790 }, { "epoch": 0.225710802710659, "grad_norm": 0.0693359375, "learning_rate": 0.0019957816446353207, "loss": 0.2474, "step": 31800 }, { "epoch": 0.2257817809505051, "grad_norm": 0.12255859375, "learning_rate": 0.001995778907142392, "loss": 0.2536, "step": 31810 }, { "epoch": 0.22585275919035122, "grad_norm": 0.11376953125, "learning_rate": 0.0019957761687635937, "loss": 0.2467, "step": 31820 }, { "epoch": 0.22592373743019734, "grad_norm": 0.08642578125, "learning_rate": 0.0019957734294989296, "loss": 0.2685, "step": 31830 }, { "epoch": 0.22599471567004345, "grad_norm": 0.12255859375, "learning_rate": 0.0019957706893484013, "loss": 0.2339, "step": 31840 }, { "epoch": 0.22606569390988956, "grad_norm": 0.0966796875, "learning_rate": 0.001995767948312012, "loss": 0.2461, "step": 31850 }, { "epoch": 0.2261366721497357, "grad_norm": 0.08251953125, "learning_rate": 0.0019957652063897643, "loss": 0.2471, "step": 31860 }, { "epoch": 0.22620765038958182, "grad_norm": 0.1796875, "learning_rate": 0.001995762463581661, "loss": 0.2516, "step": 31870 }, { "epoch": 0.22627862862942794, "grad_norm": 0.2412109375, "learning_rate": 0.0019957597198877047, "loss": 0.2491, "step": 31880 }, { "epoch": 0.22634960686927405, "grad_norm": 0.11767578125, "learning_rate": 0.0019957569753078983, "loss": 0.2577, "step": 31890 }, { "epoch": 0.22642058510912016, "grad_norm": 0.07763671875, "learning_rate": 0.0019957542298422444, "loss": 0.2277, "step": 31900 }, { "epoch": 0.22649156334896628, "grad_norm": 0.078125, "learning_rate": 0.0019957514834907453, "loss": 0.239, "step": 31910 }, { "epoch": 0.22656254158881242, "grad_norm": 0.08154296875, "learning_rate": 0.0019957487362534045, "loss": 0.2215, "step": 31920 }, { "epoch": 0.22663351982865854, "grad_norm": 0.0771484375, "learning_rate": 0.0019957459881302245, "loss": 0.2533, "step": 31930 }, { "epoch": 0.22670449806850465, "grad_norm": 0.1416015625, "learning_rate": 0.0019957432391212076, "loss": 0.244, "step": 31940 }, { "epoch": 0.22677547630835077, "grad_norm": 0.140625, "learning_rate": 0.0019957404892263567, "loss": 0.251, "step": 31950 }, { "epoch": 0.22684645454819688, "grad_norm": 0.08349609375, "learning_rate": 0.001995737738445675, "loss": 0.2428, "step": 31960 }, { "epoch": 0.226917432788043, "grad_norm": 0.09912109375, "learning_rate": 0.0019957349867791643, "loss": 0.2494, "step": 31970 }, { "epoch": 0.22698841102788914, "grad_norm": 0.10595703125, "learning_rate": 0.001995732234226828, "loss": 0.2443, "step": 31980 }, { "epoch": 0.22705938926773525, "grad_norm": 0.09814453125, "learning_rate": 0.001995729480788669, "loss": 0.2376, "step": 31990 }, { "epoch": 0.22713036750758137, "grad_norm": 0.1103515625, "learning_rate": 0.0019957267264646894, "loss": 0.2382, "step": 32000 }, { "epoch": 0.22713036750758137, "eval_covost2-zh-en_loss": 3.929133415222168, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 22.5187, "eval_covost2-zh-en_samples_per_second": 2.842, "eval_covost2-zh-en_steps_per_second": 0.178, "step": 32000 }, { "epoch": 0.22713036750758137, "eval_covost2-en-zh_loss": 3.195446491241455, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 21.853, "eval_covost2-en-zh_samples_per_second": 2.929, "eval_covost2-en-zh_steps_per_second": 0.183, "step": 32000 }, { "epoch": 0.22720134574742748, "grad_norm": 0.123046875, "learning_rate": 0.001995723971254892, "loss": 0.2404, "step": 32010 }, { "epoch": 0.2272723239872736, "grad_norm": 0.13671875, "learning_rate": 0.00199572121515928, "loss": 0.276, "step": 32020 }, { "epoch": 0.2273433022271197, "grad_norm": 0.1298828125, "learning_rate": 0.0019957184581778555, "loss": 0.2539, "step": 32030 }, { "epoch": 0.22741428046696585, "grad_norm": 0.1044921875, "learning_rate": 0.0019957157003106223, "loss": 0.2457, "step": 32040 }, { "epoch": 0.22748525870681197, "grad_norm": 0.06982421875, "learning_rate": 0.001995712941557582, "loss": 0.2518, "step": 32050 }, { "epoch": 0.22755623694665808, "grad_norm": 0.13671875, "learning_rate": 0.0019957101819187374, "loss": 0.2464, "step": 32060 }, { "epoch": 0.2276272151865042, "grad_norm": 0.08056640625, "learning_rate": 0.001995707421394092, "loss": 0.2529, "step": 32070 }, { "epoch": 0.2276981934263503, "grad_norm": 0.11669921875, "learning_rate": 0.001995704659983648, "loss": 0.2735, "step": 32080 }, { "epoch": 0.22776917166619642, "grad_norm": 0.1162109375, "learning_rate": 0.0019957018976874082, "loss": 0.2517, "step": 32090 }, { "epoch": 0.22784014990604257, "grad_norm": 0.09814453125, "learning_rate": 0.0019956991345053752, "loss": 0.2485, "step": 32100 }, { "epoch": 0.22791112814588868, "grad_norm": 0.1337890625, "learning_rate": 0.0019956963704375525, "loss": 0.2644, "step": 32110 }, { "epoch": 0.2279821063857348, "grad_norm": 0.07666015625, "learning_rate": 0.001995693605483942, "loss": 0.2269, "step": 32120 }, { "epoch": 0.2280530846255809, "grad_norm": 0.12109375, "learning_rate": 0.001995690839644546, "loss": 0.2575, "step": 32130 }, { "epoch": 0.22812406286542702, "grad_norm": 0.1083984375, "learning_rate": 0.0019956880729193687, "loss": 0.2473, "step": 32140 }, { "epoch": 0.22819504110527314, "grad_norm": 0.107421875, "learning_rate": 0.0019956853053084114, "loss": 0.254, "step": 32150 }, { "epoch": 0.22826601934511928, "grad_norm": 0.107421875, "learning_rate": 0.001995682536811678, "loss": 0.256, "step": 32160 }, { "epoch": 0.2283369975849654, "grad_norm": 0.0830078125, "learning_rate": 0.0019956797674291703, "loss": 0.2558, "step": 32170 }, { "epoch": 0.2284079758248115, "grad_norm": 0.1953125, "learning_rate": 0.0019956769971608917, "loss": 0.2456, "step": 32180 }, { "epoch": 0.22847895406465762, "grad_norm": 0.150390625, "learning_rate": 0.001995674226006845, "loss": 0.236, "step": 32190 }, { "epoch": 0.22854993230450374, "grad_norm": 0.08837890625, "learning_rate": 0.001995671453967032, "loss": 0.2645, "step": 32200 }, { "epoch": 0.22862091054434985, "grad_norm": 0.150390625, "learning_rate": 0.0019956686810414562, "loss": 0.2603, "step": 32210 }, { "epoch": 0.228691888784196, "grad_norm": 0.09228515625, "learning_rate": 0.0019956659072301202, "loss": 0.2535, "step": 32220 }, { "epoch": 0.2287628670240421, "grad_norm": 0.1279296875, "learning_rate": 0.001995663132533027, "loss": 0.2634, "step": 32230 }, { "epoch": 0.22883384526388822, "grad_norm": 0.072265625, "learning_rate": 0.001995660356950179, "loss": 0.2332, "step": 32240 }, { "epoch": 0.22890482350373434, "grad_norm": 0.275390625, "learning_rate": 0.0019956575804815787, "loss": 0.2514, "step": 32250 }, { "epoch": 0.22897580174358045, "grad_norm": 0.2197265625, "learning_rate": 0.00199565480312723, "loss": 0.2498, "step": 32260 }, { "epoch": 0.22904677998342657, "grad_norm": 0.09228515625, "learning_rate": 0.001995652024887134, "loss": 0.2348, "step": 32270 }, { "epoch": 0.2291177582232727, "grad_norm": 0.10498046875, "learning_rate": 0.001995649245761295, "loss": 0.2523, "step": 32280 }, { "epoch": 0.22918873646311883, "grad_norm": 0.1142578125, "learning_rate": 0.001995646465749714, "loss": 0.2588, "step": 32290 }, { "epoch": 0.22925971470296494, "grad_norm": 0.083984375, "learning_rate": 0.001995643684852396, "loss": 0.2553, "step": 32300 }, { "epoch": 0.22933069294281105, "grad_norm": 0.11328125, "learning_rate": 0.0019956409030693414, "loss": 0.2535, "step": 32310 }, { "epoch": 0.22940167118265717, "grad_norm": 0.10546875, "learning_rate": 0.001995638120400555, "loss": 0.2445, "step": 32320 }, { "epoch": 0.22947264942250328, "grad_norm": 0.1279296875, "learning_rate": 0.001995635336846038, "loss": 0.2426, "step": 32330 }, { "epoch": 0.22954362766234943, "grad_norm": 0.1171875, "learning_rate": 0.0019956325524057945, "loss": 0.2592, "step": 32340 }, { "epoch": 0.22961460590219554, "grad_norm": 0.099609375, "learning_rate": 0.001995629767079826, "loss": 0.2455, "step": 32350 }, { "epoch": 0.22968558414204165, "grad_norm": 0.22265625, "learning_rate": 0.0019956269808681358, "loss": 0.2456, "step": 32360 }, { "epoch": 0.22975656238188777, "grad_norm": 0.14453125, "learning_rate": 0.001995624193770727, "loss": 0.2763, "step": 32370 }, { "epoch": 0.22982754062173388, "grad_norm": 0.1513671875, "learning_rate": 0.0019956214057876014, "loss": 0.2484, "step": 32380 }, { "epoch": 0.22989851886158, "grad_norm": 0.11767578125, "learning_rate": 0.001995618616918763, "loss": 0.2352, "step": 32390 }, { "epoch": 0.22996949710142614, "grad_norm": 0.1298828125, "learning_rate": 0.0019956158271642135, "loss": 0.2391, "step": 32400 }, { "epoch": 0.23004047534127225, "grad_norm": 0.08837890625, "learning_rate": 0.001995613036523956, "loss": 0.2647, "step": 32410 }, { "epoch": 0.23011145358111837, "grad_norm": 0.18359375, "learning_rate": 0.0019956102449979938, "loss": 0.2428, "step": 32420 }, { "epoch": 0.23018243182096448, "grad_norm": 0.08935546875, "learning_rate": 0.0019956074525863286, "loss": 0.2442, "step": 32430 }, { "epoch": 0.2302534100608106, "grad_norm": 0.12255859375, "learning_rate": 0.0019956046592889643, "loss": 0.2442, "step": 32440 }, { "epoch": 0.2303243883006567, "grad_norm": 0.1201171875, "learning_rate": 0.0019956018651059028, "loss": 0.2405, "step": 32450 }, { "epoch": 0.23039536654050286, "grad_norm": 0.11962890625, "learning_rate": 0.0019955990700371473, "loss": 0.2429, "step": 32460 }, { "epoch": 0.23046634478034897, "grad_norm": 0.1474609375, "learning_rate": 0.0019955962740827006, "loss": 0.2333, "step": 32470 }, { "epoch": 0.23053732302019508, "grad_norm": 0.1279296875, "learning_rate": 0.001995593477242565, "loss": 0.2596, "step": 32480 }, { "epoch": 0.2306083012600412, "grad_norm": 0.111328125, "learning_rate": 0.001995590679516744, "loss": 0.2296, "step": 32490 }, { "epoch": 0.2306792794998873, "grad_norm": 0.12060546875, "learning_rate": 0.0019955878809052394, "loss": 0.2504, "step": 32500 }, { "epoch": 0.23075025773973343, "grad_norm": 0.1123046875, "learning_rate": 0.001995585081408055, "loss": 0.2481, "step": 32510 }, { "epoch": 0.23082123597957957, "grad_norm": 0.140625, "learning_rate": 0.001995582281025193, "loss": 0.2522, "step": 32520 }, { "epoch": 0.23089221421942568, "grad_norm": 0.07080078125, "learning_rate": 0.0019955794797566558, "loss": 0.2522, "step": 32530 }, { "epoch": 0.2309631924592718, "grad_norm": 0.1611328125, "learning_rate": 0.001995576677602447, "loss": 0.2466, "step": 32540 }, { "epoch": 0.2310341706991179, "grad_norm": 0.185546875, "learning_rate": 0.0019955738745625687, "loss": 0.243, "step": 32550 }, { "epoch": 0.23110514893896403, "grad_norm": 0.154296875, "learning_rate": 0.0019955710706370247, "loss": 0.2408, "step": 32560 }, { "epoch": 0.23117612717881017, "grad_norm": 0.08251953125, "learning_rate": 0.0019955682658258167, "loss": 0.2378, "step": 32570 }, { "epoch": 0.23124710541865628, "grad_norm": 0.09033203125, "learning_rate": 0.0019955654601289475, "loss": 0.2569, "step": 32580 }, { "epoch": 0.2313180836585024, "grad_norm": 0.0986328125, "learning_rate": 0.0019955626535464203, "loss": 0.2576, "step": 32590 }, { "epoch": 0.2313890618983485, "grad_norm": 0.12353515625, "learning_rate": 0.001995559846078238, "loss": 0.2617, "step": 32600 }, { "epoch": 0.23146004013819463, "grad_norm": 0.0732421875, "learning_rate": 0.001995557037724403, "loss": 0.2497, "step": 32610 }, { "epoch": 0.23153101837804074, "grad_norm": 0.1337890625, "learning_rate": 0.001995554228484918, "loss": 0.2449, "step": 32620 }, { "epoch": 0.23160199661788688, "grad_norm": 0.10595703125, "learning_rate": 0.0019955514183597864, "loss": 0.2324, "step": 32630 }, { "epoch": 0.231672974857733, "grad_norm": 0.14453125, "learning_rate": 0.0019955486073490103, "loss": 0.2685, "step": 32640 }, { "epoch": 0.23174395309757911, "grad_norm": 0.1650390625, "learning_rate": 0.001995545795452593, "loss": 0.2557, "step": 32650 }, { "epoch": 0.23181493133742523, "grad_norm": 0.0693359375, "learning_rate": 0.001995542982670537, "loss": 0.2375, "step": 32660 }, { "epoch": 0.23188590957727134, "grad_norm": 0.10009765625, "learning_rate": 0.0019955401690028448, "loss": 0.2522, "step": 32670 }, { "epoch": 0.23195688781711746, "grad_norm": 0.11083984375, "learning_rate": 0.0019955373544495196, "loss": 0.2489, "step": 32680 }, { "epoch": 0.2320278660569636, "grad_norm": 0.140625, "learning_rate": 0.0019955345390105642, "loss": 0.2434, "step": 32690 }, { "epoch": 0.23209884429680971, "grad_norm": 0.1279296875, "learning_rate": 0.0019955317226859814, "loss": 0.2426, "step": 32700 }, { "epoch": 0.23216982253665583, "grad_norm": 0.08984375, "learning_rate": 0.0019955289054757737, "loss": 0.2532, "step": 32710 }, { "epoch": 0.23224080077650194, "grad_norm": 0.224609375, "learning_rate": 0.001995526087379944, "loss": 0.2391, "step": 32720 }, { "epoch": 0.23231177901634806, "grad_norm": 0.07421875, "learning_rate": 0.0019955232683984948, "loss": 0.2382, "step": 32730 }, { "epoch": 0.23238275725619417, "grad_norm": 0.123046875, "learning_rate": 0.0019955204485314296, "loss": 0.2691, "step": 32740 }, { "epoch": 0.23245373549604031, "grad_norm": 0.08740234375, "learning_rate": 0.001995517627778751, "loss": 0.2369, "step": 32750 }, { "epoch": 0.23252471373588643, "grad_norm": 0.09716796875, "learning_rate": 0.001995514806140461, "loss": 0.2418, "step": 32760 }, { "epoch": 0.23259569197573254, "grad_norm": 0.1298828125, "learning_rate": 0.001995511983616564, "loss": 0.2423, "step": 32770 }, { "epoch": 0.23266667021557866, "grad_norm": 0.1279296875, "learning_rate": 0.0019955091602070606, "loss": 0.2578, "step": 32780 }, { "epoch": 0.23273764845542477, "grad_norm": 0.07275390625, "learning_rate": 0.0019955063359119556, "loss": 0.2392, "step": 32790 }, { "epoch": 0.2328086266952709, "grad_norm": 0.1083984375, "learning_rate": 0.0019955035107312507, "loss": 0.2273, "step": 32800 }, { "epoch": 0.23287960493511703, "grad_norm": 0.0732421875, "learning_rate": 0.0019955006846649487, "loss": 0.2386, "step": 32810 }, { "epoch": 0.23295058317496314, "grad_norm": 0.08984375, "learning_rate": 0.001995497857713053, "loss": 0.2633, "step": 32820 }, { "epoch": 0.23302156141480926, "grad_norm": 0.07470703125, "learning_rate": 0.0019954950298755655, "loss": 0.2506, "step": 32830 }, { "epoch": 0.23309253965465537, "grad_norm": 0.1552734375, "learning_rate": 0.0019954922011524902, "loss": 0.2547, "step": 32840 }, { "epoch": 0.2331635178945015, "grad_norm": 0.1142578125, "learning_rate": 0.0019954893715438287, "loss": 0.2671, "step": 32850 }, { "epoch": 0.2332344961343476, "grad_norm": 0.1318359375, "learning_rate": 0.001995486541049585, "loss": 0.2485, "step": 32860 }, { "epoch": 0.23330547437419374, "grad_norm": 0.08203125, "learning_rate": 0.0019954837096697606, "loss": 0.2483, "step": 32870 }, { "epoch": 0.23337645261403986, "grad_norm": 0.0927734375, "learning_rate": 0.0019954808774043592, "loss": 0.2624, "step": 32880 }, { "epoch": 0.23344743085388597, "grad_norm": 0.0771484375, "learning_rate": 0.0019954780442533833, "loss": 0.2442, "step": 32890 }, { "epoch": 0.2335184090937321, "grad_norm": 0.09716796875, "learning_rate": 0.0019954752102168357, "loss": 0.2578, "step": 32900 }, { "epoch": 0.2335893873335782, "grad_norm": 0.0908203125, "learning_rate": 0.0019954723752947197, "loss": 0.2422, "step": 32910 }, { "epoch": 0.23366036557342432, "grad_norm": 0.09521484375, "learning_rate": 0.0019954695394870372, "loss": 0.2666, "step": 32920 }, { "epoch": 0.23373134381327046, "grad_norm": 0.1337890625, "learning_rate": 0.001995466702793791, "loss": 0.2407, "step": 32930 }, { "epoch": 0.23380232205311657, "grad_norm": 0.123046875, "learning_rate": 0.0019954638652149854, "loss": 0.2351, "step": 32940 }, { "epoch": 0.2338733002929627, "grad_norm": 0.1044921875, "learning_rate": 0.0019954610267506213, "loss": 0.2368, "step": 32950 }, { "epoch": 0.2339442785328088, "grad_norm": 0.1455078125, "learning_rate": 0.001995458187400703, "loss": 0.2459, "step": 32960 }, { "epoch": 0.23401525677265492, "grad_norm": 0.07080078125, "learning_rate": 0.001995455347165232, "loss": 0.2416, "step": 32970 }, { "epoch": 0.23408623501250103, "grad_norm": 0.08349609375, "learning_rate": 0.001995452506044212, "loss": 0.2455, "step": 32980 }, { "epoch": 0.23415721325234717, "grad_norm": 0.0849609375, "learning_rate": 0.001995449664037646, "loss": 0.2546, "step": 32990 }, { "epoch": 0.2342281914921933, "grad_norm": 0.1318359375, "learning_rate": 0.001995446821145536, "loss": 0.253, "step": 33000 }, { "epoch": 0.2342991697320394, "grad_norm": 0.1279296875, "learning_rate": 0.0019954439773678854, "loss": 0.2527, "step": 33010 }, { "epoch": 0.23437014797188552, "grad_norm": 0.08349609375, "learning_rate": 0.0019954411327046968, "loss": 0.2427, "step": 33020 }, { "epoch": 0.23444112621173163, "grad_norm": 0.09521484375, "learning_rate": 0.0019954382871559726, "loss": 0.2428, "step": 33030 }, { "epoch": 0.23451210445157775, "grad_norm": 0.11865234375, "learning_rate": 0.0019954354407217168, "loss": 0.2431, "step": 33040 }, { "epoch": 0.2345830826914239, "grad_norm": 0.083984375, "learning_rate": 0.001995432593401931, "loss": 0.2392, "step": 33050 }, { "epoch": 0.23465406093127, "grad_norm": 0.1220703125, "learning_rate": 0.0019954297451966188, "loss": 0.2834, "step": 33060 }, { "epoch": 0.23472503917111612, "grad_norm": 0.1318359375, "learning_rate": 0.0019954268961057823, "loss": 0.2565, "step": 33070 }, { "epoch": 0.23479601741096223, "grad_norm": 0.10400390625, "learning_rate": 0.001995424046129425, "loss": 0.2378, "step": 33080 }, { "epoch": 0.23486699565080835, "grad_norm": 0.09423828125, "learning_rate": 0.0019954211952675494, "loss": 0.2457, "step": 33090 }, { "epoch": 0.23493797389065446, "grad_norm": 0.0947265625, "learning_rate": 0.001995418343520158, "loss": 0.2481, "step": 33100 }, { "epoch": 0.2350089521305006, "grad_norm": 0.111328125, "learning_rate": 0.0019954154908872544, "loss": 0.2626, "step": 33110 }, { "epoch": 0.23507993037034672, "grad_norm": 0.1328125, "learning_rate": 0.001995412637368841, "loss": 0.2418, "step": 33120 }, { "epoch": 0.23515090861019283, "grad_norm": 0.099609375, "learning_rate": 0.001995409782964921, "loss": 0.232, "step": 33130 }, { "epoch": 0.23522188685003895, "grad_norm": 0.1552734375, "learning_rate": 0.0019954069276754962, "loss": 0.27, "step": 33140 }, { "epoch": 0.23529286508988506, "grad_norm": 0.107421875, "learning_rate": 0.0019954040715005703, "loss": 0.2305, "step": 33150 }, { "epoch": 0.23536384332973118, "grad_norm": 0.0830078125, "learning_rate": 0.0019954012144401453, "loss": 0.2238, "step": 33160 }, { "epoch": 0.23543482156957732, "grad_norm": 0.12353515625, "learning_rate": 0.0019953983564942254, "loss": 0.2482, "step": 33170 }, { "epoch": 0.23550579980942343, "grad_norm": 0.07080078125, "learning_rate": 0.0019953954976628125, "loss": 0.2639, "step": 33180 }, { "epoch": 0.23557677804926955, "grad_norm": 0.1015625, "learning_rate": 0.0019953926379459096, "loss": 0.2484, "step": 33190 }, { "epoch": 0.23564775628911566, "grad_norm": 0.125, "learning_rate": 0.001995389777343519, "loss": 0.2531, "step": 33200 }, { "epoch": 0.23571873452896178, "grad_norm": 0.166015625, "learning_rate": 0.001995386915855645, "loss": 0.2573, "step": 33210 }, { "epoch": 0.2357897127688079, "grad_norm": 0.16015625, "learning_rate": 0.0019953840534822883, "loss": 0.2513, "step": 33220 }, { "epoch": 0.23586069100865403, "grad_norm": 0.10888671875, "learning_rate": 0.0019953811902234534, "loss": 0.269, "step": 33230 }, { "epoch": 0.23593166924850015, "grad_norm": 0.09326171875, "learning_rate": 0.0019953783260791428, "loss": 0.2789, "step": 33240 }, { "epoch": 0.23600264748834626, "grad_norm": 0.09375, "learning_rate": 0.001995375461049359, "loss": 0.2546, "step": 33250 }, { "epoch": 0.23607362572819238, "grad_norm": 0.1396484375, "learning_rate": 0.001995372595134105, "loss": 0.2558, "step": 33260 }, { "epoch": 0.2361446039680385, "grad_norm": 0.1396484375, "learning_rate": 0.0019953697283333835, "loss": 0.2539, "step": 33270 }, { "epoch": 0.2362155822078846, "grad_norm": 0.11865234375, "learning_rate": 0.0019953668606471973, "loss": 0.2412, "step": 33280 }, { "epoch": 0.23628656044773075, "grad_norm": 0.1337890625, "learning_rate": 0.00199536399207555, "loss": 0.2376, "step": 33290 }, { "epoch": 0.23635753868757686, "grad_norm": 0.1474609375, "learning_rate": 0.0019953611226184434, "loss": 0.238, "step": 33300 }, { "epoch": 0.23642851692742298, "grad_norm": 0.08349609375, "learning_rate": 0.001995358252275881, "loss": 0.2604, "step": 33310 }, { "epoch": 0.2364994951672691, "grad_norm": 0.08154296875, "learning_rate": 0.0019953553810478654, "loss": 0.245, "step": 33320 }, { "epoch": 0.2365704734071152, "grad_norm": 0.087890625, "learning_rate": 0.001995352508934399, "loss": 0.237, "step": 33330 }, { "epoch": 0.23664145164696132, "grad_norm": 0.185546875, "learning_rate": 0.0019953496359354857, "loss": 0.274, "step": 33340 }, { "epoch": 0.23671242988680746, "grad_norm": 0.11962890625, "learning_rate": 0.0019953467620511273, "loss": 0.263, "step": 33350 }, { "epoch": 0.23678340812665358, "grad_norm": 0.10693359375, "learning_rate": 0.001995343887281327, "loss": 0.2753, "step": 33360 }, { "epoch": 0.2368543863664997, "grad_norm": 0.091796875, "learning_rate": 0.001995341011626088, "loss": 0.2488, "step": 33370 }, { "epoch": 0.2369253646063458, "grad_norm": 0.1591796875, "learning_rate": 0.0019953381350854126, "loss": 0.2755, "step": 33380 }, { "epoch": 0.23699634284619192, "grad_norm": 0.11962890625, "learning_rate": 0.001995335257659304, "loss": 0.2594, "step": 33390 }, { "epoch": 0.23706732108603804, "grad_norm": 0.12158203125, "learning_rate": 0.0019953323793477653, "loss": 0.261, "step": 33400 }, { "epoch": 0.23713829932588418, "grad_norm": 0.125, "learning_rate": 0.0019953295001507985, "loss": 0.261, "step": 33410 }, { "epoch": 0.2372092775657303, "grad_norm": 0.09228515625, "learning_rate": 0.001995326620068407, "loss": 0.2642, "step": 33420 }, { "epoch": 0.2372802558055764, "grad_norm": 0.2041015625, "learning_rate": 0.001995323739100594, "loss": 0.2605, "step": 33430 }, { "epoch": 0.23735123404542252, "grad_norm": 0.1484375, "learning_rate": 0.0019953208572473617, "loss": 0.2477, "step": 33440 }, { "epoch": 0.23742221228526864, "grad_norm": 0.1689453125, "learning_rate": 0.0019953179745087132, "loss": 0.2472, "step": 33450 }, { "epoch": 0.23749319052511475, "grad_norm": 0.10791015625, "learning_rate": 0.001995315090884651, "loss": 0.2523, "step": 33460 }, { "epoch": 0.2375641687649609, "grad_norm": 0.1416015625, "learning_rate": 0.0019953122063751786, "loss": 0.2515, "step": 33470 }, { "epoch": 0.237635147004807, "grad_norm": 0.0869140625, "learning_rate": 0.0019953093209802986, "loss": 0.2483, "step": 33480 }, { "epoch": 0.23770612524465312, "grad_norm": 0.12890625, "learning_rate": 0.0019953064347000135, "loss": 0.2728, "step": 33490 }, { "epoch": 0.23777710348449924, "grad_norm": 0.07470703125, "learning_rate": 0.001995303547534327, "loss": 0.256, "step": 33500 }, { "epoch": 0.23784808172434535, "grad_norm": 0.25, "learning_rate": 0.001995300659483241, "loss": 0.2505, "step": 33510 }, { "epoch": 0.23791905996419146, "grad_norm": 0.07421875, "learning_rate": 0.0019952977705467584, "loss": 0.2326, "step": 33520 }, { "epoch": 0.2379900382040376, "grad_norm": 0.1552734375, "learning_rate": 0.001995294880724883, "loss": 0.2554, "step": 33530 }, { "epoch": 0.23806101644388372, "grad_norm": 0.1142578125, "learning_rate": 0.001995291990017617, "loss": 0.2499, "step": 33540 }, { "epoch": 0.23813199468372984, "grad_norm": 0.1474609375, "learning_rate": 0.0019952890984249634, "loss": 0.261, "step": 33550 }, { "epoch": 0.23820297292357595, "grad_norm": 0.1484375, "learning_rate": 0.0019952862059469246, "loss": 0.2625, "step": 33560 }, { "epoch": 0.23827395116342207, "grad_norm": 0.09033203125, "learning_rate": 0.0019952833125835043, "loss": 0.2556, "step": 33570 }, { "epoch": 0.23834492940326818, "grad_norm": 0.169921875, "learning_rate": 0.0019952804183347044, "loss": 0.2735, "step": 33580 }, { "epoch": 0.23841590764311432, "grad_norm": 0.1025390625, "learning_rate": 0.0019952775232005286, "loss": 0.2739, "step": 33590 }, { "epoch": 0.23848688588296044, "grad_norm": 0.134765625, "learning_rate": 0.0019952746271809795, "loss": 0.2671, "step": 33600 }, { "epoch": 0.23855786412280655, "grad_norm": 0.185546875, "learning_rate": 0.00199527173027606, "loss": 0.2501, "step": 33610 }, { "epoch": 0.23862884236265267, "grad_norm": 0.087890625, "learning_rate": 0.0019952688324857727, "loss": 0.2491, "step": 33620 }, { "epoch": 0.23869982060249878, "grad_norm": 0.080078125, "learning_rate": 0.0019952659338101203, "loss": 0.247, "step": 33630 }, { "epoch": 0.2387707988423449, "grad_norm": 0.1044921875, "learning_rate": 0.0019952630342491063, "loss": 0.2573, "step": 33640 }, { "epoch": 0.23884177708219104, "grad_norm": 0.1337890625, "learning_rate": 0.0019952601338027332, "loss": 0.2397, "step": 33650 }, { "epoch": 0.23891275532203715, "grad_norm": 0.16015625, "learning_rate": 0.001995257232471004, "loss": 0.2541, "step": 33660 }, { "epoch": 0.23898373356188327, "grad_norm": 0.07373046875, "learning_rate": 0.0019952543302539216, "loss": 0.2535, "step": 33670 }, { "epoch": 0.23905471180172938, "grad_norm": 0.10107421875, "learning_rate": 0.0019952514271514886, "loss": 0.266, "step": 33680 }, { "epoch": 0.2391256900415755, "grad_norm": 0.10009765625, "learning_rate": 0.0019952485231637083, "loss": 0.2578, "step": 33690 }, { "epoch": 0.2391966682814216, "grad_norm": 0.1337890625, "learning_rate": 0.0019952456182905833, "loss": 0.2414, "step": 33700 }, { "epoch": 0.23926764652126775, "grad_norm": 0.103515625, "learning_rate": 0.0019952427125321166, "loss": 0.2459, "step": 33710 }, { "epoch": 0.23933862476111387, "grad_norm": 0.09716796875, "learning_rate": 0.0019952398058883107, "loss": 0.2627, "step": 33720 }, { "epoch": 0.23940960300095998, "grad_norm": 0.11962890625, "learning_rate": 0.001995236898359169, "loss": 0.2702, "step": 33730 }, { "epoch": 0.2394805812408061, "grad_norm": 0.138671875, "learning_rate": 0.0019952339899446934, "loss": 0.2493, "step": 33740 }, { "epoch": 0.2395515594806522, "grad_norm": 0.099609375, "learning_rate": 0.001995231080644888, "loss": 0.2704, "step": 33750 }, { "epoch": 0.23962253772049835, "grad_norm": 0.11181640625, "learning_rate": 0.001995228170459755, "loss": 0.2393, "step": 33760 }, { "epoch": 0.23969351596034447, "grad_norm": 0.0830078125, "learning_rate": 0.0019952252593892976, "loss": 0.279, "step": 33770 }, { "epoch": 0.23976449420019058, "grad_norm": 0.10107421875, "learning_rate": 0.0019952223474335186, "loss": 0.2558, "step": 33780 }, { "epoch": 0.2398354724400367, "grad_norm": 0.12060546875, "learning_rate": 0.001995219434592421, "loss": 0.2598, "step": 33790 }, { "epoch": 0.2399064506798828, "grad_norm": 0.1181640625, "learning_rate": 0.0019952165208660068, "loss": 0.2504, "step": 33800 }, { "epoch": 0.23997742891972892, "grad_norm": 0.0888671875, "learning_rate": 0.0019952136062542803, "loss": 0.2383, "step": 33810 }, { "epoch": 0.24004840715957507, "grad_norm": 0.10595703125, "learning_rate": 0.001995210690757243, "loss": 0.2518, "step": 33820 }, { "epoch": 0.24011938539942118, "grad_norm": 0.076171875, "learning_rate": 0.001995207774374899, "loss": 0.267, "step": 33830 }, { "epoch": 0.2401903636392673, "grad_norm": 0.21484375, "learning_rate": 0.00199520485710725, "loss": 0.2508, "step": 33840 }, { "epoch": 0.2402613418791134, "grad_norm": 0.103515625, "learning_rate": 0.0019952019389543004, "loss": 0.245, "step": 33850 }, { "epoch": 0.24033232011895952, "grad_norm": 0.107421875, "learning_rate": 0.0019951990199160515, "loss": 0.2618, "step": 33860 }, { "epoch": 0.24040329835880564, "grad_norm": 0.1328125, "learning_rate": 0.001995196099992507, "loss": 0.2282, "step": 33870 }, { "epoch": 0.24047427659865178, "grad_norm": 0.095703125, "learning_rate": 0.0019951931791836698, "loss": 0.2539, "step": 33880 }, { "epoch": 0.2405452548384979, "grad_norm": 0.078125, "learning_rate": 0.0019951902574895427, "loss": 0.2628, "step": 33890 }, { "epoch": 0.240616233078344, "grad_norm": 0.12060546875, "learning_rate": 0.0019951873349101285, "loss": 0.2296, "step": 33900 }, { "epoch": 0.24068721131819013, "grad_norm": 0.08349609375, "learning_rate": 0.00199518441144543, "loss": 0.2715, "step": 33910 }, { "epoch": 0.24075818955803624, "grad_norm": 0.1396484375, "learning_rate": 0.0019951814870954503, "loss": 0.2457, "step": 33920 }, { "epoch": 0.24082916779788235, "grad_norm": 0.10302734375, "learning_rate": 0.0019951785618601923, "loss": 0.257, "step": 33930 }, { "epoch": 0.2409001460377285, "grad_norm": 0.138671875, "learning_rate": 0.001995175635739658, "loss": 0.2622, "step": 33940 }, { "epoch": 0.2409711242775746, "grad_norm": 0.1337890625, "learning_rate": 0.0019951727087338523, "loss": 0.2523, "step": 33950 }, { "epoch": 0.24104210251742073, "grad_norm": 0.0986328125, "learning_rate": 0.001995169780842776, "loss": 0.2408, "step": 33960 }, { "epoch": 0.24111308075726684, "grad_norm": 0.12353515625, "learning_rate": 0.0019951668520664335, "loss": 0.2636, "step": 33970 }, { "epoch": 0.24118405899711295, "grad_norm": 0.10986328125, "learning_rate": 0.001995163922404827, "loss": 0.2325, "step": 33980 }, { "epoch": 0.24125503723695907, "grad_norm": 0.10205078125, "learning_rate": 0.001995160991857959, "loss": 0.2371, "step": 33990 }, { "epoch": 0.2413260154768052, "grad_norm": 0.111328125, "learning_rate": 0.0019951580604258335, "loss": 0.2345, "step": 34000 }, { "epoch": 0.2413260154768052, "eval_covost2-zh-en_loss": 3.8680419921875, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 20.7911, "eval_covost2-zh-en_samples_per_second": 3.078, "eval_covost2-zh-en_steps_per_second": 0.192, "step": 34000 }, { "epoch": 0.2413260154768052, "eval_covost2-en-zh_loss": 3.161374568939209, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 21.844, "eval_covost2-en-zh_samples_per_second": 2.93, "eval_covost2-en-zh_steps_per_second": 0.183, "step": 34000 }, { "epoch": 0.24139699371665133, "grad_norm": 0.2275390625, "learning_rate": 0.0019951551281084524, "loss": 0.267, "step": 34010 }, { "epoch": 0.24146797195649744, "grad_norm": 0.12060546875, "learning_rate": 0.001995152194905819, "loss": 0.2627, "step": 34020 }, { "epoch": 0.24153895019634355, "grad_norm": 0.119140625, "learning_rate": 0.001995149260817936, "loss": 0.2574, "step": 34030 }, { "epoch": 0.24160992843618967, "grad_norm": 0.08447265625, "learning_rate": 0.001995146325844807, "loss": 0.26, "step": 34040 }, { "epoch": 0.24168090667603578, "grad_norm": 0.0810546875, "learning_rate": 0.001995143389986434, "loss": 0.256, "step": 34050 }, { "epoch": 0.24175188491588193, "grad_norm": 0.0654296875, "learning_rate": 0.0019951404532428207, "loss": 0.2691, "step": 34060 }, { "epoch": 0.24182286315572804, "grad_norm": 0.09130859375, "learning_rate": 0.001995137515613969, "loss": 0.2535, "step": 34070 }, { "epoch": 0.24189384139557416, "grad_norm": 0.1396484375, "learning_rate": 0.001995134577099883, "loss": 0.2578, "step": 34080 }, { "epoch": 0.24196481963542027, "grad_norm": 0.0859375, "learning_rate": 0.001995131637700565, "loss": 0.254, "step": 34090 }, { "epoch": 0.24203579787526638, "grad_norm": 0.1435546875, "learning_rate": 0.0019951286974160174, "loss": 0.2426, "step": 34100 }, { "epoch": 0.2421067761151125, "grad_norm": 0.11767578125, "learning_rate": 0.001995125756246244, "loss": 0.2618, "step": 34110 }, { "epoch": 0.24217775435495864, "grad_norm": 0.173828125, "learning_rate": 0.0019951228141912468, "loss": 0.2635, "step": 34120 }, { "epoch": 0.24224873259480476, "grad_norm": 0.087890625, "learning_rate": 0.0019951198712510297, "loss": 0.2655, "step": 34130 }, { "epoch": 0.24231971083465087, "grad_norm": 0.083984375, "learning_rate": 0.001995116927425595, "loss": 0.2441, "step": 34140 }, { "epoch": 0.24239068907449698, "grad_norm": 0.103515625, "learning_rate": 0.001995113982714946, "loss": 0.2294, "step": 34150 }, { "epoch": 0.2424616673143431, "grad_norm": 0.2021484375, "learning_rate": 0.0019951110371190855, "loss": 0.2541, "step": 34160 }, { "epoch": 0.2425326455541892, "grad_norm": 0.0888671875, "learning_rate": 0.001995108090638016, "loss": 0.2603, "step": 34170 }, { "epoch": 0.24260362379403536, "grad_norm": 0.09375, "learning_rate": 0.0019951051432717404, "loss": 0.2654, "step": 34180 }, { "epoch": 0.24267460203388147, "grad_norm": 0.1494140625, "learning_rate": 0.0019951021950202624, "loss": 0.2667, "step": 34190 }, { "epoch": 0.24274558027372758, "grad_norm": 0.09716796875, "learning_rate": 0.001995099245883584, "loss": 0.2524, "step": 34200 }, { "epoch": 0.2428165585135737, "grad_norm": 0.08837890625, "learning_rate": 0.001995096295861709, "loss": 0.2422, "step": 34210 }, { "epoch": 0.2428875367534198, "grad_norm": 0.203125, "learning_rate": 0.0019950933449546394, "loss": 0.2489, "step": 34220 }, { "epoch": 0.24295851499326593, "grad_norm": 0.09326171875, "learning_rate": 0.001995090393162379, "loss": 0.2554, "step": 34230 }, { "epoch": 0.24302949323311207, "grad_norm": 0.1845703125, "learning_rate": 0.0019950874404849298, "loss": 0.2367, "step": 34240 }, { "epoch": 0.24310047147295819, "grad_norm": 0.1298828125, "learning_rate": 0.001995084486922296, "loss": 0.2547, "step": 34250 }, { "epoch": 0.2431714497128043, "grad_norm": 0.1181640625, "learning_rate": 0.001995081532474479, "loss": 0.2608, "step": 34260 }, { "epoch": 0.24324242795265041, "grad_norm": 0.07763671875, "learning_rate": 0.0019950785771414826, "loss": 0.2689, "step": 34270 }, { "epoch": 0.24331340619249653, "grad_norm": 0.0712890625, "learning_rate": 0.00199507562092331, "loss": 0.2547, "step": 34280 }, { "epoch": 0.24338438443234264, "grad_norm": 0.0888671875, "learning_rate": 0.001995072663819963, "loss": 0.2466, "step": 34290 }, { "epoch": 0.24345536267218879, "grad_norm": 0.09619140625, "learning_rate": 0.001995069705831446, "loss": 0.2316, "step": 34300 }, { "epoch": 0.2435263409120349, "grad_norm": 0.11767578125, "learning_rate": 0.001995066746957761, "loss": 0.2315, "step": 34310 }, { "epoch": 0.24359731915188101, "grad_norm": 0.431640625, "learning_rate": 0.001995063787198911, "loss": 0.266, "step": 34320 }, { "epoch": 0.24366829739172713, "grad_norm": 0.10009765625, "learning_rate": 0.0019950608265548986, "loss": 0.253, "step": 34330 }, { "epoch": 0.24373927563157324, "grad_norm": 0.10595703125, "learning_rate": 0.0019950578650257274, "loss": 0.2686, "step": 34340 }, { "epoch": 0.24381025387141936, "grad_norm": 0.08740234375, "learning_rate": 0.0019950549026114006, "loss": 0.2368, "step": 34350 }, { "epoch": 0.2438812321112655, "grad_norm": 0.23046875, "learning_rate": 0.0019950519393119198, "loss": 0.2593, "step": 34360 }, { "epoch": 0.24395221035111161, "grad_norm": 0.09033203125, "learning_rate": 0.001995048975127289, "loss": 0.2604, "step": 34370 }, { "epoch": 0.24402318859095773, "grad_norm": 0.1357421875, "learning_rate": 0.001995046010057511, "loss": 0.241, "step": 34380 }, { "epoch": 0.24409416683080384, "grad_norm": 0.0966796875, "learning_rate": 0.0019950430441025885, "loss": 0.269, "step": 34390 }, { "epoch": 0.24416514507064996, "grad_norm": 0.08544921875, "learning_rate": 0.001995040077262525, "loss": 0.2469, "step": 34400 }, { "epoch": 0.24423612331049607, "grad_norm": 0.15625, "learning_rate": 0.001995037109537322, "loss": 0.268, "step": 34410 }, { "epoch": 0.24430710155034221, "grad_norm": 0.0986328125, "learning_rate": 0.0019950341409269838, "loss": 0.2496, "step": 34420 }, { "epoch": 0.24437807979018833, "grad_norm": 0.09521484375, "learning_rate": 0.001995031171431513, "loss": 0.2751, "step": 34430 }, { "epoch": 0.24444905803003444, "grad_norm": 0.08447265625, "learning_rate": 0.0019950282010509127, "loss": 0.2416, "step": 34440 }, { "epoch": 0.24452003626988056, "grad_norm": 0.08544921875, "learning_rate": 0.0019950252297851853, "loss": 0.2433, "step": 34450 }, { "epoch": 0.24459101450972667, "grad_norm": 0.08935546875, "learning_rate": 0.0019950222576343344, "loss": 0.2701, "step": 34460 }, { "epoch": 0.2446619927495728, "grad_norm": 0.09765625, "learning_rate": 0.0019950192845983625, "loss": 0.2639, "step": 34470 }, { "epoch": 0.24473297098941893, "grad_norm": 0.138671875, "learning_rate": 0.001995016310677272, "loss": 0.2461, "step": 34480 }, { "epoch": 0.24480394922926504, "grad_norm": 0.11181640625, "learning_rate": 0.001995013335871067, "loss": 0.2462, "step": 34490 }, { "epoch": 0.24487492746911116, "grad_norm": 0.11572265625, "learning_rate": 0.00199501036017975, "loss": 0.247, "step": 34500 }, { "epoch": 0.24494590570895727, "grad_norm": 0.0947265625, "learning_rate": 0.0019950073836033236, "loss": 0.2505, "step": 34510 }, { "epoch": 0.2450168839488034, "grad_norm": 0.158203125, "learning_rate": 0.001995004406141791, "loss": 0.2495, "step": 34520 }, { "epoch": 0.2450878621886495, "grad_norm": 0.1357421875, "learning_rate": 0.0019950014277951554, "loss": 0.2495, "step": 34530 }, { "epoch": 0.24515884042849564, "grad_norm": 0.10400390625, "learning_rate": 0.001994998448563419, "loss": 0.2688, "step": 34540 }, { "epoch": 0.24522981866834176, "grad_norm": 0.10546875, "learning_rate": 0.0019949954684465856, "loss": 0.2488, "step": 34550 }, { "epoch": 0.24530079690818787, "grad_norm": 0.119140625, "learning_rate": 0.0019949924874446572, "loss": 0.2568, "step": 34560 }, { "epoch": 0.245371775148034, "grad_norm": 0.1416015625, "learning_rate": 0.0019949895055576383, "loss": 0.2622, "step": 34570 }, { "epoch": 0.2454427533878801, "grad_norm": 0.091796875, "learning_rate": 0.00199498652278553, "loss": 0.2568, "step": 34580 }, { "epoch": 0.24551373162772622, "grad_norm": 0.1494140625, "learning_rate": 0.0019949835391283367, "loss": 0.2569, "step": 34590 }, { "epoch": 0.24558470986757236, "grad_norm": 0.11279296875, "learning_rate": 0.0019949805545860606, "loss": 0.2564, "step": 34600 }, { "epoch": 0.24565568810741847, "grad_norm": 0.1376953125, "learning_rate": 0.0019949775691587047, "loss": 0.2345, "step": 34610 }, { "epoch": 0.2457266663472646, "grad_norm": 0.10302734375, "learning_rate": 0.0019949745828462726, "loss": 0.2646, "step": 34620 }, { "epoch": 0.2457976445871107, "grad_norm": 0.0986328125, "learning_rate": 0.001994971595648766, "loss": 0.2474, "step": 34630 }, { "epoch": 0.24586862282695682, "grad_norm": 0.0966796875, "learning_rate": 0.001994968607566189, "loss": 0.2696, "step": 34640 }, { "epoch": 0.24593960106680293, "grad_norm": 0.0810546875, "learning_rate": 0.001994965618598544, "loss": 0.2313, "step": 34650 }, { "epoch": 0.24601057930664907, "grad_norm": 0.109375, "learning_rate": 0.001994962628745834, "loss": 0.2572, "step": 34660 }, { "epoch": 0.2460815575464952, "grad_norm": 0.07763671875, "learning_rate": 0.0019949596380080623, "loss": 0.2474, "step": 34670 }, { "epoch": 0.2461525357863413, "grad_norm": 0.140625, "learning_rate": 0.001994956646385231, "loss": 0.2541, "step": 34680 }, { "epoch": 0.24622351402618742, "grad_norm": 0.0810546875, "learning_rate": 0.0019949536538773445, "loss": 0.2461, "step": 34690 }, { "epoch": 0.24629449226603353, "grad_norm": 0.1484375, "learning_rate": 0.0019949506604844047, "loss": 0.2669, "step": 34700 }, { "epoch": 0.24636547050587965, "grad_norm": 0.10302734375, "learning_rate": 0.001994947666206414, "loss": 0.2537, "step": 34710 }, { "epoch": 0.2464364487457258, "grad_norm": 0.1357421875, "learning_rate": 0.0019949446710433773, "loss": 0.2593, "step": 34720 }, { "epoch": 0.2465074269855719, "grad_norm": 0.0654296875, "learning_rate": 0.001994941674995296, "loss": 0.2427, "step": 34730 }, { "epoch": 0.24657840522541802, "grad_norm": 0.10986328125, "learning_rate": 0.001994938678062173, "loss": 0.2629, "step": 34740 }, { "epoch": 0.24664938346526413, "grad_norm": 0.142578125, "learning_rate": 0.0019949356802440124, "loss": 0.2537, "step": 34750 }, { "epoch": 0.24672036170511025, "grad_norm": 0.11376953125, "learning_rate": 0.001994932681540816, "loss": 0.2351, "step": 34760 }, { "epoch": 0.24679133994495636, "grad_norm": 0.09814453125, "learning_rate": 0.0019949296819525876, "loss": 0.2426, "step": 34770 }, { "epoch": 0.2468623181848025, "grad_norm": 0.11376953125, "learning_rate": 0.0019949266814793297, "loss": 0.255, "step": 34780 }, { "epoch": 0.24693329642464862, "grad_norm": 0.10302734375, "learning_rate": 0.0019949236801210454, "loss": 0.2314, "step": 34790 }, { "epoch": 0.24700427466449473, "grad_norm": 0.08056640625, "learning_rate": 0.0019949206778777377, "loss": 0.2539, "step": 34800 }, { "epoch": 0.24707525290434085, "grad_norm": 0.0859375, "learning_rate": 0.0019949176747494097, "loss": 0.2733, "step": 34810 }, { "epoch": 0.24714623114418696, "grad_norm": 0.10302734375, "learning_rate": 0.001994914670736064, "loss": 0.2443, "step": 34820 }, { "epoch": 0.24721720938403308, "grad_norm": 0.12109375, "learning_rate": 0.0019949116658377034, "loss": 0.2404, "step": 34830 }, { "epoch": 0.24728818762387922, "grad_norm": 0.119140625, "learning_rate": 0.0019949086600543316, "loss": 0.2579, "step": 34840 }, { "epoch": 0.24735916586372533, "grad_norm": 0.0986328125, "learning_rate": 0.0019949056533859516, "loss": 0.2475, "step": 34850 }, { "epoch": 0.24743014410357145, "grad_norm": 0.1904296875, "learning_rate": 0.0019949026458325656, "loss": 0.243, "step": 34860 }, { "epoch": 0.24750112234341756, "grad_norm": 0.1123046875, "learning_rate": 0.0019948996373941774, "loss": 0.2644, "step": 34870 }, { "epoch": 0.24757210058326368, "grad_norm": 0.095703125, "learning_rate": 0.001994896628070789, "loss": 0.2296, "step": 34880 }, { "epoch": 0.2476430788231098, "grad_norm": 0.1806640625, "learning_rate": 0.0019948936178624042, "loss": 0.2549, "step": 34890 }, { "epoch": 0.24771405706295593, "grad_norm": 0.09619140625, "learning_rate": 0.0019948906067690257, "loss": 0.2572, "step": 34900 }, { "epoch": 0.24778503530280205, "grad_norm": 0.1474609375, "learning_rate": 0.0019948875947906563, "loss": 0.2671, "step": 34910 }, { "epoch": 0.24785601354264816, "grad_norm": 0.138671875, "learning_rate": 0.001994884581927299, "loss": 0.2539, "step": 34920 }, { "epoch": 0.24792699178249428, "grad_norm": 0.130859375, "learning_rate": 0.0019948815681789574, "loss": 0.2549, "step": 34930 }, { "epoch": 0.2479979700223404, "grad_norm": 0.1943359375, "learning_rate": 0.0019948785535456337, "loss": 0.2667, "step": 34940 }, { "epoch": 0.24806894826218653, "grad_norm": 0.0888671875, "learning_rate": 0.001994875538027331, "loss": 0.2513, "step": 34950 }, { "epoch": 0.24813992650203265, "grad_norm": 0.10107421875, "learning_rate": 0.001994872521624053, "loss": 0.2572, "step": 34960 }, { "epoch": 0.24821090474187876, "grad_norm": 0.11669921875, "learning_rate": 0.001994869504335802, "loss": 0.2553, "step": 34970 }, { "epoch": 0.24828188298172488, "grad_norm": 0.142578125, "learning_rate": 0.001994866486162581, "loss": 0.2532, "step": 34980 }, { "epoch": 0.248352861221571, "grad_norm": 0.2255859375, "learning_rate": 0.0019948634671043933, "loss": 0.2547, "step": 34990 }, { "epoch": 0.2484238394614171, "grad_norm": 0.234375, "learning_rate": 0.0019948604471612414, "loss": 0.2729, "step": 35000 }, { "epoch": 0.24849481770126325, "grad_norm": 0.1123046875, "learning_rate": 0.001994857426333129, "loss": 0.2382, "step": 35010 }, { "epoch": 0.24856579594110936, "grad_norm": 0.150390625, "learning_rate": 0.0019948544046200587, "loss": 0.2539, "step": 35020 }, { "epoch": 0.24863677418095548, "grad_norm": 0.1533203125, "learning_rate": 0.001994851382022033, "loss": 0.2489, "step": 35030 }, { "epoch": 0.2487077524208016, "grad_norm": 0.1494140625, "learning_rate": 0.001994848358539056, "loss": 0.254, "step": 35040 }, { "epoch": 0.2487787306606477, "grad_norm": 0.1357421875, "learning_rate": 0.00199484533417113, "loss": 0.2625, "step": 35050 }, { "epoch": 0.24884970890049382, "grad_norm": 0.1181640625, "learning_rate": 0.0019948423089182574, "loss": 0.2382, "step": 35060 }, { "epoch": 0.24892068714033996, "grad_norm": 0.09130859375, "learning_rate": 0.0019948392827804423, "loss": 0.2374, "step": 35070 }, { "epoch": 0.24899166538018608, "grad_norm": 0.1123046875, "learning_rate": 0.001994836255757687, "loss": 0.2463, "step": 35080 }, { "epoch": 0.2490626436200322, "grad_norm": 0.08154296875, "learning_rate": 0.001994833227849995, "loss": 0.2459, "step": 35090 }, { "epoch": 0.2491336218598783, "grad_norm": 0.123046875, "learning_rate": 0.0019948301990573686, "loss": 0.2495, "step": 35100 }, { "epoch": 0.24920460009972442, "grad_norm": 0.099609375, "learning_rate": 0.001994827169379812, "loss": 0.2415, "step": 35110 }, { "epoch": 0.24927557833957054, "grad_norm": 0.1005859375, "learning_rate": 0.0019948241388173266, "loss": 0.2454, "step": 35120 }, { "epoch": 0.24934655657941668, "grad_norm": 0.12158203125, "learning_rate": 0.0019948211073699167, "loss": 0.2501, "step": 35130 }, { "epoch": 0.2494175348192628, "grad_norm": 0.11181640625, "learning_rate": 0.0019948180750375844, "loss": 0.252, "step": 35140 }, { "epoch": 0.2494885130591089, "grad_norm": 0.10693359375, "learning_rate": 0.0019948150418203337, "loss": 0.2506, "step": 35150 }, { "epoch": 0.24955949129895502, "grad_norm": 0.12890625, "learning_rate": 0.0019948120077181667, "loss": 0.2705, "step": 35160 }, { "epoch": 0.24963046953880114, "grad_norm": 0.283203125, "learning_rate": 0.0019948089727310865, "loss": 0.2713, "step": 35170 }, { "epoch": 0.24970144777864725, "grad_norm": 0.1591796875, "learning_rate": 0.0019948059368590965, "loss": 0.2768, "step": 35180 }, { "epoch": 0.2497724260184934, "grad_norm": 0.154296875, "learning_rate": 0.0019948029001021993, "loss": 0.2467, "step": 35190 }, { "epoch": 0.2498434042583395, "grad_norm": 0.1533203125, "learning_rate": 0.001994799862460398, "loss": 0.2615, "step": 35200 }, { "epoch": 0.24991438249818562, "grad_norm": 0.11328125, "learning_rate": 0.0019947968239336965, "loss": 0.258, "step": 35210 }, { "epoch": 0.24998536073803174, "grad_norm": 0.171875, "learning_rate": 0.0019947937845220965, "loss": 0.2459, "step": 35220 }, { "epoch": 0.2500563389778779, "grad_norm": 0.11474609375, "learning_rate": 0.0019947907442256015, "loss": 0.2504, "step": 35230 }, { "epoch": 0.25012731721772397, "grad_norm": 0.10400390625, "learning_rate": 0.0019947877030442145, "loss": 0.2635, "step": 35240 }, { "epoch": 0.2501982954575701, "grad_norm": 0.10107421875, "learning_rate": 0.0019947846609779385, "loss": 0.2534, "step": 35250 }, { "epoch": 0.2502692736974162, "grad_norm": 0.12451171875, "learning_rate": 0.0019947816180267766, "loss": 0.243, "step": 35260 }, { "epoch": 0.25034025193726234, "grad_norm": 0.12255859375, "learning_rate": 0.001994778574190732, "loss": 0.2583, "step": 35270 }, { "epoch": 0.2504112301771085, "grad_norm": 0.10302734375, "learning_rate": 0.0019947755294698073, "loss": 0.2868, "step": 35280 }, { "epoch": 0.25048220841695457, "grad_norm": 0.1669921875, "learning_rate": 0.001994772483864006, "loss": 0.2517, "step": 35290 }, { "epoch": 0.2505531866568007, "grad_norm": 0.07421875, "learning_rate": 0.0019947694373733302, "loss": 0.2367, "step": 35300 }, { "epoch": 0.2506241648966468, "grad_norm": 0.09912109375, "learning_rate": 0.001994766389997784, "loss": 0.2506, "step": 35310 }, { "epoch": 0.25069514313649294, "grad_norm": 0.2431640625, "learning_rate": 0.0019947633417373694, "loss": 0.2566, "step": 35320 }, { "epoch": 0.250766121376339, "grad_norm": 0.15625, "learning_rate": 0.0019947602925920902, "loss": 0.2484, "step": 35330 }, { "epoch": 0.25083709961618517, "grad_norm": 0.0986328125, "learning_rate": 0.0019947572425619495, "loss": 0.2893, "step": 35340 }, { "epoch": 0.2509080778560313, "grad_norm": 0.11865234375, "learning_rate": 0.0019947541916469497, "loss": 0.2494, "step": 35350 }, { "epoch": 0.2509790560958774, "grad_norm": 0.11279296875, "learning_rate": 0.001994751139847094, "loss": 0.2561, "step": 35360 }, { "epoch": 0.25105003433572354, "grad_norm": 0.1484375, "learning_rate": 0.001994748087162385, "loss": 0.2437, "step": 35370 }, { "epoch": 0.2511210125755696, "grad_norm": 0.158203125, "learning_rate": 0.001994745033592827, "loss": 0.2418, "step": 35380 }, { "epoch": 0.25119199081541577, "grad_norm": 0.091796875, "learning_rate": 0.001994741979138422, "loss": 0.2466, "step": 35390 }, { "epoch": 0.2512629690552619, "grad_norm": 0.07763671875, "learning_rate": 0.0019947389237991737, "loss": 0.2432, "step": 35400 }, { "epoch": 0.251333947295108, "grad_norm": 0.09716796875, "learning_rate": 0.001994735867575084, "loss": 0.2528, "step": 35410 }, { "epoch": 0.25140492553495414, "grad_norm": 0.0869140625, "learning_rate": 0.001994732810466157, "loss": 0.259, "step": 35420 }, { "epoch": 0.2514759037748002, "grad_norm": 0.103515625, "learning_rate": 0.001994729752472395, "loss": 0.2783, "step": 35430 }, { "epoch": 0.25154688201464637, "grad_norm": 0.07470703125, "learning_rate": 0.0019947266935938014, "loss": 0.2416, "step": 35440 }, { "epoch": 0.25161786025449245, "grad_norm": 0.240234375, "learning_rate": 0.001994723633830379, "loss": 0.2479, "step": 35450 }, { "epoch": 0.2516888384943386, "grad_norm": 0.08935546875, "learning_rate": 0.0019947205731821317, "loss": 0.2569, "step": 35460 }, { "epoch": 0.25175981673418474, "grad_norm": 0.1552734375, "learning_rate": 0.0019947175116490612, "loss": 0.2564, "step": 35470 }, { "epoch": 0.2518307949740308, "grad_norm": 0.1005859375, "learning_rate": 0.0019947144492311717, "loss": 0.2652, "step": 35480 }, { "epoch": 0.25190177321387697, "grad_norm": 0.11474609375, "learning_rate": 0.0019947113859284655, "loss": 0.2635, "step": 35490 }, { "epoch": 0.25197275145372305, "grad_norm": 0.11328125, "learning_rate": 0.0019947083217409455, "loss": 0.2571, "step": 35500 }, { "epoch": 0.2520437296935692, "grad_norm": 0.1162109375, "learning_rate": 0.001994705256668615, "loss": 0.2588, "step": 35510 }, { "epoch": 0.25211470793341534, "grad_norm": 0.0908203125, "learning_rate": 0.0019947021907114775, "loss": 0.2475, "step": 35520 }, { "epoch": 0.2521856861732614, "grad_norm": 0.1171875, "learning_rate": 0.0019946991238695352, "loss": 0.242, "step": 35530 }, { "epoch": 0.25225666441310757, "grad_norm": 0.142578125, "learning_rate": 0.0019946960561427916, "loss": 0.2437, "step": 35540 }, { "epoch": 0.25232764265295365, "grad_norm": 0.10546875, "learning_rate": 0.00199469298753125, "loss": 0.2438, "step": 35550 }, { "epoch": 0.2523986208927998, "grad_norm": 0.1328125, "learning_rate": 0.001994689918034913, "loss": 0.2587, "step": 35560 }, { "epoch": 0.2524695991326459, "grad_norm": 0.111328125, "learning_rate": 0.001994686847653784, "loss": 0.2494, "step": 35570 }, { "epoch": 0.252540577372492, "grad_norm": 0.09619140625, "learning_rate": 0.001994683776387865, "loss": 0.2411, "step": 35580 }, { "epoch": 0.25261155561233817, "grad_norm": 0.080078125, "learning_rate": 0.0019946807042371608, "loss": 0.2691, "step": 35590 }, { "epoch": 0.25268253385218425, "grad_norm": 0.16796875, "learning_rate": 0.0019946776312016723, "loss": 0.2605, "step": 35600 }, { "epoch": 0.2527535120920304, "grad_norm": 0.1337890625, "learning_rate": 0.0019946745572814043, "loss": 0.2429, "step": 35610 }, { "epoch": 0.2528244903318765, "grad_norm": 0.15625, "learning_rate": 0.0019946714824763596, "loss": 0.2366, "step": 35620 }, { "epoch": 0.2528954685717226, "grad_norm": 0.201171875, "learning_rate": 0.001994668406786541, "loss": 0.2611, "step": 35630 }, { "epoch": 0.25296644681156877, "grad_norm": 0.10205078125, "learning_rate": 0.0019946653302119503, "loss": 0.2538, "step": 35640 }, { "epoch": 0.25303742505141485, "grad_norm": 0.109375, "learning_rate": 0.0019946622527525926, "loss": 0.2529, "step": 35650 }, { "epoch": 0.253108403291261, "grad_norm": 0.10400390625, "learning_rate": 0.0019946591744084696, "loss": 0.2611, "step": 35660 }, { "epoch": 0.2531793815311071, "grad_norm": 0.08935546875, "learning_rate": 0.0019946560951795847, "loss": 0.268, "step": 35670 }, { "epoch": 0.2532503597709532, "grad_norm": 0.10595703125, "learning_rate": 0.0019946530150659414, "loss": 0.245, "step": 35680 }, { "epoch": 0.2533213380107993, "grad_norm": 0.06982421875, "learning_rate": 0.0019946499340675423, "loss": 0.2537, "step": 35690 }, { "epoch": 0.25339231625064546, "grad_norm": 0.09912109375, "learning_rate": 0.00199464685218439, "loss": 0.2482, "step": 35700 }, { "epoch": 0.2534632944904916, "grad_norm": 0.10693359375, "learning_rate": 0.001994643769416489, "loss": 0.265, "step": 35710 }, { "epoch": 0.2535342727303377, "grad_norm": 0.115234375, "learning_rate": 0.00199464068576384, "loss": 0.263, "step": 35720 }, { "epoch": 0.2536052509701838, "grad_norm": 0.08203125, "learning_rate": 0.001994637601226448, "loss": 0.2575, "step": 35730 }, { "epoch": 0.2536762292100299, "grad_norm": 0.203125, "learning_rate": 0.0019946345158043163, "loss": 0.2532, "step": 35740 }, { "epoch": 0.25374720744987606, "grad_norm": 0.10693359375, "learning_rate": 0.001994631429497446, "loss": 0.2577, "step": 35750 }, { "epoch": 0.2538181856897222, "grad_norm": 0.0888671875, "learning_rate": 0.001994628342305842, "loss": 0.2609, "step": 35760 }, { "epoch": 0.2538891639295683, "grad_norm": 0.154296875, "learning_rate": 0.0019946252542295062, "loss": 0.2712, "step": 35770 }, { "epoch": 0.2539601421694144, "grad_norm": 0.1259765625, "learning_rate": 0.0019946221652684423, "loss": 0.2551, "step": 35780 }, { "epoch": 0.2540311204092605, "grad_norm": 0.09375, "learning_rate": 0.0019946190754226534, "loss": 0.2422, "step": 35790 }, { "epoch": 0.25410209864910666, "grad_norm": 0.1318359375, "learning_rate": 0.001994615984692142, "loss": 0.2783, "step": 35800 }, { "epoch": 0.25417307688895274, "grad_norm": 0.1103515625, "learning_rate": 0.0019946128930769117, "loss": 0.2494, "step": 35810 }, { "epoch": 0.2542440551287989, "grad_norm": 0.0869140625, "learning_rate": 0.001994609800576965, "loss": 0.268, "step": 35820 }, { "epoch": 0.254315033368645, "grad_norm": 0.09228515625, "learning_rate": 0.0019946067071923055, "loss": 0.2451, "step": 35830 }, { "epoch": 0.2543860116084911, "grad_norm": 0.06640625, "learning_rate": 0.0019946036129229357, "loss": 0.2534, "step": 35840 }, { "epoch": 0.25445698984833726, "grad_norm": 0.0810546875, "learning_rate": 0.0019946005177688595, "loss": 0.2487, "step": 35850 }, { "epoch": 0.25452796808818334, "grad_norm": 0.07763671875, "learning_rate": 0.0019945974217300787, "loss": 0.2535, "step": 35860 }, { "epoch": 0.2545989463280295, "grad_norm": 0.11279296875, "learning_rate": 0.001994594324806598, "loss": 0.2638, "step": 35870 }, { "epoch": 0.2546699245678756, "grad_norm": 0.076171875, "learning_rate": 0.001994591226998419, "loss": 0.2652, "step": 35880 }, { "epoch": 0.2547409028077217, "grad_norm": 0.11669921875, "learning_rate": 0.0019945881283055457, "loss": 0.2605, "step": 35890 }, { "epoch": 0.25481188104756786, "grad_norm": 0.11962890625, "learning_rate": 0.0019945850287279806, "loss": 0.2559, "step": 35900 }, { "epoch": 0.25488285928741394, "grad_norm": 0.091796875, "learning_rate": 0.001994581928265727, "loss": 0.2632, "step": 35910 }, { "epoch": 0.2549538375272601, "grad_norm": 0.1357421875, "learning_rate": 0.001994578826918788, "loss": 0.2639, "step": 35920 }, { "epoch": 0.25502481576710617, "grad_norm": 0.076171875, "learning_rate": 0.0019945757246871663, "loss": 0.2577, "step": 35930 }, { "epoch": 0.2550957940069523, "grad_norm": 0.1494140625, "learning_rate": 0.0019945726215708655, "loss": 0.2806, "step": 35940 }, { "epoch": 0.25516677224679846, "grad_norm": 0.10595703125, "learning_rate": 0.0019945695175698887, "loss": 0.2488, "step": 35950 }, { "epoch": 0.25523775048664454, "grad_norm": 0.09033203125, "learning_rate": 0.0019945664126842385, "loss": 0.2465, "step": 35960 }, { "epoch": 0.2553087287264907, "grad_norm": 0.111328125, "learning_rate": 0.001994563306913918, "loss": 0.2565, "step": 35970 }, { "epoch": 0.2553797069663368, "grad_norm": 0.10546875, "learning_rate": 0.001994560200258931, "loss": 0.2494, "step": 35980 }, { "epoch": 0.2554506852061829, "grad_norm": 0.123046875, "learning_rate": 0.0019945570927192794, "loss": 0.2642, "step": 35990 }, { "epoch": 0.25552166344602906, "grad_norm": 0.1220703125, "learning_rate": 0.001994553984294967, "loss": 0.2341, "step": 36000 }, { "epoch": 0.25552166344602906, "eval_covost2-zh-en_loss": 3.857123374938965, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 20.7031, "eval_covost2-zh-en_samples_per_second": 3.091, "eval_covost2-zh-en_steps_per_second": 0.193, "step": 36000 }, { "epoch": 0.25552166344602906, "eval_covost2-en-zh_loss": 3.12442946434021, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 21.6196, "eval_covost2-en-zh_samples_per_second": 2.96, "eval_covost2-en-zh_steps_per_second": 0.185, "step": 36000 }, { "epoch": 0.25559264168587514, "grad_norm": 0.11474609375, "learning_rate": 0.0019945508749859967, "loss": 0.2744, "step": 36010 }, { "epoch": 0.2556636199257213, "grad_norm": 0.19140625, "learning_rate": 0.0019945477647923724, "loss": 0.2677, "step": 36020 }, { "epoch": 0.2557345981655674, "grad_norm": 0.119140625, "learning_rate": 0.0019945446537140954, "loss": 0.2409, "step": 36030 }, { "epoch": 0.2558055764054135, "grad_norm": 0.08642578125, "learning_rate": 0.0019945415417511707, "loss": 0.2507, "step": 36040 }, { "epoch": 0.2558765546452596, "grad_norm": 0.1572265625, "learning_rate": 0.0019945384289036003, "loss": 0.2676, "step": 36050 }, { "epoch": 0.25594753288510574, "grad_norm": 0.19921875, "learning_rate": 0.001994535315171387, "loss": 0.2836, "step": 36060 }, { "epoch": 0.2560185111249519, "grad_norm": 0.09326171875, "learning_rate": 0.0019945322005545346, "loss": 0.2532, "step": 36070 }, { "epoch": 0.256089489364798, "grad_norm": 0.154296875, "learning_rate": 0.001994529085053046, "loss": 0.2613, "step": 36080 }, { "epoch": 0.2561604676046441, "grad_norm": 0.138671875, "learning_rate": 0.001994525968666924, "loss": 0.2783, "step": 36090 }, { "epoch": 0.2562314458444902, "grad_norm": 0.154296875, "learning_rate": 0.0019945228513961718, "loss": 0.2652, "step": 36100 }, { "epoch": 0.25630242408433634, "grad_norm": 0.10009765625, "learning_rate": 0.001994519733240793, "loss": 0.2651, "step": 36110 }, { "epoch": 0.2563734023241825, "grad_norm": 0.087890625, "learning_rate": 0.00199451661420079, "loss": 0.2434, "step": 36120 }, { "epoch": 0.2564443805640286, "grad_norm": 0.07275390625, "learning_rate": 0.001994513494276166, "loss": 0.2498, "step": 36130 }, { "epoch": 0.2565153588038747, "grad_norm": 0.08251953125, "learning_rate": 0.001994510373466924, "loss": 0.2533, "step": 36140 }, { "epoch": 0.2565863370437208, "grad_norm": 0.08056640625, "learning_rate": 0.001994507251773068, "loss": 0.2709, "step": 36150 }, { "epoch": 0.25665731528356694, "grad_norm": 0.12353515625, "learning_rate": 0.0019945041291946, "loss": 0.2511, "step": 36160 }, { "epoch": 0.2567282935234131, "grad_norm": 0.1484375, "learning_rate": 0.0019945010057315234, "loss": 0.2597, "step": 36170 }, { "epoch": 0.2567992717632592, "grad_norm": 0.1318359375, "learning_rate": 0.001994497881383842, "loss": 0.2476, "step": 36180 }, { "epoch": 0.2568702500031053, "grad_norm": 0.140625, "learning_rate": 0.0019944947561515575, "loss": 0.2728, "step": 36190 }, { "epoch": 0.2569412282429514, "grad_norm": 0.10546875, "learning_rate": 0.001994491630034674, "loss": 0.2613, "step": 36200 }, { "epoch": 0.25701220648279754, "grad_norm": 0.19140625, "learning_rate": 0.001994488503033194, "loss": 0.2555, "step": 36210 }, { "epoch": 0.25708318472264363, "grad_norm": 0.12353515625, "learning_rate": 0.0019944853751471215, "loss": 0.2556, "step": 36220 }, { "epoch": 0.2571541629624898, "grad_norm": 0.09033203125, "learning_rate": 0.001994482246376459, "loss": 0.2484, "step": 36230 }, { "epoch": 0.2572251412023359, "grad_norm": 0.0966796875, "learning_rate": 0.001994479116721209, "loss": 0.2525, "step": 36240 }, { "epoch": 0.257296119442182, "grad_norm": 0.154296875, "learning_rate": 0.0019944759861813756, "loss": 0.2735, "step": 36250 }, { "epoch": 0.25736709768202815, "grad_norm": 0.119140625, "learning_rate": 0.0019944728547569616, "loss": 0.2473, "step": 36260 }, { "epoch": 0.25743807592187423, "grad_norm": 0.11669921875, "learning_rate": 0.00199446972244797, "loss": 0.2485, "step": 36270 }, { "epoch": 0.2575090541617204, "grad_norm": 0.10009765625, "learning_rate": 0.001994466589254404, "loss": 0.256, "step": 36280 }, { "epoch": 0.2575800324015665, "grad_norm": 0.095703125, "learning_rate": 0.001994463455176266, "loss": 0.2559, "step": 36290 }, { "epoch": 0.2576510106414126, "grad_norm": 0.12255859375, "learning_rate": 0.0019944603202135605, "loss": 0.2616, "step": 36300 }, { "epoch": 0.25772198888125875, "grad_norm": 0.12109375, "learning_rate": 0.0019944571843662894, "loss": 0.2376, "step": 36310 }, { "epoch": 0.25779296712110483, "grad_norm": 0.16015625, "learning_rate": 0.001994454047634456, "loss": 0.2849, "step": 36320 }, { "epoch": 0.257863945360951, "grad_norm": 0.14453125, "learning_rate": 0.001994450910018064, "loss": 0.2587, "step": 36330 }, { "epoch": 0.25793492360079706, "grad_norm": 0.09912109375, "learning_rate": 0.001994447771517116, "loss": 0.2576, "step": 36340 }, { "epoch": 0.2580059018406432, "grad_norm": 0.09423828125, "learning_rate": 0.001994444632131615, "loss": 0.2527, "step": 36350 }, { "epoch": 0.25807688008048935, "grad_norm": 0.09375, "learning_rate": 0.001994441491861565, "loss": 0.2542, "step": 36360 }, { "epoch": 0.25814785832033543, "grad_norm": 0.14453125, "learning_rate": 0.001994438350706968, "loss": 0.2792, "step": 36370 }, { "epoch": 0.2582188365601816, "grad_norm": 0.224609375, "learning_rate": 0.001994435208667827, "loss": 0.2474, "step": 36380 }, { "epoch": 0.25828981480002766, "grad_norm": 0.1376953125, "learning_rate": 0.001994432065744146, "loss": 0.2442, "step": 36390 }, { "epoch": 0.2583607930398738, "grad_norm": 0.12109375, "learning_rate": 0.001994428921935928, "loss": 0.2905, "step": 36400 }, { "epoch": 0.25843177127971995, "grad_norm": 0.10400390625, "learning_rate": 0.001994425777243176, "loss": 0.2765, "step": 36410 }, { "epoch": 0.25850274951956603, "grad_norm": 0.1552734375, "learning_rate": 0.0019944226316658924, "loss": 0.264, "step": 36420 }, { "epoch": 0.2585737277594122, "grad_norm": 0.1103515625, "learning_rate": 0.0019944194852040812, "loss": 0.2332, "step": 36430 }, { "epoch": 0.25864470599925826, "grad_norm": 0.11279296875, "learning_rate": 0.001994416337857745, "loss": 0.2739, "step": 36440 }, { "epoch": 0.2587156842391044, "grad_norm": 0.08154296875, "learning_rate": 0.001994413189626887, "loss": 0.2417, "step": 36450 }, { "epoch": 0.2587866624789505, "grad_norm": 0.1376953125, "learning_rate": 0.0019944100405115107, "loss": 0.2457, "step": 36460 }, { "epoch": 0.25885764071879663, "grad_norm": 0.1259765625, "learning_rate": 0.001994406890511619, "loss": 0.2416, "step": 36470 }, { "epoch": 0.2589286189586428, "grad_norm": 0.08935546875, "learning_rate": 0.0019944037396272146, "loss": 0.2841, "step": 36480 }, { "epoch": 0.25899959719848886, "grad_norm": 0.1416015625, "learning_rate": 0.0019944005878583014, "loss": 0.2569, "step": 36490 }, { "epoch": 0.259070575438335, "grad_norm": 0.11376953125, "learning_rate": 0.001994397435204882, "loss": 0.2588, "step": 36500 }, { "epoch": 0.2591415536781811, "grad_norm": 0.14453125, "learning_rate": 0.0019943942816669593, "loss": 0.2417, "step": 36510 }, { "epoch": 0.25921253191802723, "grad_norm": 0.0888671875, "learning_rate": 0.001994391127244537, "loss": 0.2543, "step": 36520 }, { "epoch": 0.2592835101578734, "grad_norm": 0.09765625, "learning_rate": 0.001994387971937618, "loss": 0.2552, "step": 36530 }, { "epoch": 0.25935448839771946, "grad_norm": 0.1298828125, "learning_rate": 0.0019943848157462046, "loss": 0.2729, "step": 36540 }, { "epoch": 0.2594254666375656, "grad_norm": 0.08056640625, "learning_rate": 0.0019943816586703014, "loss": 0.2717, "step": 36550 }, { "epoch": 0.2594964448774117, "grad_norm": 0.10107421875, "learning_rate": 0.0019943785007099104, "loss": 0.2565, "step": 36560 }, { "epoch": 0.25956742311725783, "grad_norm": 0.08544921875, "learning_rate": 0.0019943753418650355, "loss": 0.242, "step": 36570 }, { "epoch": 0.2596384013571039, "grad_norm": 0.08349609375, "learning_rate": 0.001994372182135679, "loss": 0.2396, "step": 36580 }, { "epoch": 0.25970937959695006, "grad_norm": 0.11376953125, "learning_rate": 0.001994369021521845, "loss": 0.2555, "step": 36590 }, { "epoch": 0.2597803578367962, "grad_norm": 0.08447265625, "learning_rate": 0.0019943658600235356, "loss": 0.2441, "step": 36600 }, { "epoch": 0.2598513360766423, "grad_norm": 0.08642578125, "learning_rate": 0.0019943626976407546, "loss": 0.2539, "step": 36610 }, { "epoch": 0.25992231431648843, "grad_norm": 0.130859375, "learning_rate": 0.001994359534373505, "loss": 0.2688, "step": 36620 }, { "epoch": 0.2599932925563345, "grad_norm": 0.12060546875, "learning_rate": 0.00199435637022179, "loss": 0.2459, "step": 36630 }, { "epoch": 0.26006427079618066, "grad_norm": 0.09912109375, "learning_rate": 0.0019943532051856123, "loss": 0.2586, "step": 36640 }, { "epoch": 0.2601352490360268, "grad_norm": 0.12890625, "learning_rate": 0.001994350039264975, "loss": 0.2357, "step": 36650 }, { "epoch": 0.2602062272758729, "grad_norm": 0.09033203125, "learning_rate": 0.0019943468724598823, "loss": 0.2538, "step": 36660 }, { "epoch": 0.26027720551571903, "grad_norm": 0.142578125, "learning_rate": 0.0019943437047703363, "loss": 0.259, "step": 36670 }, { "epoch": 0.2603481837555651, "grad_norm": 0.123046875, "learning_rate": 0.0019943405361963406, "loss": 0.2537, "step": 36680 }, { "epoch": 0.26041916199541126, "grad_norm": 0.1103515625, "learning_rate": 0.001994337366737898, "loss": 0.2548, "step": 36690 }, { "epoch": 0.26049014023525735, "grad_norm": 0.1396484375, "learning_rate": 0.0019943341963950117, "loss": 0.2494, "step": 36700 }, { "epoch": 0.2605611184751035, "grad_norm": 0.099609375, "learning_rate": 0.001994331025167685, "loss": 0.2482, "step": 36710 }, { "epoch": 0.26063209671494963, "grad_norm": 0.189453125, "learning_rate": 0.001994327853055921, "loss": 0.2592, "step": 36720 }, { "epoch": 0.2607030749547957, "grad_norm": 0.1298828125, "learning_rate": 0.001994324680059723, "loss": 0.2623, "step": 36730 }, { "epoch": 0.26077405319464186, "grad_norm": 0.1572265625, "learning_rate": 0.0019943215061790937, "loss": 0.2509, "step": 36740 }, { "epoch": 0.26084503143448795, "grad_norm": 0.171875, "learning_rate": 0.0019943183314140365, "loss": 0.2677, "step": 36750 }, { "epoch": 0.2609160096743341, "grad_norm": 0.11279296875, "learning_rate": 0.0019943151557645544, "loss": 0.2759, "step": 36760 }, { "epoch": 0.26098698791418024, "grad_norm": 0.1123046875, "learning_rate": 0.001994311979230651, "loss": 0.2697, "step": 36770 }, { "epoch": 0.2610579661540263, "grad_norm": 0.1259765625, "learning_rate": 0.001994308801812329, "loss": 0.2507, "step": 36780 }, { "epoch": 0.26112894439387246, "grad_norm": 0.087890625, "learning_rate": 0.0019943056235095916, "loss": 0.2469, "step": 36790 }, { "epoch": 0.26119992263371855, "grad_norm": 0.08203125, "learning_rate": 0.001994302444322442, "loss": 0.2407, "step": 36800 }, { "epoch": 0.2612709008735647, "grad_norm": 0.24609375, "learning_rate": 0.001994299264250883, "loss": 0.2622, "step": 36810 }, { "epoch": 0.2613418791134108, "grad_norm": 0.10791015625, "learning_rate": 0.0019942960832949183, "loss": 0.2465, "step": 36820 }, { "epoch": 0.2614128573532569, "grad_norm": 0.123046875, "learning_rate": 0.001994292901454551, "loss": 0.2529, "step": 36830 }, { "epoch": 0.26148383559310306, "grad_norm": 0.259765625, "learning_rate": 0.0019942897187297842, "loss": 0.2545, "step": 36840 }, { "epoch": 0.26155481383294915, "grad_norm": 0.1611328125, "learning_rate": 0.00199428653512062, "loss": 0.2695, "step": 36850 }, { "epoch": 0.2616257920727953, "grad_norm": 0.1806640625, "learning_rate": 0.0019942833506270633, "loss": 0.27, "step": 36860 }, { "epoch": 0.2616967703126414, "grad_norm": 0.09619140625, "learning_rate": 0.0019942801652491167, "loss": 0.263, "step": 36870 }, { "epoch": 0.2617677485524875, "grad_norm": 0.11181640625, "learning_rate": 0.0019942769789867824, "loss": 0.2586, "step": 36880 }, { "epoch": 0.26183872679233366, "grad_norm": 0.11669921875, "learning_rate": 0.001994273791840065, "loss": 0.2538, "step": 36890 }, { "epoch": 0.26190970503217975, "grad_norm": 0.07373046875, "learning_rate": 0.001994270603808966, "loss": 0.2383, "step": 36900 }, { "epoch": 0.2619806832720259, "grad_norm": 0.1513671875, "learning_rate": 0.0019942674148934894, "loss": 0.2525, "step": 36910 }, { "epoch": 0.262051661511872, "grad_norm": 0.09814453125, "learning_rate": 0.001994264225093639, "loss": 0.2392, "step": 36920 }, { "epoch": 0.2621226397517181, "grad_norm": 0.13671875, "learning_rate": 0.001994261034409417, "loss": 0.2733, "step": 36930 }, { "epoch": 0.2621936179915642, "grad_norm": 0.09521484375, "learning_rate": 0.001994257842840827, "loss": 0.2654, "step": 36940 }, { "epoch": 0.26226459623141035, "grad_norm": 0.12451171875, "learning_rate": 0.001994254650387872, "loss": 0.2588, "step": 36950 }, { "epoch": 0.2623355744712565, "grad_norm": 0.11181640625, "learning_rate": 0.001994251457050555, "loss": 0.2543, "step": 36960 }, { "epoch": 0.2624065527111026, "grad_norm": 0.11767578125, "learning_rate": 0.0019942482628288794, "loss": 0.2384, "step": 36970 }, { "epoch": 0.2624775309509487, "grad_norm": 0.1455078125, "learning_rate": 0.0019942450677228486, "loss": 0.2638, "step": 36980 }, { "epoch": 0.2625485091907948, "grad_norm": 0.228515625, "learning_rate": 0.001994241871732465, "loss": 0.2445, "step": 36990 }, { "epoch": 0.26261948743064095, "grad_norm": 0.3515625, "learning_rate": 0.001994238674857733, "loss": 0.2716, "step": 37000 }, { "epoch": 0.2626904656704871, "grad_norm": 0.12255859375, "learning_rate": 0.001994235477098654, "loss": 0.2657, "step": 37010 }, { "epoch": 0.2627614439103332, "grad_norm": 0.12255859375, "learning_rate": 0.001994232278455233, "loss": 0.2673, "step": 37020 }, { "epoch": 0.2628324221501793, "grad_norm": 0.11376953125, "learning_rate": 0.001994229078927472, "loss": 0.2761, "step": 37030 }, { "epoch": 0.2629034003900254, "grad_norm": 0.1533203125, "learning_rate": 0.0019942258785153742, "loss": 0.2494, "step": 37040 }, { "epoch": 0.26297437862987155, "grad_norm": 0.0986328125, "learning_rate": 0.0019942226772189436, "loss": 0.2571, "step": 37050 }, { "epoch": 0.26304535686971764, "grad_norm": 0.09814453125, "learning_rate": 0.0019942194750381826, "loss": 0.2657, "step": 37060 }, { "epoch": 0.2631163351095638, "grad_norm": 0.10205078125, "learning_rate": 0.0019942162719730947, "loss": 0.2763, "step": 37070 }, { "epoch": 0.2631873133494099, "grad_norm": 0.11083984375, "learning_rate": 0.0019942130680236825, "loss": 0.2821, "step": 37080 }, { "epoch": 0.263258291589256, "grad_norm": 0.12158203125, "learning_rate": 0.00199420986318995, "loss": 0.2616, "step": 37090 }, { "epoch": 0.26332926982910215, "grad_norm": 0.0888671875, "learning_rate": 0.0019942066574719, "loss": 0.2574, "step": 37100 }, { "epoch": 0.26340024806894824, "grad_norm": 0.1015625, "learning_rate": 0.0019942034508695352, "loss": 0.2502, "step": 37110 }, { "epoch": 0.2634712263087944, "grad_norm": 0.1259765625, "learning_rate": 0.0019942002433828597, "loss": 0.2584, "step": 37120 }, { "epoch": 0.2635422045486405, "grad_norm": 0.1953125, "learning_rate": 0.0019941970350118763, "loss": 0.264, "step": 37130 }, { "epoch": 0.2636131827884866, "grad_norm": 0.1484375, "learning_rate": 0.001994193825756588, "loss": 0.2659, "step": 37140 }, { "epoch": 0.26368416102833275, "grad_norm": 0.16796875, "learning_rate": 0.0019941906156169978, "loss": 0.2681, "step": 37150 }, { "epoch": 0.26375513926817884, "grad_norm": 0.09375, "learning_rate": 0.001994187404593109, "loss": 0.2543, "step": 37160 }, { "epoch": 0.263826117508025, "grad_norm": 0.162109375, "learning_rate": 0.0019941841926849253, "loss": 0.2829, "step": 37170 }, { "epoch": 0.26389709574787107, "grad_norm": 0.1123046875, "learning_rate": 0.0019941809798924496, "loss": 0.2658, "step": 37180 }, { "epoch": 0.2639680739877172, "grad_norm": 0.103515625, "learning_rate": 0.0019941777662156844, "loss": 0.2549, "step": 37190 }, { "epoch": 0.26403905222756335, "grad_norm": 0.10791015625, "learning_rate": 0.001994174551654634, "loss": 0.2551, "step": 37200 }, { "epoch": 0.26411003046740944, "grad_norm": 0.12109375, "learning_rate": 0.0019941713362093003, "loss": 0.2367, "step": 37210 }, { "epoch": 0.2641810087072556, "grad_norm": 0.0869140625, "learning_rate": 0.001994168119879688, "loss": 0.2649, "step": 37220 }, { "epoch": 0.26425198694710167, "grad_norm": 0.0986328125, "learning_rate": 0.0019941649026657985, "loss": 0.2631, "step": 37230 }, { "epoch": 0.2643229651869478, "grad_norm": 0.0888671875, "learning_rate": 0.001994161684567637, "loss": 0.2666, "step": 37240 }, { "epoch": 0.26439394342679395, "grad_norm": 0.1513671875, "learning_rate": 0.001994158465585205, "loss": 0.2593, "step": 37250 }, { "epoch": 0.26446492166664004, "grad_norm": 0.07958984375, "learning_rate": 0.0019941552457185064, "loss": 0.2364, "step": 37260 }, { "epoch": 0.2645358999064862, "grad_norm": 0.0693359375, "learning_rate": 0.0019941520249675444, "loss": 0.2409, "step": 37270 }, { "epoch": 0.26460687814633227, "grad_norm": 0.09521484375, "learning_rate": 0.001994148803332322, "loss": 0.2569, "step": 37280 }, { "epoch": 0.2646778563861784, "grad_norm": 0.142578125, "learning_rate": 0.0019941455808128424, "loss": 0.2696, "step": 37290 }, { "epoch": 0.26474883462602455, "grad_norm": 0.1484375, "learning_rate": 0.001994142357409109, "loss": 0.2756, "step": 37300 }, { "epoch": 0.26481981286587064, "grad_norm": 0.1484375, "learning_rate": 0.001994139133121125, "loss": 0.2617, "step": 37310 }, { "epoch": 0.2648907911057168, "grad_norm": 0.11083984375, "learning_rate": 0.0019941359079488933, "loss": 0.2604, "step": 37320 }, { "epoch": 0.26496176934556287, "grad_norm": 0.0859375, "learning_rate": 0.001994132681892417, "loss": 0.2578, "step": 37330 }, { "epoch": 0.265032747585409, "grad_norm": 0.1396484375, "learning_rate": 0.0019941294549517, "loss": 0.2711, "step": 37340 }, { "epoch": 0.2651037258252551, "grad_norm": 0.10595703125, "learning_rate": 0.0019941262271267446, "loss": 0.2581, "step": 37350 }, { "epoch": 0.26517470406510124, "grad_norm": 0.1005859375, "learning_rate": 0.001994122998417555, "loss": 0.2467, "step": 37360 }, { "epoch": 0.2652456823049474, "grad_norm": 0.11865234375, "learning_rate": 0.001994119768824133, "loss": 0.2603, "step": 37370 }, { "epoch": 0.26531666054479347, "grad_norm": 0.09375, "learning_rate": 0.001994116538346483, "loss": 0.2466, "step": 37380 }, { "epoch": 0.2653876387846396, "grad_norm": 0.0888671875, "learning_rate": 0.001994113306984608, "loss": 0.244, "step": 37390 }, { "epoch": 0.2654586170244857, "grad_norm": 0.09619140625, "learning_rate": 0.0019941100747385104, "loss": 0.2568, "step": 37400 }, { "epoch": 0.26552959526433184, "grad_norm": 0.1201171875, "learning_rate": 0.0019941068416081944, "loss": 0.253, "step": 37410 }, { "epoch": 0.265600573504178, "grad_norm": 0.1044921875, "learning_rate": 0.0019941036075936626, "loss": 0.2511, "step": 37420 }, { "epoch": 0.26567155174402407, "grad_norm": 0.10546875, "learning_rate": 0.0019941003726949183, "loss": 0.2692, "step": 37430 }, { "epoch": 0.2657425299838702, "grad_norm": 0.134765625, "learning_rate": 0.001994097136911965, "loss": 0.2623, "step": 37440 }, { "epoch": 0.2658135082237163, "grad_norm": 0.10888671875, "learning_rate": 0.0019940939002448055, "loss": 0.2808, "step": 37450 }, { "epoch": 0.26588448646356244, "grad_norm": 0.2373046875, "learning_rate": 0.001994090662693443, "loss": 0.265, "step": 37460 }, { "epoch": 0.26595546470340853, "grad_norm": 0.1494140625, "learning_rate": 0.0019940874242578813, "loss": 0.2568, "step": 37470 }, { "epoch": 0.26602644294325467, "grad_norm": 0.07421875, "learning_rate": 0.0019940841849381227, "loss": 0.2496, "step": 37480 }, { "epoch": 0.2660974211831008, "grad_norm": 0.087890625, "learning_rate": 0.001994080944734171, "loss": 0.2602, "step": 37490 }, { "epoch": 0.2661683994229469, "grad_norm": 0.09765625, "learning_rate": 0.0019940777036460294, "loss": 0.2783, "step": 37500 }, { "epoch": 0.26623937766279304, "grad_norm": 0.111328125, "learning_rate": 0.001994074461673701, "loss": 0.2594, "step": 37510 }, { "epoch": 0.26631035590263913, "grad_norm": 0.1787109375, "learning_rate": 0.0019940712188171892, "loss": 0.2459, "step": 37520 }, { "epoch": 0.26638133414248527, "grad_norm": 0.10107421875, "learning_rate": 0.0019940679750764965, "loss": 0.2582, "step": 37530 }, { "epoch": 0.2664523123823314, "grad_norm": 0.1455078125, "learning_rate": 0.001994064730451627, "loss": 0.2627, "step": 37540 }, { "epoch": 0.2665232906221775, "grad_norm": 0.171875, "learning_rate": 0.0019940614849425836, "loss": 0.2708, "step": 37550 }, { "epoch": 0.26659426886202364, "grad_norm": 0.142578125, "learning_rate": 0.0019940582385493694, "loss": 0.2542, "step": 37560 }, { "epoch": 0.26666524710186973, "grad_norm": 0.0830078125, "learning_rate": 0.0019940549912719875, "loss": 0.272, "step": 37570 }, { "epoch": 0.26673622534171587, "grad_norm": 0.107421875, "learning_rate": 0.001994051743110441, "loss": 0.2637, "step": 37580 }, { "epoch": 0.26680720358156196, "grad_norm": 0.11767578125, "learning_rate": 0.0019940484940647336, "loss": 0.2709, "step": 37590 }, { "epoch": 0.2668781818214081, "grad_norm": 0.173828125, "learning_rate": 0.0019940452441348684, "loss": 0.24, "step": 37600 }, { "epoch": 0.26694916006125424, "grad_norm": 0.1416015625, "learning_rate": 0.001994041993320848, "loss": 0.2661, "step": 37610 }, { "epoch": 0.26702013830110033, "grad_norm": 0.1474609375, "learning_rate": 0.0019940387416226765, "loss": 0.2659, "step": 37620 }, { "epoch": 0.26709111654094647, "grad_norm": 0.2041015625, "learning_rate": 0.0019940354890403567, "loss": 0.2556, "step": 37630 }, { "epoch": 0.26716209478079256, "grad_norm": 0.07958984375, "learning_rate": 0.0019940322355738917, "loss": 0.2526, "step": 37640 }, { "epoch": 0.2672330730206387, "grad_norm": 0.07470703125, "learning_rate": 0.001994028981223285, "loss": 0.2698, "step": 37650 }, { "epoch": 0.26730405126048484, "grad_norm": 0.1904296875, "learning_rate": 0.0019940257259885396, "loss": 0.2848, "step": 37660 }, { "epoch": 0.26737502950033093, "grad_norm": 0.0908203125, "learning_rate": 0.001994022469869659, "loss": 0.272, "step": 37670 }, { "epoch": 0.26744600774017707, "grad_norm": 0.09814453125, "learning_rate": 0.001994019212866646, "loss": 0.2438, "step": 37680 }, { "epoch": 0.26751698598002316, "grad_norm": 0.119140625, "learning_rate": 0.001994015954979504, "loss": 0.2593, "step": 37690 }, { "epoch": 0.2675879642198693, "grad_norm": 0.09765625, "learning_rate": 0.0019940126962082363, "loss": 0.2591, "step": 37700 }, { "epoch": 0.2676589424597154, "grad_norm": 0.06884765625, "learning_rate": 0.001994009436552846, "loss": 0.2635, "step": 37710 }, { "epoch": 0.26772992069956153, "grad_norm": 0.1796875, "learning_rate": 0.0019940061760133367, "loss": 0.2565, "step": 37720 }, { "epoch": 0.26780089893940767, "grad_norm": 0.1337890625, "learning_rate": 0.001994002914589711, "loss": 0.2708, "step": 37730 }, { "epoch": 0.26787187717925376, "grad_norm": 0.11767578125, "learning_rate": 0.0019939996522819726, "loss": 0.2514, "step": 37740 }, { "epoch": 0.2679428554190999, "grad_norm": 0.12451171875, "learning_rate": 0.0019939963890901243, "loss": 0.2575, "step": 37750 }, { "epoch": 0.268013833658946, "grad_norm": 0.10546875, "learning_rate": 0.00199399312501417, "loss": 0.2646, "step": 37760 }, { "epoch": 0.26808481189879213, "grad_norm": 0.07421875, "learning_rate": 0.0019939898600541124, "loss": 0.2661, "step": 37770 }, { "epoch": 0.2681557901386383, "grad_norm": 0.0869140625, "learning_rate": 0.001993986594209955, "loss": 0.2527, "step": 37780 }, { "epoch": 0.26822676837848436, "grad_norm": 0.08984375, "learning_rate": 0.0019939833274817, "loss": 0.24, "step": 37790 }, { "epoch": 0.2682977466183305, "grad_norm": 0.09033203125, "learning_rate": 0.0019939800598693528, "loss": 0.2476, "step": 37800 }, { "epoch": 0.2683687248581766, "grad_norm": 0.1552734375, "learning_rate": 0.0019939767913729144, "loss": 0.2603, "step": 37810 }, { "epoch": 0.26843970309802273, "grad_norm": 0.146484375, "learning_rate": 0.0019939735219923894, "loss": 0.244, "step": 37820 }, { "epoch": 0.2685106813378688, "grad_norm": 0.080078125, "learning_rate": 0.001993970251727781, "loss": 0.2365, "step": 37830 }, { "epoch": 0.26858165957771496, "grad_norm": 0.10546875, "learning_rate": 0.0019939669805790912, "loss": 0.2862, "step": 37840 }, { "epoch": 0.2686526378175611, "grad_norm": 0.07763671875, "learning_rate": 0.0019939637085463245, "loss": 0.2419, "step": 37850 }, { "epoch": 0.2687236160574072, "grad_norm": 0.142578125, "learning_rate": 0.001993960435629484, "loss": 0.2492, "step": 37860 }, { "epoch": 0.26879459429725333, "grad_norm": 0.10400390625, "learning_rate": 0.001993957161828572, "loss": 0.2512, "step": 37870 }, { "epoch": 0.2688655725370994, "grad_norm": 0.09521484375, "learning_rate": 0.001993953887143593, "loss": 0.2573, "step": 37880 }, { "epoch": 0.26893655077694556, "grad_norm": 0.228515625, "learning_rate": 0.0019939506115745492, "loss": 0.2878, "step": 37890 }, { "epoch": 0.2690075290167917, "grad_norm": 0.1572265625, "learning_rate": 0.0019939473351214447, "loss": 0.2805, "step": 37900 }, { "epoch": 0.2690785072566378, "grad_norm": 0.09521484375, "learning_rate": 0.001993944057784282, "loss": 0.2717, "step": 37910 }, { "epoch": 0.26914948549648393, "grad_norm": 0.08447265625, "learning_rate": 0.0019939407795630647, "loss": 0.2473, "step": 37920 }, { "epoch": 0.26922046373633, "grad_norm": 0.12158203125, "learning_rate": 0.001993937500457796, "loss": 0.2631, "step": 37930 }, { "epoch": 0.26929144197617616, "grad_norm": 0.1943359375, "learning_rate": 0.0019939342204684796, "loss": 0.2686, "step": 37940 }, { "epoch": 0.26936242021602225, "grad_norm": 0.08203125, "learning_rate": 0.0019939309395951175, "loss": 0.2646, "step": 37950 }, { "epoch": 0.2694333984558684, "grad_norm": 0.0986328125, "learning_rate": 0.001993927657837714, "loss": 0.2656, "step": 37960 }, { "epoch": 0.26950437669571453, "grad_norm": 0.10302734375, "learning_rate": 0.0019939243751962723, "loss": 0.2569, "step": 37970 }, { "epoch": 0.2695753549355606, "grad_norm": 0.08642578125, "learning_rate": 0.001993921091670795, "loss": 0.2697, "step": 37980 }, { "epoch": 0.26964633317540676, "grad_norm": 0.138671875, "learning_rate": 0.0019939178072612865, "loss": 0.2711, "step": 37990 }, { "epoch": 0.26971731141525285, "grad_norm": 0.1015625, "learning_rate": 0.001993914521967749, "loss": 0.281, "step": 38000 }, { "epoch": 0.26971731141525285, "eval_covost2-zh-en_loss": 3.8704609870910645, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 21.7136, "eval_covost2-zh-en_samples_per_second": 2.947, "eval_covost2-zh-en_steps_per_second": 0.184, "step": 38000 }, { "epoch": 0.26971731141525285, "eval_covost2-en-zh_loss": 3.172335624694824, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 22.3523, "eval_covost2-en-zh_samples_per_second": 2.863, "eval_covost2-en-zh_steps_per_second": 0.179, "step": 38000 }, { "epoch": 0.269788289655099, "grad_norm": 0.09912109375, "learning_rate": 0.0019939112357901857, "loss": 0.2705, "step": 38010 }, { "epoch": 0.26985926789494513, "grad_norm": 0.1845703125, "learning_rate": 0.0019939079487286, "loss": 0.2456, "step": 38020 }, { "epoch": 0.2699302461347912, "grad_norm": 0.2001953125, "learning_rate": 0.0019939046607829964, "loss": 0.2656, "step": 38030 }, { "epoch": 0.27000122437463736, "grad_norm": 0.08349609375, "learning_rate": 0.0019939013719533763, "loss": 0.2305, "step": 38040 }, { "epoch": 0.27007220261448345, "grad_norm": 0.1083984375, "learning_rate": 0.001993898082239744, "loss": 0.2656, "step": 38050 }, { "epoch": 0.2701431808543296, "grad_norm": 0.1611328125, "learning_rate": 0.0019938947916421025, "loss": 0.2711, "step": 38060 }, { "epoch": 0.2702141590941757, "grad_norm": 0.1005859375, "learning_rate": 0.001993891500160455, "loss": 0.2725, "step": 38070 }, { "epoch": 0.2702851373340218, "grad_norm": 0.07421875, "learning_rate": 0.001993888207794805, "loss": 0.2398, "step": 38080 }, { "epoch": 0.27035611557386796, "grad_norm": 0.08349609375, "learning_rate": 0.0019938849145451558, "loss": 0.2666, "step": 38090 }, { "epoch": 0.27042709381371405, "grad_norm": 0.1650390625, "learning_rate": 0.00199388162041151, "loss": 0.2591, "step": 38100 }, { "epoch": 0.2704980720535602, "grad_norm": 0.10400390625, "learning_rate": 0.0019938783253938713, "loss": 0.2832, "step": 38110 }, { "epoch": 0.2705690502934063, "grad_norm": 0.1162109375, "learning_rate": 0.0019938750294922433, "loss": 0.2487, "step": 38120 }, { "epoch": 0.2706400285332524, "grad_norm": 0.0986328125, "learning_rate": 0.0019938717327066284, "loss": 0.2465, "step": 38130 }, { "epoch": 0.27071100677309856, "grad_norm": 0.08984375, "learning_rate": 0.001993868435037031, "loss": 0.2457, "step": 38140 }, { "epoch": 0.27078198501294465, "grad_norm": 0.10400390625, "learning_rate": 0.001993865136483453, "loss": 0.2677, "step": 38150 }, { "epoch": 0.2708529632527908, "grad_norm": 0.1142578125, "learning_rate": 0.001993861837045899, "loss": 0.2594, "step": 38160 }, { "epoch": 0.2709239414926369, "grad_norm": 0.09716796875, "learning_rate": 0.0019938585367243714, "loss": 0.2489, "step": 38170 }, { "epoch": 0.270994919732483, "grad_norm": 0.08349609375, "learning_rate": 0.001993855235518874, "loss": 0.2588, "step": 38180 }, { "epoch": 0.2710658979723291, "grad_norm": 0.0966796875, "learning_rate": 0.0019938519334294098, "loss": 0.2529, "step": 38190 }, { "epoch": 0.27113687621217525, "grad_norm": 0.099609375, "learning_rate": 0.0019938486304559816, "loss": 0.2764, "step": 38200 }, { "epoch": 0.2712078544520214, "grad_norm": 0.1123046875, "learning_rate": 0.0019938453265985934, "loss": 0.2484, "step": 38210 }, { "epoch": 0.2712788326918675, "grad_norm": 0.08447265625, "learning_rate": 0.0019938420218572482, "loss": 0.2786, "step": 38220 }, { "epoch": 0.2713498109317136, "grad_norm": 0.103515625, "learning_rate": 0.001993838716231949, "loss": 0.2736, "step": 38230 }, { "epoch": 0.2714207891715597, "grad_norm": 0.068359375, "learning_rate": 0.0019938354097227004, "loss": 0.2509, "step": 38240 }, { "epoch": 0.27149176741140585, "grad_norm": 0.12109375, "learning_rate": 0.0019938321023295034, "loss": 0.2688, "step": 38250 }, { "epoch": 0.271562745651252, "grad_norm": 0.08251953125, "learning_rate": 0.001993828794052363, "loss": 0.2594, "step": 38260 }, { "epoch": 0.2716337238910981, "grad_norm": 0.2216796875, "learning_rate": 0.0019938254848912815, "loss": 0.2596, "step": 38270 }, { "epoch": 0.2717047021309442, "grad_norm": 0.158203125, "learning_rate": 0.001993822174846263, "loss": 0.2402, "step": 38280 }, { "epoch": 0.2717756803707903, "grad_norm": 0.1015625, "learning_rate": 0.0019938188639173106, "loss": 0.2598, "step": 38290 }, { "epoch": 0.27184665861063645, "grad_norm": 0.10400390625, "learning_rate": 0.0019938155521044268, "loss": 0.2514, "step": 38300 }, { "epoch": 0.27191763685048254, "grad_norm": 0.09423828125, "learning_rate": 0.001993812239407616, "loss": 0.2633, "step": 38310 }, { "epoch": 0.2719886150903287, "grad_norm": 0.126953125, "learning_rate": 0.0019938089258268806, "loss": 0.2592, "step": 38320 }, { "epoch": 0.2720595933301748, "grad_norm": 0.1279296875, "learning_rate": 0.0019938056113622243, "loss": 0.2656, "step": 38330 }, { "epoch": 0.2721305715700209, "grad_norm": 0.1201171875, "learning_rate": 0.00199380229601365, "loss": 0.2555, "step": 38340 }, { "epoch": 0.27220154980986705, "grad_norm": 0.189453125, "learning_rate": 0.0019937989797811613, "loss": 0.2929, "step": 38350 }, { "epoch": 0.27227252804971314, "grad_norm": 0.232421875, "learning_rate": 0.0019937956626647616, "loss": 0.2677, "step": 38360 }, { "epoch": 0.2723435062895593, "grad_norm": 0.1416015625, "learning_rate": 0.001993792344664454, "loss": 0.2496, "step": 38370 }, { "epoch": 0.2724144845294054, "grad_norm": 0.1923828125, "learning_rate": 0.001993789025780242, "loss": 0.2613, "step": 38380 }, { "epoch": 0.2724854627692515, "grad_norm": 0.14453125, "learning_rate": 0.0019937857060121286, "loss": 0.2446, "step": 38390 }, { "epoch": 0.27255644100909765, "grad_norm": 0.07861328125, "learning_rate": 0.0019937823853601166, "loss": 0.2565, "step": 38400 }, { "epoch": 0.27262741924894374, "grad_norm": 0.1572265625, "learning_rate": 0.0019937790638242105, "loss": 0.2377, "step": 38410 }, { "epoch": 0.2726983974887899, "grad_norm": 0.12255859375, "learning_rate": 0.0019937757414044124, "loss": 0.2595, "step": 38420 }, { "epoch": 0.272769375728636, "grad_norm": 0.142578125, "learning_rate": 0.0019937724181007263, "loss": 0.264, "step": 38430 }, { "epoch": 0.2728403539684821, "grad_norm": 0.1279296875, "learning_rate": 0.0019937690939131557, "loss": 0.25, "step": 38440 }, { "epoch": 0.27291133220832825, "grad_norm": 0.1298828125, "learning_rate": 0.001993765768841703, "loss": 0.2413, "step": 38450 }, { "epoch": 0.27298231044817434, "grad_norm": 0.076171875, "learning_rate": 0.0019937624428863717, "loss": 0.2638, "step": 38460 }, { "epoch": 0.2730532886880205, "grad_norm": 0.08203125, "learning_rate": 0.001993759116047166, "loss": 0.2606, "step": 38470 }, { "epoch": 0.27312426692786657, "grad_norm": 0.0859375, "learning_rate": 0.001993755788324088, "loss": 0.234, "step": 38480 }, { "epoch": 0.2731952451677127, "grad_norm": 0.08154296875, "learning_rate": 0.001993752459717142, "loss": 0.2487, "step": 38490 }, { "epoch": 0.27326622340755885, "grad_norm": 0.169921875, "learning_rate": 0.001993749130226331, "loss": 0.2515, "step": 38500 }, { "epoch": 0.27333720164740494, "grad_norm": 0.103515625, "learning_rate": 0.001993745799851658, "loss": 0.2524, "step": 38510 }, { "epoch": 0.2734081798872511, "grad_norm": 0.26171875, "learning_rate": 0.001993742468593126, "loss": 0.2613, "step": 38520 }, { "epoch": 0.27347915812709717, "grad_norm": 0.10546875, "learning_rate": 0.0019937391364507387, "loss": 0.2547, "step": 38530 }, { "epoch": 0.2735501363669433, "grad_norm": 0.10595703125, "learning_rate": 0.0019937358034245, "loss": 0.2604, "step": 38540 }, { "epoch": 0.27362111460678945, "grad_norm": 0.083984375, "learning_rate": 0.0019937324695144124, "loss": 0.2526, "step": 38550 }, { "epoch": 0.27369209284663554, "grad_norm": 0.115234375, "learning_rate": 0.0019937291347204788, "loss": 0.2707, "step": 38560 }, { "epoch": 0.2737630710864817, "grad_norm": 0.10302734375, "learning_rate": 0.001993725799042704, "loss": 0.2721, "step": 38570 }, { "epoch": 0.27383404932632777, "grad_norm": 0.09228515625, "learning_rate": 0.0019937224624810897, "loss": 0.2518, "step": 38580 }, { "epoch": 0.2739050275661739, "grad_norm": 0.09375, "learning_rate": 0.0019937191250356404, "loss": 0.247, "step": 38590 }, { "epoch": 0.27397600580602, "grad_norm": 0.12158203125, "learning_rate": 0.0019937157867063585, "loss": 0.2726, "step": 38600 }, { "epoch": 0.27404698404586614, "grad_norm": 0.08984375, "learning_rate": 0.0019937124474932477, "loss": 0.2831, "step": 38610 }, { "epoch": 0.2741179622857123, "grad_norm": 0.08984375, "learning_rate": 0.0019937091073963117, "loss": 0.252, "step": 38620 }, { "epoch": 0.27418894052555837, "grad_norm": 0.1953125, "learning_rate": 0.001993705766415553, "loss": 0.2428, "step": 38630 }, { "epoch": 0.2742599187654045, "grad_norm": 0.12451171875, "learning_rate": 0.0019937024245509755, "loss": 0.2489, "step": 38640 }, { "epoch": 0.2743308970052506, "grad_norm": 0.162109375, "learning_rate": 0.0019936990818025823, "loss": 0.2799, "step": 38650 }, { "epoch": 0.27440187524509674, "grad_norm": 0.12451171875, "learning_rate": 0.0019936957381703766, "loss": 0.2551, "step": 38660 }, { "epoch": 0.2744728534849429, "grad_norm": 0.11962890625, "learning_rate": 0.001993692393654362, "loss": 0.2703, "step": 38670 }, { "epoch": 0.27454383172478897, "grad_norm": 0.10986328125, "learning_rate": 0.0019936890482545417, "loss": 0.2305, "step": 38680 }, { "epoch": 0.2746148099646351, "grad_norm": 0.11669921875, "learning_rate": 0.0019936857019709187, "loss": 0.2628, "step": 38690 }, { "epoch": 0.2746857882044812, "grad_norm": 0.1015625, "learning_rate": 0.001993682354803497, "loss": 0.2656, "step": 38700 }, { "epoch": 0.27475676644432734, "grad_norm": 0.0888671875, "learning_rate": 0.001993679006752279, "loss": 0.2491, "step": 38710 }, { "epoch": 0.2748277446841734, "grad_norm": 0.10693359375, "learning_rate": 0.0019936756578172684, "loss": 0.2603, "step": 38720 }, { "epoch": 0.27489872292401957, "grad_norm": 0.1279296875, "learning_rate": 0.001993672307998469, "loss": 0.2314, "step": 38730 }, { "epoch": 0.2749697011638657, "grad_norm": 0.1435546875, "learning_rate": 0.0019936689572958833, "loss": 0.2796, "step": 38740 }, { "epoch": 0.2750406794037118, "grad_norm": 0.09326171875, "learning_rate": 0.001993665605709515, "loss": 0.2705, "step": 38750 }, { "epoch": 0.27511165764355794, "grad_norm": 0.09228515625, "learning_rate": 0.001993662253239368, "loss": 0.2483, "step": 38760 }, { "epoch": 0.275182635883404, "grad_norm": 0.09912109375, "learning_rate": 0.0019936588998854445, "loss": 0.2356, "step": 38770 }, { "epoch": 0.27525361412325017, "grad_norm": 0.11376953125, "learning_rate": 0.0019936555456477483, "loss": 0.2621, "step": 38780 }, { "epoch": 0.2753245923630963, "grad_norm": 0.1220703125, "learning_rate": 0.0019936521905262834, "loss": 0.2514, "step": 38790 }, { "epoch": 0.2753955706029424, "grad_norm": 0.10888671875, "learning_rate": 0.001993648834521052, "loss": 0.2502, "step": 38800 }, { "epoch": 0.27546654884278854, "grad_norm": 0.07470703125, "learning_rate": 0.0019936454776320577, "loss": 0.2521, "step": 38810 }, { "epoch": 0.2755375270826346, "grad_norm": 0.125, "learning_rate": 0.0019936421198593042, "loss": 0.2618, "step": 38820 }, { "epoch": 0.27560850532248077, "grad_norm": 0.08544921875, "learning_rate": 0.001993638761202795, "loss": 0.246, "step": 38830 }, { "epoch": 0.27567948356232685, "grad_norm": 0.07421875, "learning_rate": 0.0019936354016625328, "loss": 0.2589, "step": 38840 }, { "epoch": 0.275750461802173, "grad_norm": 0.11181640625, "learning_rate": 0.0019936320412385212, "loss": 0.2583, "step": 38850 }, { "epoch": 0.27582144004201914, "grad_norm": 0.091796875, "learning_rate": 0.0019936286799307635, "loss": 0.2524, "step": 38860 }, { "epoch": 0.2758924182818652, "grad_norm": 0.13671875, "learning_rate": 0.0019936253177392635, "loss": 0.2577, "step": 38870 }, { "epoch": 0.27596339652171137, "grad_norm": 0.1123046875, "learning_rate": 0.0019936219546640237, "loss": 0.2553, "step": 38880 }, { "epoch": 0.27603437476155746, "grad_norm": 0.11376953125, "learning_rate": 0.0019936185907050473, "loss": 0.2489, "step": 38890 }, { "epoch": 0.2761053530014036, "grad_norm": 0.09130859375, "learning_rate": 0.001993615225862339, "loss": 0.2593, "step": 38900 }, { "epoch": 0.27617633124124974, "grad_norm": 0.1552734375, "learning_rate": 0.0019936118601359006, "loss": 0.2559, "step": 38910 }, { "epoch": 0.2762473094810958, "grad_norm": 0.10400390625, "learning_rate": 0.0019936084935257363, "loss": 0.2599, "step": 38920 }, { "epoch": 0.27631828772094197, "grad_norm": 0.0927734375, "learning_rate": 0.0019936051260318493, "loss": 0.2637, "step": 38930 }, { "epoch": 0.27638926596078806, "grad_norm": 0.10400390625, "learning_rate": 0.001993601757654243, "loss": 0.251, "step": 38940 }, { "epoch": 0.2764602442006342, "grad_norm": 0.10986328125, "learning_rate": 0.0019935983883929203, "loss": 0.2663, "step": 38950 }, { "epoch": 0.2765312224404803, "grad_norm": 0.125, "learning_rate": 0.0019935950182478845, "loss": 0.263, "step": 38960 }, { "epoch": 0.2766022006803264, "grad_norm": 0.1220703125, "learning_rate": 0.0019935916472191397, "loss": 0.2501, "step": 38970 }, { "epoch": 0.27667317892017257, "grad_norm": 0.08056640625, "learning_rate": 0.0019935882753066886, "loss": 0.2699, "step": 38980 }, { "epoch": 0.27674415716001866, "grad_norm": 0.12353515625, "learning_rate": 0.0019935849025105347, "loss": 0.277, "step": 38990 }, { "epoch": 0.2768151353998648, "grad_norm": 0.103515625, "learning_rate": 0.0019935815288306814, "loss": 0.2407, "step": 39000 }, { "epoch": 0.2768861136397109, "grad_norm": 0.09912109375, "learning_rate": 0.0019935781542671322, "loss": 0.2599, "step": 39010 }, { "epoch": 0.276957091879557, "grad_norm": 0.1298828125, "learning_rate": 0.00199357477881989, "loss": 0.238, "step": 39020 }, { "epoch": 0.27702807011940317, "grad_norm": 0.0849609375, "learning_rate": 0.001993571402488958, "loss": 0.252, "step": 39030 }, { "epoch": 0.27709904835924926, "grad_norm": 0.0673828125, "learning_rate": 0.00199356802527434, "loss": 0.2364, "step": 39040 }, { "epoch": 0.2771700265990954, "grad_norm": 0.11865234375, "learning_rate": 0.0019935646471760395, "loss": 0.2637, "step": 39050 }, { "epoch": 0.2772410048389415, "grad_norm": 0.08837890625, "learning_rate": 0.0019935612681940594, "loss": 0.2501, "step": 39060 }, { "epoch": 0.2773119830787876, "grad_norm": 0.09326171875, "learning_rate": 0.0019935578883284034, "loss": 0.2557, "step": 39070 }, { "epoch": 0.2773829613186337, "grad_norm": 0.11181640625, "learning_rate": 0.0019935545075790745, "loss": 0.24, "step": 39080 }, { "epoch": 0.27745393955847986, "grad_norm": 0.08251953125, "learning_rate": 0.001993551125946076, "loss": 0.2695, "step": 39090 }, { "epoch": 0.277524917798326, "grad_norm": 0.099609375, "learning_rate": 0.0019935477434294122, "loss": 0.263, "step": 39100 }, { "epoch": 0.2775958960381721, "grad_norm": 0.099609375, "learning_rate": 0.001993544360029085, "loss": 0.2747, "step": 39110 }, { "epoch": 0.2776668742780182, "grad_norm": 0.18359375, "learning_rate": 0.0019935409757450982, "loss": 0.2862, "step": 39120 }, { "epoch": 0.2777378525178643, "grad_norm": 0.1796875, "learning_rate": 0.0019935375905774564, "loss": 0.2723, "step": 39130 }, { "epoch": 0.27780883075771046, "grad_norm": 0.17578125, "learning_rate": 0.0019935342045261607, "loss": 0.2547, "step": 39140 }, { "epoch": 0.2778798089975566, "grad_norm": 0.09375, "learning_rate": 0.0019935308175912164, "loss": 0.2637, "step": 39150 }, { "epoch": 0.2779507872374027, "grad_norm": 0.07763671875, "learning_rate": 0.001993527429772626, "loss": 0.2769, "step": 39160 }, { "epoch": 0.27802176547724883, "grad_norm": 0.080078125, "learning_rate": 0.001993524041070393, "loss": 0.2513, "step": 39170 }, { "epoch": 0.2780927437170949, "grad_norm": 0.103515625, "learning_rate": 0.0019935206514845208, "loss": 0.2504, "step": 39180 }, { "epoch": 0.27816372195694106, "grad_norm": 0.08837890625, "learning_rate": 0.0019935172610150123, "loss": 0.275, "step": 39190 }, { "epoch": 0.27823470019678714, "grad_norm": 0.119140625, "learning_rate": 0.0019935138696618717, "loss": 0.2473, "step": 39200 }, { "epoch": 0.2783056784366333, "grad_norm": 0.11083984375, "learning_rate": 0.001993510477425102, "loss": 0.2543, "step": 39210 }, { "epoch": 0.27837665667647943, "grad_norm": 0.08935546875, "learning_rate": 0.001993507084304706, "loss": 0.2507, "step": 39220 }, { "epoch": 0.2784476349163255, "grad_norm": 0.0732421875, "learning_rate": 0.0019935036903006875, "loss": 0.2406, "step": 39230 }, { "epoch": 0.27851861315617166, "grad_norm": 0.10009765625, "learning_rate": 0.00199350029541305, "loss": 0.2479, "step": 39240 }, { "epoch": 0.27858959139601774, "grad_norm": 0.11181640625, "learning_rate": 0.0019934968996417964, "loss": 0.2501, "step": 39250 }, { "epoch": 0.2786605696358639, "grad_norm": 0.08447265625, "learning_rate": 0.0019934935029869306, "loss": 0.2475, "step": 39260 }, { "epoch": 0.27873154787571003, "grad_norm": 0.13671875, "learning_rate": 0.0019934901054484556, "loss": 0.2782, "step": 39270 }, { "epoch": 0.2788025261155561, "grad_norm": 0.2197265625, "learning_rate": 0.001993486707026375, "loss": 0.2473, "step": 39280 }, { "epoch": 0.27887350435540226, "grad_norm": 0.1796875, "learning_rate": 0.001993483307720692, "loss": 0.2843, "step": 39290 }, { "epoch": 0.27894448259524834, "grad_norm": 0.1689453125, "learning_rate": 0.0019934799075314105, "loss": 0.2726, "step": 39300 }, { "epoch": 0.2790154608350945, "grad_norm": 0.11328125, "learning_rate": 0.0019934765064585328, "loss": 0.2341, "step": 39310 }, { "epoch": 0.2790864390749406, "grad_norm": 0.0810546875, "learning_rate": 0.001993473104502063, "loss": 0.257, "step": 39320 }, { "epoch": 0.2791574173147867, "grad_norm": 0.125, "learning_rate": 0.0019934697016620044, "loss": 0.2502, "step": 39330 }, { "epoch": 0.27922839555463286, "grad_norm": 0.0849609375, "learning_rate": 0.00199346629793836, "loss": 0.2512, "step": 39340 }, { "epoch": 0.27929937379447894, "grad_norm": 0.13671875, "learning_rate": 0.0019934628933311336, "loss": 0.248, "step": 39350 }, { "epoch": 0.2793703520343251, "grad_norm": 0.0888671875, "learning_rate": 0.0019934594878403284, "loss": 0.259, "step": 39360 }, { "epoch": 0.2794413302741712, "grad_norm": 0.0927734375, "learning_rate": 0.0019934560814659475, "loss": 0.2389, "step": 39370 }, { "epoch": 0.2795123085140173, "grad_norm": 0.10595703125, "learning_rate": 0.001993452674207995, "loss": 0.2649, "step": 39380 }, { "epoch": 0.27958328675386346, "grad_norm": 0.1171875, "learning_rate": 0.0019934492660664735, "loss": 0.2421, "step": 39390 }, { "epoch": 0.27965426499370954, "grad_norm": 0.1162109375, "learning_rate": 0.001993445857041387, "loss": 0.2667, "step": 39400 }, { "epoch": 0.2797252432335557, "grad_norm": 0.1494140625, "learning_rate": 0.001993442447132738, "loss": 0.2489, "step": 39410 }, { "epoch": 0.2797962214734018, "grad_norm": 0.11328125, "learning_rate": 0.001993439036340531, "loss": 0.2654, "step": 39420 }, { "epoch": 0.2798671997132479, "grad_norm": 0.13671875, "learning_rate": 0.0019934356246647683, "loss": 0.2672, "step": 39430 }, { "epoch": 0.279938177953094, "grad_norm": 0.11279296875, "learning_rate": 0.0019934322121054537, "loss": 0.248, "step": 39440 }, { "epoch": 0.28000915619294015, "grad_norm": 0.0859375, "learning_rate": 0.001993428798662591, "loss": 0.2581, "step": 39450 }, { "epoch": 0.2800801344327863, "grad_norm": 0.107421875, "learning_rate": 0.001993425384336183, "loss": 0.2705, "step": 39460 }, { "epoch": 0.2801511126726324, "grad_norm": 0.142578125, "learning_rate": 0.0019934219691262337, "loss": 0.2476, "step": 39470 }, { "epoch": 0.2802220909124785, "grad_norm": 0.1005859375, "learning_rate": 0.0019934185530327457, "loss": 0.2692, "step": 39480 }, { "epoch": 0.2802930691523246, "grad_norm": 0.1220703125, "learning_rate": 0.001993415136055723, "loss": 0.2428, "step": 39490 }, { "epoch": 0.28036404739217075, "grad_norm": 0.1044921875, "learning_rate": 0.001993411718195168, "loss": 0.2534, "step": 39500 }, { "epoch": 0.2804350256320169, "grad_norm": 0.0927734375, "learning_rate": 0.001993408299451086, "loss": 0.2571, "step": 39510 }, { "epoch": 0.280506003871863, "grad_norm": 0.0947265625, "learning_rate": 0.0019934048798234782, "loss": 0.2515, "step": 39520 }, { "epoch": 0.2805769821117091, "grad_norm": 0.0869140625, "learning_rate": 0.001993401459312349, "loss": 0.2454, "step": 39530 }, { "epoch": 0.2806479603515552, "grad_norm": 0.07275390625, "learning_rate": 0.001993398037917702, "loss": 0.2509, "step": 39540 }, { "epoch": 0.28071893859140135, "grad_norm": 0.126953125, "learning_rate": 0.0019933946156395405, "loss": 0.2809, "step": 39550 }, { "epoch": 0.2807899168312475, "grad_norm": 0.1025390625, "learning_rate": 0.0019933911924778676, "loss": 0.2492, "step": 39560 }, { "epoch": 0.2808608950710936, "grad_norm": 0.09423828125, "learning_rate": 0.001993387768432687, "loss": 0.2545, "step": 39570 }, { "epoch": 0.2809318733109397, "grad_norm": 0.162109375, "learning_rate": 0.0019933843435040017, "loss": 0.2549, "step": 39580 }, { "epoch": 0.2810028515507858, "grad_norm": 0.16796875, "learning_rate": 0.0019933809176918152, "loss": 0.2619, "step": 39590 }, { "epoch": 0.28107382979063195, "grad_norm": 0.2890625, "learning_rate": 0.001993377490996131, "loss": 0.2558, "step": 39600 }, { "epoch": 0.28114480803047803, "grad_norm": 0.103515625, "learning_rate": 0.0019933740634169523, "loss": 0.2644, "step": 39610 }, { "epoch": 0.2812157862703242, "grad_norm": 0.09814453125, "learning_rate": 0.0019933706349542827, "loss": 0.2649, "step": 39620 }, { "epoch": 0.2812867645101703, "grad_norm": 0.1328125, "learning_rate": 0.001993367205608126, "loss": 0.2471, "step": 39630 }, { "epoch": 0.2813577427500164, "grad_norm": 0.099609375, "learning_rate": 0.0019933637753784847, "loss": 0.2585, "step": 39640 }, { "epoch": 0.28142872098986255, "grad_norm": 0.0869140625, "learning_rate": 0.0019933603442653628, "loss": 0.2681, "step": 39650 }, { "epoch": 0.28149969922970863, "grad_norm": 0.0771484375, "learning_rate": 0.001993356912268763, "loss": 0.2407, "step": 39660 }, { "epoch": 0.2815706774695548, "grad_norm": 0.07861328125, "learning_rate": 0.00199335347938869, "loss": 0.2745, "step": 39670 }, { "epoch": 0.2816416557094009, "grad_norm": 0.10400390625, "learning_rate": 0.0019933500456251457, "loss": 0.2639, "step": 39680 }, { "epoch": 0.281712633949247, "grad_norm": 0.1484375, "learning_rate": 0.0019933466109781344, "loss": 0.2496, "step": 39690 }, { "epoch": 0.28178361218909315, "grad_norm": 0.1015625, "learning_rate": 0.0019933431754476597, "loss": 0.2416, "step": 39700 }, { "epoch": 0.28185459042893923, "grad_norm": 0.1171875, "learning_rate": 0.001993339739033724, "loss": 0.2488, "step": 39710 }, { "epoch": 0.2819255686687854, "grad_norm": 0.0751953125, "learning_rate": 0.001993336301736332, "loss": 0.2606, "step": 39720 }, { "epoch": 0.28199654690863146, "grad_norm": 0.2314453125, "learning_rate": 0.001993332863555486, "loss": 0.2552, "step": 39730 }, { "epoch": 0.2820675251484776, "grad_norm": 0.10009765625, "learning_rate": 0.0019933294244911894, "loss": 0.2471, "step": 39740 }, { "epoch": 0.28213850338832375, "grad_norm": 0.12109375, "learning_rate": 0.0019933259845434466, "loss": 0.2542, "step": 39750 }, { "epoch": 0.28220948162816983, "grad_norm": 0.12158203125, "learning_rate": 0.00199332254371226, "loss": 0.2418, "step": 39760 }, { "epoch": 0.282280459868016, "grad_norm": 0.10009765625, "learning_rate": 0.0019933191019976333, "loss": 0.2411, "step": 39770 }, { "epoch": 0.28235143810786206, "grad_norm": 0.1025390625, "learning_rate": 0.0019933156593995704, "loss": 0.2369, "step": 39780 }, { "epoch": 0.2824224163477082, "grad_norm": 0.11962890625, "learning_rate": 0.001993312215918074, "loss": 0.2655, "step": 39790 }, { "epoch": 0.28249339458755435, "grad_norm": 0.0791015625, "learning_rate": 0.001993308771553148, "loss": 0.2496, "step": 39800 }, { "epoch": 0.28256437282740043, "grad_norm": 0.15625, "learning_rate": 0.0019933053263047956, "loss": 0.2526, "step": 39810 }, { "epoch": 0.2826353510672466, "grad_norm": 0.1240234375, "learning_rate": 0.0019933018801730202, "loss": 0.27, "step": 39820 }, { "epoch": 0.28270632930709266, "grad_norm": 0.1005859375, "learning_rate": 0.001993298433157825, "loss": 0.245, "step": 39830 }, { "epoch": 0.2827773075469388, "grad_norm": 0.1279296875, "learning_rate": 0.001993294985259214, "loss": 0.2507, "step": 39840 }, { "epoch": 0.2828482857867849, "grad_norm": 0.10986328125, "learning_rate": 0.00199329153647719, "loss": 0.2903, "step": 39850 }, { "epoch": 0.28291926402663103, "grad_norm": 0.1220703125, "learning_rate": 0.0019932880868117567, "loss": 0.2648, "step": 39860 }, { "epoch": 0.2829902422664772, "grad_norm": 0.103515625, "learning_rate": 0.0019932846362629175, "loss": 0.2499, "step": 39870 }, { "epoch": 0.28306122050632326, "grad_norm": 0.376953125, "learning_rate": 0.0019932811848306755, "loss": 0.2513, "step": 39880 }, { "epoch": 0.2831321987461694, "grad_norm": 0.1708984375, "learning_rate": 0.001993277732515035, "loss": 0.3192, "step": 39890 }, { "epoch": 0.2832031769860155, "grad_norm": 0.298828125, "learning_rate": 0.0019932742793159985, "loss": 0.2757, "step": 39900 }, { "epoch": 0.28327415522586163, "grad_norm": 0.10595703125, "learning_rate": 0.0019932708252335695, "loss": 0.2379, "step": 39910 }, { "epoch": 0.2833451334657078, "grad_norm": 0.0791015625, "learning_rate": 0.0019932673702677517, "loss": 0.2524, "step": 39920 }, { "epoch": 0.28341611170555386, "grad_norm": 0.09912109375, "learning_rate": 0.0019932639144185486, "loss": 0.2573, "step": 39930 }, { "epoch": 0.2834870899454, "grad_norm": 0.13671875, "learning_rate": 0.0019932604576859635, "loss": 0.2532, "step": 39940 }, { "epoch": 0.2835580681852461, "grad_norm": 0.0908203125, "learning_rate": 0.00199325700007, "loss": 0.247, "step": 39950 }, { "epoch": 0.28362904642509223, "grad_norm": 0.10205078125, "learning_rate": 0.001993253541570661, "loss": 0.2707, "step": 39960 }, { "epoch": 0.2837000246649383, "grad_norm": 0.1435546875, "learning_rate": 0.0019932500821879503, "loss": 0.2552, "step": 39970 }, { "epoch": 0.28377100290478446, "grad_norm": 0.0947265625, "learning_rate": 0.001993246621921871, "loss": 0.2495, "step": 39980 }, { "epoch": 0.2838419811446306, "grad_norm": 0.08984375, "learning_rate": 0.001993243160772427, "loss": 0.254, "step": 39990 }, { "epoch": 0.2839129593844767, "grad_norm": 0.1201171875, "learning_rate": 0.0019932396987396217, "loss": 0.2548, "step": 40000 }, { "epoch": 0.2839129593844767, "eval_covost2-zh-en_loss": 3.803316593170166, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 20.0858, "eval_covost2-zh-en_samples_per_second": 3.186, "eval_covost2-zh-en_steps_per_second": 0.199, "step": 40000 }, { "epoch": 0.2839129593844767, "eval_covost2-en-zh_loss": 3.1883370876312256, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 19.7269, "eval_covost2-en-zh_samples_per_second": 3.244, "eval_covost2-en-zh_steps_per_second": 0.203, "step": 40000 }, { "epoch": 0.28398393762432284, "grad_norm": 0.11572265625, "learning_rate": 0.001993236235823458, "loss": 0.258, "step": 40010 }, { "epoch": 0.2840549158641689, "grad_norm": 0.130859375, "learning_rate": 0.0019932327720239397, "loss": 0.2681, "step": 40020 }, { "epoch": 0.28412589410401506, "grad_norm": 0.1181640625, "learning_rate": 0.00199322930734107, "loss": 0.2491, "step": 40030 }, { "epoch": 0.2841968723438612, "grad_norm": 0.1884765625, "learning_rate": 0.001993225841774853, "loss": 0.2731, "step": 40040 }, { "epoch": 0.2842678505837073, "grad_norm": 0.20703125, "learning_rate": 0.0019932223753252915, "loss": 0.2641, "step": 40050 }, { "epoch": 0.28433882882355344, "grad_norm": 0.080078125, "learning_rate": 0.001993218907992389, "loss": 0.2475, "step": 40060 }, { "epoch": 0.2844098070633995, "grad_norm": 0.09521484375, "learning_rate": 0.0019932154397761487, "loss": 0.2525, "step": 40070 }, { "epoch": 0.28448078530324566, "grad_norm": 0.173828125, "learning_rate": 0.0019932119706765747, "loss": 0.2686, "step": 40080 }, { "epoch": 0.28455176354309175, "grad_norm": 0.1064453125, "learning_rate": 0.0019932085006936695, "loss": 0.25, "step": 40090 }, { "epoch": 0.2846227417829379, "grad_norm": 0.1005859375, "learning_rate": 0.0019932050298274375, "loss": 0.2691, "step": 40100 }, { "epoch": 0.28469372002278404, "grad_norm": 0.0830078125, "learning_rate": 0.0019932015580778817, "loss": 0.2602, "step": 40110 }, { "epoch": 0.2847646982626301, "grad_norm": 0.0908203125, "learning_rate": 0.001993198085445006, "loss": 0.269, "step": 40120 }, { "epoch": 0.28483567650247626, "grad_norm": 0.10205078125, "learning_rate": 0.0019931946119288125, "loss": 0.2586, "step": 40130 }, { "epoch": 0.28490665474232235, "grad_norm": 0.11669921875, "learning_rate": 0.0019931911375293057, "loss": 0.2457, "step": 40140 }, { "epoch": 0.2849776329821685, "grad_norm": 0.0791015625, "learning_rate": 0.001993187662246489, "loss": 0.2581, "step": 40150 }, { "epoch": 0.28504861122201464, "grad_norm": 0.0751953125, "learning_rate": 0.001993184186080366, "loss": 0.2619, "step": 40160 }, { "epoch": 0.2851195894618607, "grad_norm": 0.10791015625, "learning_rate": 0.001993180709030939, "loss": 0.2554, "step": 40170 }, { "epoch": 0.28519056770170687, "grad_norm": 0.08544921875, "learning_rate": 0.001993177231098213, "loss": 0.2471, "step": 40180 }, { "epoch": 0.28526154594155295, "grad_norm": 0.13671875, "learning_rate": 0.0019931737522821905, "loss": 0.2469, "step": 40190 }, { "epoch": 0.2853325241813991, "grad_norm": 0.1259765625, "learning_rate": 0.001993170272582875, "loss": 0.2724, "step": 40200 }, { "epoch": 0.2854035024212452, "grad_norm": 0.2001953125, "learning_rate": 0.0019931667920002703, "loss": 0.2574, "step": 40210 }, { "epoch": 0.2854744806610913, "grad_norm": 0.1181640625, "learning_rate": 0.0019931633105343795, "loss": 0.2619, "step": 40220 }, { "epoch": 0.28554545890093747, "grad_norm": 0.1513671875, "learning_rate": 0.001993159828185206, "loss": 0.2687, "step": 40230 }, { "epoch": 0.28561643714078355, "grad_norm": 0.09912109375, "learning_rate": 0.0019931563449527537, "loss": 0.2495, "step": 40240 }, { "epoch": 0.2856874153806297, "grad_norm": 0.166015625, "learning_rate": 0.0019931528608370256, "loss": 0.2601, "step": 40250 }, { "epoch": 0.2857583936204758, "grad_norm": 0.12060546875, "learning_rate": 0.0019931493758380257, "loss": 0.261, "step": 40260 }, { "epoch": 0.2858293718603219, "grad_norm": 0.09228515625, "learning_rate": 0.0019931458899557566, "loss": 0.2654, "step": 40270 }, { "epoch": 0.28590035010016807, "grad_norm": 0.12060546875, "learning_rate": 0.0019931424031902224, "loss": 0.2633, "step": 40280 }, { "epoch": 0.28597132834001415, "grad_norm": 0.11328125, "learning_rate": 0.001993138915541426, "loss": 0.2671, "step": 40290 }, { "epoch": 0.2860423065798603, "grad_norm": 0.10107421875, "learning_rate": 0.0019931354270093715, "loss": 0.2367, "step": 40300 }, { "epoch": 0.2861132848197064, "grad_norm": 0.09912109375, "learning_rate": 0.001993131937594062, "loss": 0.2412, "step": 40310 }, { "epoch": 0.2861842630595525, "grad_norm": 0.2451171875, "learning_rate": 0.001993128447295501, "loss": 0.2572, "step": 40320 }, { "epoch": 0.2862552412993986, "grad_norm": 0.12890625, "learning_rate": 0.0019931249561136915, "loss": 0.2786, "step": 40330 }, { "epoch": 0.28632621953924475, "grad_norm": 0.07861328125, "learning_rate": 0.001993121464048638, "loss": 0.2405, "step": 40340 }, { "epoch": 0.2863971977790909, "grad_norm": 0.07763671875, "learning_rate": 0.001993117971100343, "loss": 0.2507, "step": 40350 }, { "epoch": 0.286468176018937, "grad_norm": 0.09375, "learning_rate": 0.0019931144772688102, "loss": 0.2473, "step": 40360 }, { "epoch": 0.2865391542587831, "grad_norm": 0.3515625, "learning_rate": 0.0019931109825540435, "loss": 0.2799, "step": 40370 }, { "epoch": 0.2866101324986292, "grad_norm": 0.10986328125, "learning_rate": 0.001993107486956046, "loss": 0.2495, "step": 40380 }, { "epoch": 0.28668111073847535, "grad_norm": 0.1923828125, "learning_rate": 0.001993103990474821, "loss": 0.273, "step": 40390 }, { "epoch": 0.2867520889783215, "grad_norm": 0.08740234375, "learning_rate": 0.0019931004931103722, "loss": 0.2578, "step": 40400 }, { "epoch": 0.2868230672181676, "grad_norm": 0.072265625, "learning_rate": 0.001993096994862703, "loss": 0.2619, "step": 40410 }, { "epoch": 0.2868940454580137, "grad_norm": 0.1416015625, "learning_rate": 0.0019930934957318167, "loss": 0.2534, "step": 40420 }, { "epoch": 0.2869650236978598, "grad_norm": 0.103515625, "learning_rate": 0.001993089995717717, "loss": 0.2538, "step": 40430 }, { "epoch": 0.28703600193770595, "grad_norm": 0.1904296875, "learning_rate": 0.001993086494820407, "loss": 0.254, "step": 40440 }, { "epoch": 0.28710698017755204, "grad_norm": 0.173828125, "learning_rate": 0.001993082993039891, "loss": 0.2606, "step": 40450 }, { "epoch": 0.2871779584173982, "grad_norm": 0.0771484375, "learning_rate": 0.0019930794903761716, "loss": 0.2525, "step": 40460 }, { "epoch": 0.2872489366572443, "grad_norm": 0.08935546875, "learning_rate": 0.0019930759868292524, "loss": 0.2467, "step": 40470 }, { "epoch": 0.2873199148970904, "grad_norm": 0.1337890625, "learning_rate": 0.0019930724823991374, "loss": 0.2588, "step": 40480 }, { "epoch": 0.28739089313693655, "grad_norm": 0.10107421875, "learning_rate": 0.0019930689770858295, "loss": 0.2565, "step": 40490 }, { "epoch": 0.28746187137678264, "grad_norm": 0.150390625, "learning_rate": 0.0019930654708893323, "loss": 0.2622, "step": 40500 }, { "epoch": 0.2875328496166288, "grad_norm": 0.072265625, "learning_rate": 0.0019930619638096492, "loss": 0.2426, "step": 40510 }, { "epoch": 0.2876038278564749, "grad_norm": 0.08447265625, "learning_rate": 0.001993058455846784, "loss": 0.2583, "step": 40520 }, { "epoch": 0.287674806096321, "grad_norm": 0.1494140625, "learning_rate": 0.00199305494700074, "loss": 0.2492, "step": 40530 }, { "epoch": 0.28774578433616715, "grad_norm": 0.1689453125, "learning_rate": 0.0019930514372715207, "loss": 0.2552, "step": 40540 }, { "epoch": 0.28781676257601324, "grad_norm": 0.09423828125, "learning_rate": 0.001993047926659129, "loss": 0.2625, "step": 40550 }, { "epoch": 0.2878877408158594, "grad_norm": 0.10107421875, "learning_rate": 0.0019930444151635696, "loss": 0.2752, "step": 40560 }, { "epoch": 0.28795871905570547, "grad_norm": 0.11181640625, "learning_rate": 0.0019930409027848445, "loss": 0.2342, "step": 40570 }, { "epoch": 0.2880296972955516, "grad_norm": 0.11083984375, "learning_rate": 0.0019930373895229586, "loss": 0.2801, "step": 40580 }, { "epoch": 0.28810067553539775, "grad_norm": 0.0859375, "learning_rate": 0.001993033875377914, "loss": 0.2515, "step": 40590 }, { "epoch": 0.28817165377524384, "grad_norm": 0.1396484375, "learning_rate": 0.001993030360349715, "loss": 0.2526, "step": 40600 }, { "epoch": 0.28824263201509, "grad_norm": 0.07275390625, "learning_rate": 0.0019930268444383653, "loss": 0.2642, "step": 40610 }, { "epoch": 0.28831361025493607, "grad_norm": 0.1455078125, "learning_rate": 0.001993023327643868, "loss": 0.2519, "step": 40620 }, { "epoch": 0.2883845884947822, "grad_norm": 0.099609375, "learning_rate": 0.001993019809966227, "loss": 0.251, "step": 40630 }, { "epoch": 0.28845556673462835, "grad_norm": 0.15625, "learning_rate": 0.0019930162914054443, "loss": 0.2736, "step": 40640 }, { "epoch": 0.28852654497447444, "grad_norm": 0.0751953125, "learning_rate": 0.0019930127719615253, "loss": 0.2695, "step": 40650 }, { "epoch": 0.2885975232143206, "grad_norm": 0.109375, "learning_rate": 0.0019930092516344724, "loss": 0.2624, "step": 40660 }, { "epoch": 0.28866850145416667, "grad_norm": 0.146484375, "learning_rate": 0.001993005730424289, "loss": 0.2495, "step": 40670 }, { "epoch": 0.2887394796940128, "grad_norm": 0.1220703125, "learning_rate": 0.0019930022083309794, "loss": 0.2613, "step": 40680 }, { "epoch": 0.2888104579338589, "grad_norm": 0.09033203125, "learning_rate": 0.0019929986853545467, "loss": 0.2374, "step": 40690 }, { "epoch": 0.28888143617370504, "grad_norm": 0.10009765625, "learning_rate": 0.0019929951614949936, "loss": 0.2543, "step": 40700 }, { "epoch": 0.2889524144135512, "grad_norm": 0.1044921875, "learning_rate": 0.001992991636752325, "loss": 0.2437, "step": 40710 }, { "epoch": 0.28902339265339727, "grad_norm": 0.09619140625, "learning_rate": 0.001992988111126543, "loss": 0.254, "step": 40720 }, { "epoch": 0.2890943708932434, "grad_norm": 0.12255859375, "learning_rate": 0.0019929845846176518, "loss": 0.2635, "step": 40730 }, { "epoch": 0.2891653491330895, "grad_norm": 0.12353515625, "learning_rate": 0.001992981057225655, "loss": 0.2724, "step": 40740 }, { "epoch": 0.28923632737293564, "grad_norm": 0.12060546875, "learning_rate": 0.001992977528950556, "loss": 0.2454, "step": 40750 }, { "epoch": 0.2893073056127818, "grad_norm": 0.142578125, "learning_rate": 0.0019929739997923577, "loss": 0.2544, "step": 40760 }, { "epoch": 0.28937828385262787, "grad_norm": 0.0849609375, "learning_rate": 0.0019929704697510646, "loss": 0.2535, "step": 40770 }, { "epoch": 0.289449262092474, "grad_norm": 0.07861328125, "learning_rate": 0.0019929669388266797, "loss": 0.2813, "step": 40780 }, { "epoch": 0.2895202403323201, "grad_norm": 0.0869140625, "learning_rate": 0.001992963407019206, "loss": 0.2561, "step": 40790 }, { "epoch": 0.28959121857216624, "grad_norm": 0.1044921875, "learning_rate": 0.0019929598743286476, "loss": 0.26, "step": 40800 }, { "epoch": 0.2896621968120124, "grad_norm": 0.1318359375, "learning_rate": 0.001992956340755008, "loss": 0.2564, "step": 40810 }, { "epoch": 0.28973317505185847, "grad_norm": 0.2041015625, "learning_rate": 0.001992952806298291, "loss": 0.2501, "step": 40820 }, { "epoch": 0.2898041532917046, "grad_norm": 0.0771484375, "learning_rate": 0.001992949270958499, "loss": 0.2561, "step": 40830 }, { "epoch": 0.2898751315315507, "grad_norm": 0.08837890625, "learning_rate": 0.001992945734735636, "loss": 0.2382, "step": 40840 }, { "epoch": 0.28994610977139684, "grad_norm": 0.109375, "learning_rate": 0.001992942197629706, "loss": 0.2518, "step": 40850 }, { "epoch": 0.29001708801124293, "grad_norm": 0.109375, "learning_rate": 0.0019929386596407124, "loss": 0.2913, "step": 40860 }, { "epoch": 0.29008806625108907, "grad_norm": 0.095703125, "learning_rate": 0.0019929351207686576, "loss": 0.2428, "step": 40870 }, { "epoch": 0.2901590444909352, "grad_norm": 0.07568359375, "learning_rate": 0.0019929315810135466, "loss": 0.255, "step": 40880 }, { "epoch": 0.2902300227307813, "grad_norm": 0.09814453125, "learning_rate": 0.001992928040375382, "loss": 0.2439, "step": 40890 }, { "epoch": 0.29030100097062744, "grad_norm": 0.1220703125, "learning_rate": 0.0019929244988541677, "loss": 0.257, "step": 40900 }, { "epoch": 0.29037197921047353, "grad_norm": 0.10986328125, "learning_rate": 0.0019929209564499067, "loss": 0.2589, "step": 40910 }, { "epoch": 0.29044295745031967, "grad_norm": 0.08642578125, "learning_rate": 0.0019929174131626034, "loss": 0.2599, "step": 40920 }, { "epoch": 0.2905139356901658, "grad_norm": 0.181640625, "learning_rate": 0.0019929138689922603, "loss": 0.2376, "step": 40930 }, { "epoch": 0.2905849139300119, "grad_norm": 0.07177734375, "learning_rate": 0.0019929103239388814, "loss": 0.2755, "step": 40940 }, { "epoch": 0.29065589216985804, "grad_norm": 0.130859375, "learning_rate": 0.00199290677800247, "loss": 0.2732, "step": 40950 }, { "epoch": 0.29072687040970413, "grad_norm": 0.0849609375, "learning_rate": 0.00199290323118303, "loss": 0.2559, "step": 40960 }, { "epoch": 0.2907978486495503, "grad_norm": 0.142578125, "learning_rate": 0.001992899683480565, "loss": 0.2601, "step": 40970 }, { "epoch": 0.29086882688939636, "grad_norm": 0.1279296875, "learning_rate": 0.0019928961348950774, "loss": 0.2613, "step": 40980 }, { "epoch": 0.2909398051292425, "grad_norm": 0.10498046875, "learning_rate": 0.001992892585426572, "loss": 0.2586, "step": 40990 }, { "epoch": 0.29101078336908864, "grad_norm": 0.19921875, "learning_rate": 0.0019928890350750517, "loss": 0.2612, "step": 41000 }, { "epoch": 0.29108176160893473, "grad_norm": 0.09765625, "learning_rate": 0.00199288548384052, "loss": 0.2798, "step": 41010 }, { "epoch": 0.2911527398487809, "grad_norm": 0.123046875, "learning_rate": 0.0019928819317229806, "loss": 0.2586, "step": 41020 }, { "epoch": 0.29122371808862696, "grad_norm": 0.095703125, "learning_rate": 0.001992878378722437, "loss": 0.2607, "step": 41030 }, { "epoch": 0.2912946963284731, "grad_norm": 0.1640625, "learning_rate": 0.0019928748248388924, "loss": 0.2672, "step": 41040 }, { "epoch": 0.29136567456831924, "grad_norm": 0.10107421875, "learning_rate": 0.001992871270072351, "loss": 0.2471, "step": 41050 }, { "epoch": 0.29143665280816533, "grad_norm": 0.162109375, "learning_rate": 0.0019928677144228153, "loss": 0.2597, "step": 41060 }, { "epoch": 0.2915076310480115, "grad_norm": 0.07861328125, "learning_rate": 0.00199286415789029, "loss": 0.2599, "step": 41070 }, { "epoch": 0.29157860928785756, "grad_norm": 0.09765625, "learning_rate": 0.0019928606004747775, "loss": 0.2733, "step": 41080 }, { "epoch": 0.2916495875277037, "grad_norm": 0.0986328125, "learning_rate": 0.0019928570421762822, "loss": 0.2581, "step": 41090 }, { "epoch": 0.2917205657675498, "grad_norm": 0.061279296875, "learning_rate": 0.0019928534829948066, "loss": 0.2602, "step": 41100 }, { "epoch": 0.29179154400739593, "grad_norm": 0.08154296875, "learning_rate": 0.0019928499229303555, "loss": 0.2653, "step": 41110 }, { "epoch": 0.2918625222472421, "grad_norm": 0.1171875, "learning_rate": 0.001992846361982932, "loss": 0.2483, "step": 41120 }, { "epoch": 0.29193350048708816, "grad_norm": 0.1279296875, "learning_rate": 0.0019928428001525387, "loss": 0.2427, "step": 41130 }, { "epoch": 0.2920044787269343, "grad_norm": 0.1484375, "learning_rate": 0.00199283923743918, "loss": 0.2719, "step": 41140 }, { "epoch": 0.2920754569667804, "grad_norm": 0.09765625, "learning_rate": 0.0019928356738428596, "loss": 0.2514, "step": 41150 }, { "epoch": 0.29214643520662653, "grad_norm": 0.1005859375, "learning_rate": 0.00199283210936358, "loss": 0.2617, "step": 41160 }, { "epoch": 0.2922174134464727, "grad_norm": 0.1259765625, "learning_rate": 0.0019928285440013464, "loss": 0.2375, "step": 41170 }, { "epoch": 0.29228839168631876, "grad_norm": 0.09375, "learning_rate": 0.001992824977756161, "loss": 0.2502, "step": 41180 }, { "epoch": 0.2923593699261649, "grad_norm": 0.10400390625, "learning_rate": 0.0019928214106280273, "loss": 0.2637, "step": 41190 }, { "epoch": 0.292430348166011, "grad_norm": 0.162109375, "learning_rate": 0.0019928178426169493, "loss": 0.2499, "step": 41200 }, { "epoch": 0.29250132640585713, "grad_norm": 0.1259765625, "learning_rate": 0.0019928142737229305, "loss": 0.2513, "step": 41210 }, { "epoch": 0.2925723046457032, "grad_norm": 0.107421875, "learning_rate": 0.0019928107039459742, "loss": 0.2782, "step": 41220 }, { "epoch": 0.29264328288554936, "grad_norm": 0.109375, "learning_rate": 0.001992807133286084, "loss": 0.2519, "step": 41230 }, { "epoch": 0.2927142611253955, "grad_norm": 0.130859375, "learning_rate": 0.001992803561743264, "loss": 0.2801, "step": 41240 }, { "epoch": 0.2927852393652416, "grad_norm": 0.09130859375, "learning_rate": 0.0019927999893175168, "loss": 0.2555, "step": 41250 }, { "epoch": 0.29285621760508773, "grad_norm": 0.10302734375, "learning_rate": 0.0019927964160088465, "loss": 0.2556, "step": 41260 }, { "epoch": 0.2929271958449338, "grad_norm": 0.1455078125, "learning_rate": 0.0019927928418172567, "loss": 0.2551, "step": 41270 }, { "epoch": 0.29299817408477996, "grad_norm": 0.2333984375, "learning_rate": 0.0019927892667427503, "loss": 0.2664, "step": 41280 }, { "epoch": 0.2930691523246261, "grad_norm": 0.169921875, "learning_rate": 0.0019927856907853312, "loss": 0.2757, "step": 41290 }, { "epoch": 0.2931401305644722, "grad_norm": 0.1337890625, "learning_rate": 0.0019927821139450034, "loss": 0.2357, "step": 41300 }, { "epoch": 0.29321110880431833, "grad_norm": 0.10205078125, "learning_rate": 0.0019927785362217703, "loss": 0.2445, "step": 41310 }, { "epoch": 0.2932820870441644, "grad_norm": 0.1484375, "learning_rate": 0.0019927749576156345, "loss": 0.2774, "step": 41320 }, { "epoch": 0.29335306528401056, "grad_norm": 0.1005859375, "learning_rate": 0.0019927713781266007, "loss": 0.2604, "step": 41330 }, { "epoch": 0.29342404352385665, "grad_norm": 0.1005859375, "learning_rate": 0.0019927677977546717, "loss": 0.2499, "step": 41340 }, { "epoch": 0.2934950217637028, "grad_norm": 0.0712890625, "learning_rate": 0.001992764216499851, "loss": 0.2469, "step": 41350 }, { "epoch": 0.29356600000354893, "grad_norm": 0.16796875, "learning_rate": 0.001992760634362143, "loss": 0.268, "step": 41360 }, { "epoch": 0.293636978243395, "grad_norm": 0.0888671875, "learning_rate": 0.0019927570513415508, "loss": 0.2472, "step": 41370 }, { "epoch": 0.29370795648324116, "grad_norm": 0.1083984375, "learning_rate": 0.0019927534674380777, "loss": 0.251, "step": 41380 }, { "epoch": 0.29377893472308725, "grad_norm": 0.09716796875, "learning_rate": 0.001992749882651727, "loss": 0.2405, "step": 41390 }, { "epoch": 0.2938499129629334, "grad_norm": 0.0947265625, "learning_rate": 0.001992746296982503, "loss": 0.2534, "step": 41400 }, { "epoch": 0.29392089120277953, "grad_norm": 0.1103515625, "learning_rate": 0.0019927427104304085, "loss": 0.2655, "step": 41410 }, { "epoch": 0.2939918694426256, "grad_norm": 0.1064453125, "learning_rate": 0.0019927391229954475, "loss": 0.2591, "step": 41420 }, { "epoch": 0.29406284768247176, "grad_norm": 0.1875, "learning_rate": 0.0019927355346776236, "loss": 0.2603, "step": 41430 }, { "epoch": 0.29413382592231785, "grad_norm": 0.138671875, "learning_rate": 0.0019927319454769404, "loss": 0.2326, "step": 41440 }, { "epoch": 0.294204804162164, "grad_norm": 0.12060546875, "learning_rate": 0.001992728355393401, "loss": 0.2704, "step": 41450 }, { "epoch": 0.2942757824020101, "grad_norm": 0.083984375, "learning_rate": 0.001992724764427009, "loss": 0.2586, "step": 41460 }, { "epoch": 0.2943467606418562, "grad_norm": 0.07763671875, "learning_rate": 0.0019927211725777687, "loss": 0.2419, "step": 41470 }, { "epoch": 0.29441773888170236, "grad_norm": 0.15234375, "learning_rate": 0.0019927175798456825, "loss": 0.2608, "step": 41480 }, { "epoch": 0.29448871712154845, "grad_norm": 0.08154296875, "learning_rate": 0.0019927139862307547, "loss": 0.2789, "step": 41490 }, { "epoch": 0.2945596953613946, "grad_norm": 0.12353515625, "learning_rate": 0.0019927103917329893, "loss": 0.2422, "step": 41500 }, { "epoch": 0.2946306736012407, "grad_norm": 0.203125, "learning_rate": 0.001992706796352389, "loss": 0.2567, "step": 41510 }, { "epoch": 0.2947016518410868, "grad_norm": 0.1328125, "learning_rate": 0.0019927032000889573, "loss": 0.2739, "step": 41520 }, { "epoch": 0.29477263008093296, "grad_norm": 0.11279296875, "learning_rate": 0.001992699602942698, "loss": 0.2639, "step": 41530 }, { "epoch": 0.29484360832077905, "grad_norm": 0.1865234375, "learning_rate": 0.0019926960049136155, "loss": 0.2492, "step": 41540 }, { "epoch": 0.2949145865606252, "grad_norm": 0.11376953125, "learning_rate": 0.001992692406001712, "loss": 0.2563, "step": 41550 }, { "epoch": 0.2949855648004713, "grad_norm": 0.08984375, "learning_rate": 0.0019926888062069916, "loss": 0.2445, "step": 41560 }, { "epoch": 0.2950565430403174, "grad_norm": 0.1640625, "learning_rate": 0.0019926852055294586, "loss": 0.2529, "step": 41570 }, { "epoch": 0.2951275212801635, "grad_norm": 0.1279296875, "learning_rate": 0.001992681603969115, "loss": 0.2611, "step": 41580 }, { "epoch": 0.29519849952000965, "grad_norm": 0.26171875, "learning_rate": 0.0019926780015259654, "loss": 0.2368, "step": 41590 }, { "epoch": 0.2952694777598558, "grad_norm": 0.1259765625, "learning_rate": 0.0019926743982000136, "loss": 0.2519, "step": 41600 }, { "epoch": 0.2953404559997019, "grad_norm": 0.1904296875, "learning_rate": 0.0019926707939912627, "loss": 0.2674, "step": 41610 }, { "epoch": 0.295411434239548, "grad_norm": 0.0966796875, "learning_rate": 0.0019926671888997162, "loss": 0.2661, "step": 41620 }, { "epoch": 0.2954824124793941, "grad_norm": 0.08837890625, "learning_rate": 0.0019926635829253777, "loss": 0.264, "step": 41630 }, { "epoch": 0.29555339071924025, "grad_norm": 0.083984375, "learning_rate": 0.0019926599760682513, "loss": 0.2593, "step": 41640 }, { "epoch": 0.2956243689590864, "grad_norm": 0.10986328125, "learning_rate": 0.00199265636832834, "loss": 0.2499, "step": 41650 }, { "epoch": 0.2956953471989325, "grad_norm": 0.08642578125, "learning_rate": 0.001992652759705647, "loss": 0.2493, "step": 41660 }, { "epoch": 0.2957663254387786, "grad_norm": 0.1044921875, "learning_rate": 0.0019926491502001765, "loss": 0.2675, "step": 41670 }, { "epoch": 0.2958373036786247, "grad_norm": 0.11181640625, "learning_rate": 0.001992645539811932, "loss": 0.2468, "step": 41680 }, { "epoch": 0.29590828191847085, "grad_norm": 0.1220703125, "learning_rate": 0.0019926419285409176, "loss": 0.2519, "step": 41690 }, { "epoch": 0.29597926015831694, "grad_norm": 0.07177734375, "learning_rate": 0.0019926383163871354, "loss": 0.2603, "step": 41700 }, { "epoch": 0.2960502383981631, "grad_norm": 0.08837890625, "learning_rate": 0.00199263470335059, "loss": 0.2412, "step": 41710 }, { "epoch": 0.2961212166380092, "grad_norm": 0.0830078125, "learning_rate": 0.0019926310894312856, "loss": 0.2506, "step": 41720 }, { "epoch": 0.2961921948778553, "grad_norm": 0.259765625, "learning_rate": 0.0019926274746292242, "loss": 0.2641, "step": 41730 }, { "epoch": 0.29626317311770145, "grad_norm": 0.07421875, "learning_rate": 0.0019926238589444106, "loss": 0.2598, "step": 41740 }, { "epoch": 0.29633415135754754, "grad_norm": 0.1962890625, "learning_rate": 0.001992620242376848, "loss": 0.2642, "step": 41750 }, { "epoch": 0.2964051295973937, "grad_norm": 0.09326171875, "learning_rate": 0.0019926166249265398, "loss": 0.2692, "step": 41760 }, { "epoch": 0.2964761078372398, "grad_norm": 0.11083984375, "learning_rate": 0.0019926130065934895, "loss": 0.2389, "step": 41770 }, { "epoch": 0.2965470860770859, "grad_norm": 0.11669921875, "learning_rate": 0.001992609387377701, "loss": 0.2809, "step": 41780 }, { "epoch": 0.29661806431693205, "grad_norm": 0.1279296875, "learning_rate": 0.001992605767279178, "loss": 0.251, "step": 41790 }, { "epoch": 0.29668904255677814, "grad_norm": 0.09130859375, "learning_rate": 0.0019926021462979234, "loss": 0.252, "step": 41800 }, { "epoch": 0.2967600207966243, "grad_norm": 0.111328125, "learning_rate": 0.001992598524433941, "loss": 0.2398, "step": 41810 }, { "epoch": 0.29683099903647037, "grad_norm": 0.087890625, "learning_rate": 0.001992594901687235, "loss": 0.2621, "step": 41820 }, { "epoch": 0.2969019772763165, "grad_norm": 0.12109375, "learning_rate": 0.001992591278057809, "loss": 0.2669, "step": 41830 }, { "epoch": 0.29697295551616265, "grad_norm": 0.10595703125, "learning_rate": 0.0019925876535456656, "loss": 0.2486, "step": 41840 }, { "epoch": 0.29704393375600874, "grad_norm": 0.09423828125, "learning_rate": 0.0019925840281508093, "loss": 0.2627, "step": 41850 }, { "epoch": 0.2971149119958549, "grad_norm": 0.10791015625, "learning_rate": 0.0019925804018732433, "loss": 0.2548, "step": 41860 }, { "epoch": 0.29718589023570097, "grad_norm": 0.09716796875, "learning_rate": 0.0019925767747129707, "loss": 0.2519, "step": 41870 }, { "epoch": 0.2972568684755471, "grad_norm": 0.0712890625, "learning_rate": 0.0019925731466699965, "loss": 0.2527, "step": 41880 }, { "epoch": 0.29732784671539325, "grad_norm": 0.1748046875, "learning_rate": 0.0019925695177443227, "loss": 0.2361, "step": 41890 }, { "epoch": 0.29739882495523934, "grad_norm": 0.1103515625, "learning_rate": 0.0019925658879359535, "loss": 0.2404, "step": 41900 }, { "epoch": 0.2974698031950855, "grad_norm": 0.1669921875, "learning_rate": 0.001992562257244893, "loss": 0.2673, "step": 41910 }, { "epoch": 0.29754078143493157, "grad_norm": 0.1669921875, "learning_rate": 0.0019925586256711444, "loss": 0.2604, "step": 41920 }, { "epoch": 0.2976117596747777, "grad_norm": 0.099609375, "learning_rate": 0.001992554993214711, "loss": 0.2618, "step": 41930 }, { "epoch": 0.29768273791462385, "grad_norm": 0.1396484375, "learning_rate": 0.0019925513598755967, "loss": 0.2524, "step": 41940 }, { "epoch": 0.29775371615446994, "grad_norm": 0.1396484375, "learning_rate": 0.001992547725653805, "loss": 0.2429, "step": 41950 }, { "epoch": 0.2978246943943161, "grad_norm": 0.087890625, "learning_rate": 0.00199254409054934, "loss": 0.2562, "step": 41960 }, { "epoch": 0.29789567263416217, "grad_norm": 0.09619140625, "learning_rate": 0.001992540454562204, "loss": 0.2557, "step": 41970 }, { "epoch": 0.2979666508740083, "grad_norm": 0.11669921875, "learning_rate": 0.001992536817692402, "loss": 0.2716, "step": 41980 }, { "epoch": 0.2980376291138544, "grad_norm": 0.09326171875, "learning_rate": 0.0019925331799399374, "loss": 0.2576, "step": 41990 }, { "epoch": 0.29810860735370054, "grad_norm": 0.11279296875, "learning_rate": 0.001992529541304813, "loss": 0.2503, "step": 42000 }, { "epoch": 0.29810860735370054, "eval_covost2-zh-en_loss": 3.8793630599975586, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 21.531, "eval_covost2-zh-en_samples_per_second": 2.972, "eval_covost2-zh-en_steps_per_second": 0.186, "step": 42000 }, { "epoch": 0.29810860735370054, "eval_covost2-en-zh_loss": 3.187016487121582, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 23.0216, "eval_covost2-en-zh_samples_per_second": 2.78, "eval_covost2-en-zh_steps_per_second": 0.174, "step": 42000 }, { "epoch": 0.2981795855935467, "grad_norm": 0.072265625, "learning_rate": 0.0019925259017870324, "loss": 0.2526, "step": 42010 }, { "epoch": 0.29825056383339277, "grad_norm": 0.2216796875, "learning_rate": 0.0019925222613866, "loss": 0.2663, "step": 42020 }, { "epoch": 0.2983215420732389, "grad_norm": 0.0947265625, "learning_rate": 0.001992518620103519, "loss": 0.2517, "step": 42030 }, { "epoch": 0.298392520313085, "grad_norm": 0.1767578125, "learning_rate": 0.001992514977937793, "loss": 0.273, "step": 42040 }, { "epoch": 0.29846349855293114, "grad_norm": 0.1083984375, "learning_rate": 0.001992511334889426, "loss": 0.2666, "step": 42050 }, { "epoch": 0.2985344767927773, "grad_norm": 0.1376953125, "learning_rate": 0.0019925076909584204, "loss": 0.2488, "step": 42060 }, { "epoch": 0.29860545503262337, "grad_norm": 0.306640625, "learning_rate": 0.001992504046144781, "loss": 0.2746, "step": 42070 }, { "epoch": 0.2986764332724695, "grad_norm": 0.0908203125, "learning_rate": 0.0019925004004485115, "loss": 0.2656, "step": 42080 }, { "epoch": 0.2987474115123156, "grad_norm": 0.099609375, "learning_rate": 0.001992496753869615, "loss": 0.2459, "step": 42090 }, { "epoch": 0.29881838975216174, "grad_norm": 0.13671875, "learning_rate": 0.0019924931064080947, "loss": 0.2606, "step": 42100 }, { "epoch": 0.2988893679920078, "grad_norm": 0.10595703125, "learning_rate": 0.001992489458063955, "loss": 0.2613, "step": 42110 }, { "epoch": 0.29896034623185397, "grad_norm": 0.095703125, "learning_rate": 0.001992485808837199, "loss": 0.2494, "step": 42120 }, { "epoch": 0.2990313244717001, "grad_norm": 0.10546875, "learning_rate": 0.00199248215872783, "loss": 0.2528, "step": 42130 }, { "epoch": 0.2991023027115462, "grad_norm": 0.126953125, "learning_rate": 0.0019924785077358527, "loss": 0.2697, "step": 42140 }, { "epoch": 0.29917328095139234, "grad_norm": 0.1728515625, "learning_rate": 0.0019924748558612696, "loss": 0.2531, "step": 42150 }, { "epoch": 0.2992442591912384, "grad_norm": 0.08056640625, "learning_rate": 0.0019924712031040856, "loss": 0.2597, "step": 42160 }, { "epoch": 0.29931523743108457, "grad_norm": 0.18359375, "learning_rate": 0.0019924675494643026, "loss": 0.2682, "step": 42170 }, { "epoch": 0.2993862156709307, "grad_norm": 0.1240234375, "learning_rate": 0.0019924638949419256, "loss": 0.2708, "step": 42180 }, { "epoch": 0.2994571939107768, "grad_norm": 0.1240234375, "learning_rate": 0.0019924602395369577, "loss": 0.2428, "step": 42190 }, { "epoch": 0.29952817215062294, "grad_norm": 0.10400390625, "learning_rate": 0.0019924565832494024, "loss": 0.2556, "step": 42200 }, { "epoch": 0.299599150390469, "grad_norm": 0.1064453125, "learning_rate": 0.001992452926079264, "loss": 0.2511, "step": 42210 }, { "epoch": 0.29967012863031517, "grad_norm": 0.09765625, "learning_rate": 0.0019924492680265453, "loss": 0.2411, "step": 42220 }, { "epoch": 0.29974110687016126, "grad_norm": 0.181640625, "learning_rate": 0.00199244560909125, "loss": 0.2366, "step": 42230 }, { "epoch": 0.2998120851100074, "grad_norm": 0.09375, "learning_rate": 0.001992441949273382, "loss": 0.2504, "step": 42240 }, { "epoch": 0.29988306334985354, "grad_norm": 0.08984375, "learning_rate": 0.0019924382885729446, "loss": 0.25, "step": 42250 }, { "epoch": 0.2999540415896996, "grad_norm": 0.09130859375, "learning_rate": 0.001992434626989942, "loss": 0.2506, "step": 42260 }, { "epoch": 0.30002501982954577, "grad_norm": 0.1005859375, "learning_rate": 0.001992430964524378, "loss": 0.242, "step": 42270 }, { "epoch": 0.30009599806939186, "grad_norm": 0.0927734375, "learning_rate": 0.0019924273011762547, "loss": 0.2593, "step": 42280 }, { "epoch": 0.300166976309238, "grad_norm": 0.0830078125, "learning_rate": 0.0019924236369455776, "loss": 0.2515, "step": 42290 }, { "epoch": 0.30023795454908414, "grad_norm": 0.09765625, "learning_rate": 0.0019924199718323486, "loss": 0.2563, "step": 42300 }, { "epoch": 0.3003089327889302, "grad_norm": 0.1259765625, "learning_rate": 0.0019924163058365725, "loss": 0.2546, "step": 42310 }, { "epoch": 0.30037991102877637, "grad_norm": 0.0810546875, "learning_rate": 0.001992412638958253, "loss": 0.2778, "step": 42320 }, { "epoch": 0.30045088926862246, "grad_norm": 0.072265625, "learning_rate": 0.001992408971197393, "loss": 0.2533, "step": 42330 }, { "epoch": 0.3005218675084686, "grad_norm": 0.1318359375, "learning_rate": 0.0019924053025539967, "loss": 0.2905, "step": 42340 }, { "epoch": 0.3005928457483147, "grad_norm": 0.1005859375, "learning_rate": 0.001992401633028067, "loss": 0.2661, "step": 42350 }, { "epoch": 0.3006638239881608, "grad_norm": 0.130859375, "learning_rate": 0.0019923979626196085, "loss": 0.238, "step": 42360 }, { "epoch": 0.30073480222800697, "grad_norm": 0.2099609375, "learning_rate": 0.001992394291328624, "loss": 0.2609, "step": 42370 }, { "epoch": 0.30080578046785306, "grad_norm": 0.09521484375, "learning_rate": 0.001992390619155118, "loss": 0.2539, "step": 42380 }, { "epoch": 0.3008767587076992, "grad_norm": 0.1025390625, "learning_rate": 0.001992386946099093, "loss": 0.231, "step": 42390 }, { "epoch": 0.3009477369475453, "grad_norm": 0.08642578125, "learning_rate": 0.0019923832721605536, "loss": 0.2396, "step": 42400 }, { "epoch": 0.30101871518739143, "grad_norm": 0.09033203125, "learning_rate": 0.001992379597339503, "loss": 0.2608, "step": 42410 }, { "epoch": 0.30108969342723757, "grad_norm": 0.11328125, "learning_rate": 0.001992375921635945, "loss": 0.2435, "step": 42420 }, { "epoch": 0.30116067166708366, "grad_norm": 0.10205078125, "learning_rate": 0.001992372245049883, "loss": 0.2732, "step": 42430 }, { "epoch": 0.3012316499069298, "grad_norm": 0.08154296875, "learning_rate": 0.0019923685675813205, "loss": 0.2597, "step": 42440 }, { "epoch": 0.3013026281467759, "grad_norm": 0.10400390625, "learning_rate": 0.001992364889230262, "loss": 0.258, "step": 42450 }, { "epoch": 0.30137360638662203, "grad_norm": 0.1376953125, "learning_rate": 0.00199236120999671, "loss": 0.2674, "step": 42460 }, { "epoch": 0.3014445846264681, "grad_norm": 0.138671875, "learning_rate": 0.001992357529880669, "loss": 0.2636, "step": 42470 }, { "epoch": 0.30151556286631426, "grad_norm": 0.1376953125, "learning_rate": 0.001992353848882142, "loss": 0.2557, "step": 42480 }, { "epoch": 0.3015865411061604, "grad_norm": 0.1044921875, "learning_rate": 0.0019923501670011338, "loss": 0.2524, "step": 42490 }, { "epoch": 0.3016575193460065, "grad_norm": 0.1318359375, "learning_rate": 0.0019923464842376464, "loss": 0.2721, "step": 42500 }, { "epoch": 0.30172849758585263, "grad_norm": 0.09228515625, "learning_rate": 0.0019923428005916843, "loss": 0.254, "step": 42510 }, { "epoch": 0.3017994758256987, "grad_norm": 0.08935546875, "learning_rate": 0.0019923391160632515, "loss": 0.2689, "step": 42520 }, { "epoch": 0.30187045406554486, "grad_norm": 0.07666015625, "learning_rate": 0.001992335430652351, "loss": 0.251, "step": 42530 }, { "epoch": 0.301941432305391, "grad_norm": 0.07421875, "learning_rate": 0.0019923317443589867, "loss": 0.2698, "step": 42540 }, { "epoch": 0.3020124105452371, "grad_norm": 0.11572265625, "learning_rate": 0.0019923280571831625, "loss": 0.2483, "step": 42550 }, { "epoch": 0.30208338878508323, "grad_norm": 0.091796875, "learning_rate": 0.0019923243691248814, "loss": 0.2551, "step": 42560 }, { "epoch": 0.3021543670249293, "grad_norm": 0.0791015625, "learning_rate": 0.0019923206801841474, "loss": 0.2458, "step": 42570 }, { "epoch": 0.30222534526477546, "grad_norm": 0.1123046875, "learning_rate": 0.0019923169903609647, "loss": 0.2425, "step": 42580 }, { "epoch": 0.30229632350462154, "grad_norm": 0.09716796875, "learning_rate": 0.0019923132996553356, "loss": 0.2395, "step": 42590 }, { "epoch": 0.3023673017444677, "grad_norm": 0.109375, "learning_rate": 0.001992309608067265, "loss": 0.2639, "step": 42600 }, { "epoch": 0.30243827998431383, "grad_norm": 0.099609375, "learning_rate": 0.0019923059155967566, "loss": 0.2738, "step": 42610 }, { "epoch": 0.3025092582241599, "grad_norm": 0.09228515625, "learning_rate": 0.001992302222243813, "loss": 0.2647, "step": 42620 }, { "epoch": 0.30258023646400606, "grad_norm": 0.09033203125, "learning_rate": 0.0019922985280084386, "loss": 0.2389, "step": 42630 }, { "epoch": 0.30265121470385214, "grad_norm": 0.15625, "learning_rate": 0.0019922948328906366, "loss": 0.2769, "step": 42640 }, { "epoch": 0.3027221929436983, "grad_norm": 0.10009765625, "learning_rate": 0.001992291136890411, "loss": 0.2528, "step": 42650 }, { "epoch": 0.30279317118354443, "grad_norm": 0.1708984375, "learning_rate": 0.001992287440007766, "loss": 0.2581, "step": 42660 }, { "epoch": 0.3028641494233905, "grad_norm": 0.11328125, "learning_rate": 0.001992283742242704, "loss": 0.2434, "step": 42670 }, { "epoch": 0.30293512766323666, "grad_norm": 0.09521484375, "learning_rate": 0.0019922800435952298, "loss": 0.2549, "step": 42680 }, { "epoch": 0.30300610590308275, "grad_norm": 0.126953125, "learning_rate": 0.001992276344065346, "loss": 0.2694, "step": 42690 }, { "epoch": 0.3030770841429289, "grad_norm": 0.1279296875, "learning_rate": 0.0019922726436530573, "loss": 0.2588, "step": 42700 }, { "epoch": 0.303148062382775, "grad_norm": 0.06640625, "learning_rate": 0.001992268942358367, "loss": 0.2567, "step": 42710 }, { "epoch": 0.3032190406226211, "grad_norm": 0.126953125, "learning_rate": 0.0019922652401812777, "loss": 0.2727, "step": 42720 }, { "epoch": 0.30329001886246726, "grad_norm": 0.10791015625, "learning_rate": 0.001992261537121795, "loss": 0.2616, "step": 42730 }, { "epoch": 0.30336099710231335, "grad_norm": 0.1083984375, "learning_rate": 0.0019922578331799208, "loss": 0.256, "step": 42740 }, { "epoch": 0.3034319753421595, "grad_norm": 0.091796875, "learning_rate": 0.00199225412835566, "loss": 0.2444, "step": 42750 }, { "epoch": 0.3035029535820056, "grad_norm": 0.11376953125, "learning_rate": 0.0019922504226490157, "loss": 0.2545, "step": 42760 }, { "epoch": 0.3035739318218517, "grad_norm": 0.1171875, "learning_rate": 0.0019922467160599917, "loss": 0.2553, "step": 42770 }, { "epoch": 0.30364491006169786, "grad_norm": 0.1025390625, "learning_rate": 0.0019922430085885915, "loss": 0.2576, "step": 42780 }, { "epoch": 0.30371588830154395, "grad_norm": 0.08349609375, "learning_rate": 0.001992239300234819, "loss": 0.2656, "step": 42790 }, { "epoch": 0.3037868665413901, "grad_norm": 0.11083984375, "learning_rate": 0.0019922355909986777, "loss": 0.2453, "step": 42800 }, { "epoch": 0.3038578447812362, "grad_norm": 0.185546875, "learning_rate": 0.0019922318808801714, "loss": 0.2426, "step": 42810 }, { "epoch": 0.3039288230210823, "grad_norm": 0.1025390625, "learning_rate": 0.0019922281698793033, "loss": 0.2805, "step": 42820 }, { "epoch": 0.3039998012609284, "grad_norm": 0.09228515625, "learning_rate": 0.001992224457996078, "loss": 0.2461, "step": 42830 }, { "epoch": 0.30407077950077455, "grad_norm": 0.0849609375, "learning_rate": 0.0019922207452304982, "loss": 0.2531, "step": 42840 }, { "epoch": 0.3041417577406207, "grad_norm": 0.1025390625, "learning_rate": 0.0019922170315825683, "loss": 0.265, "step": 42850 }, { "epoch": 0.3042127359804668, "grad_norm": 0.09765625, "learning_rate": 0.0019922133170522916, "loss": 0.2689, "step": 42860 }, { "epoch": 0.3042837142203129, "grad_norm": 0.1015625, "learning_rate": 0.0019922096016396717, "loss": 0.2729, "step": 42870 }, { "epoch": 0.304354692460159, "grad_norm": 0.08837890625, "learning_rate": 0.001992205885344713, "loss": 0.2747, "step": 42880 }, { "epoch": 0.30442567070000515, "grad_norm": 0.10498046875, "learning_rate": 0.0019922021681674177, "loss": 0.2821, "step": 42890 }, { "epoch": 0.3044966489398513, "grad_norm": 0.09423828125, "learning_rate": 0.001992198450107791, "loss": 0.2432, "step": 42900 }, { "epoch": 0.3045676271796974, "grad_norm": 0.07275390625, "learning_rate": 0.0019921947311658354, "loss": 0.2485, "step": 42910 }, { "epoch": 0.3046386054195435, "grad_norm": 0.0947265625, "learning_rate": 0.0019921910113415557, "loss": 0.2539, "step": 42920 }, { "epoch": 0.3047095836593896, "grad_norm": 0.15625, "learning_rate": 0.0019921872906349548, "loss": 0.2605, "step": 42930 }, { "epoch": 0.30478056189923575, "grad_norm": 0.134765625, "learning_rate": 0.0019921835690460367, "loss": 0.2637, "step": 42940 }, { "epoch": 0.30485154013908183, "grad_norm": 0.30859375, "learning_rate": 0.001992179846574805, "loss": 0.2577, "step": 42950 }, { "epoch": 0.304922518378928, "grad_norm": 0.08154296875, "learning_rate": 0.001992176123221263, "loss": 0.2716, "step": 42960 }, { "epoch": 0.3049934966187741, "grad_norm": 0.1162109375, "learning_rate": 0.001992172398985415, "loss": 0.2512, "step": 42970 }, { "epoch": 0.3050644748586202, "grad_norm": 0.11767578125, "learning_rate": 0.0019921686738672644, "loss": 0.2781, "step": 42980 }, { "epoch": 0.30513545309846635, "grad_norm": 0.1083984375, "learning_rate": 0.001992164947866815, "loss": 0.2577, "step": 42990 }, { "epoch": 0.30520643133831243, "grad_norm": 0.1279296875, "learning_rate": 0.0019921612209840697, "loss": 0.2746, "step": 43000 }, { "epoch": 0.3052774095781586, "grad_norm": 0.08447265625, "learning_rate": 0.001992157493219034, "loss": 0.2557, "step": 43010 }, { "epoch": 0.3053483878180047, "grad_norm": 0.140625, "learning_rate": 0.0019921537645717093, "loss": 0.26, "step": 43020 }, { "epoch": 0.3054193660578508, "grad_norm": 0.09521484375, "learning_rate": 0.001992150035042101, "loss": 0.2451, "step": 43030 }, { "epoch": 0.30549034429769695, "grad_norm": 0.09765625, "learning_rate": 0.0019921463046302123, "loss": 0.2808, "step": 43040 }, { "epoch": 0.30556132253754303, "grad_norm": 0.0927734375, "learning_rate": 0.0019921425733360467, "loss": 0.2415, "step": 43050 }, { "epoch": 0.3056323007773892, "grad_norm": 0.07568359375, "learning_rate": 0.001992138841159608, "loss": 0.2614, "step": 43060 }, { "epoch": 0.3057032790172353, "grad_norm": 0.16796875, "learning_rate": 0.0019921351081009, "loss": 0.2621, "step": 43070 }, { "epoch": 0.3057742572570814, "grad_norm": 0.2119140625, "learning_rate": 0.0019921313741599264, "loss": 0.2605, "step": 43080 }, { "epoch": 0.30584523549692755, "grad_norm": 0.12158203125, "learning_rate": 0.0019921276393366906, "loss": 0.2471, "step": 43090 }, { "epoch": 0.30591621373677363, "grad_norm": 0.09375, "learning_rate": 0.0019921239036311965, "loss": 0.2449, "step": 43100 }, { "epoch": 0.3059871919766198, "grad_norm": 0.171875, "learning_rate": 0.001992120167043448, "loss": 0.2516, "step": 43110 }, { "epoch": 0.30605817021646586, "grad_norm": 0.220703125, "learning_rate": 0.001992116429573448, "loss": 0.255, "step": 43120 }, { "epoch": 0.306129148456312, "grad_norm": 0.09521484375, "learning_rate": 0.0019921126912212012, "loss": 0.2365, "step": 43130 }, { "epoch": 0.30620012669615815, "grad_norm": 0.10302734375, "learning_rate": 0.0019921089519867106, "loss": 0.2371, "step": 43140 }, { "epoch": 0.30627110493600423, "grad_norm": 0.126953125, "learning_rate": 0.0019921052118699804, "loss": 0.2532, "step": 43150 }, { "epoch": 0.3063420831758504, "grad_norm": 0.1513671875, "learning_rate": 0.001992101470871014, "loss": 0.2728, "step": 43160 }, { "epoch": 0.30641306141569646, "grad_norm": 0.0966796875, "learning_rate": 0.001992097728989815, "loss": 0.274, "step": 43170 }, { "epoch": 0.3064840396555426, "grad_norm": 0.08740234375, "learning_rate": 0.001992093986226388, "loss": 0.2587, "step": 43180 }, { "epoch": 0.30655501789538875, "grad_norm": 0.09033203125, "learning_rate": 0.0019920902425807348, "loss": 0.2562, "step": 43190 }, { "epoch": 0.30662599613523484, "grad_norm": 0.0888671875, "learning_rate": 0.0019920864980528612, "loss": 0.2443, "step": 43200 }, { "epoch": 0.306696974375081, "grad_norm": 0.1669921875, "learning_rate": 0.0019920827526427693, "loss": 0.2664, "step": 43210 }, { "epoch": 0.30676795261492706, "grad_norm": 0.123046875, "learning_rate": 0.001992079006350464, "loss": 0.2648, "step": 43220 }, { "epoch": 0.3068389308547732, "grad_norm": 0.0888671875, "learning_rate": 0.001992075259175948, "loss": 0.2486, "step": 43230 }, { "epoch": 0.3069099090946193, "grad_norm": 0.07177734375, "learning_rate": 0.0019920715111192257, "loss": 0.2259, "step": 43240 }, { "epoch": 0.30698088733446544, "grad_norm": 0.08740234375, "learning_rate": 0.0019920677621803004, "loss": 0.2559, "step": 43250 }, { "epoch": 0.3070518655743116, "grad_norm": 0.12890625, "learning_rate": 0.0019920640123591766, "loss": 0.2695, "step": 43260 }, { "epoch": 0.30712284381415766, "grad_norm": 0.1923828125, "learning_rate": 0.0019920602616558566, "loss": 0.2619, "step": 43270 }, { "epoch": 0.3071938220540038, "grad_norm": 0.07373046875, "learning_rate": 0.0019920565100703456, "loss": 0.2499, "step": 43280 }, { "epoch": 0.3072648002938499, "grad_norm": 0.11328125, "learning_rate": 0.001992052757602646, "loss": 0.2363, "step": 43290 }, { "epoch": 0.30733577853369604, "grad_norm": 0.125, "learning_rate": 0.0019920490042527627, "loss": 0.257, "step": 43300 }, { "epoch": 0.3074067567735422, "grad_norm": 0.1064453125, "learning_rate": 0.0019920452500206986, "loss": 0.2698, "step": 43310 }, { "epoch": 0.30747773501338826, "grad_norm": 0.10205078125, "learning_rate": 0.0019920414949064578, "loss": 0.2506, "step": 43320 }, { "epoch": 0.3075487132532344, "grad_norm": 0.146484375, "learning_rate": 0.0019920377389100433, "loss": 0.2711, "step": 43330 }, { "epoch": 0.3076196914930805, "grad_norm": 0.12353515625, "learning_rate": 0.00199203398203146, "loss": 0.2458, "step": 43340 }, { "epoch": 0.30769066973292664, "grad_norm": 0.0947265625, "learning_rate": 0.0019920302242707106, "loss": 0.2477, "step": 43350 }, { "epoch": 0.3077616479727727, "grad_norm": 0.130859375, "learning_rate": 0.0019920264656278, "loss": 0.2636, "step": 43360 }, { "epoch": 0.30783262621261887, "grad_norm": 0.09619140625, "learning_rate": 0.00199202270610273, "loss": 0.2661, "step": 43370 }, { "epoch": 0.307903604452465, "grad_norm": 0.11669921875, "learning_rate": 0.001992018945695506, "loss": 0.2813, "step": 43380 }, { "epoch": 0.3079745826923111, "grad_norm": 0.1494140625, "learning_rate": 0.001992015184406131, "loss": 0.2586, "step": 43390 }, { "epoch": 0.30804556093215724, "grad_norm": 0.1435546875, "learning_rate": 0.0019920114222346094, "loss": 0.2525, "step": 43400 }, { "epoch": 0.3081165391720033, "grad_norm": 0.294921875, "learning_rate": 0.001992007659180944, "loss": 0.2446, "step": 43410 }, { "epoch": 0.30818751741184947, "grad_norm": 0.1279296875, "learning_rate": 0.001992003895245139, "loss": 0.2574, "step": 43420 }, { "epoch": 0.3082584956516956, "grad_norm": 0.2197265625, "learning_rate": 0.001992000130427198, "loss": 0.2785, "step": 43430 }, { "epoch": 0.3083294738915417, "grad_norm": 0.10400390625, "learning_rate": 0.001991996364727125, "loss": 0.2506, "step": 43440 }, { "epoch": 0.30840045213138784, "grad_norm": 0.05615234375, "learning_rate": 0.001991992598144923, "loss": 0.2372, "step": 43450 }, { "epoch": 0.3084714303712339, "grad_norm": 0.138671875, "learning_rate": 0.0019919888306805964, "loss": 0.2645, "step": 43460 }, { "epoch": 0.30854240861108007, "grad_norm": 0.11962890625, "learning_rate": 0.0019919850623341495, "loss": 0.2517, "step": 43470 }, { "epoch": 0.30861338685092615, "grad_norm": 0.11767578125, "learning_rate": 0.0019919812931055844, "loss": 0.246, "step": 43480 }, { "epoch": 0.3086843650907723, "grad_norm": 0.1669921875, "learning_rate": 0.001991977522994906, "loss": 0.2401, "step": 43490 }, { "epoch": 0.30875534333061844, "grad_norm": 0.12353515625, "learning_rate": 0.0019919737520021174, "loss": 0.2532, "step": 43500 }, { "epoch": 0.3088263215704645, "grad_norm": 0.1005859375, "learning_rate": 0.001991969980127223, "loss": 0.2556, "step": 43510 }, { "epoch": 0.30889729981031067, "grad_norm": 0.1064453125, "learning_rate": 0.0019919662073702264, "loss": 0.2478, "step": 43520 }, { "epoch": 0.30896827805015675, "grad_norm": 0.10498046875, "learning_rate": 0.0019919624337311308, "loss": 0.2661, "step": 43530 }, { "epoch": 0.3090392562900029, "grad_norm": 0.255859375, "learning_rate": 0.0019919586592099404, "loss": 0.2501, "step": 43540 }, { "epoch": 0.30911023452984904, "grad_norm": 0.1416015625, "learning_rate": 0.0019919548838066587, "loss": 0.2615, "step": 43550 }, { "epoch": 0.3091812127696951, "grad_norm": 0.14453125, "learning_rate": 0.0019919511075212897, "loss": 0.2554, "step": 43560 }, { "epoch": 0.30925219100954127, "grad_norm": 0.255859375, "learning_rate": 0.0019919473303538367, "loss": 0.2654, "step": 43570 }, { "epoch": 0.30932316924938735, "grad_norm": 0.095703125, "learning_rate": 0.0019919435523043038, "loss": 0.2769, "step": 43580 }, { "epoch": 0.3093941474892335, "grad_norm": 0.10986328125, "learning_rate": 0.0019919397733726947, "loss": 0.2733, "step": 43590 }, { "epoch": 0.3094651257290796, "grad_norm": 0.12890625, "learning_rate": 0.001991935993559013, "loss": 0.253, "step": 43600 }, { "epoch": 0.3095361039689257, "grad_norm": 0.0966796875, "learning_rate": 0.0019919322128632627, "loss": 0.2799, "step": 43610 }, { "epoch": 0.30960708220877187, "grad_norm": 0.11865234375, "learning_rate": 0.001991928431285447, "loss": 0.2683, "step": 43620 }, { "epoch": 0.30967806044861795, "grad_norm": 0.1181640625, "learning_rate": 0.0019919246488255705, "loss": 0.2557, "step": 43630 }, { "epoch": 0.3097490386884641, "grad_norm": 0.1953125, "learning_rate": 0.001991920865483636, "loss": 0.2692, "step": 43640 }, { "epoch": 0.3098200169283102, "grad_norm": 0.09765625, "learning_rate": 0.0019919170812596476, "loss": 0.2627, "step": 43650 }, { "epoch": 0.3098909951681563, "grad_norm": 0.11865234375, "learning_rate": 0.001991913296153609, "loss": 0.2467, "step": 43660 }, { "epoch": 0.30996197340800247, "grad_norm": 0.07421875, "learning_rate": 0.0019919095101655244, "loss": 0.2666, "step": 43670 }, { "epoch": 0.31003295164784855, "grad_norm": 0.08642578125, "learning_rate": 0.001991905723295397, "loss": 0.2593, "step": 43680 }, { "epoch": 0.3101039298876947, "grad_norm": 0.09033203125, "learning_rate": 0.001991901935543231, "loss": 0.2428, "step": 43690 }, { "epoch": 0.3101749081275408, "grad_norm": 0.111328125, "learning_rate": 0.0019918981469090294, "loss": 0.2456, "step": 43700 }, { "epoch": 0.3102458863673869, "grad_norm": 0.11328125, "learning_rate": 0.001991894357392797, "loss": 0.2388, "step": 43710 }, { "epoch": 0.310316864607233, "grad_norm": 0.1982421875, "learning_rate": 0.0019918905669945365, "loss": 0.2613, "step": 43720 }, { "epoch": 0.31038784284707915, "grad_norm": 0.12451171875, "learning_rate": 0.0019918867757142524, "loss": 0.2704, "step": 43730 }, { "epoch": 0.3104588210869253, "grad_norm": 0.08935546875, "learning_rate": 0.001991882983551948, "loss": 0.2497, "step": 43740 }, { "epoch": 0.3105297993267714, "grad_norm": 0.08154296875, "learning_rate": 0.0019918791905076277, "loss": 0.2721, "step": 43750 }, { "epoch": 0.3106007775666175, "grad_norm": 0.091796875, "learning_rate": 0.0019918753965812944, "loss": 0.2497, "step": 43760 }, { "epoch": 0.3106717558064636, "grad_norm": 0.103515625, "learning_rate": 0.0019918716017729523, "loss": 0.2393, "step": 43770 }, { "epoch": 0.31074273404630975, "grad_norm": 0.0849609375, "learning_rate": 0.0019918678060826048, "loss": 0.2548, "step": 43780 }, { "epoch": 0.3108137122861559, "grad_norm": 0.09521484375, "learning_rate": 0.001991864009510256, "loss": 0.2489, "step": 43790 }, { "epoch": 0.310884690526002, "grad_norm": 0.1376953125, "learning_rate": 0.00199186021205591, "loss": 0.247, "step": 43800 }, { "epoch": 0.3109556687658481, "grad_norm": 0.0908203125, "learning_rate": 0.00199185641371957, "loss": 0.271, "step": 43810 }, { "epoch": 0.3110266470056942, "grad_norm": 0.11669921875, "learning_rate": 0.0019918526145012395, "loss": 0.2566, "step": 43820 }, { "epoch": 0.31109762524554035, "grad_norm": 0.09423828125, "learning_rate": 0.001991848814400923, "loss": 0.2458, "step": 43830 }, { "epoch": 0.31116860348538644, "grad_norm": 0.09619140625, "learning_rate": 0.001991845013418624, "loss": 0.2559, "step": 43840 }, { "epoch": 0.3112395817252326, "grad_norm": 0.2412109375, "learning_rate": 0.0019918412115543464, "loss": 0.2583, "step": 43850 }, { "epoch": 0.3113105599650787, "grad_norm": 0.08349609375, "learning_rate": 0.001991837408808093, "loss": 0.2599, "step": 43860 }, { "epoch": 0.3113815382049248, "grad_norm": 0.0966796875, "learning_rate": 0.001991833605179869, "loss": 0.2452, "step": 43870 }, { "epoch": 0.31145251644477095, "grad_norm": 0.1044921875, "learning_rate": 0.001991829800669677, "loss": 0.2372, "step": 43880 }, { "epoch": 0.31152349468461704, "grad_norm": 0.1181640625, "learning_rate": 0.001991825995277522, "loss": 0.2652, "step": 43890 }, { "epoch": 0.3115944729244632, "grad_norm": 0.078125, "learning_rate": 0.0019918221890034057, "loss": 0.2537, "step": 43900 }, { "epoch": 0.3116654511643093, "grad_norm": 0.099609375, "learning_rate": 0.001991818381847334, "loss": 0.2594, "step": 43910 }, { "epoch": 0.3117364294041554, "grad_norm": 0.08984375, "learning_rate": 0.0019918145738093097, "loss": 0.2525, "step": 43920 }, { "epoch": 0.31180740764400156, "grad_norm": 0.099609375, "learning_rate": 0.001991810764889337, "loss": 0.2559, "step": 43930 }, { "epoch": 0.31187838588384764, "grad_norm": 0.2314453125, "learning_rate": 0.001991806955087419, "loss": 0.248, "step": 43940 }, { "epoch": 0.3119493641236938, "grad_norm": 0.10595703125, "learning_rate": 0.0019918031444035593, "loss": 0.2621, "step": 43950 }, { "epoch": 0.31202034236353987, "grad_norm": 0.10107421875, "learning_rate": 0.001991799332837763, "loss": 0.2651, "step": 43960 }, { "epoch": 0.312091320603386, "grad_norm": 0.181640625, "learning_rate": 0.0019917955203900325, "loss": 0.2609, "step": 43970 }, { "epoch": 0.31216229884323216, "grad_norm": 0.140625, "learning_rate": 0.001991791707060372, "loss": 0.2678, "step": 43980 }, { "epoch": 0.31223327708307824, "grad_norm": 0.0634765625, "learning_rate": 0.001991787892848786, "loss": 0.2475, "step": 43990 }, { "epoch": 0.3123042553229244, "grad_norm": 0.07568359375, "learning_rate": 0.0019917840777552774, "loss": 0.22, "step": 44000 }, { "epoch": 0.3123042553229244, "eval_covost2-zh-en_loss": 3.9225471019744873, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 20.665, "eval_covost2-zh-en_samples_per_second": 3.097, "eval_covost2-zh-en_steps_per_second": 0.194, "step": 44000 }, { "epoch": 0.3123042553229244, "eval_covost2-en-zh_loss": 3.1867172718048096, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 20.3506, "eval_covost2-en-zh_samples_per_second": 3.145, "eval_covost2-en-zh_steps_per_second": 0.197, "step": 44000 }, { "epoch": 0.31237523356277047, "grad_norm": 0.0927734375, "learning_rate": 0.0019917802617798503, "loss": 0.2529, "step": 44010 }, { "epoch": 0.3124462118026166, "grad_norm": 0.1171875, "learning_rate": 0.0019917764449225083, "loss": 0.2664, "step": 44020 }, { "epoch": 0.31251719004246276, "grad_norm": 0.1376953125, "learning_rate": 0.0019917726271832556, "loss": 0.2797, "step": 44030 }, { "epoch": 0.31258816828230884, "grad_norm": 0.1201171875, "learning_rate": 0.001991768808562095, "loss": 0.2488, "step": 44040 }, { "epoch": 0.312659146522155, "grad_norm": 0.12255859375, "learning_rate": 0.0019917649890590313, "loss": 0.2451, "step": 44050 }, { "epoch": 0.31273012476200107, "grad_norm": 0.10595703125, "learning_rate": 0.001991761168674068, "loss": 0.2589, "step": 44060 }, { "epoch": 0.3128011030018472, "grad_norm": 0.2177734375, "learning_rate": 0.001991757347407209, "loss": 0.2524, "step": 44070 }, { "epoch": 0.3128720812416933, "grad_norm": 0.10888671875, "learning_rate": 0.0019917535252584576, "loss": 0.2384, "step": 44080 }, { "epoch": 0.31294305948153944, "grad_norm": 0.12353515625, "learning_rate": 0.001991749702227818, "loss": 0.235, "step": 44090 }, { "epoch": 0.3130140377213856, "grad_norm": 0.119140625, "learning_rate": 0.0019917458783152934, "loss": 0.249, "step": 44100 }, { "epoch": 0.31308501596123167, "grad_norm": 0.09130859375, "learning_rate": 0.001991742053520888, "loss": 0.2482, "step": 44110 }, { "epoch": 0.3131559942010778, "grad_norm": 0.12158203125, "learning_rate": 0.0019917382278446063, "loss": 0.2454, "step": 44120 }, { "epoch": 0.3132269724409239, "grad_norm": 0.11669921875, "learning_rate": 0.0019917344012864513, "loss": 0.2477, "step": 44130 }, { "epoch": 0.31329795068077004, "grad_norm": 0.080078125, "learning_rate": 0.0019917305738464263, "loss": 0.2452, "step": 44140 }, { "epoch": 0.3133689289206162, "grad_norm": 0.1494140625, "learning_rate": 0.001991726745524536, "loss": 0.2563, "step": 44150 }, { "epoch": 0.31343990716046227, "grad_norm": 0.06689453125, "learning_rate": 0.0019917229163207836, "loss": 0.2519, "step": 44160 }, { "epoch": 0.3135108854003084, "grad_norm": 0.1572265625, "learning_rate": 0.0019917190862351734, "loss": 0.247, "step": 44170 }, { "epoch": 0.3135818636401545, "grad_norm": 0.1376953125, "learning_rate": 0.0019917152552677087, "loss": 0.2393, "step": 44180 }, { "epoch": 0.31365284188000064, "grad_norm": 0.078125, "learning_rate": 0.001991711423418394, "loss": 0.259, "step": 44190 }, { "epoch": 0.31372382011984673, "grad_norm": 0.11328125, "learning_rate": 0.001991707590687232, "loss": 0.2429, "step": 44200 }, { "epoch": 0.3137947983596929, "grad_norm": 0.06494140625, "learning_rate": 0.001991703757074227, "loss": 0.2441, "step": 44210 }, { "epoch": 0.313865776599539, "grad_norm": 0.166015625, "learning_rate": 0.001991699922579383, "loss": 0.2248, "step": 44220 }, { "epoch": 0.3139367548393851, "grad_norm": 0.115234375, "learning_rate": 0.001991696087202704, "loss": 0.2502, "step": 44230 }, { "epoch": 0.31400773307923124, "grad_norm": 0.109375, "learning_rate": 0.0019916922509441936, "loss": 0.2453, "step": 44240 }, { "epoch": 0.31407871131907733, "grad_norm": 0.0849609375, "learning_rate": 0.0019916884138038546, "loss": 0.2538, "step": 44250 }, { "epoch": 0.3141496895589235, "grad_norm": 0.1484375, "learning_rate": 0.0019916845757816924, "loss": 0.2699, "step": 44260 }, { "epoch": 0.3142206677987696, "grad_norm": 0.07568359375, "learning_rate": 0.00199168073687771, "loss": 0.2557, "step": 44270 }, { "epoch": 0.3142916460386157, "grad_norm": 0.1103515625, "learning_rate": 0.0019916768970919107, "loss": 0.2782, "step": 44280 }, { "epoch": 0.31436262427846184, "grad_norm": 0.12353515625, "learning_rate": 0.0019916730564242995, "loss": 0.2377, "step": 44290 }, { "epoch": 0.31443360251830793, "grad_norm": 0.07470703125, "learning_rate": 0.001991669214874879, "loss": 0.2581, "step": 44300 }, { "epoch": 0.3145045807581541, "grad_norm": 0.103515625, "learning_rate": 0.0019916653724436537, "loss": 0.2795, "step": 44310 }, { "epoch": 0.3145755589980002, "grad_norm": 0.1376953125, "learning_rate": 0.0019916615291306274, "loss": 0.2516, "step": 44320 }, { "epoch": 0.3146465372378463, "grad_norm": 0.1318359375, "learning_rate": 0.0019916576849358034, "loss": 0.2502, "step": 44330 }, { "epoch": 0.31471751547769244, "grad_norm": 0.09716796875, "learning_rate": 0.001991653839859186, "loss": 0.2673, "step": 44340 }, { "epoch": 0.31478849371753853, "grad_norm": 0.08642578125, "learning_rate": 0.0019916499939007792, "loss": 0.2527, "step": 44350 }, { "epoch": 0.3148594719573847, "grad_norm": 0.111328125, "learning_rate": 0.0019916461470605856, "loss": 0.2414, "step": 44360 }, { "epoch": 0.31493045019723076, "grad_norm": 0.11328125, "learning_rate": 0.001991642299338611, "loss": 0.2656, "step": 44370 }, { "epoch": 0.3150014284370769, "grad_norm": 0.130859375, "learning_rate": 0.001991638450734857, "loss": 0.2842, "step": 44380 }, { "epoch": 0.31507240667692304, "grad_norm": 0.064453125, "learning_rate": 0.001991634601249329, "loss": 0.2507, "step": 44390 }, { "epoch": 0.31514338491676913, "grad_norm": 0.07958984375, "learning_rate": 0.00199163075088203, "loss": 0.2328, "step": 44400 }, { "epoch": 0.3152143631566153, "grad_norm": 0.10791015625, "learning_rate": 0.001991626899632964, "loss": 0.2883, "step": 44410 }, { "epoch": 0.31528534139646136, "grad_norm": 0.12890625, "learning_rate": 0.001991623047502135, "loss": 0.2676, "step": 44420 }, { "epoch": 0.3153563196363075, "grad_norm": 0.0947265625, "learning_rate": 0.001991619194489547, "loss": 0.2619, "step": 44430 }, { "epoch": 0.31542729787615365, "grad_norm": 0.2041015625, "learning_rate": 0.001991615340595203, "loss": 0.2508, "step": 44440 }, { "epoch": 0.31549827611599973, "grad_norm": 0.10546875, "learning_rate": 0.0019916114858191075, "loss": 0.2503, "step": 44450 }, { "epoch": 0.3155692543558459, "grad_norm": 0.072265625, "learning_rate": 0.0019916076301612646, "loss": 0.264, "step": 44460 }, { "epoch": 0.31564023259569196, "grad_norm": 0.119140625, "learning_rate": 0.001991603773621677, "loss": 0.2697, "step": 44470 }, { "epoch": 0.3157112108355381, "grad_norm": 0.0927734375, "learning_rate": 0.0019915999162003492, "loss": 0.2488, "step": 44480 }, { "epoch": 0.3157821890753842, "grad_norm": 0.07470703125, "learning_rate": 0.0019915960578972855, "loss": 0.2417, "step": 44490 }, { "epoch": 0.31585316731523033, "grad_norm": 0.130859375, "learning_rate": 0.0019915921987124887, "loss": 0.2627, "step": 44500 }, { "epoch": 0.3159241455550765, "grad_norm": 0.162109375, "learning_rate": 0.001991588338645963, "loss": 0.2541, "step": 44510 }, { "epoch": 0.31599512379492256, "grad_norm": 0.1650390625, "learning_rate": 0.0019915844776977124, "loss": 0.2643, "step": 44520 }, { "epoch": 0.3160661020347687, "grad_norm": 0.1083984375, "learning_rate": 0.0019915806158677407, "loss": 0.2541, "step": 44530 }, { "epoch": 0.3161370802746148, "grad_norm": 0.10107421875, "learning_rate": 0.001991576753156052, "loss": 0.2522, "step": 44540 }, { "epoch": 0.31620805851446093, "grad_norm": 0.09912109375, "learning_rate": 0.0019915728895626493, "loss": 0.2439, "step": 44550 }, { "epoch": 0.3162790367543071, "grad_norm": 0.07861328125, "learning_rate": 0.001991569025087537, "loss": 0.2543, "step": 44560 }, { "epoch": 0.31635001499415316, "grad_norm": 0.1455078125, "learning_rate": 0.001991565159730719, "loss": 0.2523, "step": 44570 }, { "epoch": 0.3164209932339993, "grad_norm": 0.1103515625, "learning_rate": 0.001991561293492199, "loss": 0.2399, "step": 44580 }, { "epoch": 0.3164919714738454, "grad_norm": 0.150390625, "learning_rate": 0.0019915574263719804, "loss": 0.2498, "step": 44590 }, { "epoch": 0.31656294971369153, "grad_norm": 0.1201171875, "learning_rate": 0.0019915535583700673, "loss": 0.2531, "step": 44600 }, { "epoch": 0.3166339279535376, "grad_norm": 0.1015625, "learning_rate": 0.001991549689486464, "loss": 0.2436, "step": 44610 }, { "epoch": 0.31670490619338376, "grad_norm": 0.115234375, "learning_rate": 0.001991545819721174, "loss": 0.2503, "step": 44620 }, { "epoch": 0.3167758844332299, "grad_norm": 0.177734375, "learning_rate": 0.0019915419490742006, "loss": 0.2591, "step": 44630 }, { "epoch": 0.316846862673076, "grad_norm": 0.1318359375, "learning_rate": 0.0019915380775455483, "loss": 0.258, "step": 44640 }, { "epoch": 0.31691784091292213, "grad_norm": 0.099609375, "learning_rate": 0.001991534205135221, "loss": 0.2446, "step": 44650 }, { "epoch": 0.3169888191527682, "grad_norm": 0.0751953125, "learning_rate": 0.0019915303318432218, "loss": 0.255, "step": 44660 }, { "epoch": 0.31705979739261436, "grad_norm": 0.1162109375, "learning_rate": 0.001991526457669555, "loss": 0.2387, "step": 44670 }, { "epoch": 0.3171307756324605, "grad_norm": 0.10595703125, "learning_rate": 0.001991522582614225, "loss": 0.2501, "step": 44680 }, { "epoch": 0.3172017538723066, "grad_norm": 0.10205078125, "learning_rate": 0.0019915187066772344, "loss": 0.2482, "step": 44690 }, { "epoch": 0.31727273211215273, "grad_norm": 0.10546875, "learning_rate": 0.001991514829858588, "loss": 0.2671, "step": 44700 }, { "epoch": 0.3173437103519988, "grad_norm": 0.080078125, "learning_rate": 0.001991510952158289, "loss": 0.2669, "step": 44710 }, { "epoch": 0.31741468859184496, "grad_norm": 0.10302734375, "learning_rate": 0.001991507073576342, "loss": 0.2335, "step": 44720 }, { "epoch": 0.31748566683169105, "grad_norm": 0.12890625, "learning_rate": 0.00199150319411275, "loss": 0.2357, "step": 44730 }, { "epoch": 0.3175566450715372, "grad_norm": 0.07958984375, "learning_rate": 0.0019914993137675175, "loss": 0.2406, "step": 44740 }, { "epoch": 0.31762762331138333, "grad_norm": 0.11181640625, "learning_rate": 0.0019914954325406476, "loss": 0.2508, "step": 44750 }, { "epoch": 0.3176986015512294, "grad_norm": 0.08837890625, "learning_rate": 0.001991491550432145, "loss": 0.2489, "step": 44760 }, { "epoch": 0.31776957979107556, "grad_norm": 0.078125, "learning_rate": 0.0019914876674420132, "loss": 0.2374, "step": 44770 }, { "epoch": 0.31784055803092165, "grad_norm": 0.10888671875, "learning_rate": 0.0019914837835702556, "loss": 0.2519, "step": 44780 }, { "epoch": 0.3179115362707678, "grad_norm": 0.1669921875, "learning_rate": 0.001991479898816877, "loss": 0.2689, "step": 44790 }, { "epoch": 0.31798251451061393, "grad_norm": 0.10888671875, "learning_rate": 0.00199147601318188, "loss": 0.2718, "step": 44800 }, { "epoch": 0.31805349275046, "grad_norm": 0.1171875, "learning_rate": 0.001991472126665269, "loss": 0.2437, "step": 44810 }, { "epoch": 0.31812447099030616, "grad_norm": 0.07470703125, "learning_rate": 0.0019914682392670486, "loss": 0.2318, "step": 44820 }, { "epoch": 0.31819544923015225, "grad_norm": 0.1572265625, "learning_rate": 0.0019914643509872216, "loss": 0.2725, "step": 44830 }, { "epoch": 0.3182664274699984, "grad_norm": 0.130859375, "learning_rate": 0.0019914604618257925, "loss": 0.263, "step": 44840 }, { "epoch": 0.3183374057098445, "grad_norm": 0.146484375, "learning_rate": 0.0019914565717827643, "loss": 0.2625, "step": 44850 }, { "epoch": 0.3184083839496906, "grad_norm": 0.09326171875, "learning_rate": 0.001991452680858142, "loss": 0.2485, "step": 44860 }, { "epoch": 0.31847936218953676, "grad_norm": 0.09716796875, "learning_rate": 0.0019914487890519287, "loss": 0.2536, "step": 44870 }, { "epoch": 0.31855034042938285, "grad_norm": 0.08984375, "learning_rate": 0.0019914448963641285, "loss": 0.2431, "step": 44880 }, { "epoch": 0.318621318669229, "grad_norm": 0.09228515625, "learning_rate": 0.0019914410027947445, "loss": 0.258, "step": 44890 }, { "epoch": 0.3186922969090751, "grad_norm": 0.1396484375, "learning_rate": 0.0019914371083437823, "loss": 0.2517, "step": 44900 }, { "epoch": 0.3187632751489212, "grad_norm": 0.126953125, "learning_rate": 0.001991433213011244, "loss": 0.2556, "step": 44910 }, { "epoch": 0.31883425338876736, "grad_norm": 0.1123046875, "learning_rate": 0.0019914293167971344, "loss": 0.2475, "step": 44920 }, { "epoch": 0.31890523162861345, "grad_norm": 0.08935546875, "learning_rate": 0.0019914254197014565, "loss": 0.2506, "step": 44930 }, { "epoch": 0.3189762098684596, "grad_norm": 0.1259765625, "learning_rate": 0.0019914215217242156, "loss": 0.2535, "step": 44940 }, { "epoch": 0.3190471881083057, "grad_norm": 0.169921875, "learning_rate": 0.0019914176228654143, "loss": 0.2355, "step": 44950 }, { "epoch": 0.3191181663481518, "grad_norm": 0.1279296875, "learning_rate": 0.001991413723125057, "loss": 0.2461, "step": 44960 }, { "epoch": 0.3191891445879979, "grad_norm": 0.08203125, "learning_rate": 0.001991409822503147, "loss": 0.2517, "step": 44970 }, { "epoch": 0.31926012282784405, "grad_norm": 0.0966796875, "learning_rate": 0.0019914059209996888, "loss": 0.247, "step": 44980 }, { "epoch": 0.3193311010676902, "grad_norm": 0.162109375, "learning_rate": 0.001991402018614686, "loss": 0.2457, "step": 44990 }, { "epoch": 0.3194020793075363, "grad_norm": 0.09423828125, "learning_rate": 0.001991398115348142, "loss": 0.2514, "step": 45000 }, { "epoch": 0.3194730575473824, "grad_norm": 0.1162109375, "learning_rate": 0.001991394211200062, "loss": 0.2733, "step": 45010 }, { "epoch": 0.3195440357872285, "grad_norm": 0.1201171875, "learning_rate": 0.001991390306170448, "loss": 0.2577, "step": 45020 }, { "epoch": 0.31961501402707465, "grad_norm": 0.1025390625, "learning_rate": 0.0019913864002593057, "loss": 0.2477, "step": 45030 }, { "epoch": 0.3196859922669208, "grad_norm": 0.119140625, "learning_rate": 0.0019913824934666377, "loss": 0.2448, "step": 45040 }, { "epoch": 0.3197569705067669, "grad_norm": 0.09716796875, "learning_rate": 0.0019913785857924486, "loss": 0.2507, "step": 45050 }, { "epoch": 0.319827948746613, "grad_norm": 0.1083984375, "learning_rate": 0.0019913746772367417, "loss": 0.2537, "step": 45060 }, { "epoch": 0.3198989269864591, "grad_norm": 0.09228515625, "learning_rate": 0.001991370767799521, "loss": 0.2307, "step": 45070 }, { "epoch": 0.31996990522630525, "grad_norm": 0.10546875, "learning_rate": 0.001991366857480791, "loss": 0.2424, "step": 45080 }, { "epoch": 0.32004088346615134, "grad_norm": 0.09228515625, "learning_rate": 0.0019913629462805543, "loss": 0.2451, "step": 45090 }, { "epoch": 0.3201118617059975, "grad_norm": 0.10888671875, "learning_rate": 0.001991359034198816, "loss": 0.2636, "step": 45100 }, { "epoch": 0.3201828399458436, "grad_norm": 0.140625, "learning_rate": 0.0019913551212355795, "loss": 0.2547, "step": 45110 }, { "epoch": 0.3202538181856897, "grad_norm": 0.1044921875, "learning_rate": 0.0019913512073908486, "loss": 0.2544, "step": 45120 }, { "epoch": 0.32032479642553585, "grad_norm": 0.1103515625, "learning_rate": 0.001991347292664627, "loss": 0.2443, "step": 45130 }, { "epoch": 0.32039577466538194, "grad_norm": 0.2578125, "learning_rate": 0.001991343377056919, "loss": 0.2515, "step": 45140 }, { "epoch": 0.3204667529052281, "grad_norm": 0.0908203125, "learning_rate": 0.001991339460567728, "loss": 0.2539, "step": 45150 }, { "epoch": 0.3205377311450742, "grad_norm": 0.1748046875, "learning_rate": 0.0019913355431970584, "loss": 0.2663, "step": 45160 }, { "epoch": 0.3206087093849203, "grad_norm": 0.09423828125, "learning_rate": 0.001991331624944914, "loss": 0.2403, "step": 45170 }, { "epoch": 0.32067968762476645, "grad_norm": 0.0859375, "learning_rate": 0.001991327705811298, "loss": 0.2587, "step": 45180 }, { "epoch": 0.32075066586461254, "grad_norm": 0.1103515625, "learning_rate": 0.001991323785796215, "loss": 0.2505, "step": 45190 }, { "epoch": 0.3208216441044587, "grad_norm": 0.1376953125, "learning_rate": 0.0019913198648996685, "loss": 0.2348, "step": 45200 }, { "epoch": 0.32089262234430477, "grad_norm": 0.10888671875, "learning_rate": 0.001991315943121663, "loss": 0.2428, "step": 45210 }, { "epoch": 0.3209636005841509, "grad_norm": 0.0947265625, "learning_rate": 0.0019913120204622017, "loss": 0.2316, "step": 45220 }, { "epoch": 0.32103457882399705, "grad_norm": 0.09375, "learning_rate": 0.001991308096921288, "loss": 0.2742, "step": 45230 }, { "epoch": 0.32110555706384314, "grad_norm": 0.07275390625, "learning_rate": 0.001991304172498927, "loss": 0.2352, "step": 45240 }, { "epoch": 0.3211765353036893, "grad_norm": 0.09912109375, "learning_rate": 0.001991300247195122, "loss": 0.2526, "step": 45250 }, { "epoch": 0.32124751354353537, "grad_norm": 0.1640625, "learning_rate": 0.001991296321009877, "loss": 0.2388, "step": 45260 }, { "epoch": 0.3213184917833815, "grad_norm": 0.1298828125, "learning_rate": 0.0019912923939431957, "loss": 0.2409, "step": 45270 }, { "epoch": 0.32138947002322765, "grad_norm": 0.10546875, "learning_rate": 0.0019912884659950823, "loss": 0.2473, "step": 45280 }, { "epoch": 0.32146044826307374, "grad_norm": 0.09716796875, "learning_rate": 0.00199128453716554, "loss": 0.2333, "step": 45290 }, { "epoch": 0.3215314265029199, "grad_norm": 0.12060546875, "learning_rate": 0.0019912806074545734, "loss": 0.2601, "step": 45300 }, { "epoch": 0.32160240474276597, "grad_norm": 0.134765625, "learning_rate": 0.0019912766768621862, "loss": 0.2604, "step": 45310 }, { "epoch": 0.3216733829826121, "grad_norm": 0.10302734375, "learning_rate": 0.001991272745388382, "loss": 0.2572, "step": 45320 }, { "epoch": 0.3217443612224582, "grad_norm": 0.17578125, "learning_rate": 0.0019912688130331656, "loss": 0.2748, "step": 45330 }, { "epoch": 0.32181533946230434, "grad_norm": 0.08740234375, "learning_rate": 0.0019912648797965394, "loss": 0.2394, "step": 45340 }, { "epoch": 0.3218863177021505, "grad_norm": 0.076171875, "learning_rate": 0.0019912609456785085, "loss": 0.2433, "step": 45350 }, { "epoch": 0.32195729594199657, "grad_norm": 0.1015625, "learning_rate": 0.0019912570106790763, "loss": 0.2506, "step": 45360 }, { "epoch": 0.3220282741818427, "grad_norm": 0.06689453125, "learning_rate": 0.0019912530747982467, "loss": 0.2441, "step": 45370 }, { "epoch": 0.3220992524216888, "grad_norm": 0.11865234375, "learning_rate": 0.001991249138036024, "loss": 0.2774, "step": 45380 }, { "epoch": 0.32217023066153494, "grad_norm": 0.1884765625, "learning_rate": 0.0019912452003924114, "loss": 0.2385, "step": 45390 }, { "epoch": 0.3222412089013811, "grad_norm": 0.130859375, "learning_rate": 0.0019912412618674135, "loss": 0.2519, "step": 45400 }, { "epoch": 0.32231218714122717, "grad_norm": 0.138671875, "learning_rate": 0.0019912373224610334, "loss": 0.2373, "step": 45410 }, { "epoch": 0.3223831653810733, "grad_norm": 0.146484375, "learning_rate": 0.0019912333821732755, "loss": 0.2435, "step": 45420 }, { "epoch": 0.3224541436209194, "grad_norm": 0.140625, "learning_rate": 0.001991229441004144, "loss": 0.25, "step": 45430 }, { "epoch": 0.32252512186076554, "grad_norm": 0.19140625, "learning_rate": 0.001991225498953642, "loss": 0.2592, "step": 45440 }, { "epoch": 0.3225961001006117, "grad_norm": 0.091796875, "learning_rate": 0.0019912215560217742, "loss": 0.2491, "step": 45450 }, { "epoch": 0.32266707834045777, "grad_norm": 0.08642578125, "learning_rate": 0.0019912176122085443, "loss": 0.2525, "step": 45460 }, { "epoch": 0.3227380565803039, "grad_norm": 0.09228515625, "learning_rate": 0.0019912136675139555, "loss": 0.2523, "step": 45470 }, { "epoch": 0.32280903482015, "grad_norm": 0.091796875, "learning_rate": 0.0019912097219380127, "loss": 0.2562, "step": 45480 }, { "epoch": 0.32288001305999614, "grad_norm": 0.095703125, "learning_rate": 0.001991205775480719, "loss": 0.2437, "step": 45490 }, { "epoch": 0.3229509912998422, "grad_norm": 0.08251953125, "learning_rate": 0.0019912018281420785, "loss": 0.2517, "step": 45500 }, { "epoch": 0.32302196953968837, "grad_norm": 0.12158203125, "learning_rate": 0.001991197879922096, "loss": 0.2596, "step": 45510 }, { "epoch": 0.3230929477795345, "grad_norm": 0.10302734375, "learning_rate": 0.0019911939308207742, "loss": 0.2538, "step": 45520 }, { "epoch": 0.3231639260193806, "grad_norm": 0.080078125, "learning_rate": 0.0019911899808381173, "loss": 0.2299, "step": 45530 }, { "epoch": 0.32323490425922674, "grad_norm": 0.1552734375, "learning_rate": 0.0019911860299741293, "loss": 0.2822, "step": 45540 }, { "epoch": 0.3233058824990728, "grad_norm": 0.10205078125, "learning_rate": 0.0019911820782288142, "loss": 0.2365, "step": 45550 }, { "epoch": 0.32337686073891897, "grad_norm": 0.078125, "learning_rate": 0.0019911781256021764, "loss": 0.2535, "step": 45560 }, { "epoch": 0.3234478389787651, "grad_norm": 0.11474609375, "learning_rate": 0.0019911741720942187, "loss": 0.2571, "step": 45570 }, { "epoch": 0.3235188172186112, "grad_norm": 0.2265625, "learning_rate": 0.0019911702177049457, "loss": 0.2574, "step": 45580 }, { "epoch": 0.32358979545845734, "grad_norm": 0.07763671875, "learning_rate": 0.0019911662624343615, "loss": 0.2392, "step": 45590 }, { "epoch": 0.32366077369830343, "grad_norm": 0.0966796875, "learning_rate": 0.0019911623062824693, "loss": 0.2356, "step": 45600 }, { "epoch": 0.32373175193814957, "grad_norm": 0.1103515625, "learning_rate": 0.001991158349249274, "loss": 0.2466, "step": 45610 }, { "epoch": 0.32380273017799566, "grad_norm": 0.11962890625, "learning_rate": 0.0019911543913347786, "loss": 0.267, "step": 45620 }, { "epoch": 0.3238737084178418, "grad_norm": 0.11181640625, "learning_rate": 0.0019911504325389874, "loss": 0.253, "step": 45630 }, { "epoch": 0.32394468665768794, "grad_norm": 0.10205078125, "learning_rate": 0.001991146472861904, "loss": 0.2426, "step": 45640 }, { "epoch": 0.32401566489753403, "grad_norm": 0.109375, "learning_rate": 0.0019911425123035334, "loss": 0.2491, "step": 45650 }, { "epoch": 0.32408664313738017, "grad_norm": 0.1044921875, "learning_rate": 0.001991138550863878, "loss": 0.28, "step": 45660 }, { "epoch": 0.32415762137722626, "grad_norm": 0.068359375, "learning_rate": 0.0019911345885429426, "loss": 0.2442, "step": 45670 }, { "epoch": 0.3242285996170724, "grad_norm": 0.115234375, "learning_rate": 0.001991130625340731, "loss": 0.256, "step": 45680 }, { "epoch": 0.32429957785691854, "grad_norm": 0.08984375, "learning_rate": 0.0019911266612572466, "loss": 0.2453, "step": 45690 }, { "epoch": 0.32437055609676463, "grad_norm": 0.115234375, "learning_rate": 0.0019911226962924943, "loss": 0.2496, "step": 45700 }, { "epoch": 0.32444153433661077, "grad_norm": 0.0712890625, "learning_rate": 0.0019911187304464777, "loss": 0.2505, "step": 45710 }, { "epoch": 0.32451251257645686, "grad_norm": 0.1796875, "learning_rate": 0.0019911147637192, "loss": 0.2461, "step": 45720 }, { "epoch": 0.324583490816303, "grad_norm": 0.16015625, "learning_rate": 0.0019911107961106656, "loss": 0.2616, "step": 45730 }, { "epoch": 0.3246544690561491, "grad_norm": 0.14453125, "learning_rate": 0.001991106827620879, "loss": 0.2438, "step": 45740 }, { "epoch": 0.32472544729599523, "grad_norm": 0.1455078125, "learning_rate": 0.0019911028582498434, "loss": 0.2454, "step": 45750 }, { "epoch": 0.32479642553584137, "grad_norm": 0.10693359375, "learning_rate": 0.001991098887997563, "loss": 0.2753, "step": 45760 }, { "epoch": 0.32486740377568746, "grad_norm": 0.138671875, "learning_rate": 0.001991094916864041, "loss": 0.2788, "step": 45770 }, { "epoch": 0.3249383820155336, "grad_norm": 0.095703125, "learning_rate": 0.0019910909448492826, "loss": 0.2409, "step": 45780 }, { "epoch": 0.3250093602553797, "grad_norm": 0.0966796875, "learning_rate": 0.0019910869719532913, "loss": 0.2531, "step": 45790 }, { "epoch": 0.32508033849522583, "grad_norm": 0.1298828125, "learning_rate": 0.00199108299817607, "loss": 0.2588, "step": 45800 }, { "epoch": 0.32515131673507197, "grad_norm": 0.11962890625, "learning_rate": 0.0019910790235176242, "loss": 0.253, "step": 45810 }, { "epoch": 0.32522229497491806, "grad_norm": 0.19921875, "learning_rate": 0.001991075047977957, "loss": 0.2516, "step": 45820 }, { "epoch": 0.3252932732147642, "grad_norm": 0.07666015625, "learning_rate": 0.001991071071557072, "loss": 0.2442, "step": 45830 }, { "epoch": 0.3253642514546103, "grad_norm": 0.07861328125, "learning_rate": 0.0019910670942549736, "loss": 0.2596, "step": 45840 }, { "epoch": 0.32543522969445643, "grad_norm": 0.1416015625, "learning_rate": 0.001991063116071666, "loss": 0.242, "step": 45850 }, { "epoch": 0.3255062079343025, "grad_norm": 0.1416015625, "learning_rate": 0.0019910591370071533, "loss": 0.2592, "step": 45860 }, { "epoch": 0.32557718617414866, "grad_norm": 0.158203125, "learning_rate": 0.0019910551570614383, "loss": 0.2649, "step": 45870 }, { "epoch": 0.3256481644139948, "grad_norm": 0.1650390625, "learning_rate": 0.0019910511762345254, "loss": 0.2374, "step": 45880 }, { "epoch": 0.3257191426538409, "grad_norm": 0.08984375, "learning_rate": 0.001991047194526419, "loss": 0.2739, "step": 45890 }, { "epoch": 0.32579012089368703, "grad_norm": 0.142578125, "learning_rate": 0.001991043211937123, "loss": 0.2717, "step": 45900 }, { "epoch": 0.3258610991335331, "grad_norm": 0.07373046875, "learning_rate": 0.001991039228466641, "loss": 0.2468, "step": 45910 }, { "epoch": 0.32593207737337926, "grad_norm": 0.10400390625, "learning_rate": 0.001991035244114977, "loss": 0.2652, "step": 45920 }, { "epoch": 0.3260030556132254, "grad_norm": 0.07861328125, "learning_rate": 0.001991031258882135, "loss": 0.2495, "step": 45930 }, { "epoch": 0.3260740338530715, "grad_norm": 0.0986328125, "learning_rate": 0.001991027272768119, "loss": 0.2599, "step": 45940 }, { "epoch": 0.32614501209291763, "grad_norm": 0.09912109375, "learning_rate": 0.0019910232857729324, "loss": 0.2696, "step": 45950 }, { "epoch": 0.3262159903327637, "grad_norm": 0.11572265625, "learning_rate": 0.0019910192978965804, "loss": 0.2481, "step": 45960 }, { "epoch": 0.32628696857260986, "grad_norm": 0.0791015625, "learning_rate": 0.0019910153091390655, "loss": 0.2426, "step": 45970 }, { "epoch": 0.32635794681245595, "grad_norm": 0.091796875, "learning_rate": 0.0019910113195003923, "loss": 0.2438, "step": 45980 }, { "epoch": 0.3264289250523021, "grad_norm": 0.12451171875, "learning_rate": 0.001991007328980565, "loss": 0.2503, "step": 45990 }, { "epoch": 0.32649990329214823, "grad_norm": 0.07080078125, "learning_rate": 0.0019910033375795874, "loss": 0.2421, "step": 46000 }, { "epoch": 0.32649990329214823, "eval_covost2-zh-en_loss": 3.8344364166259766, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 20.6793, "eval_covost2-zh-en_samples_per_second": 3.095, "eval_covost2-zh-en_steps_per_second": 0.193, "step": 46000 }, { "epoch": 0.32649990329214823, "eval_covost2-en-zh_loss": 3.101886749267578, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 20.7051, "eval_covost2-en-zh_samples_per_second": 3.091, "eval_covost2-en-zh_steps_per_second": 0.193, "step": 46000 }, { "epoch": 0.3265708815319943, "grad_norm": 0.12109375, "learning_rate": 0.001990999345297463, "loss": 0.2598, "step": 46010 }, { "epoch": 0.32664185977184046, "grad_norm": 0.1572265625, "learning_rate": 0.0019909953521341965, "loss": 0.2376, "step": 46020 }, { "epoch": 0.32671283801168655, "grad_norm": 0.1142578125, "learning_rate": 0.0019909913580897914, "loss": 0.2336, "step": 46030 }, { "epoch": 0.3267838162515327, "grad_norm": 0.1767578125, "learning_rate": 0.0019909873631642518, "loss": 0.2538, "step": 46040 }, { "epoch": 0.32685479449137883, "grad_norm": 0.0927734375, "learning_rate": 0.001990983367357581, "loss": 0.2513, "step": 46050 }, { "epoch": 0.3269257727312249, "grad_norm": 0.271484375, "learning_rate": 0.001990979370669784, "loss": 0.2431, "step": 46060 }, { "epoch": 0.32699675097107106, "grad_norm": 0.11474609375, "learning_rate": 0.001990975373100864, "loss": 0.2546, "step": 46070 }, { "epoch": 0.32706772921091715, "grad_norm": 0.08935546875, "learning_rate": 0.001990971374650825, "loss": 0.2599, "step": 46080 }, { "epoch": 0.3271387074507633, "grad_norm": 0.12109375, "learning_rate": 0.0019909673753196713, "loss": 0.2635, "step": 46090 }, { "epoch": 0.3272096856906094, "grad_norm": 0.10498046875, "learning_rate": 0.0019909633751074066, "loss": 0.247, "step": 46100 }, { "epoch": 0.3272806639304555, "grad_norm": 0.181640625, "learning_rate": 0.001990959374014035, "loss": 0.2549, "step": 46110 }, { "epoch": 0.32735164217030166, "grad_norm": 0.08349609375, "learning_rate": 0.001990955372039561, "loss": 0.2254, "step": 46120 }, { "epoch": 0.32742262041014775, "grad_norm": 0.10791015625, "learning_rate": 0.0019909513691839876, "loss": 0.2505, "step": 46130 }, { "epoch": 0.3274935986499939, "grad_norm": 0.1220703125, "learning_rate": 0.001990947365447319, "loss": 0.2447, "step": 46140 }, { "epoch": 0.32756457688984, "grad_norm": 0.2294921875, "learning_rate": 0.001990943360829559, "loss": 0.2563, "step": 46150 }, { "epoch": 0.3276355551296861, "grad_norm": 0.07958984375, "learning_rate": 0.0019909393553307125, "loss": 0.271, "step": 46160 }, { "epoch": 0.32770653336953226, "grad_norm": 0.205078125, "learning_rate": 0.0019909353489507824, "loss": 0.2679, "step": 46170 }, { "epoch": 0.32777751160937835, "grad_norm": 0.1494140625, "learning_rate": 0.0019909313416897733, "loss": 0.2484, "step": 46180 }, { "epoch": 0.3278484898492245, "grad_norm": 0.1806640625, "learning_rate": 0.0019909273335476886, "loss": 0.2461, "step": 46190 }, { "epoch": 0.3279194680890706, "grad_norm": 0.1787109375, "learning_rate": 0.001990923324524533, "loss": 0.2373, "step": 46200 }, { "epoch": 0.3279904463289167, "grad_norm": 0.1162109375, "learning_rate": 0.00199091931462031, "loss": 0.2475, "step": 46210 }, { "epoch": 0.3280614245687628, "grad_norm": 0.11767578125, "learning_rate": 0.0019909153038350238, "loss": 0.2578, "step": 46220 }, { "epoch": 0.32813240280860895, "grad_norm": 0.103515625, "learning_rate": 0.001990911292168678, "loss": 0.2467, "step": 46230 }, { "epoch": 0.3282033810484551, "grad_norm": 0.12353515625, "learning_rate": 0.0019909072796212767, "loss": 0.253, "step": 46240 }, { "epoch": 0.3282743592883012, "grad_norm": 0.08837890625, "learning_rate": 0.001990903266192824, "loss": 0.2708, "step": 46250 }, { "epoch": 0.3283453375281473, "grad_norm": 0.076171875, "learning_rate": 0.0019908992518833236, "loss": 0.2734, "step": 46260 }, { "epoch": 0.3284163157679934, "grad_norm": 0.1484375, "learning_rate": 0.00199089523669278, "loss": 0.2351, "step": 46270 }, { "epoch": 0.32848729400783955, "grad_norm": 0.11474609375, "learning_rate": 0.0019908912206211965, "loss": 0.2633, "step": 46280 }, { "epoch": 0.3285582722476857, "grad_norm": 0.1494140625, "learning_rate": 0.001990887203668578, "loss": 0.2825, "step": 46290 }, { "epoch": 0.3286292504875318, "grad_norm": 0.10107421875, "learning_rate": 0.001990883185834927, "loss": 0.2421, "step": 46300 }, { "epoch": 0.3287002287273779, "grad_norm": 0.2470703125, "learning_rate": 0.001990879167120249, "loss": 0.2813, "step": 46310 }, { "epoch": 0.328771206967224, "grad_norm": 0.08203125, "learning_rate": 0.0019908751475245473, "loss": 0.2381, "step": 46320 }, { "epoch": 0.32884218520707015, "grad_norm": 0.2197265625, "learning_rate": 0.001990871127047826, "loss": 0.2684, "step": 46330 }, { "epoch": 0.32891316344691623, "grad_norm": 0.21484375, "learning_rate": 0.001990867105690089, "loss": 0.2709, "step": 46340 }, { "epoch": 0.3289841416867624, "grad_norm": 0.109375, "learning_rate": 0.00199086308345134, "loss": 0.2496, "step": 46350 }, { "epoch": 0.3290551199266085, "grad_norm": 0.13671875, "learning_rate": 0.0019908590603315833, "loss": 0.2481, "step": 46360 }, { "epoch": 0.3291260981664546, "grad_norm": 0.11328125, "learning_rate": 0.001990855036330823, "loss": 0.2574, "step": 46370 }, { "epoch": 0.32919707640630075, "grad_norm": 0.1748046875, "learning_rate": 0.0019908510114490627, "loss": 0.2435, "step": 46380 }, { "epoch": 0.32926805464614683, "grad_norm": 0.1298828125, "learning_rate": 0.001990846985686306, "loss": 0.2426, "step": 46390 }, { "epoch": 0.329339032885993, "grad_norm": 0.1064453125, "learning_rate": 0.0019908429590425585, "loss": 0.2505, "step": 46400 }, { "epoch": 0.3294100111258391, "grad_norm": 0.115234375, "learning_rate": 0.001990838931517823, "loss": 0.2716, "step": 46410 }, { "epoch": 0.3294809893656852, "grad_norm": 0.16796875, "learning_rate": 0.0019908349031121032, "loss": 0.2573, "step": 46420 }, { "epoch": 0.32955196760553135, "grad_norm": 0.126953125, "learning_rate": 0.0019908308738254034, "loss": 0.2541, "step": 46430 }, { "epoch": 0.32962294584537744, "grad_norm": 0.12890625, "learning_rate": 0.0019908268436577285, "loss": 0.2507, "step": 46440 }, { "epoch": 0.3296939240852236, "grad_norm": 0.12158203125, "learning_rate": 0.001990822812609081, "loss": 0.2594, "step": 46450 }, { "epoch": 0.32976490232506966, "grad_norm": 0.125, "learning_rate": 0.0019908187806794656, "loss": 0.2433, "step": 46460 }, { "epoch": 0.3298358805649158, "grad_norm": 0.130859375, "learning_rate": 0.0019908147478688866, "loss": 0.2582, "step": 46470 }, { "epoch": 0.32990685880476195, "grad_norm": 0.0947265625, "learning_rate": 0.0019908107141773474, "loss": 0.244, "step": 46480 }, { "epoch": 0.32997783704460804, "grad_norm": 0.0849609375, "learning_rate": 0.001990806679604852, "loss": 0.2371, "step": 46490 }, { "epoch": 0.3300488152844542, "grad_norm": 0.1552734375, "learning_rate": 0.0019908026441514052, "loss": 0.2504, "step": 46500 }, { "epoch": 0.33011979352430026, "grad_norm": 0.1162109375, "learning_rate": 0.00199079860781701, "loss": 0.253, "step": 46510 }, { "epoch": 0.3301907717641464, "grad_norm": 0.267578125, "learning_rate": 0.001990794570601671, "loss": 0.2464, "step": 46520 }, { "epoch": 0.33026175000399255, "grad_norm": 0.09228515625, "learning_rate": 0.001990790532505392, "loss": 0.2627, "step": 46530 }, { "epoch": 0.33033272824383864, "grad_norm": 0.1494140625, "learning_rate": 0.0019907864935281767, "loss": 0.2427, "step": 46540 }, { "epoch": 0.3304037064836848, "grad_norm": 0.126953125, "learning_rate": 0.00199078245367003, "loss": 0.2582, "step": 46550 }, { "epoch": 0.33047468472353086, "grad_norm": 0.0859375, "learning_rate": 0.0019907784129309545, "loss": 0.2402, "step": 46560 }, { "epoch": 0.330545662963377, "grad_norm": 0.154296875, "learning_rate": 0.0019907743713109558, "loss": 0.2484, "step": 46570 }, { "epoch": 0.33061664120322315, "grad_norm": 0.103515625, "learning_rate": 0.0019907703288100363, "loss": 0.2525, "step": 46580 }, { "epoch": 0.33068761944306924, "grad_norm": 0.09765625, "learning_rate": 0.001990766285428201, "loss": 0.2686, "step": 46590 }, { "epoch": 0.3307585976829154, "grad_norm": 0.1064453125, "learning_rate": 0.001990762241165454, "loss": 0.2261, "step": 46600 }, { "epoch": 0.33082957592276147, "grad_norm": 0.09326171875, "learning_rate": 0.001990758196021799, "loss": 0.224, "step": 46610 }, { "epoch": 0.3309005541626076, "grad_norm": 0.2158203125, "learning_rate": 0.00199075414999724, "loss": 0.2639, "step": 46620 }, { "epoch": 0.3309715324024537, "grad_norm": 0.06787109375, "learning_rate": 0.0019907501030917802, "loss": 0.2458, "step": 46630 }, { "epoch": 0.33104251064229984, "grad_norm": 0.384765625, "learning_rate": 0.001990746055305425, "loss": 0.2436, "step": 46640 }, { "epoch": 0.331113488882146, "grad_norm": 0.10791015625, "learning_rate": 0.0019907420066381776, "loss": 0.2465, "step": 46650 }, { "epoch": 0.33118446712199207, "grad_norm": 0.1484375, "learning_rate": 0.001990737957090042, "loss": 0.252, "step": 46660 }, { "epoch": 0.3312554453618382, "grad_norm": 0.1298828125, "learning_rate": 0.001990733906661023, "loss": 0.2662, "step": 46670 }, { "epoch": 0.3313264236016843, "grad_norm": 0.11279296875, "learning_rate": 0.0019907298553511234, "loss": 0.2598, "step": 46680 }, { "epoch": 0.33139740184153044, "grad_norm": 0.1845703125, "learning_rate": 0.001990725803160348, "loss": 0.239, "step": 46690 }, { "epoch": 0.3314683800813766, "grad_norm": 0.09765625, "learning_rate": 0.0019907217500887008, "loss": 0.2465, "step": 46700 }, { "epoch": 0.33153935832122267, "grad_norm": 0.1943359375, "learning_rate": 0.0019907176961361855, "loss": 0.2449, "step": 46710 }, { "epoch": 0.3316103365610688, "grad_norm": 0.1005859375, "learning_rate": 0.001990713641302806, "loss": 0.2497, "step": 46720 }, { "epoch": 0.3316813148009149, "grad_norm": 0.0986328125, "learning_rate": 0.001990709585588567, "loss": 0.2651, "step": 46730 }, { "epoch": 0.33175229304076104, "grad_norm": 0.087890625, "learning_rate": 0.0019907055289934716, "loss": 0.2673, "step": 46740 }, { "epoch": 0.3318232712806071, "grad_norm": 0.09375, "learning_rate": 0.0019907014715175243, "loss": 0.2619, "step": 46750 }, { "epoch": 0.33189424952045327, "grad_norm": 0.11181640625, "learning_rate": 0.001990697413160729, "loss": 0.2528, "step": 46760 }, { "epoch": 0.3319652277602994, "grad_norm": 0.11572265625, "learning_rate": 0.0019906933539230905, "loss": 0.2342, "step": 46770 }, { "epoch": 0.3320362060001455, "grad_norm": 0.2451171875, "learning_rate": 0.001990689293804611, "loss": 0.2502, "step": 46780 }, { "epoch": 0.33210718423999164, "grad_norm": 0.1025390625, "learning_rate": 0.0019906852328052966, "loss": 0.2298, "step": 46790 }, { "epoch": 0.3321781624798377, "grad_norm": 0.1259765625, "learning_rate": 0.0019906811709251497, "loss": 0.2528, "step": 46800 }, { "epoch": 0.33224914071968387, "grad_norm": 0.0947265625, "learning_rate": 0.001990677108164175, "loss": 0.2582, "step": 46810 }, { "epoch": 0.33232011895953, "grad_norm": 0.1875, "learning_rate": 0.0019906730445223768, "loss": 0.2662, "step": 46820 }, { "epoch": 0.3323910971993761, "grad_norm": 0.11181640625, "learning_rate": 0.0019906689799997586, "loss": 0.2356, "step": 46830 }, { "epoch": 0.33246207543922224, "grad_norm": 0.189453125, "learning_rate": 0.0019906649145963245, "loss": 0.2606, "step": 46840 }, { "epoch": 0.3325330536790683, "grad_norm": 0.11865234375, "learning_rate": 0.0019906608483120786, "loss": 0.2642, "step": 46850 }, { "epoch": 0.33260403191891447, "grad_norm": 0.111328125, "learning_rate": 0.001990656781147025, "loss": 0.2558, "step": 46860 }, { "epoch": 0.33267501015876055, "grad_norm": 0.125, "learning_rate": 0.0019906527131011674, "loss": 0.2444, "step": 46870 }, { "epoch": 0.3327459883986067, "grad_norm": 0.07421875, "learning_rate": 0.001990648644174511, "loss": 0.2735, "step": 46880 }, { "epoch": 0.33281696663845284, "grad_norm": 0.11083984375, "learning_rate": 0.0019906445743670584, "loss": 0.2646, "step": 46890 }, { "epoch": 0.3328879448782989, "grad_norm": 0.09423828125, "learning_rate": 0.0019906405036788136, "loss": 0.2521, "step": 46900 }, { "epoch": 0.33295892311814507, "grad_norm": 0.10400390625, "learning_rate": 0.001990636432109782, "loss": 0.2491, "step": 46910 }, { "epoch": 0.33302990135799115, "grad_norm": 0.138671875, "learning_rate": 0.001990632359659966, "loss": 0.2574, "step": 46920 }, { "epoch": 0.3331008795978373, "grad_norm": 0.06689453125, "learning_rate": 0.001990628286329371, "loss": 0.2301, "step": 46930 }, { "epoch": 0.33317185783768344, "grad_norm": 0.212890625, "learning_rate": 0.001990624212118, "loss": 0.2512, "step": 46940 }, { "epoch": 0.3332428360775295, "grad_norm": 0.13671875, "learning_rate": 0.001990620137025858, "loss": 0.2585, "step": 46950 }, { "epoch": 0.33331381431737567, "grad_norm": 0.0986328125, "learning_rate": 0.001990616061052948, "loss": 0.2627, "step": 46960 }, { "epoch": 0.33338479255722175, "grad_norm": 0.11865234375, "learning_rate": 0.001990611984199275, "loss": 0.2487, "step": 46970 }, { "epoch": 0.3334557707970679, "grad_norm": 0.06884765625, "learning_rate": 0.001990607906464842, "loss": 0.2303, "step": 46980 }, { "epoch": 0.333526749036914, "grad_norm": 0.115234375, "learning_rate": 0.0019906038278496544, "loss": 0.2248, "step": 46990 }, { "epoch": 0.3335977272767601, "grad_norm": 0.1162109375, "learning_rate": 0.001990599748353715, "loss": 0.2453, "step": 47000 }, { "epoch": 0.33366870551660627, "grad_norm": 0.107421875, "learning_rate": 0.0019905956679770282, "loss": 0.2576, "step": 47010 }, { "epoch": 0.33373968375645235, "grad_norm": 0.1728515625, "learning_rate": 0.001990591586719598, "loss": 0.261, "step": 47020 }, { "epoch": 0.3338106619962985, "grad_norm": 0.1962890625, "learning_rate": 0.0019905875045814285, "loss": 0.2496, "step": 47030 }, { "epoch": 0.3338816402361446, "grad_norm": 0.10498046875, "learning_rate": 0.0019905834215625242, "loss": 0.2505, "step": 47040 }, { "epoch": 0.3339526184759907, "grad_norm": 0.10693359375, "learning_rate": 0.0019905793376628882, "loss": 0.2611, "step": 47050 }, { "epoch": 0.33402359671583687, "grad_norm": 0.08447265625, "learning_rate": 0.0019905752528825253, "loss": 0.2485, "step": 47060 }, { "epoch": 0.33409457495568295, "grad_norm": 0.07763671875, "learning_rate": 0.0019905711672214394, "loss": 0.2427, "step": 47070 }, { "epoch": 0.3341655531955291, "grad_norm": 0.125, "learning_rate": 0.0019905670806796344, "loss": 0.2524, "step": 47080 }, { "epoch": 0.3342365314353752, "grad_norm": 0.20703125, "learning_rate": 0.001990562993257114, "loss": 0.2527, "step": 47090 }, { "epoch": 0.3343075096752213, "grad_norm": 0.09423828125, "learning_rate": 0.0019905589049538834, "loss": 0.2482, "step": 47100 }, { "epoch": 0.3343784879150674, "grad_norm": 0.1201171875, "learning_rate": 0.001990554815769945, "loss": 0.2553, "step": 47110 }, { "epoch": 0.33444946615491356, "grad_norm": 0.318359375, "learning_rate": 0.0019905507257053042, "loss": 0.2561, "step": 47120 }, { "epoch": 0.3345204443947597, "grad_norm": 0.11181640625, "learning_rate": 0.0019905466347599644, "loss": 0.2644, "step": 47130 }, { "epoch": 0.3345914226346058, "grad_norm": 0.12109375, "learning_rate": 0.0019905425429339297, "loss": 0.2546, "step": 47140 }, { "epoch": 0.3346624008744519, "grad_norm": 0.109375, "learning_rate": 0.0019905384502272045, "loss": 0.2482, "step": 47150 }, { "epoch": 0.334733379114298, "grad_norm": 0.1396484375, "learning_rate": 0.0019905343566397923, "loss": 0.26, "step": 47160 }, { "epoch": 0.33480435735414416, "grad_norm": 0.1044921875, "learning_rate": 0.0019905302621716974, "loss": 0.2538, "step": 47170 }, { "epoch": 0.3348753355939903, "grad_norm": 0.109375, "learning_rate": 0.0019905261668229242, "loss": 0.2486, "step": 47180 }, { "epoch": 0.3349463138338364, "grad_norm": 0.095703125, "learning_rate": 0.0019905220705934766, "loss": 0.2353, "step": 47190 }, { "epoch": 0.3350172920736825, "grad_norm": 0.1103515625, "learning_rate": 0.0019905179734833584, "loss": 0.2499, "step": 47200 }, { "epoch": 0.3350882703135286, "grad_norm": 0.0859375, "learning_rate": 0.001990513875492573, "loss": 0.2445, "step": 47210 }, { "epoch": 0.33515924855337476, "grad_norm": 0.09619140625, "learning_rate": 0.001990509776621126, "loss": 0.2325, "step": 47220 }, { "epoch": 0.33523022679322084, "grad_norm": 0.384765625, "learning_rate": 0.00199050567686902, "loss": 0.2739, "step": 47230 }, { "epoch": 0.335301205033067, "grad_norm": 0.177734375, "learning_rate": 0.00199050157623626, "loss": 0.2501, "step": 47240 }, { "epoch": 0.3353721832729131, "grad_norm": 0.1435546875, "learning_rate": 0.00199049747472285, "loss": 0.2386, "step": 47250 }, { "epoch": 0.3354431615127592, "grad_norm": 0.125, "learning_rate": 0.0019904933723287935, "loss": 0.2746, "step": 47260 }, { "epoch": 0.33551413975260536, "grad_norm": 0.1025390625, "learning_rate": 0.001990489269054095, "loss": 0.2478, "step": 47270 }, { "epoch": 0.33558511799245144, "grad_norm": 0.12890625, "learning_rate": 0.001990485164898759, "loss": 0.2534, "step": 47280 }, { "epoch": 0.3356560962322976, "grad_norm": 0.1982421875, "learning_rate": 0.001990481059862788, "loss": 0.2433, "step": 47290 }, { "epoch": 0.3357270744721437, "grad_norm": 0.087890625, "learning_rate": 0.0019904769539461874, "loss": 0.2451, "step": 47300 }, { "epoch": 0.3357980527119898, "grad_norm": 0.1328125, "learning_rate": 0.001990472847148961, "loss": 0.2445, "step": 47310 }, { "epoch": 0.33586903095183596, "grad_norm": 0.130859375, "learning_rate": 0.001990468739471113, "loss": 0.2573, "step": 47320 }, { "epoch": 0.33594000919168204, "grad_norm": 0.109375, "learning_rate": 0.0019904646309126463, "loss": 0.2466, "step": 47330 }, { "epoch": 0.3360109874315282, "grad_norm": 0.09716796875, "learning_rate": 0.001990460521473567, "loss": 0.2461, "step": 47340 }, { "epoch": 0.33608196567137427, "grad_norm": 0.10693359375, "learning_rate": 0.0019904564111538773, "loss": 0.2661, "step": 47350 }, { "epoch": 0.3361529439112204, "grad_norm": 0.11865234375, "learning_rate": 0.001990452299953582, "loss": 0.2509, "step": 47360 }, { "epoch": 0.33622392215106656, "grad_norm": 0.0908203125, "learning_rate": 0.001990448187872686, "loss": 0.2471, "step": 47370 }, { "epoch": 0.33629490039091264, "grad_norm": 0.08935546875, "learning_rate": 0.001990444074911192, "loss": 0.2488, "step": 47380 }, { "epoch": 0.3363658786307588, "grad_norm": 0.09130859375, "learning_rate": 0.0019904399610691046, "loss": 0.2351, "step": 47390 }, { "epoch": 0.3364368568706049, "grad_norm": 0.1396484375, "learning_rate": 0.0019904358463464274, "loss": 0.2393, "step": 47400 }, { "epoch": 0.336507835110451, "grad_norm": 0.236328125, "learning_rate": 0.0019904317307431656, "loss": 0.259, "step": 47410 }, { "epoch": 0.33657881335029716, "grad_norm": 0.267578125, "learning_rate": 0.0019904276142593226, "loss": 0.248, "step": 47420 }, { "epoch": 0.33664979159014324, "grad_norm": 0.07958984375, "learning_rate": 0.0019904234968949022, "loss": 0.2452, "step": 47430 }, { "epoch": 0.3367207698299894, "grad_norm": 0.142578125, "learning_rate": 0.0019904193786499094, "loss": 0.2628, "step": 47440 }, { "epoch": 0.3367917480698355, "grad_norm": 0.095703125, "learning_rate": 0.001990415259524347, "loss": 0.2465, "step": 47450 }, { "epoch": 0.3368627263096816, "grad_norm": 0.1005859375, "learning_rate": 0.00199041113951822, "loss": 0.2401, "step": 47460 }, { "epoch": 0.3369337045495277, "grad_norm": 0.1513671875, "learning_rate": 0.001990407018631532, "loss": 0.2411, "step": 47470 }, { "epoch": 0.33700468278937384, "grad_norm": 0.09765625, "learning_rate": 0.0019904028968642873, "loss": 0.246, "step": 47480 }, { "epoch": 0.33707566102922, "grad_norm": 0.1142578125, "learning_rate": 0.00199039877421649, "loss": 0.2632, "step": 47490 }, { "epoch": 0.3371466392690661, "grad_norm": 0.109375, "learning_rate": 0.0019903946506881444, "loss": 0.2455, "step": 47500 }, { "epoch": 0.3372176175089122, "grad_norm": 0.091796875, "learning_rate": 0.001990390526279254, "loss": 0.2501, "step": 47510 }, { "epoch": 0.3372885957487583, "grad_norm": 0.09326171875, "learning_rate": 0.001990386400989823, "loss": 0.2399, "step": 47520 }, { "epoch": 0.33735957398860444, "grad_norm": 0.130859375, "learning_rate": 0.001990382274819856, "loss": 0.2508, "step": 47530 }, { "epoch": 0.3374305522284506, "grad_norm": 0.09716796875, "learning_rate": 0.0019903781477693568, "loss": 0.2354, "step": 47540 }, { "epoch": 0.3375015304682967, "grad_norm": 0.181640625, "learning_rate": 0.001990374019838329, "loss": 0.2404, "step": 47550 }, { "epoch": 0.3375725087081428, "grad_norm": 0.1552734375, "learning_rate": 0.001990369891026777, "loss": 0.2553, "step": 47560 }, { "epoch": 0.3376434869479889, "grad_norm": 0.06396484375, "learning_rate": 0.0019903657613347054, "loss": 0.2565, "step": 47570 }, { "epoch": 0.33771446518783504, "grad_norm": 0.1298828125, "learning_rate": 0.001990361630762118, "loss": 0.2425, "step": 47580 }, { "epoch": 0.33778544342768113, "grad_norm": 0.07861328125, "learning_rate": 0.001990357499309018, "loss": 0.2576, "step": 47590 }, { "epoch": 0.3378564216675273, "grad_norm": 0.0751953125, "learning_rate": 0.001990353366975411, "loss": 0.2613, "step": 47600 }, { "epoch": 0.3379273999073734, "grad_norm": 0.1259765625, "learning_rate": 0.0019903492337613, "loss": 0.2579, "step": 47610 }, { "epoch": 0.3379983781472195, "grad_norm": 0.07421875, "learning_rate": 0.0019903450996666895, "loss": 0.2464, "step": 47620 }, { "epoch": 0.33806935638706564, "grad_norm": 0.146484375, "learning_rate": 0.0019903409646915837, "loss": 0.2527, "step": 47630 }, { "epoch": 0.33814033462691173, "grad_norm": 0.08740234375, "learning_rate": 0.001990336828835986, "loss": 0.2543, "step": 47640 }, { "epoch": 0.3382113128667579, "grad_norm": 0.0908203125, "learning_rate": 0.0019903326920999013, "loss": 0.2541, "step": 47650 }, { "epoch": 0.338282291106604, "grad_norm": 0.103515625, "learning_rate": 0.0019903285544833334, "loss": 0.2438, "step": 47660 }, { "epoch": 0.3383532693464501, "grad_norm": 0.0927734375, "learning_rate": 0.001990324415986286, "loss": 0.2531, "step": 47670 }, { "epoch": 0.33842424758629625, "grad_norm": 0.1142578125, "learning_rate": 0.001990320276608764, "loss": 0.2631, "step": 47680 }, { "epoch": 0.33849522582614233, "grad_norm": 0.11474609375, "learning_rate": 0.0019903161363507707, "loss": 0.2494, "step": 47690 }, { "epoch": 0.3385662040659885, "grad_norm": 0.1591796875, "learning_rate": 0.001990311995212311, "loss": 0.2628, "step": 47700 }, { "epoch": 0.3386371823058346, "grad_norm": 0.0830078125, "learning_rate": 0.001990307853193388, "loss": 0.2521, "step": 47710 }, { "epoch": 0.3387081605456807, "grad_norm": 0.1640625, "learning_rate": 0.0019903037102940065, "loss": 0.2556, "step": 47720 }, { "epoch": 0.33877913878552685, "grad_norm": 0.09814453125, "learning_rate": 0.0019902995665141703, "loss": 0.2578, "step": 47730 }, { "epoch": 0.33885011702537293, "grad_norm": 0.125, "learning_rate": 0.001990295421853884, "loss": 0.2531, "step": 47740 }, { "epoch": 0.3389210952652191, "grad_norm": 0.0966796875, "learning_rate": 0.001990291276313151, "loss": 0.2374, "step": 47750 }, { "epoch": 0.33899207350506516, "grad_norm": 0.12109375, "learning_rate": 0.001990287129891976, "loss": 0.2695, "step": 47760 }, { "epoch": 0.3390630517449113, "grad_norm": 0.1357421875, "learning_rate": 0.0019902829825903627, "loss": 0.2602, "step": 47770 }, { "epoch": 0.33913402998475745, "grad_norm": 0.0888671875, "learning_rate": 0.0019902788344083157, "loss": 0.2581, "step": 47780 }, { "epoch": 0.33920500822460353, "grad_norm": 0.09228515625, "learning_rate": 0.001990274685345838, "loss": 0.2578, "step": 47790 }, { "epoch": 0.3392759864644497, "grad_norm": 0.083984375, "learning_rate": 0.001990270535402935, "loss": 0.2394, "step": 47800 }, { "epoch": 0.33934696470429576, "grad_norm": 0.08251953125, "learning_rate": 0.00199026638457961, "loss": 0.2387, "step": 47810 }, { "epoch": 0.3394179429441419, "grad_norm": 0.09912109375, "learning_rate": 0.0019902622328758675, "loss": 0.2439, "step": 47820 }, { "epoch": 0.33948892118398805, "grad_norm": 0.119140625, "learning_rate": 0.001990258080291711, "loss": 0.2441, "step": 47830 }, { "epoch": 0.33955989942383413, "grad_norm": 0.1845703125, "learning_rate": 0.0019902539268271454, "loss": 0.2597, "step": 47840 }, { "epoch": 0.3396308776636803, "grad_norm": 0.06982421875, "learning_rate": 0.0019902497724821745, "loss": 0.2344, "step": 47850 }, { "epoch": 0.33970185590352636, "grad_norm": 0.07861328125, "learning_rate": 0.0019902456172568024, "loss": 0.2546, "step": 47860 }, { "epoch": 0.3397728341433725, "grad_norm": 0.271484375, "learning_rate": 0.001990241461151033, "loss": 0.2594, "step": 47870 }, { "epoch": 0.3398438123832186, "grad_norm": 0.173828125, "learning_rate": 0.0019902373041648705, "loss": 0.2663, "step": 47880 }, { "epoch": 0.33991479062306473, "grad_norm": 0.1044921875, "learning_rate": 0.0019902331462983195, "loss": 0.2436, "step": 47890 }, { "epoch": 0.3399857688629109, "grad_norm": 0.09130859375, "learning_rate": 0.0019902289875513837, "loss": 0.2642, "step": 47900 }, { "epoch": 0.34005674710275696, "grad_norm": 0.1015625, "learning_rate": 0.001990224827924067, "loss": 0.2462, "step": 47910 }, { "epoch": 0.3401277253426031, "grad_norm": 0.10302734375, "learning_rate": 0.0019902206674163735, "loss": 0.25, "step": 47920 }, { "epoch": 0.3401987035824492, "grad_norm": 0.126953125, "learning_rate": 0.0019902165060283077, "loss": 0.2447, "step": 47930 }, { "epoch": 0.34026968182229533, "grad_norm": 0.1171875, "learning_rate": 0.0019902123437598737, "loss": 0.2297, "step": 47940 }, { "epoch": 0.3403406600621415, "grad_norm": 0.08837890625, "learning_rate": 0.0019902081806110754, "loss": 0.2453, "step": 47950 }, { "epoch": 0.34041163830198756, "grad_norm": 0.09033203125, "learning_rate": 0.0019902040165819175, "loss": 0.242, "step": 47960 }, { "epoch": 0.3404826165418337, "grad_norm": 0.1298828125, "learning_rate": 0.0019901998516724025, "loss": 0.2493, "step": 47970 }, { "epoch": 0.3405535947816798, "grad_norm": 0.10595703125, "learning_rate": 0.0019901956858825367, "loss": 0.2287, "step": 47980 }, { "epoch": 0.34062457302152593, "grad_norm": 0.1142578125, "learning_rate": 0.001990191519212323, "loss": 0.2538, "step": 47990 }, { "epoch": 0.340695551261372, "grad_norm": 0.11279296875, "learning_rate": 0.0019901873516617653, "loss": 0.248, "step": 48000 }, { "epoch": 0.340695551261372, "eval_covost2-zh-en_loss": 3.836580991744995, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 20.6484, "eval_covost2-zh-en_samples_per_second": 3.1, "eval_covost2-zh-en_steps_per_second": 0.194, "step": 48000 }, { "epoch": 0.340695551261372, "eval_covost2-en-zh_loss": 3.1125426292419434, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 19.4353, "eval_covost2-en-zh_samples_per_second": 3.293, "eval_covost2-en-zh_steps_per_second": 0.206, "step": 48000 }, { "epoch": 0.34076652950121816, "grad_norm": 0.1298828125, "learning_rate": 0.001990183183230868, "loss": 0.2428, "step": 48010 }, { "epoch": 0.3408375077410643, "grad_norm": 0.12060546875, "learning_rate": 0.0019901790139196357, "loss": 0.2555, "step": 48020 }, { "epoch": 0.3409084859809104, "grad_norm": 0.1376953125, "learning_rate": 0.001990174843728072, "loss": 0.2637, "step": 48030 }, { "epoch": 0.34097946422075653, "grad_norm": 0.11962890625, "learning_rate": 0.001990170672656181, "loss": 0.2445, "step": 48040 }, { "epoch": 0.3410504424606026, "grad_norm": 0.107421875, "learning_rate": 0.0019901665007039677, "loss": 0.2622, "step": 48050 }, { "epoch": 0.34112142070044876, "grad_norm": 0.119140625, "learning_rate": 0.001990162327871435, "loss": 0.257, "step": 48060 }, { "epoch": 0.3411923989402949, "grad_norm": 0.103515625, "learning_rate": 0.0019901581541585875, "loss": 0.2712, "step": 48070 }, { "epoch": 0.341263377180141, "grad_norm": 0.130859375, "learning_rate": 0.0019901539795654294, "loss": 0.253, "step": 48080 }, { "epoch": 0.34133435541998713, "grad_norm": 0.10302734375, "learning_rate": 0.001990149804091965, "loss": 0.2339, "step": 48090 }, { "epoch": 0.3414053336598332, "grad_norm": 0.1533203125, "learning_rate": 0.001990145627738198, "loss": 0.2485, "step": 48100 }, { "epoch": 0.34147631189967936, "grad_norm": 0.1640625, "learning_rate": 0.0019901414505041327, "loss": 0.234, "step": 48110 }, { "epoch": 0.34154729013952545, "grad_norm": 0.10205078125, "learning_rate": 0.0019901372723897734, "loss": 0.2628, "step": 48120 }, { "epoch": 0.3416182683793716, "grad_norm": 0.1201171875, "learning_rate": 0.0019901330933951244, "loss": 0.2477, "step": 48130 }, { "epoch": 0.34168924661921773, "grad_norm": 0.208984375, "learning_rate": 0.001990128913520189, "loss": 0.2544, "step": 48140 }, { "epoch": 0.3417602248590638, "grad_norm": 0.06396484375, "learning_rate": 0.001990124732764972, "loss": 0.2454, "step": 48150 }, { "epoch": 0.34183120309890996, "grad_norm": 0.12890625, "learning_rate": 0.001990120551129478, "loss": 0.2606, "step": 48160 }, { "epoch": 0.34190218133875605, "grad_norm": 0.140625, "learning_rate": 0.0019901163686137104, "loss": 0.2592, "step": 48170 }, { "epoch": 0.3419731595786022, "grad_norm": 0.11962890625, "learning_rate": 0.0019901121852176733, "loss": 0.2428, "step": 48180 }, { "epoch": 0.34204413781844833, "grad_norm": 0.150390625, "learning_rate": 0.001990108000941371, "loss": 0.2515, "step": 48190 }, { "epoch": 0.3421151160582944, "grad_norm": 0.07666015625, "learning_rate": 0.0019901038157848078, "loss": 0.2377, "step": 48200 }, { "epoch": 0.34218609429814056, "grad_norm": 0.130859375, "learning_rate": 0.0019900996297479876, "loss": 0.2689, "step": 48210 }, { "epoch": 0.34225707253798665, "grad_norm": 0.09228515625, "learning_rate": 0.0019900954428309146, "loss": 0.235, "step": 48220 }, { "epoch": 0.3423280507778328, "grad_norm": 0.07666015625, "learning_rate": 0.001990091255033593, "loss": 0.246, "step": 48230 }, { "epoch": 0.3423990290176789, "grad_norm": 0.0986328125, "learning_rate": 0.001990087066356027, "loss": 0.2375, "step": 48240 }, { "epoch": 0.342470007257525, "grad_norm": 0.146484375, "learning_rate": 0.001990082876798221, "loss": 0.2548, "step": 48250 }, { "epoch": 0.34254098549737116, "grad_norm": 0.130859375, "learning_rate": 0.0019900786863601785, "loss": 0.234, "step": 48260 }, { "epoch": 0.34261196373721725, "grad_norm": 0.0869140625, "learning_rate": 0.001990074495041904, "loss": 0.2539, "step": 48270 }, { "epoch": 0.3426829419770634, "grad_norm": 0.11474609375, "learning_rate": 0.0019900703028434014, "loss": 0.241, "step": 48280 }, { "epoch": 0.3427539202169095, "grad_norm": 0.12451171875, "learning_rate": 0.001990066109764675, "loss": 0.2525, "step": 48290 }, { "epoch": 0.3428248984567556, "grad_norm": 0.099609375, "learning_rate": 0.0019900619158057294, "loss": 0.255, "step": 48300 }, { "epoch": 0.34289587669660176, "grad_norm": 0.11669921875, "learning_rate": 0.001990057720966568, "loss": 0.2722, "step": 48310 }, { "epoch": 0.34296685493644785, "grad_norm": 0.11376953125, "learning_rate": 0.001990053525247196, "loss": 0.2514, "step": 48320 }, { "epoch": 0.343037833176294, "grad_norm": 0.1640625, "learning_rate": 0.001990049328647616, "loss": 0.2637, "step": 48330 }, { "epoch": 0.3431088114161401, "grad_norm": 0.07861328125, "learning_rate": 0.0019900451311678333, "loss": 0.2721, "step": 48340 }, { "epoch": 0.3431797896559862, "grad_norm": 0.1396484375, "learning_rate": 0.001990040932807852, "loss": 0.2629, "step": 48350 }, { "epoch": 0.3432507678958323, "grad_norm": 0.0849609375, "learning_rate": 0.0019900367335676755, "loss": 0.2493, "step": 48360 }, { "epoch": 0.34332174613567845, "grad_norm": 0.08740234375, "learning_rate": 0.001990032533447309, "loss": 0.2372, "step": 48370 }, { "epoch": 0.3433927243755246, "grad_norm": 0.14453125, "learning_rate": 0.0019900283324467555, "loss": 0.2536, "step": 48380 }, { "epoch": 0.3434637026153707, "grad_norm": 0.09716796875, "learning_rate": 0.00199002413056602, "loss": 0.2487, "step": 48390 }, { "epoch": 0.3435346808552168, "grad_norm": 0.1103515625, "learning_rate": 0.001990019927805107, "loss": 0.2423, "step": 48400 }, { "epoch": 0.3436056590950629, "grad_norm": 0.099609375, "learning_rate": 0.0019900157241640195, "loss": 0.2457, "step": 48410 }, { "epoch": 0.34367663733490905, "grad_norm": 0.150390625, "learning_rate": 0.0019900115196427623, "loss": 0.2448, "step": 48420 }, { "epoch": 0.3437476155747552, "grad_norm": 0.1015625, "learning_rate": 0.0019900073142413393, "loss": 0.2558, "step": 48430 }, { "epoch": 0.3438185938146013, "grad_norm": 0.1201171875, "learning_rate": 0.001990003107959755, "loss": 0.2534, "step": 48440 }, { "epoch": 0.3438895720544474, "grad_norm": 0.107421875, "learning_rate": 0.0019899989007980136, "loss": 0.2528, "step": 48450 }, { "epoch": 0.3439605502942935, "grad_norm": 0.0947265625, "learning_rate": 0.001989994692756119, "loss": 0.2436, "step": 48460 }, { "epoch": 0.34403152853413965, "grad_norm": 0.154296875, "learning_rate": 0.0019899904838340753, "loss": 0.2361, "step": 48470 }, { "epoch": 0.34410250677398574, "grad_norm": 0.1044921875, "learning_rate": 0.0019899862740318866, "loss": 0.2516, "step": 48480 }, { "epoch": 0.3441734850138319, "grad_norm": 0.12158203125, "learning_rate": 0.0019899820633495578, "loss": 0.247, "step": 48490 }, { "epoch": 0.344244463253678, "grad_norm": 0.1171875, "learning_rate": 0.0019899778517870922, "loss": 0.2443, "step": 48500 }, { "epoch": 0.3443154414935241, "grad_norm": 0.11962890625, "learning_rate": 0.0019899736393444944, "loss": 0.2448, "step": 48510 }, { "epoch": 0.34438641973337025, "grad_norm": 0.0986328125, "learning_rate": 0.001989969426021768, "loss": 0.2647, "step": 48520 }, { "epoch": 0.34445739797321634, "grad_norm": 0.09033203125, "learning_rate": 0.001989965211818918, "loss": 0.2439, "step": 48530 }, { "epoch": 0.3445283762130625, "grad_norm": 0.09130859375, "learning_rate": 0.0019899609967359484, "loss": 0.2471, "step": 48540 }, { "epoch": 0.3445993544529086, "grad_norm": 0.10693359375, "learning_rate": 0.001989956780772863, "loss": 0.2352, "step": 48550 }, { "epoch": 0.3446703326927547, "grad_norm": 0.09130859375, "learning_rate": 0.0019899525639296657, "loss": 0.2503, "step": 48560 }, { "epoch": 0.34474131093260085, "grad_norm": 0.12255859375, "learning_rate": 0.001989948346206362, "loss": 0.266, "step": 48570 }, { "epoch": 0.34481228917244694, "grad_norm": 0.11962890625, "learning_rate": 0.0019899441276029542, "loss": 0.2452, "step": 48580 }, { "epoch": 0.3448832674122931, "grad_norm": 0.09814453125, "learning_rate": 0.0019899399081194477, "loss": 0.243, "step": 48590 }, { "epoch": 0.34495424565213917, "grad_norm": 0.09716796875, "learning_rate": 0.0019899356877558466, "loss": 0.2664, "step": 48600 }, { "epoch": 0.3450252238919853, "grad_norm": 0.08642578125, "learning_rate": 0.0019899314665121548, "loss": 0.2321, "step": 48610 }, { "epoch": 0.34509620213183145, "grad_norm": 0.1513671875, "learning_rate": 0.0019899272443883766, "loss": 0.2463, "step": 48620 }, { "epoch": 0.34516718037167754, "grad_norm": 0.1435546875, "learning_rate": 0.001989923021384516, "loss": 0.2555, "step": 48630 }, { "epoch": 0.3452381586115237, "grad_norm": 0.0947265625, "learning_rate": 0.0019899187975005775, "loss": 0.2494, "step": 48640 }, { "epoch": 0.34530913685136977, "grad_norm": 0.140625, "learning_rate": 0.001989914572736565, "loss": 0.2486, "step": 48650 }, { "epoch": 0.3453801150912159, "grad_norm": 0.08544921875, "learning_rate": 0.0019899103470924825, "loss": 0.2552, "step": 48660 }, { "epoch": 0.34545109333106205, "grad_norm": 0.150390625, "learning_rate": 0.001989906120568335, "loss": 0.2518, "step": 48670 }, { "epoch": 0.34552207157090814, "grad_norm": 0.078125, "learning_rate": 0.0019899018931641256, "loss": 0.2686, "step": 48680 }, { "epoch": 0.3455930498107543, "grad_norm": 0.1162109375, "learning_rate": 0.0019898976648798595, "loss": 0.2286, "step": 48690 }, { "epoch": 0.34566402805060037, "grad_norm": 0.1328125, "learning_rate": 0.00198989343571554, "loss": 0.2376, "step": 48700 }, { "epoch": 0.3457350062904465, "grad_norm": 0.09130859375, "learning_rate": 0.0019898892056711714, "loss": 0.2513, "step": 48710 }, { "epoch": 0.3458059845302926, "grad_norm": 0.1162109375, "learning_rate": 0.0019898849747467585, "loss": 0.255, "step": 48720 }, { "epoch": 0.34587696277013874, "grad_norm": 0.2314453125, "learning_rate": 0.001989880742942305, "loss": 0.2436, "step": 48730 }, { "epoch": 0.3459479410099849, "grad_norm": 0.1171875, "learning_rate": 0.0019898765102578154, "loss": 0.2367, "step": 48740 }, { "epoch": 0.34601891924983097, "grad_norm": 0.1201171875, "learning_rate": 0.001989872276693294, "loss": 0.2371, "step": 48750 }, { "epoch": 0.3460898974896771, "grad_norm": 0.154296875, "learning_rate": 0.001989868042248744, "loss": 0.249, "step": 48760 }, { "epoch": 0.3461608757295232, "grad_norm": 0.1376953125, "learning_rate": 0.001989863806924171, "loss": 0.2356, "step": 48770 }, { "epoch": 0.34623185396936934, "grad_norm": 0.1572265625, "learning_rate": 0.001989859570719578, "loss": 0.2499, "step": 48780 }, { "epoch": 0.3463028322092155, "grad_norm": 0.11865234375, "learning_rate": 0.0019898553336349695, "loss": 0.2616, "step": 48790 }, { "epoch": 0.34637381044906157, "grad_norm": 0.1259765625, "learning_rate": 0.0019898510956703505, "loss": 0.2483, "step": 48800 }, { "epoch": 0.3464447886889077, "grad_norm": 0.1171875, "learning_rate": 0.0019898468568257236, "loss": 0.2477, "step": 48810 }, { "epoch": 0.3465157669287538, "grad_norm": 0.1103515625, "learning_rate": 0.001989842617101095, "loss": 0.255, "step": 48820 }, { "epoch": 0.34658674516859994, "grad_norm": 0.2138671875, "learning_rate": 0.001989838376496467, "loss": 0.255, "step": 48830 }, { "epoch": 0.34665772340844603, "grad_norm": 0.10498046875, "learning_rate": 0.0019898341350118452, "loss": 0.2508, "step": 48840 }, { "epoch": 0.34672870164829217, "grad_norm": 0.115234375, "learning_rate": 0.001989829892647233, "loss": 0.2572, "step": 48850 }, { "epoch": 0.3467996798881383, "grad_norm": 0.125, "learning_rate": 0.0019898256494026343, "loss": 0.2428, "step": 48860 }, { "epoch": 0.3468706581279844, "grad_norm": 0.08642578125, "learning_rate": 0.0019898214052780543, "loss": 0.2458, "step": 48870 }, { "epoch": 0.34694163636783054, "grad_norm": 0.1171875, "learning_rate": 0.0019898171602734964, "loss": 0.2368, "step": 48880 }, { "epoch": 0.34701261460767663, "grad_norm": 0.10205078125, "learning_rate": 0.0019898129143889652, "loss": 0.2618, "step": 48890 }, { "epoch": 0.34708359284752277, "grad_norm": 0.08837890625, "learning_rate": 0.0019898086676244653, "loss": 0.2491, "step": 48900 }, { "epoch": 0.3471545710873689, "grad_norm": 0.08447265625, "learning_rate": 0.00198980441998, "loss": 0.2193, "step": 48910 }, { "epoch": 0.347225549327215, "grad_norm": 0.12451171875, "learning_rate": 0.0019898001714555736, "loss": 0.2495, "step": 48920 }, { "epoch": 0.34729652756706114, "grad_norm": 0.1435546875, "learning_rate": 0.0019897959220511914, "loss": 0.2393, "step": 48930 }, { "epoch": 0.34736750580690723, "grad_norm": 0.10986328125, "learning_rate": 0.001989791671766856, "loss": 0.2465, "step": 48940 }, { "epoch": 0.34743848404675337, "grad_norm": 0.1103515625, "learning_rate": 0.0019897874206025725, "loss": 0.2506, "step": 48950 }, { "epoch": 0.3475094622865995, "grad_norm": 0.2041015625, "learning_rate": 0.0019897831685583454, "loss": 0.2612, "step": 48960 }, { "epoch": 0.3475804405264456, "grad_norm": 0.1435546875, "learning_rate": 0.0019897789156341785, "loss": 0.2587, "step": 48970 }, { "epoch": 0.34765141876629174, "grad_norm": 0.09814453125, "learning_rate": 0.0019897746618300757, "loss": 0.2309, "step": 48980 }, { "epoch": 0.34772239700613783, "grad_norm": 0.1630859375, "learning_rate": 0.001989770407146042, "loss": 0.2343, "step": 48990 }, { "epoch": 0.34779337524598397, "grad_norm": 0.1845703125, "learning_rate": 0.0019897661515820802, "loss": 0.2537, "step": 49000 }, { "epoch": 0.34786435348583006, "grad_norm": 0.119140625, "learning_rate": 0.0019897618951381963, "loss": 0.2319, "step": 49010 }, { "epoch": 0.3479353317256762, "grad_norm": 0.0849609375, "learning_rate": 0.0019897576378143933, "loss": 0.2372, "step": 49020 }, { "epoch": 0.34800630996552234, "grad_norm": 0.0947265625, "learning_rate": 0.0019897533796106757, "loss": 0.2518, "step": 49030 }, { "epoch": 0.34807728820536843, "grad_norm": 0.0966796875, "learning_rate": 0.0019897491205270483, "loss": 0.2517, "step": 49040 }, { "epoch": 0.34814826644521457, "grad_norm": 0.1494140625, "learning_rate": 0.0019897448605635144, "loss": 0.2428, "step": 49050 }, { "epoch": 0.34821924468506066, "grad_norm": 0.130859375, "learning_rate": 0.0019897405997200785, "loss": 0.2354, "step": 49060 }, { "epoch": 0.3482902229249068, "grad_norm": 0.126953125, "learning_rate": 0.001989736337996745, "loss": 0.2508, "step": 49070 }, { "epoch": 0.34836120116475294, "grad_norm": 0.185546875, "learning_rate": 0.0019897320753935184, "loss": 0.2473, "step": 49080 }, { "epoch": 0.34843217940459903, "grad_norm": 0.162109375, "learning_rate": 0.001989727811910402, "loss": 0.2406, "step": 49090 }, { "epoch": 0.34850315764444517, "grad_norm": 0.09228515625, "learning_rate": 0.0019897235475474003, "loss": 0.2552, "step": 49100 }, { "epoch": 0.34857413588429126, "grad_norm": 0.10595703125, "learning_rate": 0.0019897192823045183, "loss": 0.2407, "step": 49110 }, { "epoch": 0.3486451141241374, "grad_norm": 0.1396484375, "learning_rate": 0.0019897150161817595, "loss": 0.2605, "step": 49120 }, { "epoch": 0.3487160923639835, "grad_norm": 0.109375, "learning_rate": 0.0019897107491791285, "loss": 0.245, "step": 49130 }, { "epoch": 0.34878707060382963, "grad_norm": 0.11865234375, "learning_rate": 0.0019897064812966297, "loss": 0.2527, "step": 49140 }, { "epoch": 0.34885804884367577, "grad_norm": 0.11962890625, "learning_rate": 0.001989702212534266, "loss": 0.2496, "step": 49150 }, { "epoch": 0.34892902708352186, "grad_norm": 0.09521484375, "learning_rate": 0.0019896979428920434, "loss": 0.2603, "step": 49160 }, { "epoch": 0.349000005323368, "grad_norm": 0.1416015625, "learning_rate": 0.001989693672369965, "loss": 0.2306, "step": 49170 }, { "epoch": 0.3490709835632141, "grad_norm": 0.126953125, "learning_rate": 0.0019896894009680355, "loss": 0.2706, "step": 49180 }, { "epoch": 0.34914196180306023, "grad_norm": 0.193359375, "learning_rate": 0.0019896851286862584, "loss": 0.2495, "step": 49190 }, { "epoch": 0.3492129400429064, "grad_norm": 0.103515625, "learning_rate": 0.0019896808555246387, "loss": 0.2548, "step": 49200 }, { "epoch": 0.34928391828275246, "grad_norm": 0.0986328125, "learning_rate": 0.0019896765814831807, "loss": 0.2641, "step": 49210 }, { "epoch": 0.3493548965225986, "grad_norm": 0.076171875, "learning_rate": 0.001989672306561888, "loss": 0.2306, "step": 49220 }, { "epoch": 0.3494258747624447, "grad_norm": 0.1328125, "learning_rate": 0.0019896680307607655, "loss": 0.2457, "step": 49230 }, { "epoch": 0.34949685300229083, "grad_norm": 0.08203125, "learning_rate": 0.0019896637540798164, "loss": 0.2398, "step": 49240 }, { "epoch": 0.3495678312421369, "grad_norm": 0.158203125, "learning_rate": 0.0019896594765190464, "loss": 0.2551, "step": 49250 }, { "epoch": 0.34963880948198306, "grad_norm": 0.099609375, "learning_rate": 0.0019896551980784585, "loss": 0.2459, "step": 49260 }, { "epoch": 0.3497097877218292, "grad_norm": 0.095703125, "learning_rate": 0.0019896509187580574, "loss": 0.2752, "step": 49270 }, { "epoch": 0.3497807659616753, "grad_norm": 0.095703125, "learning_rate": 0.001989646638557847, "loss": 0.259, "step": 49280 }, { "epoch": 0.34985174420152143, "grad_norm": 0.08642578125, "learning_rate": 0.0019896423574778323, "loss": 0.2501, "step": 49290 }, { "epoch": 0.3499227224413675, "grad_norm": 0.1162109375, "learning_rate": 0.0019896380755180174, "loss": 0.252, "step": 49300 }, { "epoch": 0.34999370068121366, "grad_norm": 0.07958984375, "learning_rate": 0.0019896337926784053, "loss": 0.2485, "step": 49310 }, { "epoch": 0.3500646789210598, "grad_norm": 0.0947265625, "learning_rate": 0.001989629508959002, "loss": 0.2659, "step": 49320 }, { "epoch": 0.3501356571609059, "grad_norm": 0.09375, "learning_rate": 0.0019896252243598103, "loss": 0.2685, "step": 49330 }, { "epoch": 0.35020663540075203, "grad_norm": 0.14453125, "learning_rate": 0.001989620938880835, "loss": 0.2536, "step": 49340 }, { "epoch": 0.3502776136405981, "grad_norm": 0.12060546875, "learning_rate": 0.0019896166525220807, "loss": 0.2512, "step": 49350 }, { "epoch": 0.35034859188044426, "grad_norm": 0.10888671875, "learning_rate": 0.0019896123652835508, "loss": 0.2438, "step": 49360 }, { "epoch": 0.35041957012029035, "grad_norm": 0.12109375, "learning_rate": 0.0019896080771652507, "loss": 0.24, "step": 49370 }, { "epoch": 0.3504905483601365, "grad_norm": 0.1328125, "learning_rate": 0.0019896037881671834, "loss": 0.2477, "step": 49380 }, { "epoch": 0.35056152659998263, "grad_norm": 0.1376953125, "learning_rate": 0.001989599498289354, "loss": 0.2552, "step": 49390 }, { "epoch": 0.3506325048398287, "grad_norm": 0.08056640625, "learning_rate": 0.0019895952075317663, "loss": 0.2535, "step": 49400 }, { "epoch": 0.35070348307967486, "grad_norm": 0.09619140625, "learning_rate": 0.001989590915894425, "loss": 0.2571, "step": 49410 }, { "epoch": 0.35077446131952095, "grad_norm": 0.1298828125, "learning_rate": 0.0019895866233773335, "loss": 0.2747, "step": 49420 }, { "epoch": 0.3508454395593671, "grad_norm": 0.08544921875, "learning_rate": 0.001989582329980497, "loss": 0.2417, "step": 49430 }, { "epoch": 0.35091641779921323, "grad_norm": 0.09619140625, "learning_rate": 0.001989578035703919, "loss": 0.2617, "step": 49440 }, { "epoch": 0.3509873960390593, "grad_norm": 0.1357421875, "learning_rate": 0.0019895737405476043, "loss": 0.2534, "step": 49450 }, { "epoch": 0.35105837427890546, "grad_norm": 0.1416015625, "learning_rate": 0.001989569444511557, "loss": 0.2523, "step": 49460 }, { "epoch": 0.35112935251875155, "grad_norm": 0.1083984375, "learning_rate": 0.001989565147595781, "loss": 0.2291, "step": 49470 }, { "epoch": 0.3512003307585977, "grad_norm": 0.1083984375, "learning_rate": 0.001989560849800281, "loss": 0.2579, "step": 49480 }, { "epoch": 0.3512713089984438, "grad_norm": 0.11865234375, "learning_rate": 0.001989556551125061, "loss": 0.2859, "step": 49490 }, { "epoch": 0.3513422872382899, "grad_norm": 0.11572265625, "learning_rate": 0.0019895522515701253, "loss": 0.2625, "step": 49500 }, { "epoch": 0.35141326547813606, "grad_norm": 0.10205078125, "learning_rate": 0.0019895479511354785, "loss": 0.252, "step": 49510 }, { "epoch": 0.35148424371798215, "grad_norm": 0.0986328125, "learning_rate": 0.0019895436498211243, "loss": 0.2356, "step": 49520 }, { "epoch": 0.3515552219578283, "grad_norm": 0.1298828125, "learning_rate": 0.001989539347627067, "loss": 0.2794, "step": 49530 }, { "epoch": 0.3516262001976744, "grad_norm": 0.08544921875, "learning_rate": 0.0019895350445533113, "loss": 0.2476, "step": 49540 }, { "epoch": 0.3516971784375205, "grad_norm": 0.1298828125, "learning_rate": 0.001989530740599861, "loss": 0.2549, "step": 49550 }, { "epoch": 0.35176815667736666, "grad_norm": 0.1494140625, "learning_rate": 0.001989526435766721, "loss": 0.2467, "step": 49560 }, { "epoch": 0.35183913491721275, "grad_norm": 0.171875, "learning_rate": 0.0019895221300538944, "loss": 0.2741, "step": 49570 }, { "epoch": 0.3519101131570589, "grad_norm": 0.0966796875, "learning_rate": 0.001989517823461387, "loss": 0.2621, "step": 49580 }, { "epoch": 0.351981091396905, "grad_norm": 0.08935546875, "learning_rate": 0.0019895135159892015, "loss": 0.2522, "step": 49590 }, { "epoch": 0.3520520696367511, "grad_norm": 0.09228515625, "learning_rate": 0.001989509207637343, "loss": 0.2447, "step": 49600 }, { "epoch": 0.3521230478765972, "grad_norm": 0.10595703125, "learning_rate": 0.001989504898405816, "loss": 0.2356, "step": 49610 }, { "epoch": 0.35219402611644335, "grad_norm": 0.07568359375, "learning_rate": 0.001989500588294624, "loss": 0.2461, "step": 49620 }, { "epoch": 0.3522650043562895, "grad_norm": 0.1005859375, "learning_rate": 0.001989496277303772, "loss": 0.2456, "step": 49630 }, { "epoch": 0.3523359825961356, "grad_norm": 0.10888671875, "learning_rate": 0.0019894919654332636, "loss": 0.2556, "step": 49640 }, { "epoch": 0.3524069608359817, "grad_norm": 0.095703125, "learning_rate": 0.0019894876526831036, "loss": 0.2727, "step": 49650 }, { "epoch": 0.3524779390758278, "grad_norm": 0.09619140625, "learning_rate": 0.001989483339053296, "loss": 0.2407, "step": 49660 }, { "epoch": 0.35254891731567395, "grad_norm": 0.10205078125, "learning_rate": 0.0019894790245438454, "loss": 0.2383, "step": 49670 }, { "epoch": 0.3526198955555201, "grad_norm": 0.0751953125, "learning_rate": 0.0019894747091547555, "loss": 0.2707, "step": 49680 }, { "epoch": 0.3526908737953662, "grad_norm": 0.07080078125, "learning_rate": 0.0019894703928860307, "loss": 0.2557, "step": 49690 }, { "epoch": 0.3527618520352123, "grad_norm": 0.146484375, "learning_rate": 0.0019894660757376753, "loss": 0.2444, "step": 49700 }, { "epoch": 0.3528328302750584, "grad_norm": 0.099609375, "learning_rate": 0.001989461757709694, "loss": 0.2674, "step": 49710 }, { "epoch": 0.35290380851490455, "grad_norm": 0.1533203125, "learning_rate": 0.001989457438802091, "loss": 0.2598, "step": 49720 }, { "epoch": 0.35297478675475064, "grad_norm": 0.267578125, "learning_rate": 0.0019894531190148704, "loss": 0.2605, "step": 49730 }, { "epoch": 0.3530457649945968, "grad_norm": 0.10693359375, "learning_rate": 0.001989448798348036, "loss": 0.2504, "step": 49740 }, { "epoch": 0.3531167432344429, "grad_norm": 0.0869140625, "learning_rate": 0.0019894444768015925, "loss": 0.2599, "step": 49750 }, { "epoch": 0.353187721474289, "grad_norm": 0.283203125, "learning_rate": 0.0019894401543755443, "loss": 0.239, "step": 49760 }, { "epoch": 0.35325869971413515, "grad_norm": 0.11669921875, "learning_rate": 0.0019894358310698955, "loss": 0.2359, "step": 49770 }, { "epoch": 0.35332967795398124, "grad_norm": 0.10498046875, "learning_rate": 0.001989431506884651, "loss": 0.2283, "step": 49780 }, { "epoch": 0.3534006561938274, "grad_norm": 0.1181640625, "learning_rate": 0.0019894271818198133, "loss": 0.2577, "step": 49790 }, { "epoch": 0.3534716344336735, "grad_norm": 0.1181640625, "learning_rate": 0.001989422855875389, "loss": 0.2406, "step": 49800 }, { "epoch": 0.3535426126735196, "grad_norm": 0.134765625, "learning_rate": 0.0019894185290513806, "loss": 0.2339, "step": 49810 }, { "epoch": 0.35361359091336575, "grad_norm": 0.09765625, "learning_rate": 0.001989414201347793, "loss": 0.2607, "step": 49820 }, { "epoch": 0.35368456915321184, "grad_norm": 0.0888671875, "learning_rate": 0.0019894098727646307, "loss": 0.2343, "step": 49830 }, { "epoch": 0.353755547393058, "grad_norm": 0.0810546875, "learning_rate": 0.0019894055433018975, "loss": 0.2397, "step": 49840 }, { "epoch": 0.35382652563290407, "grad_norm": 0.138671875, "learning_rate": 0.0019894012129595983, "loss": 0.2288, "step": 49850 }, { "epoch": 0.3538975038727502, "grad_norm": 0.1455078125, "learning_rate": 0.001989396881737737, "loss": 0.259, "step": 49860 }, { "epoch": 0.35396848211259635, "grad_norm": 0.1083984375, "learning_rate": 0.0019893925496363178, "loss": 0.2425, "step": 49870 }, { "epoch": 0.35403946035244244, "grad_norm": 0.10009765625, "learning_rate": 0.001989388216655345, "loss": 0.2545, "step": 49880 }, { "epoch": 0.3541104385922886, "grad_norm": 0.09912109375, "learning_rate": 0.0019893838827948233, "loss": 0.2568, "step": 49890 }, { "epoch": 0.35418141683213467, "grad_norm": 0.08837890625, "learning_rate": 0.0019893795480547564, "loss": 0.2432, "step": 49900 }, { "epoch": 0.3542523950719808, "grad_norm": 0.16015625, "learning_rate": 0.001989375212435149, "loss": 0.2488, "step": 49910 }, { "epoch": 0.35432337331182695, "grad_norm": 0.08984375, "learning_rate": 0.001989370875936005, "loss": 0.2396, "step": 49920 }, { "epoch": 0.35439435155167304, "grad_norm": 0.1318359375, "learning_rate": 0.0019893665385573296, "loss": 0.2507, "step": 49930 }, { "epoch": 0.3544653297915192, "grad_norm": 0.09716796875, "learning_rate": 0.001989362200299126, "loss": 0.2264, "step": 49940 }, { "epoch": 0.35453630803136527, "grad_norm": 0.12109375, "learning_rate": 0.001989357861161399, "loss": 0.259, "step": 49950 }, { "epoch": 0.3546072862712114, "grad_norm": 0.11474609375, "learning_rate": 0.0019893535211441523, "loss": 0.2509, "step": 49960 }, { "epoch": 0.3546782645110575, "grad_norm": 0.1123046875, "learning_rate": 0.0019893491802473913, "loss": 0.2489, "step": 49970 }, { "epoch": 0.35474924275090364, "grad_norm": 0.1103515625, "learning_rate": 0.00198934483847112, "loss": 0.2562, "step": 49980 }, { "epoch": 0.3548202209907498, "grad_norm": 0.09423828125, "learning_rate": 0.001989340495815342, "loss": 0.2582, "step": 49990 }, { "epoch": 0.35489119923059587, "grad_norm": 0.1923828125, "learning_rate": 0.001989336152280062, "loss": 0.2569, "step": 50000 }, { "epoch": 0.35489119923059587, "eval_covost2-zh-en_loss": 3.84857177734375, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 21.238, "eval_covost2-zh-en_samples_per_second": 3.013, "eval_covost2-zh-en_steps_per_second": 0.188, "step": 50000 }, { "epoch": 0.35489119923059587, "eval_covost2-en-zh_loss": 3.1444897651672363, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 20.842, "eval_covost2-en-zh_samples_per_second": 3.071, "eval_covost2-en-zh_steps_per_second": 0.192, "step": 50000 }, { "epoch": 0.354962177470442, "grad_norm": 0.2490234375, "learning_rate": 0.001989331807865284, "loss": 0.2568, "step": 50010 }, { "epoch": 0.3550331557102881, "grad_norm": 0.13671875, "learning_rate": 0.0019893274625710127, "loss": 0.2443, "step": 50020 }, { "epoch": 0.35510413395013424, "grad_norm": 0.13671875, "learning_rate": 0.0019893231163972524, "loss": 0.2404, "step": 50030 }, { "epoch": 0.3551751121899804, "grad_norm": 0.0927734375, "learning_rate": 0.0019893187693440072, "loss": 0.2474, "step": 50040 }, { "epoch": 0.35524609042982647, "grad_norm": 0.1484375, "learning_rate": 0.0019893144214112816, "loss": 0.2322, "step": 50050 }, { "epoch": 0.3553170686696726, "grad_norm": 0.06787109375, "learning_rate": 0.0019893100725990797, "loss": 0.2469, "step": 50060 }, { "epoch": 0.3553880469095187, "grad_norm": 0.1064453125, "learning_rate": 0.001989305722907406, "loss": 0.2496, "step": 50070 }, { "epoch": 0.35545902514936484, "grad_norm": 0.0869140625, "learning_rate": 0.0019893013723362644, "loss": 0.2349, "step": 50080 }, { "epoch": 0.355530003389211, "grad_norm": 0.12451171875, "learning_rate": 0.0019892970208856596, "loss": 0.2523, "step": 50090 }, { "epoch": 0.35560098162905707, "grad_norm": 0.1943359375, "learning_rate": 0.001989292668555596, "loss": 0.2543, "step": 50100 }, { "epoch": 0.3556719598689032, "grad_norm": 0.1259765625, "learning_rate": 0.0019892883153460775, "loss": 0.2334, "step": 50110 }, { "epoch": 0.3557429381087493, "grad_norm": 0.130859375, "learning_rate": 0.0019892839612571087, "loss": 0.2736, "step": 50120 }, { "epoch": 0.35581391634859544, "grad_norm": 0.1923828125, "learning_rate": 0.0019892796062886938, "loss": 0.2442, "step": 50130 }, { "epoch": 0.3558848945884415, "grad_norm": 0.09130859375, "learning_rate": 0.001989275250440837, "loss": 0.2265, "step": 50140 }, { "epoch": 0.35595587282828767, "grad_norm": 0.08984375, "learning_rate": 0.0019892708937135424, "loss": 0.2474, "step": 50150 }, { "epoch": 0.3560268510681338, "grad_norm": 0.11865234375, "learning_rate": 0.001989266536106815, "loss": 0.2469, "step": 50160 }, { "epoch": 0.3560978293079799, "grad_norm": 0.158203125, "learning_rate": 0.001989262177620659, "loss": 0.2498, "step": 50170 }, { "epoch": 0.35616880754782604, "grad_norm": 0.2041015625, "learning_rate": 0.0019892578182550778, "loss": 0.2457, "step": 50180 }, { "epoch": 0.3562397857876721, "grad_norm": 0.099609375, "learning_rate": 0.0019892534580100768, "loss": 0.2372, "step": 50190 }, { "epoch": 0.35631076402751827, "grad_norm": 0.12109375, "learning_rate": 0.00198924909688566, "loss": 0.2385, "step": 50200 }, { "epoch": 0.3563817422673644, "grad_norm": 0.08984375, "learning_rate": 0.001989244734881831, "loss": 0.2431, "step": 50210 }, { "epoch": 0.3564527205072105, "grad_norm": 0.091796875, "learning_rate": 0.001989240371998595, "loss": 0.2826, "step": 50220 }, { "epoch": 0.35652369874705664, "grad_norm": 0.09423828125, "learning_rate": 0.0019892360082359563, "loss": 0.2638, "step": 50230 }, { "epoch": 0.3565946769869027, "grad_norm": 0.130859375, "learning_rate": 0.0019892316435939183, "loss": 0.2562, "step": 50240 }, { "epoch": 0.35666565522674887, "grad_norm": 0.12158203125, "learning_rate": 0.0019892272780724866, "loss": 0.2421, "step": 50250 }, { "epoch": 0.35673663346659495, "grad_norm": 0.09521484375, "learning_rate": 0.001989222911671664, "loss": 0.2242, "step": 50260 }, { "epoch": 0.3568076117064411, "grad_norm": 0.091796875, "learning_rate": 0.0019892185443914565, "loss": 0.2335, "step": 50270 }, { "epoch": 0.35687858994628724, "grad_norm": 0.28125, "learning_rate": 0.001989214176231867, "loss": 0.2519, "step": 50280 }, { "epoch": 0.3569495681861333, "grad_norm": 0.1142578125, "learning_rate": 0.001989209807192901, "loss": 0.2519, "step": 50290 }, { "epoch": 0.35702054642597947, "grad_norm": 0.1904296875, "learning_rate": 0.001989205437274562, "loss": 0.2248, "step": 50300 }, { "epoch": 0.35709152466582555, "grad_norm": 0.1474609375, "learning_rate": 0.0019892010664768543, "loss": 0.2508, "step": 50310 }, { "epoch": 0.3571625029056717, "grad_norm": 0.1171875, "learning_rate": 0.0019891966947997827, "loss": 0.2509, "step": 50320 }, { "epoch": 0.35723348114551784, "grad_norm": 0.11572265625, "learning_rate": 0.0019891923222433512, "loss": 0.2648, "step": 50330 }, { "epoch": 0.3573044593853639, "grad_norm": 0.099609375, "learning_rate": 0.001989187948807564, "loss": 0.251, "step": 50340 }, { "epoch": 0.35737543762521007, "grad_norm": 0.1572265625, "learning_rate": 0.001989183574492426, "loss": 0.2653, "step": 50350 }, { "epoch": 0.35744641586505616, "grad_norm": 0.08837890625, "learning_rate": 0.001989179199297941, "loss": 0.2582, "step": 50360 }, { "epoch": 0.3575173941049023, "grad_norm": 0.091796875, "learning_rate": 0.001989174823224114, "loss": 0.2558, "step": 50370 }, { "epoch": 0.3575883723447484, "grad_norm": 0.12890625, "learning_rate": 0.0019891704462709483, "loss": 0.2393, "step": 50380 }, { "epoch": 0.3576593505845945, "grad_norm": 0.10400390625, "learning_rate": 0.0019891660684384486, "loss": 0.2591, "step": 50390 }, { "epoch": 0.35773032882444067, "grad_norm": 0.1025390625, "learning_rate": 0.00198916168972662, "loss": 0.2568, "step": 50400 }, { "epoch": 0.35780130706428676, "grad_norm": 0.11767578125, "learning_rate": 0.001989157310135466, "loss": 0.2583, "step": 50410 }, { "epoch": 0.3578722853041329, "grad_norm": 0.0771484375, "learning_rate": 0.0019891529296649905, "loss": 0.2515, "step": 50420 }, { "epoch": 0.357943263543979, "grad_norm": 0.115234375, "learning_rate": 0.0019891485483151993, "loss": 0.2451, "step": 50430 }, { "epoch": 0.3580142417838251, "grad_norm": 0.08203125, "learning_rate": 0.001989144166086096, "loss": 0.2592, "step": 50440 }, { "epoch": 0.35808522002367127, "grad_norm": 0.10888671875, "learning_rate": 0.001989139782977684, "loss": 0.2491, "step": 50450 }, { "epoch": 0.35815619826351736, "grad_norm": 0.10498046875, "learning_rate": 0.001989135398989969, "loss": 0.2431, "step": 50460 }, { "epoch": 0.3582271765033635, "grad_norm": 0.1142578125, "learning_rate": 0.001989131014122955, "loss": 0.2534, "step": 50470 }, { "epoch": 0.3582981547432096, "grad_norm": 0.142578125, "learning_rate": 0.0019891266283766458, "loss": 0.2424, "step": 50480 }, { "epoch": 0.3583691329830557, "grad_norm": 0.07666015625, "learning_rate": 0.001989122241751046, "loss": 0.2438, "step": 50490 }, { "epoch": 0.3584401112229018, "grad_norm": 0.09912109375, "learning_rate": 0.0019891178542461602, "loss": 0.2581, "step": 50500 }, { "epoch": 0.35851108946274796, "grad_norm": 0.09765625, "learning_rate": 0.001989113465861993, "loss": 0.227, "step": 50510 }, { "epoch": 0.3585820677025941, "grad_norm": 0.11669921875, "learning_rate": 0.0019891090765985476, "loss": 0.2478, "step": 50520 }, { "epoch": 0.3586530459424402, "grad_norm": 0.11669921875, "learning_rate": 0.001989104686455829, "loss": 0.2261, "step": 50530 }, { "epoch": 0.3587240241822863, "grad_norm": 0.11181640625, "learning_rate": 0.0019891002954338425, "loss": 0.255, "step": 50540 }, { "epoch": 0.3587950024221324, "grad_norm": 0.1845703125, "learning_rate": 0.0019890959035325905, "loss": 0.2437, "step": 50550 }, { "epoch": 0.35886598066197856, "grad_norm": 0.08935546875, "learning_rate": 0.001989091510752079, "loss": 0.2493, "step": 50560 }, { "epoch": 0.3589369589018247, "grad_norm": 0.08837890625, "learning_rate": 0.0019890871170923116, "loss": 0.2455, "step": 50570 }, { "epoch": 0.3590079371416708, "grad_norm": 0.10009765625, "learning_rate": 0.001989082722553293, "loss": 0.2419, "step": 50580 }, { "epoch": 0.3590789153815169, "grad_norm": 0.1884765625, "learning_rate": 0.001989078327135027, "loss": 0.2629, "step": 50590 }, { "epoch": 0.359149893621363, "grad_norm": 0.08740234375, "learning_rate": 0.0019890739308375182, "loss": 0.2343, "step": 50600 }, { "epoch": 0.35922087186120916, "grad_norm": 0.2333984375, "learning_rate": 0.001989069533660771, "loss": 0.2447, "step": 50610 }, { "epoch": 0.35929185010105524, "grad_norm": 0.11767578125, "learning_rate": 0.00198906513560479, "loss": 0.2685, "step": 50620 }, { "epoch": 0.3593628283409014, "grad_norm": 0.08544921875, "learning_rate": 0.001989060736669579, "loss": 0.2502, "step": 50630 }, { "epoch": 0.35943380658074753, "grad_norm": 0.1572265625, "learning_rate": 0.001989056336855143, "loss": 0.2517, "step": 50640 }, { "epoch": 0.3595047848205936, "grad_norm": 0.1533203125, "learning_rate": 0.001989051936161486, "loss": 0.2467, "step": 50650 }, { "epoch": 0.35957576306043976, "grad_norm": 0.076171875, "learning_rate": 0.001989047534588612, "loss": 0.2512, "step": 50660 }, { "epoch": 0.35964674130028584, "grad_norm": 0.10546875, "learning_rate": 0.0019890431321365255, "loss": 0.2557, "step": 50670 }, { "epoch": 0.359717719540132, "grad_norm": 0.08251953125, "learning_rate": 0.0019890387288052317, "loss": 0.2508, "step": 50680 }, { "epoch": 0.35978869777997813, "grad_norm": 0.10693359375, "learning_rate": 0.001989034324594734, "loss": 0.2494, "step": 50690 }, { "epoch": 0.3598596760198242, "grad_norm": 0.10205078125, "learning_rate": 0.001989029919505037, "loss": 0.2514, "step": 50700 }, { "epoch": 0.35993065425967036, "grad_norm": 0.103515625, "learning_rate": 0.001989025513536145, "loss": 0.248, "step": 50710 }, { "epoch": 0.36000163249951644, "grad_norm": 0.1689453125, "learning_rate": 0.0019890211066880634, "loss": 0.247, "step": 50720 }, { "epoch": 0.3600726107393626, "grad_norm": 0.08837890625, "learning_rate": 0.0019890166989607948, "loss": 0.2493, "step": 50730 }, { "epoch": 0.3601435889792087, "grad_norm": 0.09228515625, "learning_rate": 0.001989012290354345, "loss": 0.2518, "step": 50740 }, { "epoch": 0.3602145672190548, "grad_norm": 0.1162109375, "learning_rate": 0.0019890078808687166, "loss": 0.2272, "step": 50750 }, { "epoch": 0.36028554545890096, "grad_norm": 0.1533203125, "learning_rate": 0.0019890034705039162, "loss": 0.2342, "step": 50760 }, { "epoch": 0.36035652369874704, "grad_norm": 0.1787109375, "learning_rate": 0.0019889990592599468, "loss": 0.2554, "step": 50770 }, { "epoch": 0.3604275019385932, "grad_norm": 0.06591796875, "learning_rate": 0.001988994647136813, "loss": 0.2405, "step": 50780 }, { "epoch": 0.3604984801784393, "grad_norm": 0.1015625, "learning_rate": 0.001988990234134519, "loss": 0.24, "step": 50790 }, { "epoch": 0.3605694584182854, "grad_norm": 0.06591796875, "learning_rate": 0.00198898582025307, "loss": 0.2273, "step": 50800 }, { "epoch": 0.36064043665813156, "grad_norm": 0.1796875, "learning_rate": 0.001988981405492469, "loss": 0.2241, "step": 50810 }, { "epoch": 0.36071141489797764, "grad_norm": 0.0810546875, "learning_rate": 0.0019889769898527214, "loss": 0.24, "step": 50820 }, { "epoch": 0.3607823931378238, "grad_norm": 0.103515625, "learning_rate": 0.0019889725733338312, "loss": 0.2322, "step": 50830 }, { "epoch": 0.3608533713776699, "grad_norm": 0.11279296875, "learning_rate": 0.001988968155935803, "loss": 0.2546, "step": 50840 }, { "epoch": 0.360924349617516, "grad_norm": 0.09033203125, "learning_rate": 0.001988963737658641, "loss": 0.2266, "step": 50850 }, { "epoch": 0.3609953278573621, "grad_norm": 0.07421875, "learning_rate": 0.0019889593185023497, "loss": 0.2292, "step": 50860 }, { "epoch": 0.36106630609720824, "grad_norm": 0.1025390625, "learning_rate": 0.001988954898466933, "loss": 0.2439, "step": 50870 }, { "epoch": 0.3611372843370544, "grad_norm": 0.125, "learning_rate": 0.0019889504775523962, "loss": 0.2467, "step": 50880 }, { "epoch": 0.3612082625769005, "grad_norm": 0.10400390625, "learning_rate": 0.0019889460557587424, "loss": 0.2468, "step": 50890 }, { "epoch": 0.3612792408167466, "grad_norm": 0.10791015625, "learning_rate": 0.001988941633085977, "loss": 0.2347, "step": 50900 }, { "epoch": 0.3613502190565927, "grad_norm": 0.44921875, "learning_rate": 0.001988937209534104, "loss": 0.2395, "step": 50910 }, { "epoch": 0.36142119729643885, "grad_norm": 0.11962890625, "learning_rate": 0.001988932785103128, "loss": 0.2562, "step": 50920 }, { "epoch": 0.361492175536285, "grad_norm": 0.08447265625, "learning_rate": 0.0019889283597930526, "loss": 0.2351, "step": 50930 }, { "epoch": 0.3615631537761311, "grad_norm": 0.0966796875, "learning_rate": 0.0019889239336038835, "loss": 0.2527, "step": 50940 }, { "epoch": 0.3616341320159772, "grad_norm": 0.1494140625, "learning_rate": 0.0019889195065356242, "loss": 0.2772, "step": 50950 }, { "epoch": 0.3617051102558233, "grad_norm": 0.1044921875, "learning_rate": 0.0019889150785882786, "loss": 0.2417, "step": 50960 }, { "epoch": 0.36177608849566945, "grad_norm": 0.09228515625, "learning_rate": 0.0019889106497618524, "loss": 0.2369, "step": 50970 }, { "epoch": 0.36184706673551553, "grad_norm": 0.11669921875, "learning_rate": 0.001988906220056349, "loss": 0.2296, "step": 50980 }, { "epoch": 0.3619180449753617, "grad_norm": 0.109375, "learning_rate": 0.001988901789471773, "loss": 0.246, "step": 50990 }, { "epoch": 0.3619890232152078, "grad_norm": 0.0966796875, "learning_rate": 0.0019888973580081287, "loss": 0.2435, "step": 51000 }, { "epoch": 0.3620600014550539, "grad_norm": 0.1103515625, "learning_rate": 0.001988892925665421, "loss": 0.2627, "step": 51010 }, { "epoch": 0.36213097969490005, "grad_norm": 0.0869140625, "learning_rate": 0.001988888492443654, "loss": 0.2546, "step": 51020 }, { "epoch": 0.36220195793474613, "grad_norm": 0.10986328125, "learning_rate": 0.0019888840583428317, "loss": 0.219, "step": 51030 }, { "epoch": 0.3622729361745923, "grad_norm": 0.09423828125, "learning_rate": 0.0019888796233629587, "loss": 0.2376, "step": 51040 }, { "epoch": 0.3623439144144384, "grad_norm": 0.1025390625, "learning_rate": 0.00198887518750404, "loss": 0.2369, "step": 51050 }, { "epoch": 0.3624148926542845, "grad_norm": 0.1572265625, "learning_rate": 0.001988870750766079, "loss": 0.2647, "step": 51060 }, { "epoch": 0.36248587089413065, "grad_norm": 0.2109375, "learning_rate": 0.00198886631314908, "loss": 0.2516, "step": 51070 }, { "epoch": 0.36255684913397673, "grad_norm": 0.169921875, "learning_rate": 0.0019888618746530485, "loss": 0.2566, "step": 51080 }, { "epoch": 0.3626278273738229, "grad_norm": 0.0810546875, "learning_rate": 0.0019888574352779887, "loss": 0.2448, "step": 51090 }, { "epoch": 0.36269880561366896, "grad_norm": 0.1494140625, "learning_rate": 0.001988852995023904, "loss": 0.2538, "step": 51100 }, { "epoch": 0.3627697838535151, "grad_norm": 0.126953125, "learning_rate": 0.0019888485538907996, "loss": 0.225, "step": 51110 }, { "epoch": 0.36284076209336125, "grad_norm": 0.1318359375, "learning_rate": 0.0019888441118786795, "loss": 0.2477, "step": 51120 }, { "epoch": 0.36291174033320733, "grad_norm": 0.0849609375, "learning_rate": 0.0019888396689875486, "loss": 0.2416, "step": 51130 }, { "epoch": 0.3629827185730535, "grad_norm": 0.12109375, "learning_rate": 0.0019888352252174107, "loss": 0.2617, "step": 51140 }, { "epoch": 0.36305369681289956, "grad_norm": 0.06884765625, "learning_rate": 0.0019888307805682706, "loss": 0.2523, "step": 51150 }, { "epoch": 0.3631246750527457, "grad_norm": 0.1171875, "learning_rate": 0.0019888263350401328, "loss": 0.2386, "step": 51160 }, { "epoch": 0.36319565329259185, "grad_norm": 0.0869140625, "learning_rate": 0.0019888218886330014, "loss": 0.2431, "step": 51170 }, { "epoch": 0.36326663153243793, "grad_norm": 0.072265625, "learning_rate": 0.0019888174413468804, "loss": 0.2637, "step": 51180 }, { "epoch": 0.3633376097722841, "grad_norm": 0.12060546875, "learning_rate": 0.001988812993181775, "loss": 0.2356, "step": 51190 }, { "epoch": 0.36340858801213016, "grad_norm": 0.111328125, "learning_rate": 0.0019888085441376892, "loss": 0.23, "step": 51200 }, { "epoch": 0.3634795662519763, "grad_norm": 0.083984375, "learning_rate": 0.0019888040942146277, "loss": 0.2448, "step": 51210 }, { "epoch": 0.36355054449182245, "grad_norm": 0.080078125, "learning_rate": 0.0019887996434125943, "loss": 0.2372, "step": 51220 }, { "epoch": 0.36362152273166853, "grad_norm": 0.19140625, "learning_rate": 0.001988795191731594, "loss": 0.2635, "step": 51230 }, { "epoch": 0.3636925009715147, "grad_norm": 0.1474609375, "learning_rate": 0.001988790739171631, "loss": 0.2424, "step": 51240 }, { "epoch": 0.36376347921136076, "grad_norm": 0.1328125, "learning_rate": 0.0019887862857327094, "loss": 0.2521, "step": 51250 }, { "epoch": 0.3638344574512069, "grad_norm": 0.123046875, "learning_rate": 0.001988781831414834, "loss": 0.2519, "step": 51260 }, { "epoch": 0.363905435691053, "grad_norm": 0.11669921875, "learning_rate": 0.001988777376218009, "loss": 0.2482, "step": 51270 }, { "epoch": 0.36397641393089913, "grad_norm": 0.09716796875, "learning_rate": 0.001988772920142239, "loss": 0.2481, "step": 51280 }, { "epoch": 0.3640473921707453, "grad_norm": 0.10986328125, "learning_rate": 0.001988768463187528, "loss": 0.2452, "step": 51290 }, { "epoch": 0.36411837041059136, "grad_norm": 0.11328125, "learning_rate": 0.001988764005353881, "loss": 0.2347, "step": 51300 }, { "epoch": 0.3641893486504375, "grad_norm": 0.0966796875, "learning_rate": 0.001988759546641302, "loss": 0.2512, "step": 51310 }, { "epoch": 0.3642603268902836, "grad_norm": 0.11376953125, "learning_rate": 0.0019887550870497954, "loss": 0.2618, "step": 51320 }, { "epoch": 0.36433130513012973, "grad_norm": 0.09814453125, "learning_rate": 0.0019887506265793657, "loss": 0.2344, "step": 51330 }, { "epoch": 0.3644022833699759, "grad_norm": 0.078125, "learning_rate": 0.0019887461652300176, "loss": 0.2357, "step": 51340 }, { "epoch": 0.36447326160982196, "grad_norm": 0.10009765625, "learning_rate": 0.0019887417030017554, "loss": 0.2296, "step": 51350 }, { "epoch": 0.3645442398496681, "grad_norm": 0.08984375, "learning_rate": 0.001988737239894583, "loss": 0.2319, "step": 51360 }, { "epoch": 0.3646152180895142, "grad_norm": 0.1962890625, "learning_rate": 0.0019887327759085054, "loss": 0.2344, "step": 51370 }, { "epoch": 0.36468619632936033, "grad_norm": 0.11474609375, "learning_rate": 0.0019887283110435264, "loss": 0.2597, "step": 51380 }, { "epoch": 0.3647571745692064, "grad_norm": 0.1123046875, "learning_rate": 0.001988723845299651, "loss": 0.2585, "step": 51390 }, { "epoch": 0.36482815280905256, "grad_norm": 0.103515625, "learning_rate": 0.0019887193786768836, "loss": 0.2334, "step": 51400 }, { "epoch": 0.3648991310488987, "grad_norm": 0.1142578125, "learning_rate": 0.001988714911175228, "loss": 0.2592, "step": 51410 }, { "epoch": 0.3649701092887448, "grad_norm": 0.095703125, "learning_rate": 0.0019887104427946892, "loss": 0.2427, "step": 51420 }, { "epoch": 0.36504108752859094, "grad_norm": 0.1123046875, "learning_rate": 0.001988705973535272, "loss": 0.2526, "step": 51430 }, { "epoch": 0.365112065768437, "grad_norm": 0.134765625, "learning_rate": 0.0019887015033969798, "loss": 0.2436, "step": 51440 }, { "epoch": 0.36518304400828316, "grad_norm": 0.08544921875, "learning_rate": 0.0019886970323798174, "loss": 0.2703, "step": 51450 }, { "epoch": 0.3652540222481293, "grad_norm": 0.09521484375, "learning_rate": 0.00198869256048379, "loss": 0.2304, "step": 51460 }, { "epoch": 0.3653250004879754, "grad_norm": 0.12353515625, "learning_rate": 0.0019886880877089006, "loss": 0.251, "step": 51470 }, { "epoch": 0.36539597872782154, "grad_norm": 0.1513671875, "learning_rate": 0.0019886836140551546, "loss": 0.2509, "step": 51480 }, { "epoch": 0.3654669569676676, "grad_norm": 0.109375, "learning_rate": 0.0019886791395225564, "loss": 0.253, "step": 51490 }, { "epoch": 0.36553793520751376, "grad_norm": 0.1298828125, "learning_rate": 0.00198867466411111, "loss": 0.2302, "step": 51500 }, { "epoch": 0.36560891344735985, "grad_norm": 0.10107421875, "learning_rate": 0.00198867018782082, "loss": 0.2546, "step": 51510 }, { "epoch": 0.365679891687206, "grad_norm": 0.154296875, "learning_rate": 0.0019886657106516913, "loss": 0.2562, "step": 51520 }, { "epoch": 0.36575086992705214, "grad_norm": 0.09326171875, "learning_rate": 0.0019886612326037276, "loss": 0.2241, "step": 51530 }, { "epoch": 0.3658218481668982, "grad_norm": 0.12890625, "learning_rate": 0.001988656753676934, "loss": 0.2521, "step": 51540 }, { "epoch": 0.36589282640674436, "grad_norm": 0.14453125, "learning_rate": 0.001988652273871314, "loss": 0.2378, "step": 51550 }, { "epoch": 0.36596380464659045, "grad_norm": 0.083984375, "learning_rate": 0.0019886477931868726, "loss": 0.2527, "step": 51560 }, { "epoch": 0.3660347828864366, "grad_norm": 0.103515625, "learning_rate": 0.0019886433116236145, "loss": 0.2219, "step": 51570 }, { "epoch": 0.36610576112628274, "grad_norm": 0.099609375, "learning_rate": 0.001988638829181544, "loss": 0.2575, "step": 51580 }, { "epoch": 0.3661767393661288, "grad_norm": 0.09423828125, "learning_rate": 0.001988634345860665, "loss": 0.2412, "step": 51590 }, { "epoch": 0.36624771760597497, "grad_norm": 0.11962890625, "learning_rate": 0.001988629861660983, "loss": 0.23, "step": 51600 }, { "epoch": 0.36631869584582105, "grad_norm": 0.08740234375, "learning_rate": 0.0019886253765825012, "loss": 0.2471, "step": 51610 }, { "epoch": 0.3663896740856672, "grad_norm": 0.1318359375, "learning_rate": 0.0019886208906252246, "loss": 0.249, "step": 51620 }, { "epoch": 0.3664606523255133, "grad_norm": 0.09326171875, "learning_rate": 0.001988616403789158, "loss": 0.2639, "step": 51630 }, { "epoch": 0.3665316305653594, "grad_norm": 0.09814453125, "learning_rate": 0.001988611916074305, "loss": 0.2394, "step": 51640 }, { "epoch": 0.36660260880520557, "grad_norm": 0.0927734375, "learning_rate": 0.0019886074274806705, "loss": 0.2418, "step": 51650 }, { "epoch": 0.36667358704505165, "grad_norm": 0.126953125, "learning_rate": 0.0019886029380082595, "loss": 0.2534, "step": 51660 }, { "epoch": 0.3667445652848978, "grad_norm": 0.1015625, "learning_rate": 0.0019885984476570757, "loss": 0.247, "step": 51670 }, { "epoch": 0.3668155435247439, "grad_norm": 0.08203125, "learning_rate": 0.0019885939564271236, "loss": 0.2393, "step": 51680 }, { "epoch": 0.36688652176459, "grad_norm": 0.1728515625, "learning_rate": 0.0019885894643184073, "loss": 0.2592, "step": 51690 }, { "epoch": 0.36695750000443617, "grad_norm": 0.1005859375, "learning_rate": 0.0019885849713309326, "loss": 0.265, "step": 51700 }, { "epoch": 0.36702847824428225, "grad_norm": 0.07763671875, "learning_rate": 0.0019885804774647025, "loss": 0.2296, "step": 51710 }, { "epoch": 0.3670994564841284, "grad_norm": 0.087890625, "learning_rate": 0.001988575982719722, "loss": 0.2567, "step": 51720 }, { "epoch": 0.3671704347239745, "grad_norm": 0.10498046875, "learning_rate": 0.001988571487095996, "loss": 0.2334, "step": 51730 }, { "epoch": 0.3672414129638206, "grad_norm": 0.146484375, "learning_rate": 0.001988566990593528, "loss": 0.2466, "step": 51740 }, { "epoch": 0.3673123912036667, "grad_norm": 0.0986328125, "learning_rate": 0.0019885624932123227, "loss": 0.234, "step": 51750 }, { "epoch": 0.36738336944351285, "grad_norm": 0.11279296875, "learning_rate": 0.0019885579949523854, "loss": 0.2614, "step": 51760 }, { "epoch": 0.367454347683359, "grad_norm": 0.1064453125, "learning_rate": 0.00198855349581372, "loss": 0.2497, "step": 51770 }, { "epoch": 0.3675253259232051, "grad_norm": 0.09228515625, "learning_rate": 0.0019885489957963305, "loss": 0.2306, "step": 51780 }, { "epoch": 0.3675963041630512, "grad_norm": 0.087890625, "learning_rate": 0.001988544494900222, "loss": 0.2431, "step": 51790 }, { "epoch": 0.3676672824028973, "grad_norm": 0.1220703125, "learning_rate": 0.001988539993125398, "loss": 0.2265, "step": 51800 }, { "epoch": 0.36773826064274345, "grad_norm": 0.140625, "learning_rate": 0.0019885354904718646, "loss": 0.2658, "step": 51810 }, { "epoch": 0.3678092388825896, "grad_norm": 0.10693359375, "learning_rate": 0.0019885309869396243, "loss": 0.2375, "step": 51820 }, { "epoch": 0.3678802171224357, "grad_norm": 0.1279296875, "learning_rate": 0.0019885264825286837, "loss": 0.2337, "step": 51830 }, { "epoch": 0.3679511953622818, "grad_norm": 0.10498046875, "learning_rate": 0.001988521977239045, "loss": 0.2495, "step": 51840 }, { "epoch": 0.3680221736021279, "grad_norm": 0.119140625, "learning_rate": 0.0019885174710707145, "loss": 0.2515, "step": 51850 }, { "epoch": 0.36809315184197405, "grad_norm": 0.1015625, "learning_rate": 0.0019885129640236955, "loss": 0.2613, "step": 51860 }, { "epoch": 0.36816413008182014, "grad_norm": 0.1201171875, "learning_rate": 0.001988508456097993, "loss": 0.2394, "step": 51870 }, { "epoch": 0.3682351083216663, "grad_norm": 0.111328125, "learning_rate": 0.001988503947293611, "loss": 0.2266, "step": 51880 }, { "epoch": 0.3683060865615124, "grad_norm": 0.1337890625, "learning_rate": 0.001988499437610555, "loss": 0.2422, "step": 51890 }, { "epoch": 0.3683770648013585, "grad_norm": 0.11328125, "learning_rate": 0.0019884949270488283, "loss": 0.2534, "step": 51900 }, { "epoch": 0.36844804304120465, "grad_norm": 0.10693359375, "learning_rate": 0.001988490415608436, "loss": 0.2665, "step": 51910 }, { "epoch": 0.36851902128105074, "grad_norm": 0.08935546875, "learning_rate": 0.001988485903289382, "loss": 0.24, "step": 51920 }, { "epoch": 0.3685899995208969, "grad_norm": 0.125, "learning_rate": 0.001988481390091671, "loss": 0.2425, "step": 51930 }, { "epoch": 0.368660977760743, "grad_norm": 0.0859375, "learning_rate": 0.0019884768760153083, "loss": 0.2445, "step": 51940 }, { "epoch": 0.3687319560005891, "grad_norm": 0.126953125, "learning_rate": 0.0019884723610602974, "loss": 0.2553, "step": 51950 }, { "epoch": 0.36880293424043525, "grad_norm": 0.06982421875, "learning_rate": 0.001988467845226643, "loss": 0.233, "step": 51960 }, { "epoch": 0.36887391248028134, "grad_norm": 0.0927734375, "learning_rate": 0.001988463328514349, "loss": 0.2335, "step": 51970 }, { "epoch": 0.3689448907201275, "grad_norm": 0.07177734375, "learning_rate": 0.001988458810923421, "loss": 0.2325, "step": 51980 }, { "epoch": 0.36901586895997357, "grad_norm": 0.07421875, "learning_rate": 0.001988454292453863, "loss": 0.2283, "step": 51990 }, { "epoch": 0.3690868471998197, "grad_norm": 0.08740234375, "learning_rate": 0.001988449773105679, "loss": 0.2431, "step": 52000 }, { "epoch": 0.3690868471998197, "eval_covost2-zh-en_loss": 3.8909382820129395, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 21.0503, "eval_covost2-zh-en_samples_per_second": 3.04, "eval_covost2-zh-en_steps_per_second": 0.19, "step": 52000 }, { "epoch": 0.3690868471998197, "eval_covost2-en-zh_loss": 3.161593437194824, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 22.305, "eval_covost2-en-zh_samples_per_second": 2.869, "eval_covost2-en-zh_steps_per_second": 0.179, "step": 52000 }, { "epoch": 0.36915782543966585, "grad_norm": 0.09033203125, "learning_rate": 0.0019884452528788742, "loss": 0.2457, "step": 52010 }, { "epoch": 0.36922880367951194, "grad_norm": 0.0947265625, "learning_rate": 0.0019884407317734528, "loss": 0.2579, "step": 52020 }, { "epoch": 0.3692997819193581, "grad_norm": 0.064453125, "learning_rate": 0.001988436209789419, "loss": 0.2315, "step": 52030 }, { "epoch": 0.36937076015920417, "grad_norm": 0.10791015625, "learning_rate": 0.0019884316869267773, "loss": 0.2694, "step": 52040 }, { "epoch": 0.3694417383990503, "grad_norm": 0.083984375, "learning_rate": 0.001988427163185533, "loss": 0.2374, "step": 52050 }, { "epoch": 0.36951271663889645, "grad_norm": 0.08251953125, "learning_rate": 0.001988422638565689, "loss": 0.2366, "step": 52060 }, { "epoch": 0.36958369487874254, "grad_norm": 0.103515625, "learning_rate": 0.0019884181130672514, "loss": 0.2783, "step": 52070 }, { "epoch": 0.3696546731185887, "grad_norm": 0.09814453125, "learning_rate": 0.001988413586690224, "loss": 0.2455, "step": 52080 }, { "epoch": 0.36972565135843477, "grad_norm": 0.1669921875, "learning_rate": 0.0019884090594346105, "loss": 0.2504, "step": 52090 }, { "epoch": 0.3697966295982809, "grad_norm": 0.087890625, "learning_rate": 0.0019884045313004165, "loss": 0.2483, "step": 52100 }, { "epoch": 0.369867607838127, "grad_norm": 0.07763671875, "learning_rate": 0.0019884000022876466, "loss": 0.2422, "step": 52110 }, { "epoch": 0.36993858607797314, "grad_norm": 0.09912109375, "learning_rate": 0.001988395472396304, "loss": 0.2502, "step": 52120 }, { "epoch": 0.3700095643178193, "grad_norm": 0.08447265625, "learning_rate": 0.0019883909416263945, "loss": 0.2502, "step": 52130 }, { "epoch": 0.37008054255766537, "grad_norm": 0.15234375, "learning_rate": 0.001988386409977922, "loss": 0.2506, "step": 52140 }, { "epoch": 0.3701515207975115, "grad_norm": 0.1201171875, "learning_rate": 0.0019883818774508906, "loss": 0.246, "step": 52150 }, { "epoch": 0.3702224990373576, "grad_norm": 0.15625, "learning_rate": 0.0019883773440453056, "loss": 0.2249, "step": 52160 }, { "epoch": 0.37029347727720374, "grad_norm": 0.08642578125, "learning_rate": 0.001988372809761171, "loss": 0.241, "step": 52170 }, { "epoch": 0.3703644555170499, "grad_norm": 0.1171875, "learning_rate": 0.0019883682745984914, "loss": 0.2677, "step": 52180 }, { "epoch": 0.37043543375689597, "grad_norm": 0.11181640625, "learning_rate": 0.001988363738557271, "loss": 0.2278, "step": 52190 }, { "epoch": 0.3705064119967421, "grad_norm": 0.11376953125, "learning_rate": 0.0019883592016375147, "loss": 0.256, "step": 52200 }, { "epoch": 0.3705773902365882, "grad_norm": 0.111328125, "learning_rate": 0.001988354663839227, "loss": 0.2589, "step": 52210 }, { "epoch": 0.37064836847643434, "grad_norm": 0.09619140625, "learning_rate": 0.001988350125162412, "loss": 0.2452, "step": 52220 }, { "epoch": 0.37071934671628043, "grad_norm": 0.1171875, "learning_rate": 0.0019883455856070744, "loss": 0.2413, "step": 52230 }, { "epoch": 0.37079032495612657, "grad_norm": 0.107421875, "learning_rate": 0.0019883410451732187, "loss": 0.2428, "step": 52240 }, { "epoch": 0.3708613031959727, "grad_norm": 0.1025390625, "learning_rate": 0.0019883365038608496, "loss": 0.2658, "step": 52250 }, { "epoch": 0.3709322814358188, "grad_norm": 0.146484375, "learning_rate": 0.001988331961669971, "loss": 0.237, "step": 52260 }, { "epoch": 0.37100325967566494, "grad_norm": 0.10693359375, "learning_rate": 0.001988327418600588, "loss": 0.2453, "step": 52270 }, { "epoch": 0.37107423791551103, "grad_norm": 0.09765625, "learning_rate": 0.0019883228746527047, "loss": 0.239, "step": 52280 }, { "epoch": 0.37114521615535717, "grad_norm": 0.072265625, "learning_rate": 0.001988318329826326, "loss": 0.2366, "step": 52290 }, { "epoch": 0.3712161943952033, "grad_norm": 0.07666015625, "learning_rate": 0.0019883137841214564, "loss": 0.2334, "step": 52300 }, { "epoch": 0.3712871726350494, "grad_norm": 0.1005859375, "learning_rate": 0.0019883092375380996, "loss": 0.2504, "step": 52310 }, { "epoch": 0.37135815087489554, "grad_norm": 0.154296875, "learning_rate": 0.0019883046900762607, "loss": 0.2463, "step": 52320 }, { "epoch": 0.37142912911474163, "grad_norm": 0.09619140625, "learning_rate": 0.0019883001417359443, "loss": 0.2568, "step": 52330 }, { "epoch": 0.37150010735458777, "grad_norm": 0.1328125, "learning_rate": 0.0019882955925171544, "loss": 0.2552, "step": 52340 }, { "epoch": 0.3715710855944339, "grad_norm": 0.1064453125, "learning_rate": 0.0019882910424198967, "loss": 0.2555, "step": 52350 }, { "epoch": 0.37164206383428, "grad_norm": 0.09423828125, "learning_rate": 0.001988286491444174, "loss": 0.2468, "step": 52360 }, { "epoch": 0.37171304207412614, "grad_norm": 0.11328125, "learning_rate": 0.001988281939589992, "loss": 0.241, "step": 52370 }, { "epoch": 0.37178402031397223, "grad_norm": 0.0869140625, "learning_rate": 0.0019882773868573544, "loss": 0.2526, "step": 52380 }, { "epoch": 0.37185499855381837, "grad_norm": 0.09326171875, "learning_rate": 0.001988272833246267, "loss": 0.2255, "step": 52390 }, { "epoch": 0.37192597679366446, "grad_norm": 0.09130859375, "learning_rate": 0.001988268278756732, "loss": 0.2343, "step": 52400 }, { "epoch": 0.3719969550335106, "grad_norm": 0.2177734375, "learning_rate": 0.001988263723388757, "loss": 0.2344, "step": 52410 }, { "epoch": 0.37206793327335674, "grad_norm": 0.09130859375, "learning_rate": 0.001988259167142344, "loss": 0.2464, "step": 52420 }, { "epoch": 0.37213891151320283, "grad_norm": 0.1201171875, "learning_rate": 0.0019882546100174984, "loss": 0.2356, "step": 52430 }, { "epoch": 0.372209889753049, "grad_norm": 0.08447265625, "learning_rate": 0.001988250052014225, "loss": 0.2518, "step": 52440 }, { "epoch": 0.37228086799289506, "grad_norm": 0.099609375, "learning_rate": 0.0019882454931325275, "loss": 0.2514, "step": 52450 }, { "epoch": 0.3723518462327412, "grad_norm": 0.06396484375, "learning_rate": 0.001988240933372411, "loss": 0.2648, "step": 52460 }, { "epoch": 0.37242282447258734, "grad_norm": 0.0810546875, "learning_rate": 0.0019882363727338798, "loss": 0.2506, "step": 52470 }, { "epoch": 0.37249380271243343, "grad_norm": 0.0927734375, "learning_rate": 0.0019882318112169383, "loss": 0.2406, "step": 52480 }, { "epoch": 0.3725647809522796, "grad_norm": 0.08154296875, "learning_rate": 0.0019882272488215918, "loss": 0.2438, "step": 52490 }, { "epoch": 0.37263575919212566, "grad_norm": 0.08642578125, "learning_rate": 0.001988222685547844, "loss": 0.2418, "step": 52500 }, { "epoch": 0.3727067374319718, "grad_norm": 0.12255859375, "learning_rate": 0.0019882181213956994, "loss": 0.2589, "step": 52510 }, { "epoch": 0.3727777156718179, "grad_norm": 0.09814453125, "learning_rate": 0.001988213556365163, "loss": 0.2321, "step": 52520 }, { "epoch": 0.37284869391166403, "grad_norm": 0.09326171875, "learning_rate": 0.001988208990456239, "loss": 0.2334, "step": 52530 }, { "epoch": 0.3729196721515102, "grad_norm": 0.1201171875, "learning_rate": 0.0019882044236689323, "loss": 0.2354, "step": 52540 }, { "epoch": 0.37299065039135626, "grad_norm": 0.08642578125, "learning_rate": 0.0019881998560032465, "loss": 0.242, "step": 52550 }, { "epoch": 0.3730616286312024, "grad_norm": 0.07568359375, "learning_rate": 0.001988195287459187, "loss": 0.2471, "step": 52560 }, { "epoch": 0.3731326068710485, "grad_norm": 0.0927734375, "learning_rate": 0.001988190718036758, "loss": 0.2538, "step": 52570 }, { "epoch": 0.37320358511089463, "grad_norm": 0.10888671875, "learning_rate": 0.001988186147735964, "loss": 0.2444, "step": 52580 }, { "epoch": 0.3732745633507408, "grad_norm": 0.06396484375, "learning_rate": 0.0019881815765568096, "loss": 0.2286, "step": 52590 }, { "epoch": 0.37334554159058686, "grad_norm": 0.09130859375, "learning_rate": 0.0019881770044992996, "loss": 0.2431, "step": 52600 }, { "epoch": 0.373416519830433, "grad_norm": 0.13671875, "learning_rate": 0.001988172431563438, "loss": 0.2232, "step": 52610 }, { "epoch": 0.3734874980702791, "grad_norm": 0.236328125, "learning_rate": 0.001988167857749229, "loss": 0.2358, "step": 52620 }, { "epoch": 0.37355847631012523, "grad_norm": 0.1318359375, "learning_rate": 0.001988163283056678, "loss": 0.2504, "step": 52630 }, { "epoch": 0.3736294545499713, "grad_norm": 0.130859375, "learning_rate": 0.00198815870748579, "loss": 0.2422, "step": 52640 }, { "epoch": 0.37370043278981746, "grad_norm": 0.15234375, "learning_rate": 0.0019881541310365677, "loss": 0.2386, "step": 52650 }, { "epoch": 0.3737714110296636, "grad_norm": 0.2109375, "learning_rate": 0.001988149553709017, "loss": 0.2542, "step": 52660 }, { "epoch": 0.3738423892695097, "grad_norm": 0.1298828125, "learning_rate": 0.0019881449755031417, "loss": 0.2544, "step": 52670 }, { "epoch": 0.37391336750935583, "grad_norm": 0.07861328125, "learning_rate": 0.001988140396418947, "loss": 0.2335, "step": 52680 }, { "epoch": 0.3739843457492019, "grad_norm": 0.0986328125, "learning_rate": 0.001988135816456437, "loss": 0.2323, "step": 52690 }, { "epoch": 0.37405532398904806, "grad_norm": 0.062255859375, "learning_rate": 0.0019881312356156166, "loss": 0.2407, "step": 52700 }, { "epoch": 0.3741263022288942, "grad_norm": 0.07958984375, "learning_rate": 0.0019881266538964897, "loss": 0.2348, "step": 52710 }, { "epoch": 0.3741972804687403, "grad_norm": 0.1396484375, "learning_rate": 0.0019881220712990614, "loss": 0.2587, "step": 52720 }, { "epoch": 0.37426825870858643, "grad_norm": 0.1005859375, "learning_rate": 0.001988117487823336, "loss": 0.2366, "step": 52730 }, { "epoch": 0.3743392369484325, "grad_norm": 0.12353515625, "learning_rate": 0.0019881129034693182, "loss": 0.2412, "step": 52740 }, { "epoch": 0.37441021518827866, "grad_norm": 0.1669921875, "learning_rate": 0.001988108318237012, "loss": 0.2454, "step": 52750 }, { "epoch": 0.37448119342812475, "grad_norm": 0.08203125, "learning_rate": 0.0019881037321264226, "loss": 0.2562, "step": 52760 }, { "epoch": 0.3745521716679709, "grad_norm": 0.2490234375, "learning_rate": 0.0019880991451375544, "loss": 0.2405, "step": 52770 }, { "epoch": 0.37462314990781703, "grad_norm": 0.1650390625, "learning_rate": 0.001988094557270412, "loss": 0.2466, "step": 52780 }, { "epoch": 0.3746941281476631, "grad_norm": 0.140625, "learning_rate": 0.0019880899685249995, "loss": 0.2278, "step": 52790 }, { "epoch": 0.37476510638750926, "grad_norm": 0.13671875, "learning_rate": 0.0019880853789013215, "loss": 0.2594, "step": 52800 }, { "epoch": 0.37483608462735535, "grad_norm": 0.12890625, "learning_rate": 0.0019880807883993824, "loss": 0.2375, "step": 52810 }, { "epoch": 0.3749070628672015, "grad_norm": 0.0810546875, "learning_rate": 0.0019880761970191875, "loss": 0.2641, "step": 52820 }, { "epoch": 0.37497804110704763, "grad_norm": 0.07080078125, "learning_rate": 0.001988071604760741, "loss": 0.241, "step": 52830 }, { "epoch": 0.3750490193468937, "grad_norm": 0.0791015625, "learning_rate": 0.0019880670116240473, "loss": 0.2647, "step": 52840 }, { "epoch": 0.37511999758673986, "grad_norm": 0.1162109375, "learning_rate": 0.001988062417609111, "loss": 0.2333, "step": 52850 }, { "epoch": 0.37519097582658595, "grad_norm": 0.1279296875, "learning_rate": 0.0019880578227159363, "loss": 0.2476, "step": 52860 }, { "epoch": 0.3752619540664321, "grad_norm": 0.255859375, "learning_rate": 0.0019880532269445283, "loss": 0.2429, "step": 52870 }, { "epoch": 0.3753329323062782, "grad_norm": 0.08203125, "learning_rate": 0.0019880486302948914, "loss": 0.2384, "step": 52880 }, { "epoch": 0.3754039105461243, "grad_norm": 0.076171875, "learning_rate": 0.00198804403276703, "loss": 0.2442, "step": 52890 }, { "epoch": 0.37547488878597046, "grad_norm": 0.107421875, "learning_rate": 0.0019880394343609487, "loss": 0.2429, "step": 52900 }, { "epoch": 0.37554586702581655, "grad_norm": 0.1123046875, "learning_rate": 0.001988034835076652, "loss": 0.2433, "step": 52910 }, { "epoch": 0.3756168452656627, "grad_norm": 0.076171875, "learning_rate": 0.001988030234914145, "loss": 0.2318, "step": 52920 }, { "epoch": 0.3756878235055088, "grad_norm": 0.0712890625, "learning_rate": 0.001988025633873431, "loss": 0.2129, "step": 52930 }, { "epoch": 0.3757588017453549, "grad_norm": 0.1416015625, "learning_rate": 0.001988021031954516, "loss": 0.2396, "step": 52940 }, { "epoch": 0.37582977998520106, "grad_norm": 0.09716796875, "learning_rate": 0.001988016429157403, "loss": 0.2457, "step": 52950 }, { "epoch": 0.37590075822504715, "grad_norm": 0.0888671875, "learning_rate": 0.001988011825482098, "loss": 0.2355, "step": 52960 }, { "epoch": 0.3759717364648933, "grad_norm": 0.1044921875, "learning_rate": 0.0019880072209286052, "loss": 0.2398, "step": 52970 }, { "epoch": 0.3760427147047394, "grad_norm": 0.10888671875, "learning_rate": 0.0019880026154969286, "loss": 0.2608, "step": 52980 }, { "epoch": 0.3761136929445855, "grad_norm": 0.19140625, "learning_rate": 0.001987998009187073, "loss": 0.2704, "step": 52990 }, { "epoch": 0.3761846711844316, "grad_norm": 0.1337890625, "learning_rate": 0.001987993401999043, "loss": 0.2301, "step": 53000 }, { "epoch": 0.37625564942427775, "grad_norm": 0.1318359375, "learning_rate": 0.001987988793932843, "loss": 0.2447, "step": 53010 }, { "epoch": 0.3763266276641239, "grad_norm": 0.1416015625, "learning_rate": 0.001987984184988478, "loss": 0.2314, "step": 53020 }, { "epoch": 0.37639760590397, "grad_norm": 0.130859375, "learning_rate": 0.001987979575165952, "loss": 0.245, "step": 53030 }, { "epoch": 0.3764685841438161, "grad_norm": 0.11083984375, "learning_rate": 0.0019879749644652707, "loss": 0.2366, "step": 53040 }, { "epoch": 0.3765395623836622, "grad_norm": 0.08203125, "learning_rate": 0.0019879703528864366, "loss": 0.2368, "step": 53050 }, { "epoch": 0.37661054062350835, "grad_norm": 0.3359375, "learning_rate": 0.0019879657404294564, "loss": 0.2407, "step": 53060 }, { "epoch": 0.3766815188633545, "grad_norm": 0.310546875, "learning_rate": 0.001987961127094333, "loss": 0.2439, "step": 53070 }, { "epoch": 0.3767524971032006, "grad_norm": 0.126953125, "learning_rate": 0.0019879565128810726, "loss": 0.2526, "step": 53080 }, { "epoch": 0.3768234753430467, "grad_norm": 0.09228515625, "learning_rate": 0.0019879518977896785, "loss": 0.2386, "step": 53090 }, { "epoch": 0.3768944535828928, "grad_norm": 0.08642578125, "learning_rate": 0.001987947281820155, "loss": 0.2412, "step": 53100 }, { "epoch": 0.37696543182273895, "grad_norm": 0.08251953125, "learning_rate": 0.0019879426649725084, "loss": 0.2551, "step": 53110 }, { "epoch": 0.37703641006258504, "grad_norm": 0.0888671875, "learning_rate": 0.0019879380472467415, "loss": 0.2325, "step": 53120 }, { "epoch": 0.3771073883024312, "grad_norm": 0.08837890625, "learning_rate": 0.0019879334286428594, "loss": 0.2337, "step": 53130 }, { "epoch": 0.3771783665422773, "grad_norm": 0.0947265625, "learning_rate": 0.001987928809160867, "loss": 0.2414, "step": 53140 }, { "epoch": 0.3772493447821234, "grad_norm": 0.10693359375, "learning_rate": 0.0019879241888007687, "loss": 0.2422, "step": 53150 }, { "epoch": 0.37732032302196955, "grad_norm": 0.087890625, "learning_rate": 0.001987919567562569, "loss": 0.2339, "step": 53160 }, { "epoch": 0.37739130126181564, "grad_norm": 0.08740234375, "learning_rate": 0.0019879149454462724, "loss": 0.2292, "step": 53170 }, { "epoch": 0.3774622795016618, "grad_norm": 0.09912109375, "learning_rate": 0.0019879103224518836, "loss": 0.248, "step": 53180 }, { "epoch": 0.3775332577415079, "grad_norm": 0.140625, "learning_rate": 0.0019879056985794073, "loss": 0.2447, "step": 53190 }, { "epoch": 0.377604235981354, "grad_norm": 0.068359375, "learning_rate": 0.001987901073828848, "loss": 0.2357, "step": 53200 }, { "epoch": 0.37767521422120015, "grad_norm": 0.08740234375, "learning_rate": 0.00198789644820021, "loss": 0.2419, "step": 53210 }, { "epoch": 0.37774619246104624, "grad_norm": 0.1083984375, "learning_rate": 0.001987891821693498, "loss": 0.2365, "step": 53220 }, { "epoch": 0.3778171707008924, "grad_norm": 0.091796875, "learning_rate": 0.0019878871943087165, "loss": 0.2463, "step": 53230 }, { "epoch": 0.37788814894073847, "grad_norm": 0.1513671875, "learning_rate": 0.00198788256604587, "loss": 0.2482, "step": 53240 }, { "epoch": 0.3779591271805846, "grad_norm": 0.0888671875, "learning_rate": 0.001987877936904964, "loss": 0.2505, "step": 53250 }, { "epoch": 0.37803010542043075, "grad_norm": 0.1376953125, "learning_rate": 0.001987873306886002, "loss": 0.2351, "step": 53260 }, { "epoch": 0.37810108366027684, "grad_norm": 0.08984375, "learning_rate": 0.001987868675988989, "loss": 0.2423, "step": 53270 }, { "epoch": 0.378172061900123, "grad_norm": 0.1015625, "learning_rate": 0.0019878640442139296, "loss": 0.2383, "step": 53280 }, { "epoch": 0.37824304013996907, "grad_norm": 0.08740234375, "learning_rate": 0.001987859411560828, "loss": 0.2307, "step": 53290 }, { "epoch": 0.3783140183798152, "grad_norm": 0.2041015625, "learning_rate": 0.0019878547780296896, "loss": 0.2409, "step": 53300 }, { "epoch": 0.37838499661966135, "grad_norm": 0.11328125, "learning_rate": 0.001987850143620518, "loss": 0.235, "step": 53310 }, { "epoch": 0.37845597485950744, "grad_norm": 0.1025390625, "learning_rate": 0.001987845508333319, "loss": 0.2372, "step": 53320 }, { "epoch": 0.3785269530993536, "grad_norm": 0.1376953125, "learning_rate": 0.001987840872168095, "loss": 0.2484, "step": 53330 }, { "epoch": 0.37859793133919967, "grad_norm": 0.072265625, "learning_rate": 0.0019878362351248534, "loss": 0.2477, "step": 53340 }, { "epoch": 0.3786689095790458, "grad_norm": 0.12353515625, "learning_rate": 0.0019878315972035967, "loss": 0.2518, "step": 53350 }, { "epoch": 0.3787398878188919, "grad_norm": 0.1572265625, "learning_rate": 0.0019878269584043306, "loss": 0.229, "step": 53360 }, { "epoch": 0.37881086605873804, "grad_norm": 0.26171875, "learning_rate": 0.001987822318727059, "loss": 0.2515, "step": 53370 }, { "epoch": 0.3788818442985842, "grad_norm": 0.08935546875, "learning_rate": 0.0019878176781717868, "loss": 0.2336, "step": 53380 }, { "epoch": 0.37895282253843027, "grad_norm": 0.078125, "learning_rate": 0.0019878130367385186, "loss": 0.2295, "step": 53390 }, { "epoch": 0.3790238007782764, "grad_norm": 0.1015625, "learning_rate": 0.001987808394427259, "loss": 0.2383, "step": 53400 }, { "epoch": 0.3790947790181225, "grad_norm": 0.2158203125, "learning_rate": 0.0019878037512380126, "loss": 0.2662, "step": 53410 }, { "epoch": 0.37916575725796864, "grad_norm": 0.11767578125, "learning_rate": 0.0019877991071707837, "loss": 0.2622, "step": 53420 }, { "epoch": 0.3792367354978148, "grad_norm": 0.091796875, "learning_rate": 0.0019877944622255775, "loss": 0.251, "step": 53430 }, { "epoch": 0.37930771373766087, "grad_norm": 0.11328125, "learning_rate": 0.0019877898164023977, "loss": 0.254, "step": 53440 }, { "epoch": 0.379378691977507, "grad_norm": 0.099609375, "learning_rate": 0.0019877851697012497, "loss": 0.227, "step": 53450 }, { "epoch": 0.3794496702173531, "grad_norm": 0.15625, "learning_rate": 0.001987780522122138, "loss": 0.2482, "step": 53460 }, { "epoch": 0.37952064845719924, "grad_norm": 0.1025390625, "learning_rate": 0.0019877758736650667, "loss": 0.2371, "step": 53470 }, { "epoch": 0.3795916266970453, "grad_norm": 0.1279296875, "learning_rate": 0.001987771224330041, "loss": 0.2327, "step": 53480 }, { "epoch": 0.37966260493689147, "grad_norm": 0.0849609375, "learning_rate": 0.0019877665741170644, "loss": 0.2312, "step": 53490 }, { "epoch": 0.3797335831767376, "grad_norm": 0.17578125, "learning_rate": 0.001987761923026143, "loss": 0.2344, "step": 53500 }, { "epoch": 0.3798045614165837, "grad_norm": 0.1240234375, "learning_rate": 0.001987757271057281, "loss": 0.2312, "step": 53510 }, { "epoch": 0.37987553965642984, "grad_norm": 0.08837890625, "learning_rate": 0.001987752618210482, "loss": 0.2456, "step": 53520 }, { "epoch": 0.3799465178962759, "grad_norm": 0.107421875, "learning_rate": 0.001987747964485752, "loss": 0.2407, "step": 53530 }, { "epoch": 0.38001749613612207, "grad_norm": 0.4296875, "learning_rate": 0.001987743309883094, "loss": 0.2127, "step": 53540 }, { "epoch": 0.3800884743759682, "grad_norm": 0.115234375, "learning_rate": 0.001987738654402514, "loss": 0.2417, "step": 53550 }, { "epoch": 0.3801594526158143, "grad_norm": 0.0751953125, "learning_rate": 0.001987733998044016, "loss": 0.2377, "step": 53560 }, { "epoch": 0.38023043085566044, "grad_norm": 0.1279296875, "learning_rate": 0.001987729340807605, "loss": 0.2272, "step": 53570 }, { "epoch": 0.3803014090955065, "grad_norm": 0.09228515625, "learning_rate": 0.0019877246826932853, "loss": 0.24, "step": 53580 }, { "epoch": 0.38037238733535267, "grad_norm": 0.087890625, "learning_rate": 0.001987720023701061, "loss": 0.2512, "step": 53590 }, { "epoch": 0.3804433655751988, "grad_norm": 0.1396484375, "learning_rate": 0.0019877153638309377, "loss": 0.261, "step": 53600 }, { "epoch": 0.3805143438150449, "grad_norm": 0.080078125, "learning_rate": 0.0019877107030829197, "loss": 0.2322, "step": 53610 }, { "epoch": 0.38058532205489104, "grad_norm": 0.16015625, "learning_rate": 0.001987706041457011, "loss": 0.2453, "step": 53620 }, { "epoch": 0.3806563002947371, "grad_norm": 0.1123046875, "learning_rate": 0.001987701378953217, "loss": 0.2467, "step": 53630 }, { "epoch": 0.38072727853458327, "grad_norm": 0.11083984375, "learning_rate": 0.001987696715571542, "loss": 0.2349, "step": 53640 }, { "epoch": 0.38079825677442936, "grad_norm": 0.0673828125, "learning_rate": 0.0019876920513119904, "loss": 0.2572, "step": 53650 }, { "epoch": 0.3808692350142755, "grad_norm": 0.0712890625, "learning_rate": 0.001987687386174567, "loss": 0.2376, "step": 53660 }, { "epoch": 0.38094021325412164, "grad_norm": 0.091796875, "learning_rate": 0.001987682720159276, "loss": 0.2388, "step": 53670 }, { "epoch": 0.3810111914939677, "grad_norm": 0.1396484375, "learning_rate": 0.001987678053266123, "loss": 0.227, "step": 53680 }, { "epoch": 0.38108216973381387, "grad_norm": 0.2109375, "learning_rate": 0.001987673385495112, "loss": 0.2514, "step": 53690 }, { "epoch": 0.38115314797365996, "grad_norm": 0.369140625, "learning_rate": 0.0019876687168462475, "loss": 0.254, "step": 53700 }, { "epoch": 0.3812241262135061, "grad_norm": 0.0908203125, "learning_rate": 0.0019876640473195342, "loss": 0.241, "step": 53710 }, { "epoch": 0.38129510445335224, "grad_norm": 0.08642578125, "learning_rate": 0.0019876593769149772, "loss": 0.2398, "step": 53720 }, { "epoch": 0.3813660826931983, "grad_norm": 0.0869140625, "learning_rate": 0.0019876547056325803, "loss": 0.2411, "step": 53730 }, { "epoch": 0.38143706093304447, "grad_norm": 0.11669921875, "learning_rate": 0.0019876500334723487, "loss": 0.2657, "step": 53740 }, { "epoch": 0.38150803917289056, "grad_norm": 0.07421875, "learning_rate": 0.0019876453604342867, "loss": 0.2457, "step": 53750 }, { "epoch": 0.3815790174127367, "grad_norm": 0.11474609375, "learning_rate": 0.001987640686518399, "loss": 0.2407, "step": 53760 }, { "epoch": 0.3816499956525828, "grad_norm": 0.1708984375, "learning_rate": 0.0019876360117246908, "loss": 0.2381, "step": 53770 }, { "epoch": 0.3817209738924289, "grad_norm": 0.0751953125, "learning_rate": 0.001987631336053166, "loss": 0.242, "step": 53780 }, { "epoch": 0.38179195213227507, "grad_norm": 0.1376953125, "learning_rate": 0.001987626659503829, "loss": 0.2468, "step": 53790 }, { "epoch": 0.38186293037212116, "grad_norm": 0.0751953125, "learning_rate": 0.0019876219820766853, "loss": 0.239, "step": 53800 }, { "epoch": 0.3819339086119673, "grad_norm": 0.0947265625, "learning_rate": 0.001987617303771739, "loss": 0.2231, "step": 53810 }, { "epoch": 0.3820048868518134, "grad_norm": 0.08544921875, "learning_rate": 0.0019876126245889944, "loss": 0.2352, "step": 53820 }, { "epoch": 0.38207586509165953, "grad_norm": 0.07177734375, "learning_rate": 0.0019876079445284574, "loss": 0.2365, "step": 53830 }, { "epoch": 0.38214684333150567, "grad_norm": 0.2890625, "learning_rate": 0.001987603263590131, "loss": 0.2538, "step": 53840 }, { "epoch": 0.38221782157135176, "grad_norm": 0.16796875, "learning_rate": 0.001987598581774021, "loss": 0.2716, "step": 53850 }, { "epoch": 0.3822887998111979, "grad_norm": 0.06982421875, "learning_rate": 0.0019875938990801313, "loss": 0.2573, "step": 53860 }, { "epoch": 0.382359778051044, "grad_norm": 0.12890625, "learning_rate": 0.0019875892155084675, "loss": 0.2347, "step": 53870 }, { "epoch": 0.38243075629089013, "grad_norm": 0.08837890625, "learning_rate": 0.001987584531059033, "loss": 0.2616, "step": 53880 }, { "epoch": 0.3825017345307362, "grad_norm": 0.166015625, "learning_rate": 0.0019875798457318333, "loss": 0.2398, "step": 53890 }, { "epoch": 0.38257271277058236, "grad_norm": 0.185546875, "learning_rate": 0.0019875751595268727, "loss": 0.2248, "step": 53900 }, { "epoch": 0.3826436910104285, "grad_norm": 0.1044921875, "learning_rate": 0.001987570472444156, "loss": 0.2562, "step": 53910 }, { "epoch": 0.3827146692502746, "grad_norm": 0.095703125, "learning_rate": 0.0019875657844836874, "loss": 0.248, "step": 53920 }, { "epoch": 0.38278564749012073, "grad_norm": 0.1015625, "learning_rate": 0.001987561095645472, "loss": 0.2412, "step": 53930 }, { "epoch": 0.3828566257299668, "grad_norm": 0.12158203125, "learning_rate": 0.0019875564059295145, "loss": 0.2583, "step": 53940 }, { "epoch": 0.38292760396981296, "grad_norm": 0.140625, "learning_rate": 0.001987551715335819, "loss": 0.2486, "step": 53950 }, { "epoch": 0.3829985822096591, "grad_norm": 0.091796875, "learning_rate": 0.001987547023864391, "loss": 0.2499, "step": 53960 }, { "epoch": 0.3830695604495052, "grad_norm": 0.08251953125, "learning_rate": 0.0019875423315152342, "loss": 0.2417, "step": 53970 }, { "epoch": 0.38314053868935133, "grad_norm": 0.287109375, "learning_rate": 0.0019875376382883538, "loss": 0.23, "step": 53980 }, { "epoch": 0.3832115169291974, "grad_norm": 0.109375, "learning_rate": 0.0019875329441837544, "loss": 0.2338, "step": 53990 }, { "epoch": 0.38328249516904356, "grad_norm": 0.06640625, "learning_rate": 0.0019875282492014404, "loss": 0.2347, "step": 54000 }, { "epoch": 0.38328249516904356, "eval_covost2-zh-en_loss": 3.851573944091797, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 20.4835, "eval_covost2-zh-en_samples_per_second": 3.124, "eval_covost2-zh-en_steps_per_second": 0.195, "step": 54000 }, { "epoch": 0.38328249516904356, "eval_covost2-en-zh_loss": 3.147329330444336, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 19.4234, "eval_covost2-en-zh_samples_per_second": 3.295, "eval_covost2-en-zh_steps_per_second": 0.206, "step": 54000 }, { "epoch": 0.38335347340888964, "grad_norm": 0.51953125, "learning_rate": 0.0019875235533414162, "loss": 0.2514, "step": 54010 }, { "epoch": 0.3834244516487358, "grad_norm": 0.0810546875, "learning_rate": 0.001987518856603688, "loss": 0.2403, "step": 54020 }, { "epoch": 0.38349542988858193, "grad_norm": 0.11181640625, "learning_rate": 0.001987514158988258, "loss": 0.2353, "step": 54030 }, { "epoch": 0.383566408128428, "grad_norm": 0.09033203125, "learning_rate": 0.0019875094604951326, "loss": 0.2521, "step": 54040 }, { "epoch": 0.38363738636827416, "grad_norm": 0.10546875, "learning_rate": 0.001987504761124316, "loss": 0.2645, "step": 54050 }, { "epoch": 0.38370836460812024, "grad_norm": 0.0927734375, "learning_rate": 0.001987500060875813, "loss": 0.2531, "step": 54060 }, { "epoch": 0.3837793428479664, "grad_norm": 0.1103515625, "learning_rate": 0.001987495359749628, "loss": 0.2358, "step": 54070 }, { "epoch": 0.38385032108781253, "grad_norm": 0.08984375, "learning_rate": 0.0019874906577457656, "loss": 0.2253, "step": 54080 }, { "epoch": 0.3839212993276586, "grad_norm": 0.091796875, "learning_rate": 0.0019874859548642304, "loss": 0.2468, "step": 54090 }, { "epoch": 0.38399227756750476, "grad_norm": 0.1875, "learning_rate": 0.0019874812511050275, "loss": 0.2617, "step": 54100 }, { "epoch": 0.38406325580735085, "grad_norm": 0.109375, "learning_rate": 0.001987476546468161, "loss": 0.2581, "step": 54110 }, { "epoch": 0.384134234047197, "grad_norm": 0.12451171875, "learning_rate": 0.001987471840953636, "loss": 0.2423, "step": 54120 }, { "epoch": 0.3842052122870431, "grad_norm": 0.265625, "learning_rate": 0.001987467134561457, "loss": 0.2503, "step": 54130 }, { "epoch": 0.3842761905268892, "grad_norm": 0.0830078125, "learning_rate": 0.0019874624272916285, "loss": 0.2572, "step": 54140 }, { "epoch": 0.38434716876673536, "grad_norm": 0.10009765625, "learning_rate": 0.0019874577191441554, "loss": 0.237, "step": 54150 }, { "epoch": 0.38441814700658145, "grad_norm": 0.126953125, "learning_rate": 0.001987453010119042, "loss": 0.2495, "step": 54160 }, { "epoch": 0.3844891252464276, "grad_norm": 0.140625, "learning_rate": 0.0019874483002162934, "loss": 0.2529, "step": 54170 }, { "epoch": 0.3845601034862737, "grad_norm": 0.083984375, "learning_rate": 0.0019874435894359143, "loss": 0.2229, "step": 54180 }, { "epoch": 0.3846310817261198, "grad_norm": 0.0732421875, "learning_rate": 0.0019874388777779087, "loss": 0.2646, "step": 54190 }, { "epoch": 0.38470205996596596, "grad_norm": 0.11572265625, "learning_rate": 0.001987434165242282, "loss": 0.238, "step": 54200 }, { "epoch": 0.38477303820581205, "grad_norm": 0.1005859375, "learning_rate": 0.001987429451829038, "loss": 0.2304, "step": 54210 }, { "epoch": 0.3848440164456582, "grad_norm": 0.08984375, "learning_rate": 0.0019874247375381822, "loss": 0.2394, "step": 54220 }, { "epoch": 0.3849149946855043, "grad_norm": 0.11474609375, "learning_rate": 0.001987420022369719, "loss": 0.2427, "step": 54230 }, { "epoch": 0.3849859729253504, "grad_norm": 0.099609375, "learning_rate": 0.0019874153063236533, "loss": 0.2464, "step": 54240 }, { "epoch": 0.3850569511651965, "grad_norm": 0.1630859375, "learning_rate": 0.001987410589399989, "loss": 0.2603, "step": 54250 }, { "epoch": 0.38512792940504265, "grad_norm": 0.0830078125, "learning_rate": 0.0019874058715987314, "loss": 0.2389, "step": 54260 }, { "epoch": 0.3851989076448888, "grad_norm": 0.185546875, "learning_rate": 0.001987401152919885, "loss": 0.2328, "step": 54270 }, { "epoch": 0.3852698858847349, "grad_norm": 0.0703125, "learning_rate": 0.0019873964333634543, "loss": 0.2257, "step": 54280 }, { "epoch": 0.385340864124581, "grad_norm": 0.1787109375, "learning_rate": 0.0019873917129294445, "loss": 0.2586, "step": 54290 }, { "epoch": 0.3854118423644271, "grad_norm": 0.0712890625, "learning_rate": 0.0019873869916178597, "loss": 0.2406, "step": 54300 }, { "epoch": 0.38548282060427325, "grad_norm": 0.1552734375, "learning_rate": 0.0019873822694287053, "loss": 0.2317, "step": 54310 }, { "epoch": 0.3855537988441194, "grad_norm": 0.265625, "learning_rate": 0.0019873775463619845, "loss": 0.2559, "step": 54320 }, { "epoch": 0.3856247770839655, "grad_norm": 0.0830078125, "learning_rate": 0.0019873728224177037, "loss": 0.2448, "step": 54330 }, { "epoch": 0.3856957553238116, "grad_norm": 0.1162109375, "learning_rate": 0.001987368097595866, "loss": 0.2323, "step": 54340 }, { "epoch": 0.3857667335636577, "grad_norm": 0.115234375, "learning_rate": 0.001987363371896478, "loss": 0.2452, "step": 54350 }, { "epoch": 0.38583771180350385, "grad_norm": 0.07763671875, "learning_rate": 0.0019873586453195424, "loss": 0.2308, "step": 54360 }, { "epoch": 0.38590869004334993, "grad_norm": 0.14453125, "learning_rate": 0.0019873539178650646, "loss": 0.2504, "step": 54370 }, { "epoch": 0.3859796682831961, "grad_norm": 0.09814453125, "learning_rate": 0.00198734918953305, "loss": 0.2456, "step": 54380 }, { "epoch": 0.3860506465230422, "grad_norm": 0.11083984375, "learning_rate": 0.0019873444603235027, "loss": 0.2445, "step": 54390 }, { "epoch": 0.3861216247628883, "grad_norm": 0.09814453125, "learning_rate": 0.0019873397302364267, "loss": 0.2589, "step": 54400 }, { "epoch": 0.38619260300273445, "grad_norm": 0.09130859375, "learning_rate": 0.0019873349992718275, "loss": 0.2406, "step": 54410 }, { "epoch": 0.38626358124258053, "grad_norm": 0.0673828125, "learning_rate": 0.0019873302674297097, "loss": 0.2415, "step": 54420 }, { "epoch": 0.3863345594824267, "grad_norm": 0.1396484375, "learning_rate": 0.001987325534710078, "loss": 0.2431, "step": 54430 }, { "epoch": 0.3864055377222728, "grad_norm": 0.109375, "learning_rate": 0.0019873208011129367, "loss": 0.2287, "step": 54440 }, { "epoch": 0.3864765159621189, "grad_norm": 0.12890625, "learning_rate": 0.001987316066638291, "loss": 0.2607, "step": 54450 }, { "epoch": 0.38654749420196505, "grad_norm": 0.1298828125, "learning_rate": 0.001987311331286145, "loss": 0.2377, "step": 54460 }, { "epoch": 0.38661847244181113, "grad_norm": 0.07861328125, "learning_rate": 0.0019873065950565044, "loss": 0.2425, "step": 54470 }, { "epoch": 0.3866894506816573, "grad_norm": 0.11767578125, "learning_rate": 0.0019873018579493725, "loss": 0.2264, "step": 54480 }, { "epoch": 0.38676042892150336, "grad_norm": 0.0888671875, "learning_rate": 0.001987297119964755, "loss": 0.248, "step": 54490 }, { "epoch": 0.3868314071613495, "grad_norm": 0.0830078125, "learning_rate": 0.0019872923811026563, "loss": 0.247, "step": 54500 }, { "epoch": 0.38690238540119565, "grad_norm": 0.08544921875, "learning_rate": 0.001987287641363081, "loss": 0.2427, "step": 54510 }, { "epoch": 0.38697336364104173, "grad_norm": 0.08837890625, "learning_rate": 0.0019872829007460337, "loss": 0.2436, "step": 54520 }, { "epoch": 0.3870443418808879, "grad_norm": 0.08203125, "learning_rate": 0.0019872781592515194, "loss": 0.2449, "step": 54530 }, { "epoch": 0.38711532012073396, "grad_norm": 0.16796875, "learning_rate": 0.0019872734168795425, "loss": 0.2261, "step": 54540 }, { "epoch": 0.3871862983605801, "grad_norm": 0.130859375, "learning_rate": 0.0019872686736301074, "loss": 0.2525, "step": 54550 }, { "epoch": 0.38725727660042625, "grad_norm": 0.1123046875, "learning_rate": 0.0019872639295032196, "loss": 0.262, "step": 54560 }, { "epoch": 0.38732825484027233, "grad_norm": 0.09521484375, "learning_rate": 0.0019872591844988835, "loss": 0.247, "step": 54570 }, { "epoch": 0.3873992330801185, "grad_norm": 0.078125, "learning_rate": 0.0019872544386171035, "loss": 0.2416, "step": 54580 }, { "epoch": 0.38747021131996456, "grad_norm": 0.091796875, "learning_rate": 0.0019872496918578847, "loss": 0.2346, "step": 54590 }, { "epoch": 0.3875411895598107, "grad_norm": 0.103515625, "learning_rate": 0.001987244944221231, "loss": 0.2358, "step": 54600 }, { "epoch": 0.3876121677996568, "grad_norm": 0.1044921875, "learning_rate": 0.001987240195707148, "loss": 0.2546, "step": 54610 }, { "epoch": 0.38768314603950293, "grad_norm": 0.0869140625, "learning_rate": 0.0019872354463156402, "loss": 0.2307, "step": 54620 }, { "epoch": 0.3877541242793491, "grad_norm": 0.1376953125, "learning_rate": 0.001987230696046712, "loss": 0.2447, "step": 54630 }, { "epoch": 0.38782510251919516, "grad_norm": 0.09716796875, "learning_rate": 0.001987225944900368, "loss": 0.249, "step": 54640 }, { "epoch": 0.3878960807590413, "grad_norm": 0.1435546875, "learning_rate": 0.001987221192876614, "loss": 0.2571, "step": 54650 }, { "epoch": 0.3879670589988874, "grad_norm": 0.1044921875, "learning_rate": 0.001987216439975453, "loss": 0.2464, "step": 54660 }, { "epoch": 0.38803803723873354, "grad_norm": 0.1025390625, "learning_rate": 0.0019872116861968906, "loss": 0.237, "step": 54670 }, { "epoch": 0.3881090154785797, "grad_norm": 0.13671875, "learning_rate": 0.0019872069315409317, "loss": 0.2382, "step": 54680 }, { "epoch": 0.38817999371842576, "grad_norm": 0.1005859375, "learning_rate": 0.001987202176007581, "loss": 0.2297, "step": 54690 }, { "epoch": 0.3882509719582719, "grad_norm": 0.09033203125, "learning_rate": 0.0019871974195968425, "loss": 0.2344, "step": 54700 }, { "epoch": 0.388321950198118, "grad_norm": 0.10107421875, "learning_rate": 0.0019871926623087214, "loss": 0.2465, "step": 54710 }, { "epoch": 0.38839292843796414, "grad_norm": 0.0986328125, "learning_rate": 0.0019871879041432226, "loss": 0.2494, "step": 54720 }, { "epoch": 0.3884639066778103, "grad_norm": 0.09521484375, "learning_rate": 0.0019871831451003506, "loss": 0.2342, "step": 54730 }, { "epoch": 0.38853488491765636, "grad_norm": 0.2431640625, "learning_rate": 0.0019871783851801096, "loss": 0.2504, "step": 54740 }, { "epoch": 0.3886058631575025, "grad_norm": 0.1025390625, "learning_rate": 0.001987173624382505, "loss": 0.2334, "step": 54750 }, { "epoch": 0.3886768413973486, "grad_norm": 0.10595703125, "learning_rate": 0.001987168862707542, "loss": 0.2352, "step": 54760 }, { "epoch": 0.38874781963719474, "grad_norm": 0.09130859375, "learning_rate": 0.0019871641001552236, "loss": 0.2276, "step": 54770 }, { "epoch": 0.3888187978770408, "grad_norm": 0.14453125, "learning_rate": 0.001987159336725556, "loss": 0.2559, "step": 54780 }, { "epoch": 0.38888977611688696, "grad_norm": 0.1650390625, "learning_rate": 0.001987154572418543, "loss": 0.2409, "step": 54790 }, { "epoch": 0.3889607543567331, "grad_norm": 0.2158203125, "learning_rate": 0.00198714980723419, "loss": 0.262, "step": 54800 }, { "epoch": 0.3890317325965792, "grad_norm": 0.11181640625, "learning_rate": 0.0019871450411725017, "loss": 0.2519, "step": 54810 }, { "epoch": 0.38910271083642534, "grad_norm": 0.12255859375, "learning_rate": 0.0019871402742334824, "loss": 0.2503, "step": 54820 }, { "epoch": 0.3891736890762714, "grad_norm": 0.12060546875, "learning_rate": 0.0019871355064171366, "loss": 0.2366, "step": 54830 }, { "epoch": 0.38924466731611757, "grad_norm": 0.10888671875, "learning_rate": 0.00198713073772347, "loss": 0.2509, "step": 54840 }, { "epoch": 0.3893156455559637, "grad_norm": 0.1337890625, "learning_rate": 0.0019871259681524863, "loss": 0.2477, "step": 54850 }, { "epoch": 0.3893866237958098, "grad_norm": 0.18359375, "learning_rate": 0.0019871211977041904, "loss": 0.2493, "step": 54860 }, { "epoch": 0.38945760203565594, "grad_norm": 0.12451171875, "learning_rate": 0.0019871164263785875, "loss": 0.235, "step": 54870 }, { "epoch": 0.389528580275502, "grad_norm": 0.11767578125, "learning_rate": 0.0019871116541756825, "loss": 0.2483, "step": 54880 }, { "epoch": 0.38959955851534817, "grad_norm": 0.17578125, "learning_rate": 0.0019871068810954793, "loss": 0.2724, "step": 54890 }, { "epoch": 0.38967053675519425, "grad_norm": 0.052978515625, "learning_rate": 0.0019871021071379827, "loss": 0.2313, "step": 54900 }, { "epoch": 0.3897415149950404, "grad_norm": 0.0966796875, "learning_rate": 0.001987097332303198, "loss": 0.254, "step": 54910 }, { "epoch": 0.38981249323488654, "grad_norm": 0.11962890625, "learning_rate": 0.0019870925565911297, "loss": 0.2431, "step": 54920 }, { "epoch": 0.3898834714747326, "grad_norm": 0.169921875, "learning_rate": 0.0019870877800017826, "loss": 0.2324, "step": 54930 }, { "epoch": 0.38995444971457877, "grad_norm": 0.1259765625, "learning_rate": 0.0019870830025351606, "loss": 0.2471, "step": 54940 }, { "epoch": 0.39002542795442485, "grad_norm": 0.1962890625, "learning_rate": 0.00198707822419127, "loss": 0.2396, "step": 54950 }, { "epoch": 0.390096406194271, "grad_norm": 0.12060546875, "learning_rate": 0.001987073444970114, "loss": 0.2474, "step": 54960 }, { "epoch": 0.39016738443411714, "grad_norm": 0.10400390625, "learning_rate": 0.0019870686648716983, "loss": 0.2442, "step": 54970 }, { "epoch": 0.3902383626739632, "grad_norm": 0.10498046875, "learning_rate": 0.001987063883896027, "loss": 0.2594, "step": 54980 }, { "epoch": 0.39030934091380937, "grad_norm": 0.09619140625, "learning_rate": 0.001987059102043105, "loss": 0.2516, "step": 54990 }, { "epoch": 0.39038031915365545, "grad_norm": 0.08935546875, "learning_rate": 0.0019870543193129375, "loss": 0.2339, "step": 55000 }, { "epoch": 0.3904512973935016, "grad_norm": 0.0771484375, "learning_rate": 0.0019870495357055293, "loss": 0.2422, "step": 55010 }, { "epoch": 0.3905222756333477, "grad_norm": 0.1396484375, "learning_rate": 0.001987044751220884, "loss": 0.2495, "step": 55020 }, { "epoch": 0.3905932538731938, "grad_norm": 0.07958984375, "learning_rate": 0.001987039965859007, "loss": 0.2513, "step": 55030 }, { "epoch": 0.39066423211303997, "grad_norm": 0.1357421875, "learning_rate": 0.001987035179619903, "loss": 0.2348, "step": 55040 }, { "epoch": 0.39073521035288605, "grad_norm": 0.1455078125, "learning_rate": 0.0019870303925035777, "loss": 0.2274, "step": 55050 }, { "epoch": 0.3908061885927322, "grad_norm": 0.146484375, "learning_rate": 0.001987025604510034, "loss": 0.2369, "step": 55060 }, { "epoch": 0.3908771668325783, "grad_norm": 0.1513671875, "learning_rate": 0.001987020815639278, "loss": 0.2236, "step": 55070 }, { "epoch": 0.3909481450724244, "grad_norm": 0.1982421875, "learning_rate": 0.0019870160258913137, "loss": 0.2361, "step": 55080 }, { "epoch": 0.39101912331227057, "grad_norm": 0.11767578125, "learning_rate": 0.0019870112352661463, "loss": 0.2554, "step": 55090 }, { "epoch": 0.39109010155211665, "grad_norm": 0.09912109375, "learning_rate": 0.0019870064437637804, "loss": 0.2413, "step": 55100 }, { "epoch": 0.3911610797919628, "grad_norm": 0.154296875, "learning_rate": 0.0019870016513842213, "loss": 0.2553, "step": 55110 }, { "epoch": 0.3912320580318089, "grad_norm": 0.08642578125, "learning_rate": 0.0019869968581274723, "loss": 0.2391, "step": 55120 }, { "epoch": 0.391303036271655, "grad_norm": 0.12353515625, "learning_rate": 0.001986992063993539, "loss": 0.2641, "step": 55130 }, { "epoch": 0.3913740145115011, "grad_norm": 0.08154296875, "learning_rate": 0.0019869872689824266, "loss": 0.2738, "step": 55140 }, { "epoch": 0.39144499275134725, "grad_norm": 0.08251953125, "learning_rate": 0.0019869824730941394, "loss": 0.2431, "step": 55150 }, { "epoch": 0.3915159709911934, "grad_norm": 0.107421875, "learning_rate": 0.001986977676328682, "loss": 0.2333, "step": 55160 }, { "epoch": 0.3915869492310395, "grad_norm": 0.09375, "learning_rate": 0.001986972878686059, "loss": 0.2662, "step": 55170 }, { "epoch": 0.3916579274708856, "grad_norm": 0.0927734375, "learning_rate": 0.0019869680801662755, "loss": 0.2546, "step": 55180 }, { "epoch": 0.3917289057107317, "grad_norm": 0.08056640625, "learning_rate": 0.001986963280769336, "loss": 0.24, "step": 55190 }, { "epoch": 0.39179988395057785, "grad_norm": 0.1083984375, "learning_rate": 0.001986958480495246, "loss": 0.2354, "step": 55200 }, { "epoch": 0.391870862190424, "grad_norm": 0.1474609375, "learning_rate": 0.0019869536793440095, "loss": 0.2428, "step": 55210 }, { "epoch": 0.3919418404302701, "grad_norm": 0.39453125, "learning_rate": 0.0019869488773156313, "loss": 0.2331, "step": 55220 }, { "epoch": 0.3920128186701162, "grad_norm": 0.1494140625, "learning_rate": 0.001986944074410116, "loss": 0.2446, "step": 55230 }, { "epoch": 0.3920837969099623, "grad_norm": 0.10400390625, "learning_rate": 0.001986939270627469, "loss": 0.2656, "step": 55240 }, { "epoch": 0.39215477514980845, "grad_norm": 0.1337890625, "learning_rate": 0.0019869344659676946, "loss": 0.238, "step": 55250 }, { "epoch": 0.39222575338965454, "grad_norm": 0.18359375, "learning_rate": 0.001986929660430797, "loss": 0.2549, "step": 55260 }, { "epoch": 0.3922967316295007, "grad_norm": 0.12255859375, "learning_rate": 0.001986924854016782, "loss": 0.2387, "step": 55270 }, { "epoch": 0.3923677098693468, "grad_norm": 0.11767578125, "learning_rate": 0.0019869200467256543, "loss": 0.2613, "step": 55280 }, { "epoch": 0.3924386881091929, "grad_norm": 0.1103515625, "learning_rate": 0.0019869152385574174, "loss": 0.2433, "step": 55290 }, { "epoch": 0.39250966634903905, "grad_norm": 0.08984375, "learning_rate": 0.0019869104295120775, "loss": 0.2178, "step": 55300 }, { "epoch": 0.39258064458888514, "grad_norm": 0.09912109375, "learning_rate": 0.0019869056195896385, "loss": 0.2444, "step": 55310 }, { "epoch": 0.3926516228287313, "grad_norm": 0.1357421875, "learning_rate": 0.001986900808790106, "loss": 0.2365, "step": 55320 }, { "epoch": 0.3927226010685774, "grad_norm": 0.10546875, "learning_rate": 0.0019868959971134836, "loss": 0.2318, "step": 55330 }, { "epoch": 0.3927935793084235, "grad_norm": 0.0986328125, "learning_rate": 0.001986891184559777, "loss": 0.2355, "step": 55340 }, { "epoch": 0.39286455754826966, "grad_norm": 0.07373046875, "learning_rate": 0.00198688637112899, "loss": 0.2362, "step": 55350 }, { "epoch": 0.39293553578811574, "grad_norm": 0.193359375, "learning_rate": 0.0019868815568211285, "loss": 0.2341, "step": 55360 }, { "epoch": 0.3930065140279619, "grad_norm": 0.1484375, "learning_rate": 0.0019868767416361967, "loss": 0.2425, "step": 55370 }, { "epoch": 0.39307749226780797, "grad_norm": 0.11767578125, "learning_rate": 0.001986871925574199, "loss": 0.2398, "step": 55380 }, { "epoch": 0.3931484705076541, "grad_norm": 0.07421875, "learning_rate": 0.0019868671086351415, "loss": 0.2296, "step": 55390 }, { "epoch": 0.39321944874750026, "grad_norm": 0.2099609375, "learning_rate": 0.001986862290819027, "loss": 0.2427, "step": 55400 }, { "epoch": 0.39329042698734634, "grad_norm": 0.1064453125, "learning_rate": 0.0019868574721258617, "loss": 0.2291, "step": 55410 }, { "epoch": 0.3933614052271925, "grad_norm": 0.10009765625, "learning_rate": 0.00198685265255565, "loss": 0.254, "step": 55420 }, { "epoch": 0.39343238346703857, "grad_norm": 0.103515625, "learning_rate": 0.0019868478321083964, "loss": 0.235, "step": 55430 }, { "epoch": 0.3935033617068847, "grad_norm": 0.10546875, "learning_rate": 0.0019868430107841057, "loss": 0.2356, "step": 55440 }, { "epoch": 0.39357433994673086, "grad_norm": 0.1259765625, "learning_rate": 0.001986838188582783, "loss": 0.2638, "step": 55450 }, { "epoch": 0.39364531818657694, "grad_norm": 0.0927734375, "learning_rate": 0.0019868333655044335, "loss": 0.2383, "step": 55460 }, { "epoch": 0.3937162964264231, "grad_norm": 0.12890625, "learning_rate": 0.0019868285415490607, "loss": 0.2358, "step": 55470 }, { "epoch": 0.39378727466626917, "grad_norm": 0.10693359375, "learning_rate": 0.0019868237167166704, "loss": 0.2348, "step": 55480 }, { "epoch": 0.3938582529061153, "grad_norm": 0.08203125, "learning_rate": 0.0019868188910072667, "loss": 0.2455, "step": 55490 }, { "epoch": 0.3939292311459614, "grad_norm": 0.111328125, "learning_rate": 0.001986814064420855, "loss": 0.2388, "step": 55500 }, { "epoch": 0.39400020938580754, "grad_norm": 0.11279296875, "learning_rate": 0.0019868092369574398, "loss": 0.2513, "step": 55510 }, { "epoch": 0.3940711876256537, "grad_norm": 0.146484375, "learning_rate": 0.001986804408617025, "loss": 0.2427, "step": 55520 }, { "epoch": 0.39414216586549977, "grad_norm": 0.2412109375, "learning_rate": 0.0019867995793996173, "loss": 0.2424, "step": 55530 }, { "epoch": 0.3942131441053459, "grad_norm": 0.1259765625, "learning_rate": 0.00198679474930522, "loss": 0.2611, "step": 55540 }, { "epoch": 0.394284122345192, "grad_norm": 0.11865234375, "learning_rate": 0.0019867899183338383, "loss": 0.2601, "step": 55550 }, { "epoch": 0.39435510058503814, "grad_norm": 0.1279296875, "learning_rate": 0.0019867850864854772, "loss": 0.2286, "step": 55560 }, { "epoch": 0.3944260788248843, "grad_norm": 0.130859375, "learning_rate": 0.0019867802537601406, "loss": 0.2332, "step": 55570 }, { "epoch": 0.39449705706473037, "grad_norm": 0.1279296875, "learning_rate": 0.0019867754201578344, "loss": 0.2443, "step": 55580 }, { "epoch": 0.3945680353045765, "grad_norm": 0.1748046875, "learning_rate": 0.0019867705856785626, "loss": 0.2416, "step": 55590 }, { "epoch": 0.3946390135444226, "grad_norm": 0.1064453125, "learning_rate": 0.0019867657503223305, "loss": 0.2439, "step": 55600 }, { "epoch": 0.39470999178426874, "grad_norm": 0.09326171875, "learning_rate": 0.0019867609140891427, "loss": 0.2494, "step": 55610 }, { "epoch": 0.39478097002411483, "grad_norm": 0.13671875, "learning_rate": 0.0019867560769790036, "loss": 0.2526, "step": 55620 }, { "epoch": 0.394851948263961, "grad_norm": 0.12109375, "learning_rate": 0.001986751238991919, "loss": 0.2506, "step": 55630 }, { "epoch": 0.3949229265038071, "grad_norm": 0.1298828125, "learning_rate": 0.001986746400127892, "loss": 0.2455, "step": 55640 }, { "epoch": 0.3949939047436532, "grad_norm": 0.08349609375, "learning_rate": 0.0019867415603869293, "loss": 0.2393, "step": 55650 }, { "epoch": 0.39506488298349934, "grad_norm": 0.07080078125, "learning_rate": 0.001986736719769034, "loss": 0.2414, "step": 55660 }, { "epoch": 0.39513586122334543, "grad_norm": 0.10693359375, "learning_rate": 0.0019867318782742123, "loss": 0.2618, "step": 55670 }, { "epoch": 0.3952068394631916, "grad_norm": 0.0830078125, "learning_rate": 0.0019867270359024683, "loss": 0.263, "step": 55680 }, { "epoch": 0.3952778177030377, "grad_norm": 0.06689453125, "learning_rate": 0.0019867221926538063, "loss": 0.2458, "step": 55690 }, { "epoch": 0.3953487959428838, "grad_norm": 0.1474609375, "learning_rate": 0.001986717348528232, "loss": 0.2509, "step": 55700 }, { "epoch": 0.39541977418272994, "grad_norm": 0.140625, "learning_rate": 0.0019867125035257496, "loss": 0.2521, "step": 55710 }, { "epoch": 0.39549075242257603, "grad_norm": 0.0703125, "learning_rate": 0.0019867076576463643, "loss": 0.2335, "step": 55720 }, { "epoch": 0.3955617306624222, "grad_norm": 0.1005859375, "learning_rate": 0.0019867028108900807, "loss": 0.267, "step": 55730 }, { "epoch": 0.39563270890226826, "grad_norm": 0.08837890625, "learning_rate": 0.0019866979632569034, "loss": 0.2306, "step": 55740 }, { "epoch": 0.3957036871421144, "grad_norm": 0.10986328125, "learning_rate": 0.0019866931147468378, "loss": 0.2503, "step": 55750 }, { "epoch": 0.39577466538196054, "grad_norm": 0.09716796875, "learning_rate": 0.001986688265359888, "loss": 0.2338, "step": 55760 }, { "epoch": 0.39584564362180663, "grad_norm": 0.169921875, "learning_rate": 0.001986683415096059, "loss": 0.2365, "step": 55770 }, { "epoch": 0.3959166218616528, "grad_norm": 0.10107421875, "learning_rate": 0.001986678563955356, "loss": 0.2309, "step": 55780 }, { "epoch": 0.39598760010149886, "grad_norm": 0.07568359375, "learning_rate": 0.001986673711937783, "loss": 0.2469, "step": 55790 }, { "epoch": 0.396058578341345, "grad_norm": 0.1875, "learning_rate": 0.001986668859043346, "loss": 0.2572, "step": 55800 }, { "epoch": 0.39612955658119114, "grad_norm": 0.11181640625, "learning_rate": 0.0019866640052720484, "loss": 0.2443, "step": 55810 }, { "epoch": 0.39620053482103723, "grad_norm": 0.09375, "learning_rate": 0.001986659150623896, "loss": 0.2444, "step": 55820 }, { "epoch": 0.3962715130608834, "grad_norm": 0.1044921875, "learning_rate": 0.0019866542950988933, "loss": 0.2272, "step": 55830 }, { "epoch": 0.39634249130072946, "grad_norm": 0.119140625, "learning_rate": 0.0019866494386970445, "loss": 0.2339, "step": 55840 }, { "epoch": 0.3964134695405756, "grad_norm": 0.1396484375, "learning_rate": 0.0019866445814183554, "loss": 0.2315, "step": 55850 }, { "epoch": 0.39648444778042174, "grad_norm": 0.126953125, "learning_rate": 0.0019866397232628304, "loss": 0.2554, "step": 55860 }, { "epoch": 0.39655542602026783, "grad_norm": 0.083984375, "learning_rate": 0.0019866348642304742, "loss": 0.2433, "step": 55870 }, { "epoch": 0.396626404260114, "grad_norm": 0.1904296875, "learning_rate": 0.001986630004321292, "loss": 0.2332, "step": 55880 }, { "epoch": 0.39669738249996006, "grad_norm": 0.09716796875, "learning_rate": 0.001986625143535288, "loss": 0.2395, "step": 55890 }, { "epoch": 0.3967683607398062, "grad_norm": 0.08984375, "learning_rate": 0.0019866202818724673, "loss": 0.2433, "step": 55900 }, { "epoch": 0.3968393389796523, "grad_norm": 0.1044921875, "learning_rate": 0.0019866154193328347, "loss": 0.2488, "step": 55910 }, { "epoch": 0.39691031721949843, "grad_norm": 0.125, "learning_rate": 0.001986610555916395, "loss": 0.2507, "step": 55920 }, { "epoch": 0.3969812954593446, "grad_norm": 0.07080078125, "learning_rate": 0.001986605691623153, "loss": 0.2328, "step": 55930 }, { "epoch": 0.39705227369919066, "grad_norm": 0.10009765625, "learning_rate": 0.0019866008264531137, "loss": 0.2491, "step": 55940 }, { "epoch": 0.3971232519390368, "grad_norm": 0.09326171875, "learning_rate": 0.0019865959604062817, "loss": 0.2463, "step": 55950 }, { "epoch": 0.3971942301788829, "grad_norm": 0.1181640625, "learning_rate": 0.001986591093482662, "loss": 0.2609, "step": 55960 }, { "epoch": 0.39726520841872903, "grad_norm": 0.142578125, "learning_rate": 0.0019865862256822587, "loss": 0.2352, "step": 55970 }, { "epoch": 0.3973361866585752, "grad_norm": 0.0712890625, "learning_rate": 0.001986581357005078, "loss": 0.2317, "step": 55980 }, { "epoch": 0.39740716489842126, "grad_norm": 0.138671875, "learning_rate": 0.0019865764874511236, "loss": 0.2272, "step": 55990 }, { "epoch": 0.3974781431382674, "grad_norm": 0.123046875, "learning_rate": 0.0019865716170204004, "loss": 0.252, "step": 56000 }, { "epoch": 0.3974781431382674, "eval_covost2-zh-en_loss": 3.8387880325317383, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 21.819, "eval_covost2-zh-en_samples_per_second": 2.933, "eval_covost2-zh-en_steps_per_second": 0.183, "step": 56000 }, { "epoch": 0.3974781431382674, "eval_covost2-en-zh_loss": 3.1304805278778076, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 22.4405, "eval_covost2-en-zh_samples_per_second": 2.852, "eval_covost2-en-zh_steps_per_second": 0.178, "step": 56000 }, { "epoch": 0.3975491213781135, "grad_norm": 0.1455078125, "learning_rate": 0.0019865667457129137, "loss": 0.2557, "step": 56010 }, { "epoch": 0.39762009961795963, "grad_norm": 0.1650390625, "learning_rate": 0.0019865618735286678, "loss": 0.2586, "step": 56020 }, { "epoch": 0.3976910778578057, "grad_norm": 0.12060546875, "learning_rate": 0.001986557000467668, "loss": 0.241, "step": 56030 }, { "epoch": 0.39776205609765186, "grad_norm": 0.18359375, "learning_rate": 0.001986552126529919, "loss": 0.239, "step": 56040 }, { "epoch": 0.397833034337498, "grad_norm": 0.10498046875, "learning_rate": 0.001986547251715425, "loss": 0.2459, "step": 56050 }, { "epoch": 0.3979040125773441, "grad_norm": 0.11474609375, "learning_rate": 0.001986542376024192, "loss": 0.2437, "step": 56060 }, { "epoch": 0.39797499081719023, "grad_norm": 0.1064453125, "learning_rate": 0.0019865374994562234, "loss": 0.2629, "step": 56070 }, { "epoch": 0.3980459690570363, "grad_norm": 0.1552734375, "learning_rate": 0.0019865326220115252, "loss": 0.2475, "step": 56080 }, { "epoch": 0.39811694729688246, "grad_norm": 0.09619140625, "learning_rate": 0.0019865277436901018, "loss": 0.2552, "step": 56090 }, { "epoch": 0.3981879255367286, "grad_norm": 0.169921875, "learning_rate": 0.001986522864491958, "loss": 0.2571, "step": 56100 }, { "epoch": 0.3982589037765747, "grad_norm": 0.23828125, "learning_rate": 0.0019865179844170983, "loss": 0.26, "step": 56110 }, { "epoch": 0.39832988201642083, "grad_norm": 0.09619140625, "learning_rate": 0.0019865131034655283, "loss": 0.2485, "step": 56120 }, { "epoch": 0.3984008602562669, "grad_norm": 0.08642578125, "learning_rate": 0.0019865082216372525, "loss": 0.2353, "step": 56130 }, { "epoch": 0.39847183849611306, "grad_norm": 0.0791015625, "learning_rate": 0.001986503338932275, "loss": 0.2274, "step": 56140 }, { "epoch": 0.39854281673595915, "grad_norm": 0.1259765625, "learning_rate": 0.001986498455350602, "loss": 0.2413, "step": 56150 }, { "epoch": 0.3986137949758053, "grad_norm": 0.140625, "learning_rate": 0.0019864935708922373, "loss": 0.2522, "step": 56160 }, { "epoch": 0.39868477321565143, "grad_norm": 0.08935546875, "learning_rate": 0.001986488685557186, "loss": 0.2425, "step": 56170 }, { "epoch": 0.3987557514554975, "grad_norm": 0.08544921875, "learning_rate": 0.001986483799345453, "loss": 0.2571, "step": 56180 }, { "epoch": 0.39882672969534366, "grad_norm": 0.1279296875, "learning_rate": 0.0019864789122570428, "loss": 0.2411, "step": 56190 }, { "epoch": 0.39889770793518975, "grad_norm": 0.1103515625, "learning_rate": 0.001986474024291961, "loss": 0.2486, "step": 56200 }, { "epoch": 0.3989686861750359, "grad_norm": 0.1083984375, "learning_rate": 0.0019864691354502116, "loss": 0.2369, "step": 56210 }, { "epoch": 0.39903966441488203, "grad_norm": 0.1259765625, "learning_rate": 0.0019864642457317998, "loss": 0.2756, "step": 56220 }, { "epoch": 0.3991106426547281, "grad_norm": 0.09619140625, "learning_rate": 0.0019864593551367307, "loss": 0.2572, "step": 56230 }, { "epoch": 0.39918162089457426, "grad_norm": 0.12158203125, "learning_rate": 0.0019864544636650086, "loss": 0.2604, "step": 56240 }, { "epoch": 0.39925259913442035, "grad_norm": 0.12890625, "learning_rate": 0.001986449571316639, "loss": 0.2317, "step": 56250 }, { "epoch": 0.3993235773742665, "grad_norm": 0.1259765625, "learning_rate": 0.001986444678091626, "loss": 0.2435, "step": 56260 }, { "epoch": 0.3993945556141126, "grad_norm": 0.12109375, "learning_rate": 0.0019864397839899745, "loss": 0.2338, "step": 56270 }, { "epoch": 0.3994655338539587, "grad_norm": 0.10205078125, "learning_rate": 0.00198643488901169, "loss": 0.2367, "step": 56280 }, { "epoch": 0.39953651209380486, "grad_norm": 0.1064453125, "learning_rate": 0.0019864299931567765, "loss": 0.2454, "step": 56290 }, { "epoch": 0.39960749033365095, "grad_norm": 0.09130859375, "learning_rate": 0.0019864250964252398, "loss": 0.2397, "step": 56300 }, { "epoch": 0.3996784685734971, "grad_norm": 0.0830078125, "learning_rate": 0.001986420198817084, "loss": 0.2304, "step": 56310 }, { "epoch": 0.3997494468133432, "grad_norm": 0.1318359375, "learning_rate": 0.001986415300332314, "loss": 0.2385, "step": 56320 }, { "epoch": 0.3998204250531893, "grad_norm": 0.099609375, "learning_rate": 0.0019864104009709354, "loss": 0.2462, "step": 56330 }, { "epoch": 0.39989140329303546, "grad_norm": 0.07275390625, "learning_rate": 0.001986405500732952, "loss": 0.2522, "step": 56340 }, { "epoch": 0.39996238153288155, "grad_norm": 0.1015625, "learning_rate": 0.001986400599618369, "loss": 0.2402, "step": 56350 }, { "epoch": 0.4000333597727277, "grad_norm": 0.1103515625, "learning_rate": 0.001986395697627192, "loss": 0.2478, "step": 56360 }, { "epoch": 0.4001043380125738, "grad_norm": 0.0927734375, "learning_rate": 0.0019863907947594247, "loss": 0.2491, "step": 56370 }, { "epoch": 0.4001753162524199, "grad_norm": 0.10107421875, "learning_rate": 0.0019863858910150723, "loss": 0.2319, "step": 56380 }, { "epoch": 0.400246294492266, "grad_norm": 0.130859375, "learning_rate": 0.00198638098639414, "loss": 0.2604, "step": 56390 }, { "epoch": 0.40031727273211215, "grad_norm": 0.09912109375, "learning_rate": 0.0019863760808966325, "loss": 0.259, "step": 56400 }, { "epoch": 0.4003882509719583, "grad_norm": 0.12353515625, "learning_rate": 0.0019863711745225546, "loss": 0.2357, "step": 56410 }, { "epoch": 0.4004592292118044, "grad_norm": 0.0869140625, "learning_rate": 0.0019863662672719115, "loss": 0.2436, "step": 56420 }, { "epoch": 0.4005302074516505, "grad_norm": 0.1015625, "learning_rate": 0.0019863613591447073, "loss": 0.2392, "step": 56430 }, { "epoch": 0.4006011856914966, "grad_norm": 0.140625, "learning_rate": 0.0019863564501409474, "loss": 0.2435, "step": 56440 }, { "epoch": 0.40067216393134275, "grad_norm": 0.255859375, "learning_rate": 0.0019863515402606365, "loss": 0.2556, "step": 56450 }, { "epoch": 0.4007431421711889, "grad_norm": 0.0927734375, "learning_rate": 0.0019863466295037793, "loss": 0.2645, "step": 56460 }, { "epoch": 0.400814120411035, "grad_norm": 0.08154296875, "learning_rate": 0.001986341717870381, "loss": 0.2344, "step": 56470 }, { "epoch": 0.4008850986508811, "grad_norm": 0.0791015625, "learning_rate": 0.0019863368053604463, "loss": 0.2451, "step": 56480 }, { "epoch": 0.4009560768907272, "grad_norm": 0.11376953125, "learning_rate": 0.0019863318919739804, "loss": 0.2424, "step": 56490 }, { "epoch": 0.40102705513057335, "grad_norm": 0.1083984375, "learning_rate": 0.0019863269777109873, "loss": 0.2403, "step": 56500 }, { "epoch": 0.40109803337041944, "grad_norm": 0.1279296875, "learning_rate": 0.0019863220625714727, "loss": 0.2322, "step": 56510 }, { "epoch": 0.4011690116102656, "grad_norm": 0.138671875, "learning_rate": 0.001986317146555441, "loss": 0.2594, "step": 56520 }, { "epoch": 0.4012399898501117, "grad_norm": 0.1171875, "learning_rate": 0.001986312229662897, "loss": 0.2396, "step": 56530 }, { "epoch": 0.4013109680899578, "grad_norm": 0.1337890625, "learning_rate": 0.001986307311893846, "loss": 0.2518, "step": 56540 }, { "epoch": 0.40138194632980395, "grad_norm": 0.08056640625, "learning_rate": 0.0019863023932482926, "loss": 0.2426, "step": 56550 }, { "epoch": 0.40145292456965004, "grad_norm": 0.083984375, "learning_rate": 0.001986297473726241, "loss": 0.2503, "step": 56560 }, { "epoch": 0.4015239028094962, "grad_norm": 0.08349609375, "learning_rate": 0.001986292553327698, "loss": 0.2402, "step": 56570 }, { "epoch": 0.4015948810493423, "grad_norm": 0.1005859375, "learning_rate": 0.0019862876320526665, "loss": 0.2502, "step": 56580 }, { "epoch": 0.4016658592891884, "grad_norm": 0.126953125, "learning_rate": 0.001986282709901152, "loss": 0.2459, "step": 56590 }, { "epoch": 0.40173683752903455, "grad_norm": 0.1171875, "learning_rate": 0.0019862777868731593, "loss": 0.2512, "step": 56600 }, { "epoch": 0.40180781576888064, "grad_norm": 0.1455078125, "learning_rate": 0.0019862728629686938, "loss": 0.2457, "step": 56610 }, { "epoch": 0.4018787940087268, "grad_norm": 0.09033203125, "learning_rate": 0.00198626793818776, "loss": 0.2629, "step": 56620 }, { "epoch": 0.40194977224857287, "grad_norm": 0.134765625, "learning_rate": 0.001986263012530363, "loss": 0.2502, "step": 56630 }, { "epoch": 0.402020750488419, "grad_norm": 0.1416015625, "learning_rate": 0.0019862580859965067, "loss": 0.263, "step": 56640 }, { "epoch": 0.40209172872826515, "grad_norm": 0.15234375, "learning_rate": 0.001986253158586197, "loss": 0.2382, "step": 56650 }, { "epoch": 0.40216270696811124, "grad_norm": 0.0966796875, "learning_rate": 0.0019862482302994387, "loss": 0.2584, "step": 56660 }, { "epoch": 0.4022336852079574, "grad_norm": 0.080078125, "learning_rate": 0.001986243301136236, "loss": 0.252, "step": 56670 }, { "epoch": 0.40230466344780347, "grad_norm": 0.11376953125, "learning_rate": 0.001986238371096595, "loss": 0.2512, "step": 56680 }, { "epoch": 0.4023756416876496, "grad_norm": 0.142578125, "learning_rate": 0.001986233440180519, "loss": 0.2383, "step": 56690 }, { "epoch": 0.40244661992749575, "grad_norm": 0.11669921875, "learning_rate": 0.001986228508388014, "loss": 0.2424, "step": 56700 }, { "epoch": 0.40251759816734184, "grad_norm": 0.1357421875, "learning_rate": 0.0019862235757190845, "loss": 0.2474, "step": 56710 }, { "epoch": 0.402588576407188, "grad_norm": 0.06396484375, "learning_rate": 0.0019862186421737353, "loss": 0.23, "step": 56720 }, { "epoch": 0.40265955464703407, "grad_norm": 0.10546875, "learning_rate": 0.001986213707751972, "loss": 0.2482, "step": 56730 }, { "epoch": 0.4027305328868802, "grad_norm": 0.169921875, "learning_rate": 0.001986208772453798, "loss": 0.2271, "step": 56740 }, { "epoch": 0.4028015111267263, "grad_norm": 0.1103515625, "learning_rate": 0.0019862038362792197, "loss": 0.2463, "step": 56750 }, { "epoch": 0.40287248936657244, "grad_norm": 0.12158203125, "learning_rate": 0.001986198899228241, "loss": 0.2433, "step": 56760 }, { "epoch": 0.4029434676064186, "grad_norm": 0.2470703125, "learning_rate": 0.0019861939613008673, "loss": 0.2473, "step": 56770 }, { "epoch": 0.40301444584626467, "grad_norm": 0.10595703125, "learning_rate": 0.0019861890224971033, "loss": 0.2366, "step": 56780 }, { "epoch": 0.4030854240861108, "grad_norm": 0.1494140625, "learning_rate": 0.001986184082816954, "loss": 0.2339, "step": 56790 }, { "epoch": 0.4031564023259569, "grad_norm": 0.134765625, "learning_rate": 0.001986179142260424, "loss": 0.2496, "step": 56800 }, { "epoch": 0.40322738056580304, "grad_norm": 0.08837890625, "learning_rate": 0.0019861742008275185, "loss": 0.2343, "step": 56810 }, { "epoch": 0.4032983588056492, "grad_norm": 0.09716796875, "learning_rate": 0.0019861692585182423, "loss": 0.2462, "step": 56820 }, { "epoch": 0.40336933704549527, "grad_norm": 0.12890625, "learning_rate": 0.0019861643153326, "loss": 0.2506, "step": 56830 }, { "epoch": 0.4034403152853414, "grad_norm": 0.09326171875, "learning_rate": 0.001986159371270597, "loss": 0.2413, "step": 56840 }, { "epoch": 0.4035112935251875, "grad_norm": 0.078125, "learning_rate": 0.0019861544263322377, "loss": 0.245, "step": 56850 }, { "epoch": 0.40358227176503364, "grad_norm": 0.1171875, "learning_rate": 0.0019861494805175272, "loss": 0.2502, "step": 56860 }, { "epoch": 0.4036532500048797, "grad_norm": 0.09814453125, "learning_rate": 0.0019861445338264705, "loss": 0.2534, "step": 56870 }, { "epoch": 0.40372422824472587, "grad_norm": 0.10400390625, "learning_rate": 0.0019861395862590726, "loss": 0.2396, "step": 56880 }, { "epoch": 0.403795206484572, "grad_norm": 0.0966796875, "learning_rate": 0.001986134637815338, "loss": 0.2512, "step": 56890 }, { "epoch": 0.4038661847244181, "grad_norm": 0.1474609375, "learning_rate": 0.001986129688495272, "loss": 0.2407, "step": 56900 }, { "epoch": 0.40393716296426424, "grad_norm": 0.0966796875, "learning_rate": 0.0019861247382988788, "loss": 0.2694, "step": 56910 }, { "epoch": 0.4040081412041103, "grad_norm": 0.18359375, "learning_rate": 0.001986119787226164, "loss": 0.249, "step": 56920 }, { "epoch": 0.40407911944395647, "grad_norm": 0.1171875, "learning_rate": 0.0019861148352771324, "loss": 0.2604, "step": 56930 }, { "epoch": 0.4041500976838026, "grad_norm": 0.11328125, "learning_rate": 0.0019861098824517885, "loss": 0.25, "step": 56940 }, { "epoch": 0.4042210759236487, "grad_norm": 0.09912109375, "learning_rate": 0.001986104928750138, "loss": 0.2556, "step": 56950 }, { "epoch": 0.40429205416349484, "grad_norm": 0.10205078125, "learning_rate": 0.0019860999741721846, "loss": 0.2302, "step": 56960 }, { "epoch": 0.4043630324033409, "grad_norm": 0.2041015625, "learning_rate": 0.001986095018717934, "loss": 0.2491, "step": 56970 }, { "epoch": 0.40443401064318707, "grad_norm": 0.087890625, "learning_rate": 0.001986090062387391, "loss": 0.2449, "step": 56980 }, { "epoch": 0.40450498888303316, "grad_norm": 0.08251953125, "learning_rate": 0.0019860851051805607, "loss": 0.248, "step": 56990 }, { "epoch": 0.4045759671228793, "grad_norm": 0.1376953125, "learning_rate": 0.001986080147097448, "loss": 0.2353, "step": 57000 }, { "epoch": 0.40464694536272544, "grad_norm": 0.09033203125, "learning_rate": 0.001986075188138057, "loss": 0.2576, "step": 57010 }, { "epoch": 0.4047179236025715, "grad_norm": 0.1015625, "learning_rate": 0.0019860702283023934, "loss": 0.2482, "step": 57020 }, { "epoch": 0.40478890184241767, "grad_norm": 0.08935546875, "learning_rate": 0.001986065267590462, "loss": 0.2494, "step": 57030 }, { "epoch": 0.40485988008226376, "grad_norm": 0.361328125, "learning_rate": 0.0019860603060022674, "loss": 0.2312, "step": 57040 }, { "epoch": 0.4049308583221099, "grad_norm": 0.16015625, "learning_rate": 0.0019860553435378145, "loss": 0.2287, "step": 57050 }, { "epoch": 0.40500183656195604, "grad_norm": 0.07177734375, "learning_rate": 0.0019860503801971087, "loss": 0.2461, "step": 57060 }, { "epoch": 0.40507281480180213, "grad_norm": 0.1435546875, "learning_rate": 0.0019860454159801545, "loss": 0.255, "step": 57070 }, { "epoch": 0.40514379304164827, "grad_norm": 0.119140625, "learning_rate": 0.0019860404508869572, "loss": 0.2666, "step": 57080 }, { "epoch": 0.40521477128149436, "grad_norm": 0.10400390625, "learning_rate": 0.001986035484917521, "loss": 0.2379, "step": 57090 }, { "epoch": 0.4052857495213405, "grad_norm": 0.07421875, "learning_rate": 0.0019860305180718512, "loss": 0.2544, "step": 57100 }, { "epoch": 0.40535672776118664, "grad_norm": 0.1357421875, "learning_rate": 0.0019860255503499533, "loss": 0.238, "step": 57110 }, { "epoch": 0.40542770600103273, "grad_norm": 0.1171875, "learning_rate": 0.001986020581751831, "loss": 0.233, "step": 57120 }, { "epoch": 0.40549868424087887, "grad_norm": 0.10595703125, "learning_rate": 0.0019860156122774903, "loss": 0.2402, "step": 57130 }, { "epoch": 0.40556966248072496, "grad_norm": 0.1513671875, "learning_rate": 0.0019860106419269355, "loss": 0.2375, "step": 57140 }, { "epoch": 0.4056406407205711, "grad_norm": 0.10107421875, "learning_rate": 0.001986005670700172, "loss": 0.243, "step": 57150 }, { "epoch": 0.4057116189604172, "grad_norm": 0.1064453125, "learning_rate": 0.0019860006985972043, "loss": 0.2258, "step": 57160 }, { "epoch": 0.40578259720026333, "grad_norm": 0.0908203125, "learning_rate": 0.001985995725618037, "loss": 0.2391, "step": 57170 }, { "epoch": 0.40585357544010947, "grad_norm": 0.1162109375, "learning_rate": 0.001985990751762676, "loss": 0.2402, "step": 57180 }, { "epoch": 0.40592455367995556, "grad_norm": 0.07177734375, "learning_rate": 0.0019859857770311256, "loss": 0.2488, "step": 57190 }, { "epoch": 0.4059955319198017, "grad_norm": 0.10205078125, "learning_rate": 0.001985980801423391, "loss": 0.2474, "step": 57200 }, { "epoch": 0.4060665101596478, "grad_norm": 0.08544921875, "learning_rate": 0.001985975824939476, "loss": 0.2534, "step": 57210 }, { "epoch": 0.40613748839949393, "grad_norm": 0.1396484375, "learning_rate": 0.001985970847579387, "loss": 0.2434, "step": 57220 }, { "epoch": 0.40620846663934007, "grad_norm": 0.0625, "learning_rate": 0.0019859658693431284, "loss": 0.2733, "step": 57230 }, { "epoch": 0.40627944487918616, "grad_norm": 0.09326171875, "learning_rate": 0.0019859608902307053, "loss": 0.2421, "step": 57240 }, { "epoch": 0.4063504231190323, "grad_norm": 0.1455078125, "learning_rate": 0.001985955910242122, "loss": 0.2517, "step": 57250 }, { "epoch": 0.4064214013588784, "grad_norm": 0.10009765625, "learning_rate": 0.0019859509293773843, "loss": 0.2445, "step": 57260 }, { "epoch": 0.40649237959872453, "grad_norm": 0.076171875, "learning_rate": 0.0019859459476364964, "loss": 0.2187, "step": 57270 }, { "epoch": 0.4065633578385706, "grad_norm": 0.111328125, "learning_rate": 0.0019859409650194635, "loss": 0.2308, "step": 57280 }, { "epoch": 0.40663433607841676, "grad_norm": 0.0888671875, "learning_rate": 0.0019859359815262902, "loss": 0.2558, "step": 57290 }, { "epoch": 0.4067053143182629, "grad_norm": 0.12890625, "learning_rate": 0.001985930997156982, "loss": 0.238, "step": 57300 }, { "epoch": 0.406776292558109, "grad_norm": 0.10888671875, "learning_rate": 0.0019859260119115436, "loss": 0.233, "step": 57310 }, { "epoch": 0.40684727079795513, "grad_norm": 0.0966796875, "learning_rate": 0.00198592102578998, "loss": 0.2453, "step": 57320 }, { "epoch": 0.4069182490378012, "grad_norm": 0.09326171875, "learning_rate": 0.0019859160387922957, "loss": 0.2502, "step": 57330 }, { "epoch": 0.40698922727764736, "grad_norm": 0.09423828125, "learning_rate": 0.0019859110509184963, "loss": 0.2422, "step": 57340 }, { "epoch": 0.4070602055174935, "grad_norm": 0.095703125, "learning_rate": 0.001985906062168586, "loss": 0.2417, "step": 57350 }, { "epoch": 0.4071311837573396, "grad_norm": 0.0791015625, "learning_rate": 0.001985901072542571, "loss": 0.2418, "step": 57360 }, { "epoch": 0.40720216199718573, "grad_norm": 0.1220703125, "learning_rate": 0.0019858960820404543, "loss": 0.239, "step": 57370 }, { "epoch": 0.4072731402370318, "grad_norm": 0.150390625, "learning_rate": 0.0019858910906622425, "loss": 0.2546, "step": 57380 }, { "epoch": 0.40734411847687796, "grad_norm": 0.12353515625, "learning_rate": 0.00198588609840794, "loss": 0.2519, "step": 57390 }, { "epoch": 0.40741509671672405, "grad_norm": 0.115234375, "learning_rate": 0.001985881105277551, "loss": 0.2367, "step": 57400 }, { "epoch": 0.4074860749565702, "grad_norm": 0.1083984375, "learning_rate": 0.001985876111271082, "loss": 0.233, "step": 57410 }, { "epoch": 0.40755705319641633, "grad_norm": 0.11474609375, "learning_rate": 0.001985871116388536, "loss": 0.2613, "step": 57420 }, { "epoch": 0.4076280314362624, "grad_norm": 0.09033203125, "learning_rate": 0.00198586612062992, "loss": 0.2344, "step": 57430 }, { "epoch": 0.40769900967610856, "grad_norm": 0.09814453125, "learning_rate": 0.0019858611239952375, "loss": 0.2345, "step": 57440 }, { "epoch": 0.40776998791595465, "grad_norm": 0.154296875, "learning_rate": 0.0019858561264844937, "loss": 0.254, "step": 57450 }, { "epoch": 0.4078409661558008, "grad_norm": 0.1484375, "learning_rate": 0.001985851128097694, "loss": 0.2615, "step": 57460 }, { "epoch": 0.40791194439564693, "grad_norm": 0.107421875, "learning_rate": 0.001985846128834843, "loss": 0.2258, "step": 57470 }, { "epoch": 0.407982922635493, "grad_norm": 0.0947265625, "learning_rate": 0.0019858411286959453, "loss": 0.2269, "step": 57480 }, { "epoch": 0.40805390087533916, "grad_norm": 0.150390625, "learning_rate": 0.0019858361276810068, "loss": 0.254, "step": 57490 }, { "epoch": 0.40812487911518525, "grad_norm": 0.09130859375, "learning_rate": 0.001985831125790032, "loss": 0.2574, "step": 57500 }, { "epoch": 0.4081958573550314, "grad_norm": 0.05908203125, "learning_rate": 0.0019858261230230254, "loss": 0.2189, "step": 57510 }, { "epoch": 0.4082668355948775, "grad_norm": 0.2099609375, "learning_rate": 0.001985821119379992, "loss": 0.2185, "step": 57520 }, { "epoch": 0.4083378138347236, "grad_norm": 0.09228515625, "learning_rate": 0.0019858161148609377, "loss": 0.2312, "step": 57530 }, { "epoch": 0.40840879207456976, "grad_norm": 0.0810546875, "learning_rate": 0.001985811109465866, "loss": 0.2407, "step": 57540 }, { "epoch": 0.40847977031441585, "grad_norm": 0.0830078125, "learning_rate": 0.0019858061031947836, "loss": 0.2403, "step": 57550 }, { "epoch": 0.408550748554262, "grad_norm": 0.1328125, "learning_rate": 0.0019858010960476934, "loss": 0.2384, "step": 57560 }, { "epoch": 0.4086217267941081, "grad_norm": 0.099609375, "learning_rate": 0.001985796088024602, "loss": 0.2447, "step": 57570 }, { "epoch": 0.4086927050339542, "grad_norm": 0.1171875, "learning_rate": 0.0019857910791255136, "loss": 0.2187, "step": 57580 }, { "epoch": 0.40876368327380036, "grad_norm": 0.12353515625, "learning_rate": 0.001985786069350434, "loss": 0.2395, "step": 57590 }, { "epoch": 0.40883466151364645, "grad_norm": 0.1005859375, "learning_rate": 0.001985781058699367, "loss": 0.2357, "step": 57600 }, { "epoch": 0.4089056397534926, "grad_norm": 0.119140625, "learning_rate": 0.0019857760471723176, "loss": 0.2648, "step": 57610 }, { "epoch": 0.4089766179933387, "grad_norm": 0.189453125, "learning_rate": 0.001985771034769292, "loss": 0.2352, "step": 57620 }, { "epoch": 0.4090475962331848, "grad_norm": 0.12451171875, "learning_rate": 0.001985766021490294, "loss": 0.2482, "step": 57630 }, { "epoch": 0.4091185744730309, "grad_norm": 0.15234375, "learning_rate": 0.001985761007335329, "loss": 0.2649, "step": 57640 }, { "epoch": 0.40918955271287705, "grad_norm": 0.07666015625, "learning_rate": 0.0019857559923044015, "loss": 0.2346, "step": 57650 }, { "epoch": 0.4092605309527232, "grad_norm": 0.09716796875, "learning_rate": 0.001985750976397517, "loss": 0.2458, "step": 57660 }, { "epoch": 0.4093315091925693, "grad_norm": 0.1728515625, "learning_rate": 0.001985745959614681, "loss": 0.2448, "step": 57670 }, { "epoch": 0.4094024874324154, "grad_norm": 0.1220703125, "learning_rate": 0.0019857409419558967, "loss": 0.2309, "step": 57680 }, { "epoch": 0.4094734656722615, "grad_norm": 0.0869140625, "learning_rate": 0.001985735923421171, "loss": 0.2348, "step": 57690 }, { "epoch": 0.40954444391210765, "grad_norm": 0.10595703125, "learning_rate": 0.0019857309040105073, "loss": 0.2487, "step": 57700 }, { "epoch": 0.4096154221519538, "grad_norm": 0.1435546875, "learning_rate": 0.0019857258837239116, "loss": 0.2378, "step": 57710 }, { "epoch": 0.4096864003917999, "grad_norm": 0.130859375, "learning_rate": 0.0019857208625613885, "loss": 0.2455, "step": 57720 }, { "epoch": 0.409757378631646, "grad_norm": 0.1474609375, "learning_rate": 0.001985715840522943, "loss": 0.2409, "step": 57730 }, { "epoch": 0.4098283568714921, "grad_norm": 0.1376953125, "learning_rate": 0.0019857108176085797, "loss": 0.2559, "step": 57740 }, { "epoch": 0.40989933511133825, "grad_norm": 0.146484375, "learning_rate": 0.0019857057938183044, "loss": 0.2426, "step": 57750 }, { "epoch": 0.40997031335118433, "grad_norm": 0.095703125, "learning_rate": 0.0019857007691521213, "loss": 0.2454, "step": 57760 }, { "epoch": 0.4100412915910305, "grad_norm": 0.138671875, "learning_rate": 0.001985695743610036, "loss": 0.2397, "step": 57770 }, { "epoch": 0.4101122698308766, "grad_norm": 0.09130859375, "learning_rate": 0.0019856907171920523, "loss": 0.2316, "step": 57780 }, { "epoch": 0.4101832480707227, "grad_norm": 0.1025390625, "learning_rate": 0.001985685689898177, "loss": 0.2511, "step": 57790 }, { "epoch": 0.41025422631056885, "grad_norm": 0.146484375, "learning_rate": 0.0019856806617284135, "loss": 0.2373, "step": 57800 }, { "epoch": 0.41032520455041493, "grad_norm": 0.1982421875, "learning_rate": 0.0019856756326827674, "loss": 0.2422, "step": 57810 }, { "epoch": 0.4103961827902611, "grad_norm": 0.126953125, "learning_rate": 0.0019856706027612434, "loss": 0.2544, "step": 57820 }, { "epoch": 0.4104671610301072, "grad_norm": 0.0732421875, "learning_rate": 0.001985665571963847, "loss": 0.245, "step": 57830 }, { "epoch": 0.4105381392699533, "grad_norm": 0.1318359375, "learning_rate": 0.001985660540290583, "loss": 0.2411, "step": 57840 }, { "epoch": 0.41060911750979945, "grad_norm": 0.06787109375, "learning_rate": 0.001985655507741456, "loss": 0.2473, "step": 57850 }, { "epoch": 0.41068009574964554, "grad_norm": 0.0986328125, "learning_rate": 0.001985650474316471, "loss": 0.2334, "step": 57860 }, { "epoch": 0.4107510739894917, "grad_norm": 0.08642578125, "learning_rate": 0.0019856454400156336, "loss": 0.2243, "step": 57870 }, { "epoch": 0.41082205222933776, "grad_norm": 0.10595703125, "learning_rate": 0.001985640404838948, "loss": 0.2456, "step": 57880 }, { "epoch": 0.4108930304691839, "grad_norm": 0.1279296875, "learning_rate": 0.0019856353687864197, "loss": 0.2439, "step": 57890 }, { "epoch": 0.41096400870903005, "grad_norm": 0.0947265625, "learning_rate": 0.0019856303318580536, "loss": 0.2313, "step": 57900 }, { "epoch": 0.41103498694887614, "grad_norm": 0.08740234375, "learning_rate": 0.0019856252940538546, "loss": 0.2551, "step": 57910 }, { "epoch": 0.4111059651887223, "grad_norm": 0.11474609375, "learning_rate": 0.0019856202553738276, "loss": 0.2463, "step": 57920 }, { "epoch": 0.41117694342856836, "grad_norm": 0.15625, "learning_rate": 0.0019856152158179777, "loss": 0.2458, "step": 57930 }, { "epoch": 0.4112479216684145, "grad_norm": 0.1083984375, "learning_rate": 0.00198561017538631, "loss": 0.2386, "step": 57940 }, { "epoch": 0.41131889990826065, "grad_norm": 0.1123046875, "learning_rate": 0.001985605134078829, "loss": 0.2474, "step": 57950 }, { "epoch": 0.41138987814810674, "grad_norm": 0.10791015625, "learning_rate": 0.0019856000918955403, "loss": 0.2603, "step": 57960 }, { "epoch": 0.4114608563879529, "grad_norm": 0.1640625, "learning_rate": 0.0019855950488364485, "loss": 0.2364, "step": 57970 }, { "epoch": 0.41153183462779896, "grad_norm": 0.10888671875, "learning_rate": 0.001985590004901559, "loss": 0.2561, "step": 57980 }, { "epoch": 0.4116028128676451, "grad_norm": 0.1416015625, "learning_rate": 0.001985584960090876, "loss": 0.2527, "step": 57990 }, { "epoch": 0.4116737911074912, "grad_norm": 0.095703125, "learning_rate": 0.0019855799144044054, "loss": 0.2453, "step": 58000 }, { "epoch": 0.4116737911074912, "eval_covost2-zh-en_loss": 3.8681602478027344, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 20.1811, "eval_covost2-zh-en_samples_per_second": 3.171, "eval_covost2-zh-en_steps_per_second": 0.198, "step": 58000 }, { "epoch": 0.4116737911074912, "eval_covost2-en-zh_loss": 3.1320862770080566, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 19.8021, "eval_covost2-en-zh_samples_per_second": 3.232, "eval_covost2-en-zh_steps_per_second": 0.202, "step": 58000 }, { "epoch": 0.41174476934733734, "grad_norm": 0.140625, "learning_rate": 0.001985574867842152, "loss": 0.257, "step": 58010 }, { "epoch": 0.4118157475871835, "grad_norm": 0.16015625, "learning_rate": 0.0019855698204041196, "loss": 0.2477, "step": 58020 }, { "epoch": 0.41188672582702957, "grad_norm": 0.08984375, "learning_rate": 0.001985564772090315, "loss": 0.2309, "step": 58030 }, { "epoch": 0.4119577040668757, "grad_norm": 0.12158203125, "learning_rate": 0.001985559722900742, "loss": 0.2372, "step": 58040 }, { "epoch": 0.4120286823067218, "grad_norm": 0.12353515625, "learning_rate": 0.001985554672835406, "loss": 0.2421, "step": 58050 }, { "epoch": 0.41209966054656794, "grad_norm": 0.09228515625, "learning_rate": 0.0019855496218943117, "loss": 0.2418, "step": 58060 }, { "epoch": 0.4121706387864141, "grad_norm": 0.0927734375, "learning_rate": 0.0019855445700774645, "loss": 0.2318, "step": 58070 }, { "epoch": 0.41224161702626017, "grad_norm": 0.08740234375, "learning_rate": 0.0019855395173848695, "loss": 0.2507, "step": 58080 }, { "epoch": 0.4123125952661063, "grad_norm": 0.09619140625, "learning_rate": 0.001985534463816531, "loss": 0.2373, "step": 58090 }, { "epoch": 0.4123835735059524, "grad_norm": 0.10986328125, "learning_rate": 0.0019855294093724545, "loss": 0.2498, "step": 58100 }, { "epoch": 0.41245455174579854, "grad_norm": 0.056884765625, "learning_rate": 0.001985524354052645, "loss": 0.2261, "step": 58110 }, { "epoch": 0.4125255299856446, "grad_norm": 0.0869140625, "learning_rate": 0.0019855192978571077, "loss": 0.2637, "step": 58120 }, { "epoch": 0.41259650822549077, "grad_norm": 0.1376953125, "learning_rate": 0.001985514240785847, "loss": 0.2346, "step": 58130 }, { "epoch": 0.4126674864653369, "grad_norm": 0.3203125, "learning_rate": 0.001985509182838868, "loss": 0.2426, "step": 58140 }, { "epoch": 0.412738464705183, "grad_norm": 0.06640625, "learning_rate": 0.0019855041240161766, "loss": 0.237, "step": 58150 }, { "epoch": 0.41280944294502914, "grad_norm": 0.08544921875, "learning_rate": 0.0019854990643177763, "loss": 0.2406, "step": 58160 }, { "epoch": 0.4128804211848752, "grad_norm": 0.10009765625, "learning_rate": 0.0019854940037436733, "loss": 0.2326, "step": 58170 }, { "epoch": 0.41295139942472137, "grad_norm": 0.1015625, "learning_rate": 0.0019854889422938724, "loss": 0.2426, "step": 58180 }, { "epoch": 0.4130223776645675, "grad_norm": 0.107421875, "learning_rate": 0.0019854838799683782, "loss": 0.2418, "step": 58190 }, { "epoch": 0.4130933559044136, "grad_norm": 0.0947265625, "learning_rate": 0.001985478816767196, "loss": 0.278, "step": 58200 }, { "epoch": 0.41316433414425974, "grad_norm": 0.10595703125, "learning_rate": 0.001985473752690331, "loss": 0.252, "step": 58210 }, { "epoch": 0.4132353123841058, "grad_norm": 0.07470703125, "learning_rate": 0.0019854686877377875, "loss": 0.2472, "step": 58220 }, { "epoch": 0.41330629062395197, "grad_norm": 0.08642578125, "learning_rate": 0.0019854636219095714, "loss": 0.2473, "step": 58230 }, { "epoch": 0.4133772688637981, "grad_norm": 0.08642578125, "learning_rate": 0.001985458555205687, "loss": 0.2537, "step": 58240 }, { "epoch": 0.4134482471036442, "grad_norm": 0.099609375, "learning_rate": 0.0019854534876261394, "loss": 0.2271, "step": 58250 }, { "epoch": 0.41351922534349034, "grad_norm": 0.09619140625, "learning_rate": 0.001985448419170934, "loss": 0.2511, "step": 58260 }, { "epoch": 0.4135902035833364, "grad_norm": 0.09521484375, "learning_rate": 0.0019854433498400757, "loss": 0.2472, "step": 58270 }, { "epoch": 0.41366118182318257, "grad_norm": 0.150390625, "learning_rate": 0.0019854382796335693, "loss": 0.2577, "step": 58280 }, { "epoch": 0.41373216006302865, "grad_norm": 0.1123046875, "learning_rate": 0.00198543320855142, "loss": 0.2553, "step": 58290 }, { "epoch": 0.4138031383028748, "grad_norm": 0.14453125, "learning_rate": 0.0019854281365936327, "loss": 0.262, "step": 58300 }, { "epoch": 0.41387411654272094, "grad_norm": 0.059814453125, "learning_rate": 0.001985423063760213, "loss": 0.2331, "step": 58310 }, { "epoch": 0.413945094782567, "grad_norm": 0.1142578125, "learning_rate": 0.0019854179900511645, "loss": 0.2558, "step": 58320 }, { "epoch": 0.41401607302241317, "grad_norm": 0.11279296875, "learning_rate": 0.0019854129154664936, "loss": 0.246, "step": 58330 }, { "epoch": 0.41408705126225925, "grad_norm": 0.1416015625, "learning_rate": 0.0019854078400062046, "loss": 0.2389, "step": 58340 }, { "epoch": 0.4141580295021054, "grad_norm": 0.14453125, "learning_rate": 0.001985402763670303, "loss": 0.2429, "step": 58350 }, { "epoch": 0.41422900774195154, "grad_norm": 0.083984375, "learning_rate": 0.0019853976864587933, "loss": 0.2558, "step": 58360 }, { "epoch": 0.4142999859817976, "grad_norm": 0.11572265625, "learning_rate": 0.001985392608371681, "loss": 0.2398, "step": 58370 }, { "epoch": 0.41437096422164377, "grad_norm": 0.142578125, "learning_rate": 0.0019853875294089706, "loss": 0.2483, "step": 58380 }, { "epoch": 0.41444194246148985, "grad_norm": 0.09423828125, "learning_rate": 0.0019853824495706675, "loss": 0.2256, "step": 58390 }, { "epoch": 0.414512920701336, "grad_norm": 0.11865234375, "learning_rate": 0.0019853773688567767, "loss": 0.2377, "step": 58400 }, { "epoch": 0.4145838989411821, "grad_norm": 0.10009765625, "learning_rate": 0.0019853722872673037, "loss": 0.2487, "step": 58410 }, { "epoch": 0.4146548771810282, "grad_norm": 0.06884765625, "learning_rate": 0.0019853672048022524, "loss": 0.2323, "step": 58420 }, { "epoch": 0.41472585542087437, "grad_norm": 0.16015625, "learning_rate": 0.0019853621214616286, "loss": 0.2471, "step": 58430 }, { "epoch": 0.41479683366072045, "grad_norm": 0.11279296875, "learning_rate": 0.0019853570372454373, "loss": 0.2287, "step": 58440 }, { "epoch": 0.4148678119005666, "grad_norm": 0.1748046875, "learning_rate": 0.001985351952153683, "loss": 0.2448, "step": 58450 }, { "epoch": 0.4149387901404127, "grad_norm": 0.08349609375, "learning_rate": 0.0019853468661863716, "loss": 0.2365, "step": 58460 }, { "epoch": 0.4150097683802588, "grad_norm": 0.1259765625, "learning_rate": 0.001985341779343507, "loss": 0.2377, "step": 58470 }, { "epoch": 0.41508074662010497, "grad_norm": 0.1142578125, "learning_rate": 0.001985336691625095, "loss": 0.2452, "step": 58480 }, { "epoch": 0.41515172485995105, "grad_norm": 0.150390625, "learning_rate": 0.001985331603031141, "loss": 0.2276, "step": 58490 }, { "epoch": 0.4152227030997972, "grad_norm": 0.119140625, "learning_rate": 0.001985326513561649, "loss": 0.2423, "step": 58500 }, { "epoch": 0.4152936813396433, "grad_norm": 0.1396484375, "learning_rate": 0.001985321423216625, "loss": 0.2461, "step": 58510 }, { "epoch": 0.4153646595794894, "grad_norm": 0.09716796875, "learning_rate": 0.0019853163319960734, "loss": 0.2323, "step": 58520 }, { "epoch": 0.4154356378193355, "grad_norm": 0.091796875, "learning_rate": 0.001985311239899999, "loss": 0.2494, "step": 58530 }, { "epoch": 0.41550661605918165, "grad_norm": 0.1064453125, "learning_rate": 0.0019853061469284083, "loss": 0.2477, "step": 58540 }, { "epoch": 0.4155775942990278, "grad_norm": 0.142578125, "learning_rate": 0.0019853010530813046, "loss": 0.2352, "step": 58550 }, { "epoch": 0.4156485725388739, "grad_norm": 0.09375, "learning_rate": 0.0019852959583586934, "loss": 0.2603, "step": 58560 }, { "epoch": 0.41571955077872, "grad_norm": 0.126953125, "learning_rate": 0.0019852908627605807, "loss": 0.248, "step": 58570 }, { "epoch": 0.4157905290185661, "grad_norm": 0.07763671875, "learning_rate": 0.0019852857662869703, "loss": 0.2306, "step": 58580 }, { "epoch": 0.41586150725841226, "grad_norm": 0.20703125, "learning_rate": 0.001985280668937868, "loss": 0.2464, "step": 58590 }, { "epoch": 0.4159324854982584, "grad_norm": 0.076171875, "learning_rate": 0.0019852755707132786, "loss": 0.2412, "step": 58600 }, { "epoch": 0.4160034637381045, "grad_norm": 0.12109375, "learning_rate": 0.0019852704716132067, "loss": 0.2492, "step": 58610 }, { "epoch": 0.4160744419779506, "grad_norm": 0.091796875, "learning_rate": 0.0019852653716376585, "loss": 0.2278, "step": 58620 }, { "epoch": 0.4161454202177967, "grad_norm": 0.08642578125, "learning_rate": 0.001985260270786638, "loss": 0.2366, "step": 58630 }, { "epoch": 0.41621639845764286, "grad_norm": 0.111328125, "learning_rate": 0.0019852551690601504, "loss": 0.2457, "step": 58640 }, { "epoch": 0.41628737669748894, "grad_norm": 0.111328125, "learning_rate": 0.001985250066458201, "loss": 0.2604, "step": 58650 }, { "epoch": 0.4163583549373351, "grad_norm": 0.1123046875, "learning_rate": 0.001985244962980795, "loss": 0.2478, "step": 58660 }, { "epoch": 0.4164293331771812, "grad_norm": 0.134765625, "learning_rate": 0.001985239858627937, "loss": 0.2576, "step": 58670 }, { "epoch": 0.4165003114170273, "grad_norm": 0.11279296875, "learning_rate": 0.0019852347533996324, "loss": 0.2448, "step": 58680 }, { "epoch": 0.41657128965687346, "grad_norm": 0.083984375, "learning_rate": 0.001985229647295886, "loss": 0.2512, "step": 58690 }, { "epoch": 0.41664226789671954, "grad_norm": 0.09375, "learning_rate": 0.0019852245403167027, "loss": 0.2537, "step": 58700 }, { "epoch": 0.4167132461365657, "grad_norm": 0.0791015625, "learning_rate": 0.0019852194324620886, "loss": 0.2287, "step": 58710 }, { "epoch": 0.4167842243764118, "grad_norm": 0.140625, "learning_rate": 0.0019852143237320476, "loss": 0.2414, "step": 58720 }, { "epoch": 0.4168552026162579, "grad_norm": 0.1494140625, "learning_rate": 0.001985209214126585, "loss": 0.2536, "step": 58730 }, { "epoch": 0.41692618085610406, "grad_norm": 0.1533203125, "learning_rate": 0.0019852041036457057, "loss": 0.2372, "step": 58740 }, { "epoch": 0.41699715909595014, "grad_norm": 0.08740234375, "learning_rate": 0.001985198992289415, "loss": 0.2368, "step": 58750 }, { "epoch": 0.4170681373357963, "grad_norm": 0.08349609375, "learning_rate": 0.0019851938800577187, "loss": 0.2462, "step": 58760 }, { "epoch": 0.41713911557564237, "grad_norm": 0.0810546875, "learning_rate": 0.0019851887669506203, "loss": 0.2639, "step": 58770 }, { "epoch": 0.4172100938154885, "grad_norm": 0.1220703125, "learning_rate": 0.0019851836529681264, "loss": 0.2562, "step": 58780 }, { "epoch": 0.41728107205533466, "grad_norm": 0.11083984375, "learning_rate": 0.0019851785381102406, "loss": 0.2327, "step": 58790 }, { "epoch": 0.41735205029518074, "grad_norm": 0.11865234375, "learning_rate": 0.0019851734223769696, "loss": 0.2707, "step": 58800 }, { "epoch": 0.4174230285350269, "grad_norm": 0.15625, "learning_rate": 0.0019851683057683167, "loss": 0.2476, "step": 58810 }, { "epoch": 0.41749400677487297, "grad_norm": 0.1064453125, "learning_rate": 0.001985163188284288, "loss": 0.2455, "step": 58820 }, { "epoch": 0.4175649850147191, "grad_norm": 0.095703125, "learning_rate": 0.001985158069924889, "loss": 0.2345, "step": 58830 }, { "epoch": 0.41763596325456526, "grad_norm": 0.0927734375, "learning_rate": 0.001985152950690124, "loss": 0.2259, "step": 58840 }, { "epoch": 0.41770694149441134, "grad_norm": 0.08544921875, "learning_rate": 0.0019851478305799976, "loss": 0.2265, "step": 58850 }, { "epoch": 0.4177779197342575, "grad_norm": 0.13671875, "learning_rate": 0.001985142709594516, "loss": 0.237, "step": 58860 }, { "epoch": 0.4178488979741036, "grad_norm": 0.1513671875, "learning_rate": 0.0019851375877336834, "loss": 0.2721, "step": 58870 }, { "epoch": 0.4179198762139497, "grad_norm": 0.076171875, "learning_rate": 0.0019851324649975055, "loss": 0.2256, "step": 58880 }, { "epoch": 0.4179908544537958, "grad_norm": 0.1689453125, "learning_rate": 0.001985127341385987, "loss": 0.2455, "step": 58890 }, { "epoch": 0.41806183269364194, "grad_norm": 0.11328125, "learning_rate": 0.001985122216899133, "loss": 0.2419, "step": 58900 }, { "epoch": 0.4181328109334881, "grad_norm": 0.064453125, "learning_rate": 0.001985117091536948, "loss": 0.2428, "step": 58910 }, { "epoch": 0.4182037891733342, "grad_norm": 0.130859375, "learning_rate": 0.0019851119652994384, "loss": 0.2553, "step": 58920 }, { "epoch": 0.4182747674131803, "grad_norm": 0.11669921875, "learning_rate": 0.0019851068381866086, "loss": 0.2448, "step": 58930 }, { "epoch": 0.4183457456530264, "grad_norm": 0.11572265625, "learning_rate": 0.0019851017101984634, "loss": 0.2526, "step": 58940 }, { "epoch": 0.41841672389287254, "grad_norm": 0.11669921875, "learning_rate": 0.001985096581335008, "loss": 0.2516, "step": 58950 }, { "epoch": 0.4184877021327187, "grad_norm": 0.1123046875, "learning_rate": 0.0019850914515962474, "loss": 0.2463, "step": 58960 }, { "epoch": 0.4185586803725648, "grad_norm": 0.123046875, "learning_rate": 0.001985086320982187, "loss": 0.2383, "step": 58970 }, { "epoch": 0.4186296586124109, "grad_norm": 0.11669921875, "learning_rate": 0.0019850811894928317, "loss": 0.2371, "step": 58980 }, { "epoch": 0.418700636852257, "grad_norm": 0.07470703125, "learning_rate": 0.001985076057128187, "loss": 0.2284, "step": 58990 }, { "epoch": 0.41877161509210314, "grad_norm": 0.12890625, "learning_rate": 0.001985070923888257, "loss": 0.2441, "step": 59000 }, { "epoch": 0.41884259333194923, "grad_norm": 0.09619140625, "learning_rate": 0.0019850657897730476, "loss": 0.2323, "step": 59010 }, { "epoch": 0.4189135715717954, "grad_norm": 0.09228515625, "learning_rate": 0.001985060654782563, "loss": 0.2325, "step": 59020 }, { "epoch": 0.4189845498116415, "grad_norm": 0.2255859375, "learning_rate": 0.0019850555189168096, "loss": 0.2521, "step": 59030 }, { "epoch": 0.4190555280514876, "grad_norm": 0.1201171875, "learning_rate": 0.0019850503821757914, "loss": 0.247, "step": 59040 }, { "epoch": 0.41912650629133374, "grad_norm": 0.11181640625, "learning_rate": 0.001985045244559514, "loss": 0.2617, "step": 59050 }, { "epoch": 0.41919748453117983, "grad_norm": 0.11767578125, "learning_rate": 0.0019850401060679825, "loss": 0.2404, "step": 59060 }, { "epoch": 0.419268462771026, "grad_norm": 0.0849609375, "learning_rate": 0.0019850349667012014, "loss": 0.2479, "step": 59070 }, { "epoch": 0.4193394410108721, "grad_norm": 0.0947265625, "learning_rate": 0.001985029826459177, "loss": 0.2472, "step": 59080 }, { "epoch": 0.4194104192507182, "grad_norm": 0.10888671875, "learning_rate": 0.0019850246853419123, "loss": 0.2356, "step": 59090 }, { "epoch": 0.41948139749056434, "grad_norm": 0.119140625, "learning_rate": 0.0019850195433494143, "loss": 0.2383, "step": 59100 }, { "epoch": 0.41955237573041043, "grad_norm": 0.08984375, "learning_rate": 0.0019850144004816875, "loss": 0.2568, "step": 59110 }, { "epoch": 0.4196233539702566, "grad_norm": 0.134765625, "learning_rate": 0.0019850092567387373, "loss": 0.2514, "step": 59120 }, { "epoch": 0.41969433221010266, "grad_norm": 0.08984375, "learning_rate": 0.001985004112120568, "loss": 0.2283, "step": 59130 }, { "epoch": 0.4197653104499488, "grad_norm": 0.27734375, "learning_rate": 0.001984998966627185, "loss": 0.2467, "step": 59140 }, { "epoch": 0.41983628868979495, "grad_norm": 0.1728515625, "learning_rate": 0.0019849938202585937, "loss": 0.2439, "step": 59150 }, { "epoch": 0.41990726692964103, "grad_norm": 0.07958984375, "learning_rate": 0.001984988673014799, "loss": 0.2456, "step": 59160 }, { "epoch": 0.4199782451694872, "grad_norm": 0.1142578125, "learning_rate": 0.0019849835248958053, "loss": 0.2345, "step": 59170 }, { "epoch": 0.42004922340933326, "grad_norm": 0.12158203125, "learning_rate": 0.001984978375901619, "loss": 0.2314, "step": 59180 }, { "epoch": 0.4201202016491794, "grad_norm": 0.083984375, "learning_rate": 0.0019849732260322443, "loss": 0.2403, "step": 59190 }, { "epoch": 0.42019117988902555, "grad_norm": 0.1044921875, "learning_rate": 0.001984968075287687, "loss": 0.2471, "step": 59200 }, { "epoch": 0.42026215812887163, "grad_norm": 0.07958984375, "learning_rate": 0.001984962923667951, "loss": 0.2537, "step": 59210 }, { "epoch": 0.4203331363687178, "grad_norm": 0.11083984375, "learning_rate": 0.001984957771173043, "loss": 0.2442, "step": 59220 }, { "epoch": 0.42040411460856386, "grad_norm": 0.10888671875, "learning_rate": 0.0019849526178029667, "loss": 0.2403, "step": 59230 }, { "epoch": 0.42047509284841, "grad_norm": 0.08349609375, "learning_rate": 0.0019849474635577277, "loss": 0.2304, "step": 59240 }, { "epoch": 0.4205460710882561, "grad_norm": 0.0771484375, "learning_rate": 0.0019849423084373316, "loss": 0.241, "step": 59250 }, { "epoch": 0.42061704932810223, "grad_norm": 0.1982421875, "learning_rate": 0.0019849371524417827, "loss": 0.2326, "step": 59260 }, { "epoch": 0.4206880275679484, "grad_norm": 0.10546875, "learning_rate": 0.001984931995571086, "loss": 0.2276, "step": 59270 }, { "epoch": 0.42075900580779446, "grad_norm": 0.2431640625, "learning_rate": 0.0019849268378252473, "loss": 0.239, "step": 59280 }, { "epoch": 0.4208299840476406, "grad_norm": 0.1298828125, "learning_rate": 0.0019849216792042716, "loss": 0.2591, "step": 59290 }, { "epoch": 0.4209009622874867, "grad_norm": 0.0888671875, "learning_rate": 0.0019849165197081634, "loss": 0.2424, "step": 59300 }, { "epoch": 0.42097194052733283, "grad_norm": 0.08837890625, "learning_rate": 0.001984911359336929, "loss": 0.2513, "step": 59310 }, { "epoch": 0.421042918767179, "grad_norm": 0.10546875, "learning_rate": 0.0019849061980905724, "loss": 0.2406, "step": 59320 }, { "epoch": 0.42111389700702506, "grad_norm": 0.095703125, "learning_rate": 0.0019849010359690986, "loss": 0.2526, "step": 59330 }, { "epoch": 0.4211848752468712, "grad_norm": 0.0849609375, "learning_rate": 0.0019848958729725137, "loss": 0.2362, "step": 59340 }, { "epoch": 0.4212558534867173, "grad_norm": 0.1806640625, "learning_rate": 0.001984890709100822, "loss": 0.2504, "step": 59350 }, { "epoch": 0.42132683172656343, "grad_norm": 0.0849609375, "learning_rate": 0.001984885544354029, "loss": 0.2474, "step": 59360 }, { "epoch": 0.4213978099664096, "grad_norm": 0.193359375, "learning_rate": 0.001984880378732139, "loss": 0.2474, "step": 59370 }, { "epoch": 0.42146878820625566, "grad_norm": 0.08056640625, "learning_rate": 0.0019848752122351584, "loss": 0.2339, "step": 59380 }, { "epoch": 0.4215397664461018, "grad_norm": 0.07763671875, "learning_rate": 0.0019848700448630917, "loss": 0.2311, "step": 59390 }, { "epoch": 0.4216107446859479, "grad_norm": 0.11669921875, "learning_rate": 0.0019848648766159437, "loss": 0.2336, "step": 59400 }, { "epoch": 0.42168172292579403, "grad_norm": 0.10205078125, "learning_rate": 0.00198485970749372, "loss": 0.2358, "step": 59410 }, { "epoch": 0.4217527011656401, "grad_norm": 0.146484375, "learning_rate": 0.0019848545374964253, "loss": 0.2542, "step": 59420 }, { "epoch": 0.42182367940548626, "grad_norm": 0.1357421875, "learning_rate": 0.001984849366624065, "loss": 0.2331, "step": 59430 }, { "epoch": 0.4218946576453324, "grad_norm": 0.119140625, "learning_rate": 0.0019848441948766443, "loss": 0.2438, "step": 59440 }, { "epoch": 0.4219656358851785, "grad_norm": 0.142578125, "learning_rate": 0.001984839022254168, "loss": 0.2435, "step": 59450 }, { "epoch": 0.42203661412502463, "grad_norm": 0.1396484375, "learning_rate": 0.0019848338487566414, "loss": 0.2439, "step": 59460 }, { "epoch": 0.4221075923648707, "grad_norm": 0.09912109375, "learning_rate": 0.0019848286743840695, "loss": 0.2468, "step": 59470 }, { "epoch": 0.42217857060471686, "grad_norm": 0.099609375, "learning_rate": 0.0019848234991364576, "loss": 0.2336, "step": 59480 }, { "epoch": 0.422249548844563, "grad_norm": 0.06494140625, "learning_rate": 0.0019848183230138108, "loss": 0.2353, "step": 59490 }, { "epoch": 0.4223205270844091, "grad_norm": 0.10205078125, "learning_rate": 0.001984813146016134, "loss": 0.2446, "step": 59500 }, { "epoch": 0.42239150532425523, "grad_norm": 0.279296875, "learning_rate": 0.0019848079681434325, "loss": 0.2403, "step": 59510 }, { "epoch": 0.4224624835641013, "grad_norm": 0.134765625, "learning_rate": 0.001984802789395711, "loss": 0.2553, "step": 59520 }, { "epoch": 0.42253346180394746, "grad_norm": 0.1328125, "learning_rate": 0.0019847976097729756, "loss": 0.2495, "step": 59530 }, { "epoch": 0.42260444004379355, "grad_norm": 0.234375, "learning_rate": 0.0019847924292752304, "loss": 0.2474, "step": 59540 }, { "epoch": 0.4226754182836397, "grad_norm": 0.1328125, "learning_rate": 0.0019847872479024815, "loss": 0.2479, "step": 59550 }, { "epoch": 0.42274639652348583, "grad_norm": 0.09716796875, "learning_rate": 0.0019847820656547326, "loss": 0.2468, "step": 59560 }, { "epoch": 0.4228173747633319, "grad_norm": 0.0849609375, "learning_rate": 0.00198477688253199, "loss": 0.2262, "step": 59570 }, { "epoch": 0.42288835300317806, "grad_norm": 0.10009765625, "learning_rate": 0.001984771698534259, "loss": 0.2452, "step": 59580 }, { "epoch": 0.42295933124302415, "grad_norm": 0.09130859375, "learning_rate": 0.0019847665136615436, "loss": 0.2339, "step": 59590 }, { "epoch": 0.4230303094828703, "grad_norm": 0.0712890625, "learning_rate": 0.00198476132791385, "loss": 0.2288, "step": 59600 }, { "epoch": 0.42310128772271643, "grad_norm": 0.1767578125, "learning_rate": 0.0019847561412911826, "loss": 0.232, "step": 59610 }, { "epoch": 0.4231722659625625, "grad_norm": 0.1376953125, "learning_rate": 0.001984750953793547, "loss": 0.2417, "step": 59620 }, { "epoch": 0.42324324420240866, "grad_norm": 0.09912109375, "learning_rate": 0.0019847457654209484, "loss": 0.2349, "step": 59630 }, { "epoch": 0.42331422244225475, "grad_norm": 0.1435546875, "learning_rate": 0.0019847405761733916, "loss": 0.2334, "step": 59640 }, { "epoch": 0.4233852006821009, "grad_norm": 0.103515625, "learning_rate": 0.001984735386050882, "loss": 0.2244, "step": 59650 }, { "epoch": 0.423456178921947, "grad_norm": 0.103515625, "learning_rate": 0.0019847301950534236, "loss": 0.2454, "step": 59660 }, { "epoch": 0.4235271571617931, "grad_norm": 0.10009765625, "learning_rate": 0.0019847250031810233, "loss": 0.2488, "step": 59670 }, { "epoch": 0.42359813540163926, "grad_norm": 0.0703125, "learning_rate": 0.0019847198104336852, "loss": 0.2413, "step": 59680 }, { "epoch": 0.42366911364148535, "grad_norm": 0.11572265625, "learning_rate": 0.0019847146168114147, "loss": 0.2389, "step": 59690 }, { "epoch": 0.4237400918813315, "grad_norm": 0.1484375, "learning_rate": 0.001984709422314217, "loss": 0.2605, "step": 59700 }, { "epoch": 0.4238110701211776, "grad_norm": 0.1826171875, "learning_rate": 0.001984704226942097, "loss": 0.2286, "step": 59710 }, { "epoch": 0.4238820483610237, "grad_norm": 0.1005859375, "learning_rate": 0.0019846990306950597, "loss": 0.228, "step": 59720 }, { "epoch": 0.42395302660086986, "grad_norm": 0.13671875, "learning_rate": 0.001984693833573111, "loss": 0.2443, "step": 59730 }, { "epoch": 0.42402400484071595, "grad_norm": 0.1572265625, "learning_rate": 0.0019846886355762553, "loss": 0.2296, "step": 59740 }, { "epoch": 0.4240949830805621, "grad_norm": 0.08837890625, "learning_rate": 0.001984683436704498, "loss": 0.2334, "step": 59750 }, { "epoch": 0.4241659613204082, "grad_norm": 0.09716796875, "learning_rate": 0.001984678236957844, "loss": 0.235, "step": 59760 }, { "epoch": 0.4242369395602543, "grad_norm": 0.1103515625, "learning_rate": 0.001984673036336299, "loss": 0.2413, "step": 59770 }, { "epoch": 0.4243079178001004, "grad_norm": 0.09423828125, "learning_rate": 0.001984667834839868, "loss": 0.2344, "step": 59780 }, { "epoch": 0.42437889603994655, "grad_norm": 0.126953125, "learning_rate": 0.0019846626324685557, "loss": 0.2333, "step": 59790 }, { "epoch": 0.4244498742797927, "grad_norm": 0.11083984375, "learning_rate": 0.0019846574292223676, "loss": 0.2416, "step": 59800 }, { "epoch": 0.4245208525196388, "grad_norm": 0.07568359375, "learning_rate": 0.001984652225101308, "loss": 0.2356, "step": 59810 }, { "epoch": 0.4245918307594849, "grad_norm": 0.1103515625, "learning_rate": 0.001984647020105384, "loss": 0.2588, "step": 59820 }, { "epoch": 0.424662808999331, "grad_norm": 0.111328125, "learning_rate": 0.0019846418142345987, "loss": 0.2424, "step": 59830 }, { "epoch": 0.42473378723917715, "grad_norm": 0.09521484375, "learning_rate": 0.0019846366074889586, "loss": 0.249, "step": 59840 }, { "epoch": 0.4248047654790233, "grad_norm": 0.189453125, "learning_rate": 0.001984631399868468, "loss": 0.238, "step": 59850 }, { "epoch": 0.4248757437188694, "grad_norm": 0.08935546875, "learning_rate": 0.0019846261913731324, "loss": 0.236, "step": 59860 }, { "epoch": 0.4249467219587155, "grad_norm": 0.080078125, "learning_rate": 0.001984620982002957, "loss": 0.2388, "step": 59870 }, { "epoch": 0.4250177001985616, "grad_norm": 0.08251953125, "learning_rate": 0.001984615771757947, "loss": 0.2388, "step": 59880 }, { "epoch": 0.42508867843840775, "grad_norm": 0.10693359375, "learning_rate": 0.0019846105606381075, "loss": 0.2515, "step": 59890 }, { "epoch": 0.42515965667825384, "grad_norm": 0.1298828125, "learning_rate": 0.0019846053486434435, "loss": 0.2479, "step": 59900 }, { "epoch": 0.4252306349181, "grad_norm": 0.0810546875, "learning_rate": 0.00198460013577396, "loss": 0.2375, "step": 59910 }, { "epoch": 0.4253016131579461, "grad_norm": 0.12109375, "learning_rate": 0.001984594922029663, "loss": 0.2545, "step": 59920 }, { "epoch": 0.4253725913977922, "grad_norm": 0.1787109375, "learning_rate": 0.0019845897074105565, "loss": 0.237, "step": 59930 }, { "epoch": 0.42544356963763835, "grad_norm": 0.09228515625, "learning_rate": 0.0019845844919166465, "loss": 0.2382, "step": 59940 }, { "epoch": 0.42551454787748444, "grad_norm": 0.10400390625, "learning_rate": 0.0019845792755479375, "loss": 0.2304, "step": 59950 }, { "epoch": 0.4255855261173306, "grad_norm": 0.111328125, "learning_rate": 0.0019845740583044353, "loss": 0.2243, "step": 59960 }, { "epoch": 0.4256565043571767, "grad_norm": 0.1650390625, "learning_rate": 0.001984568840186145, "loss": 0.2378, "step": 59970 }, { "epoch": 0.4257274825970228, "grad_norm": 0.08740234375, "learning_rate": 0.0019845636211930713, "loss": 0.2511, "step": 59980 }, { "epoch": 0.42579846083686895, "grad_norm": 0.162109375, "learning_rate": 0.00198455840132522, "loss": 0.2516, "step": 59990 }, { "epoch": 0.42586943907671504, "grad_norm": 0.10107421875, "learning_rate": 0.0019845531805825953, "loss": 0.2459, "step": 60000 }, { "epoch": 0.42586943907671504, "eval_covost2-zh-en_loss": 3.9703307151794434, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 21.9493, "eval_covost2-zh-en_samples_per_second": 2.916, "eval_covost2-zh-en_steps_per_second": 0.182, "step": 60000 }, { "epoch": 0.42586943907671504, "eval_covost2-en-zh_loss": 3.1090078353881836, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 20.2446, "eval_covost2-en-zh_samples_per_second": 3.161, "eval_covost2-en-zh_steps_per_second": 0.198, "step": 60000 }, { "epoch": 0.4259404173165612, "grad_norm": 0.072265625, "learning_rate": 0.001984547958965203, "loss": 0.2266, "step": 60010 }, { "epoch": 0.42601139555640727, "grad_norm": 0.10400390625, "learning_rate": 0.0019845427364730485, "loss": 0.2482, "step": 60020 }, { "epoch": 0.4260823737962534, "grad_norm": 0.10205078125, "learning_rate": 0.0019845375131061368, "loss": 0.2545, "step": 60030 }, { "epoch": 0.42615335203609955, "grad_norm": 0.0791015625, "learning_rate": 0.0019845322888644725, "loss": 0.232, "step": 60040 }, { "epoch": 0.42622433027594564, "grad_norm": 0.16796875, "learning_rate": 0.0019845270637480618, "loss": 0.2306, "step": 60050 }, { "epoch": 0.4262953085157918, "grad_norm": 0.10986328125, "learning_rate": 0.001984521837756909, "loss": 0.2266, "step": 60060 }, { "epoch": 0.42636628675563787, "grad_norm": 0.09814453125, "learning_rate": 0.001984516610891019, "loss": 0.2291, "step": 60070 }, { "epoch": 0.426437264995484, "grad_norm": 0.12060546875, "learning_rate": 0.001984511383150398, "loss": 0.2551, "step": 60080 }, { "epoch": 0.42650824323533015, "grad_norm": 0.0830078125, "learning_rate": 0.001984506154535051, "loss": 0.2335, "step": 60090 }, { "epoch": 0.42657922147517624, "grad_norm": 0.080078125, "learning_rate": 0.001984500925044982, "loss": 0.2316, "step": 60100 }, { "epoch": 0.4266501997150224, "grad_norm": 0.087890625, "learning_rate": 0.0019844956946801977, "loss": 0.2232, "step": 60110 }, { "epoch": 0.42672117795486847, "grad_norm": 0.10205078125, "learning_rate": 0.0019844904634407027, "loss": 0.2474, "step": 60120 }, { "epoch": 0.4267921561947146, "grad_norm": 0.1630859375, "learning_rate": 0.0019844852313265016, "loss": 0.2398, "step": 60130 }, { "epoch": 0.4268631344345607, "grad_norm": 0.10107421875, "learning_rate": 0.0019844799983376, "loss": 0.2362, "step": 60140 }, { "epoch": 0.42693411267440684, "grad_norm": 0.1015625, "learning_rate": 0.0019844747644740036, "loss": 0.2274, "step": 60150 }, { "epoch": 0.427005090914253, "grad_norm": 0.10009765625, "learning_rate": 0.0019844695297357167, "loss": 0.2319, "step": 60160 }, { "epoch": 0.42707606915409907, "grad_norm": 0.150390625, "learning_rate": 0.001984464294122745, "loss": 0.243, "step": 60170 }, { "epoch": 0.4271470473939452, "grad_norm": 0.138671875, "learning_rate": 0.001984459057635094, "loss": 0.2427, "step": 60180 }, { "epoch": 0.4272180256337913, "grad_norm": 0.1259765625, "learning_rate": 0.0019844538202727674, "loss": 0.2604, "step": 60190 }, { "epoch": 0.42728900387363744, "grad_norm": 0.1337890625, "learning_rate": 0.001984448582035772, "loss": 0.2289, "step": 60200 }, { "epoch": 0.4273599821134836, "grad_norm": 0.16015625, "learning_rate": 0.0019844433429241125, "loss": 0.2418, "step": 60210 }, { "epoch": 0.42743096035332967, "grad_norm": 0.1103515625, "learning_rate": 0.0019844381029377935, "loss": 0.2345, "step": 60220 }, { "epoch": 0.4275019385931758, "grad_norm": 0.06884765625, "learning_rate": 0.001984432862076821, "loss": 0.2311, "step": 60230 }, { "epoch": 0.4275729168330219, "grad_norm": 0.07470703125, "learning_rate": 0.0019844276203412, "loss": 0.2431, "step": 60240 }, { "epoch": 0.42764389507286804, "grad_norm": 0.09619140625, "learning_rate": 0.001984422377730935, "loss": 0.2339, "step": 60250 }, { "epoch": 0.42771487331271413, "grad_norm": 0.140625, "learning_rate": 0.0019844171342460323, "loss": 0.2544, "step": 60260 }, { "epoch": 0.42778585155256027, "grad_norm": 0.115234375, "learning_rate": 0.001984411889886496, "loss": 0.2498, "step": 60270 }, { "epoch": 0.4278568297924064, "grad_norm": 0.134765625, "learning_rate": 0.001984406644652332, "loss": 0.2468, "step": 60280 }, { "epoch": 0.4279278080322525, "grad_norm": 0.1640625, "learning_rate": 0.001984401398543545, "loss": 0.2279, "step": 60290 }, { "epoch": 0.42799878627209864, "grad_norm": 0.111328125, "learning_rate": 0.001984396151560141, "loss": 0.2608, "step": 60300 }, { "epoch": 0.42806976451194473, "grad_norm": 0.13671875, "learning_rate": 0.0019843909037021244, "loss": 0.2453, "step": 60310 }, { "epoch": 0.42814074275179087, "grad_norm": 0.091796875, "learning_rate": 0.0019843856549695005, "loss": 0.2412, "step": 60320 }, { "epoch": 0.428211720991637, "grad_norm": 0.10205078125, "learning_rate": 0.0019843804053622746, "loss": 0.2584, "step": 60330 }, { "epoch": 0.4282826992314831, "grad_norm": 0.216796875, "learning_rate": 0.001984375154880452, "loss": 0.2426, "step": 60340 }, { "epoch": 0.42835367747132924, "grad_norm": 0.07568359375, "learning_rate": 0.0019843699035240374, "loss": 0.2338, "step": 60350 }, { "epoch": 0.42842465571117533, "grad_norm": 0.0849609375, "learning_rate": 0.001984364651293037, "loss": 0.2483, "step": 60360 }, { "epoch": 0.42849563395102147, "grad_norm": 0.12109375, "learning_rate": 0.001984359398187455, "loss": 0.2296, "step": 60370 }, { "epoch": 0.42856661219086756, "grad_norm": 0.1376953125, "learning_rate": 0.0019843541442072974, "loss": 0.2493, "step": 60380 }, { "epoch": 0.4286375904307137, "grad_norm": 0.09423828125, "learning_rate": 0.0019843488893525685, "loss": 0.224, "step": 60390 }, { "epoch": 0.42870856867055984, "grad_norm": 0.08837890625, "learning_rate": 0.001984343633623274, "loss": 0.2579, "step": 60400 }, { "epoch": 0.42877954691040593, "grad_norm": 0.07958984375, "learning_rate": 0.001984338377019419, "loss": 0.243, "step": 60410 }, { "epoch": 0.42885052515025207, "grad_norm": 0.07275390625, "learning_rate": 0.001984333119541009, "loss": 0.2398, "step": 60420 }, { "epoch": 0.42892150339009816, "grad_norm": 0.08251953125, "learning_rate": 0.001984327861188049, "loss": 0.2425, "step": 60430 }, { "epoch": 0.4289924816299443, "grad_norm": 0.1171875, "learning_rate": 0.001984322601960544, "loss": 0.2505, "step": 60440 }, { "epoch": 0.42906345986979044, "grad_norm": 0.076171875, "learning_rate": 0.0019843173418585, "loss": 0.2445, "step": 60450 }, { "epoch": 0.42913443810963653, "grad_norm": 0.1298828125, "learning_rate": 0.0019843120808819207, "loss": 0.2728, "step": 60460 }, { "epoch": 0.42920541634948267, "grad_norm": 0.12158203125, "learning_rate": 0.0019843068190308127, "loss": 0.2349, "step": 60470 }, { "epoch": 0.42927639458932876, "grad_norm": 0.0927734375, "learning_rate": 0.0019843015563051807, "loss": 0.25, "step": 60480 }, { "epoch": 0.4293473728291749, "grad_norm": 0.11865234375, "learning_rate": 0.0019842962927050295, "loss": 0.2511, "step": 60490 }, { "epoch": 0.42941835106902104, "grad_norm": 0.1572265625, "learning_rate": 0.001984291028230365, "loss": 0.2447, "step": 60500 }, { "epoch": 0.42948932930886713, "grad_norm": 0.12353515625, "learning_rate": 0.001984285762881192, "loss": 0.2424, "step": 60510 }, { "epoch": 0.42956030754871327, "grad_norm": 0.09130859375, "learning_rate": 0.0019842804966575156, "loss": 0.2428, "step": 60520 }, { "epoch": 0.42963128578855936, "grad_norm": 0.11572265625, "learning_rate": 0.0019842752295593418, "loss": 0.2625, "step": 60530 }, { "epoch": 0.4297022640284055, "grad_norm": 0.11279296875, "learning_rate": 0.0019842699615866747, "loss": 0.2443, "step": 60540 }, { "epoch": 0.4297732422682516, "grad_norm": 0.0869140625, "learning_rate": 0.00198426469273952, "loss": 0.2359, "step": 60550 }, { "epoch": 0.42984422050809773, "grad_norm": 0.0703125, "learning_rate": 0.0019842594230178832, "loss": 0.2337, "step": 60560 }, { "epoch": 0.42991519874794387, "grad_norm": 0.12109375, "learning_rate": 0.001984254152421769, "loss": 0.2472, "step": 60570 }, { "epoch": 0.42998617698778996, "grad_norm": 0.14453125, "learning_rate": 0.001984248880951183, "loss": 0.2623, "step": 60580 }, { "epoch": 0.4300571552276361, "grad_norm": 0.1220703125, "learning_rate": 0.0019842436086061305, "loss": 0.2455, "step": 60590 }, { "epoch": 0.4301281334674822, "grad_norm": 0.205078125, "learning_rate": 0.0019842383353866166, "loss": 0.2482, "step": 60600 }, { "epoch": 0.43019911170732833, "grad_norm": 0.2890625, "learning_rate": 0.0019842330612926456, "loss": 0.2619, "step": 60610 }, { "epoch": 0.43027008994717447, "grad_norm": 0.10888671875, "learning_rate": 0.0019842277863242243, "loss": 0.2479, "step": 60620 }, { "epoch": 0.43034106818702056, "grad_norm": 0.076171875, "learning_rate": 0.0019842225104813563, "loss": 0.2213, "step": 60630 }, { "epoch": 0.4304120464268667, "grad_norm": 0.0888671875, "learning_rate": 0.001984217233764049, "loss": 0.2375, "step": 60640 }, { "epoch": 0.4304830246667128, "grad_norm": 0.09521484375, "learning_rate": 0.001984211956172305, "loss": 0.2325, "step": 60650 }, { "epoch": 0.43055400290655893, "grad_norm": 0.10400390625, "learning_rate": 0.0019842066777061313, "loss": 0.2451, "step": 60660 }, { "epoch": 0.430624981146405, "grad_norm": 0.09521484375, "learning_rate": 0.0019842013983655325, "loss": 0.2407, "step": 60670 }, { "epoch": 0.43069595938625116, "grad_norm": 0.11962890625, "learning_rate": 0.0019841961181505142, "loss": 0.2619, "step": 60680 }, { "epoch": 0.4307669376260973, "grad_norm": 0.095703125, "learning_rate": 0.0019841908370610813, "loss": 0.2578, "step": 60690 }, { "epoch": 0.4308379158659434, "grad_norm": 0.08447265625, "learning_rate": 0.001984185555097239, "loss": 0.2547, "step": 60700 }, { "epoch": 0.43090889410578953, "grad_norm": 0.1259765625, "learning_rate": 0.001984180272258992, "loss": 0.2341, "step": 60710 }, { "epoch": 0.4309798723456356, "grad_norm": 0.0908203125, "learning_rate": 0.0019841749885463472, "loss": 0.2376, "step": 60720 }, { "epoch": 0.43105085058548176, "grad_norm": 0.12158203125, "learning_rate": 0.001984169703959308, "loss": 0.239, "step": 60730 }, { "epoch": 0.4311218288253279, "grad_norm": 0.08154296875, "learning_rate": 0.0019841644184978805, "loss": 0.2801, "step": 60740 }, { "epoch": 0.431192807065174, "grad_norm": 0.158203125, "learning_rate": 0.00198415913216207, "loss": 0.2517, "step": 60750 }, { "epoch": 0.43126378530502013, "grad_norm": 0.1376953125, "learning_rate": 0.0019841538449518813, "loss": 0.2498, "step": 60760 }, { "epoch": 0.4313347635448662, "grad_norm": 0.115234375, "learning_rate": 0.0019841485568673203, "loss": 0.2636, "step": 60770 }, { "epoch": 0.43140574178471236, "grad_norm": 0.1259765625, "learning_rate": 0.0019841432679083915, "loss": 0.2395, "step": 60780 }, { "epoch": 0.43147672002455845, "grad_norm": 0.10498046875, "learning_rate": 0.0019841379780751, "loss": 0.246, "step": 60790 }, { "epoch": 0.4315476982644046, "grad_norm": 0.1259765625, "learning_rate": 0.001984132687367452, "loss": 0.2495, "step": 60800 }, { "epoch": 0.43161867650425073, "grad_norm": 0.11474609375, "learning_rate": 0.001984127395785452, "loss": 0.2403, "step": 60810 }, { "epoch": 0.4316896547440968, "grad_norm": 0.119140625, "learning_rate": 0.0019841221033291057, "loss": 0.2518, "step": 60820 }, { "epoch": 0.43176063298394296, "grad_norm": 0.1259765625, "learning_rate": 0.001984116809998418, "loss": 0.2455, "step": 60830 }, { "epoch": 0.43183161122378905, "grad_norm": 0.09033203125, "learning_rate": 0.001984111515793394, "loss": 0.2634, "step": 60840 }, { "epoch": 0.4319025894636352, "grad_norm": 0.0625, "learning_rate": 0.001984106220714039, "loss": 0.2269, "step": 60850 }, { "epoch": 0.43197356770348133, "grad_norm": 0.09619140625, "learning_rate": 0.0019841009247603585, "loss": 0.2486, "step": 60860 }, { "epoch": 0.4320445459433274, "grad_norm": 0.1796875, "learning_rate": 0.0019840956279323575, "loss": 0.2374, "step": 60870 }, { "epoch": 0.43211552418317356, "grad_norm": 0.1279296875, "learning_rate": 0.0019840903302300416, "loss": 0.2541, "step": 60880 }, { "epoch": 0.43218650242301965, "grad_norm": 0.1044921875, "learning_rate": 0.001984085031653415, "loss": 0.241, "step": 60890 }, { "epoch": 0.4322574806628658, "grad_norm": 0.072265625, "learning_rate": 0.0019840797322024845, "loss": 0.2482, "step": 60900 }, { "epoch": 0.4323284589027119, "grad_norm": 0.1103515625, "learning_rate": 0.0019840744318772546, "loss": 0.2256, "step": 60910 }, { "epoch": 0.432399437142558, "grad_norm": 0.091796875, "learning_rate": 0.00198406913067773, "loss": 0.2363, "step": 60920 }, { "epoch": 0.43247041538240416, "grad_norm": 0.11767578125, "learning_rate": 0.0019840638286039167, "loss": 0.2345, "step": 60930 }, { "epoch": 0.43254139362225025, "grad_norm": 0.1044921875, "learning_rate": 0.00198405852565582, "loss": 0.234, "step": 60940 }, { "epoch": 0.4326123718620964, "grad_norm": 0.09033203125, "learning_rate": 0.0019840532218334444, "loss": 0.2385, "step": 60950 }, { "epoch": 0.4326833501019425, "grad_norm": 0.0947265625, "learning_rate": 0.0019840479171367954, "loss": 0.2343, "step": 60960 }, { "epoch": 0.4327543283417886, "grad_norm": 0.08935546875, "learning_rate": 0.0019840426115658788, "loss": 0.2438, "step": 60970 }, { "epoch": 0.43282530658163476, "grad_norm": 0.091796875, "learning_rate": 0.0019840373051206997, "loss": 0.2365, "step": 60980 }, { "epoch": 0.43289628482148085, "grad_norm": 0.10888671875, "learning_rate": 0.0019840319978012624, "loss": 0.2373, "step": 60990 }, { "epoch": 0.432967263061327, "grad_norm": 0.099609375, "learning_rate": 0.0019840266896075736, "loss": 0.2512, "step": 61000 }, { "epoch": 0.4330382413011731, "grad_norm": 0.1630859375, "learning_rate": 0.001984021380539637, "loss": 0.262, "step": 61010 }, { "epoch": 0.4331092195410192, "grad_norm": 0.328125, "learning_rate": 0.001984016070597459, "loss": 0.2465, "step": 61020 }, { "epoch": 0.4331801977808653, "grad_norm": 0.1015625, "learning_rate": 0.001984010759781045, "loss": 0.2323, "step": 61030 }, { "epoch": 0.43325117602071145, "grad_norm": 0.1708984375, "learning_rate": 0.001984005448090399, "loss": 0.2371, "step": 61040 }, { "epoch": 0.4333221542605576, "grad_norm": 0.07861328125, "learning_rate": 0.0019840001355255275, "loss": 0.2413, "step": 61050 }, { "epoch": 0.4333931325004037, "grad_norm": 0.17578125, "learning_rate": 0.001983994822086435, "loss": 0.2485, "step": 61060 }, { "epoch": 0.4334641107402498, "grad_norm": 0.08642578125, "learning_rate": 0.001983989507773127, "loss": 0.2478, "step": 61070 }, { "epoch": 0.4335350889800959, "grad_norm": 0.1240234375, "learning_rate": 0.001983984192585609, "loss": 0.2521, "step": 61080 }, { "epoch": 0.43360606721994205, "grad_norm": 0.07421875, "learning_rate": 0.0019839788765238864, "loss": 0.2381, "step": 61090 }, { "epoch": 0.4336770454597882, "grad_norm": 0.0888671875, "learning_rate": 0.0019839735595879636, "loss": 0.2356, "step": 61100 }, { "epoch": 0.4337480236996343, "grad_norm": 0.12060546875, "learning_rate": 0.0019839682417778465, "loss": 0.2457, "step": 61110 }, { "epoch": 0.4338190019394804, "grad_norm": 0.099609375, "learning_rate": 0.00198396292309354, "loss": 0.2449, "step": 61120 }, { "epoch": 0.4338899801793265, "grad_norm": 0.11181640625, "learning_rate": 0.00198395760353505, "loss": 0.2594, "step": 61130 }, { "epoch": 0.43396095841917265, "grad_norm": 0.10009765625, "learning_rate": 0.0019839522831023807, "loss": 0.2579, "step": 61140 }, { "epoch": 0.43403193665901874, "grad_norm": 0.10986328125, "learning_rate": 0.0019839469617955385, "loss": 0.2495, "step": 61150 }, { "epoch": 0.4341029148988649, "grad_norm": 0.06103515625, "learning_rate": 0.001983941639614528, "loss": 0.2318, "step": 61160 }, { "epoch": 0.434173893138711, "grad_norm": 0.1162109375, "learning_rate": 0.0019839363165593546, "loss": 0.2342, "step": 61170 }, { "epoch": 0.4342448713785571, "grad_norm": 0.1435546875, "learning_rate": 0.001983930992630024, "loss": 0.2459, "step": 61180 }, { "epoch": 0.43431584961840325, "grad_norm": 0.1494140625, "learning_rate": 0.0019839256678265403, "loss": 0.2146, "step": 61190 }, { "epoch": 0.43438682785824934, "grad_norm": 0.1650390625, "learning_rate": 0.00198392034214891, "loss": 0.2619, "step": 61200 }, { "epoch": 0.4344578060980955, "grad_norm": 0.1640625, "learning_rate": 0.001983915015597138, "loss": 0.2343, "step": 61210 }, { "epoch": 0.4345287843379416, "grad_norm": 0.1298828125, "learning_rate": 0.001983909688171229, "loss": 0.2372, "step": 61220 }, { "epoch": 0.4345997625777877, "grad_norm": 0.1201171875, "learning_rate": 0.0019839043598711887, "loss": 0.2423, "step": 61230 }, { "epoch": 0.43467074081763385, "grad_norm": 0.162109375, "learning_rate": 0.0019838990306970225, "loss": 0.2551, "step": 61240 }, { "epoch": 0.43474171905747994, "grad_norm": 0.12451171875, "learning_rate": 0.0019838937006487357, "loss": 0.2631, "step": 61250 }, { "epoch": 0.4348126972973261, "grad_norm": 0.119140625, "learning_rate": 0.0019838883697263334, "loss": 0.2504, "step": 61260 }, { "epoch": 0.43488367553717217, "grad_norm": 0.08740234375, "learning_rate": 0.001983883037929821, "loss": 0.2412, "step": 61270 }, { "epoch": 0.4349546537770183, "grad_norm": 0.1220703125, "learning_rate": 0.0019838777052592037, "loss": 0.2519, "step": 61280 }, { "epoch": 0.43502563201686445, "grad_norm": 0.11083984375, "learning_rate": 0.0019838723717144863, "loss": 0.2346, "step": 61290 }, { "epoch": 0.43509661025671054, "grad_norm": 0.10693359375, "learning_rate": 0.001983867037295675, "loss": 0.2545, "step": 61300 }, { "epoch": 0.4351675884965567, "grad_norm": 0.0693359375, "learning_rate": 0.0019838617020027744, "loss": 0.2242, "step": 61310 }, { "epoch": 0.43523856673640277, "grad_norm": 0.0927734375, "learning_rate": 0.0019838563658357896, "loss": 0.2332, "step": 61320 }, { "epoch": 0.4353095449762489, "grad_norm": 0.1337890625, "learning_rate": 0.001983851028794727, "loss": 0.2252, "step": 61330 }, { "epoch": 0.43538052321609505, "grad_norm": 0.10400390625, "learning_rate": 0.0019838456908795907, "loss": 0.2556, "step": 61340 }, { "epoch": 0.43545150145594114, "grad_norm": 0.0888671875, "learning_rate": 0.0019838403520903865, "loss": 0.2567, "step": 61350 }, { "epoch": 0.4355224796957873, "grad_norm": 0.1083984375, "learning_rate": 0.0019838350124271194, "loss": 0.251, "step": 61360 }, { "epoch": 0.43559345793563337, "grad_norm": 0.12353515625, "learning_rate": 0.0019838296718897954, "loss": 0.262, "step": 61370 }, { "epoch": 0.4356644361754795, "grad_norm": 0.11865234375, "learning_rate": 0.001983824330478419, "loss": 0.2475, "step": 61380 }, { "epoch": 0.4357354144153256, "grad_norm": 0.13671875, "learning_rate": 0.0019838189881929954, "loss": 0.2489, "step": 61390 }, { "epoch": 0.43580639265517174, "grad_norm": 0.1015625, "learning_rate": 0.00198381364503353, "loss": 0.239, "step": 61400 }, { "epoch": 0.4358773708950179, "grad_norm": 0.08447265625, "learning_rate": 0.001983808301000029, "loss": 0.2403, "step": 61410 }, { "epoch": 0.43594834913486397, "grad_norm": 0.15234375, "learning_rate": 0.0019838029560924966, "loss": 0.228, "step": 61420 }, { "epoch": 0.4360193273747101, "grad_norm": 0.07470703125, "learning_rate": 0.0019837976103109383, "loss": 0.2415, "step": 61430 }, { "epoch": 0.4360903056145562, "grad_norm": 0.07861328125, "learning_rate": 0.00198379226365536, "loss": 0.2599, "step": 61440 }, { "epoch": 0.43616128385440234, "grad_norm": 0.08154296875, "learning_rate": 0.001983786916125767, "loss": 0.2241, "step": 61450 }, { "epoch": 0.4362322620942485, "grad_norm": 0.11865234375, "learning_rate": 0.0019837815677221627, "loss": 0.2394, "step": 61460 }, { "epoch": 0.43630324033409457, "grad_norm": 0.1044921875, "learning_rate": 0.001983776218444555, "loss": 0.2372, "step": 61470 }, { "epoch": 0.4363742185739407, "grad_norm": 0.11474609375, "learning_rate": 0.0019837708682929477, "loss": 0.2627, "step": 61480 }, { "epoch": 0.4364451968137868, "grad_norm": 0.107421875, "learning_rate": 0.0019837655172673463, "loss": 0.25, "step": 61490 }, { "epoch": 0.43651617505363294, "grad_norm": 0.0751953125, "learning_rate": 0.001983760165367756, "loss": 0.2411, "step": 61500 }, { "epoch": 0.436587153293479, "grad_norm": 0.080078125, "learning_rate": 0.001983754812594183, "loss": 0.2326, "step": 61510 }, { "epoch": 0.43665813153332517, "grad_norm": 0.1259765625, "learning_rate": 0.001983749458946631, "loss": 0.2452, "step": 61520 }, { "epoch": 0.4367291097731713, "grad_norm": 0.1376953125, "learning_rate": 0.0019837441044251065, "loss": 0.2317, "step": 61530 }, { "epoch": 0.4368000880130174, "grad_norm": 0.12890625, "learning_rate": 0.0019837387490296145, "loss": 0.2411, "step": 61540 }, { "epoch": 0.43687106625286354, "grad_norm": 0.1015625, "learning_rate": 0.00198373339276016, "loss": 0.2349, "step": 61550 }, { "epoch": 0.4369420444927096, "grad_norm": 0.1298828125, "learning_rate": 0.0019837280356167493, "loss": 0.2348, "step": 61560 }, { "epoch": 0.43701302273255577, "grad_norm": 0.0927734375, "learning_rate": 0.0019837226775993866, "loss": 0.2464, "step": 61570 }, { "epoch": 0.4370840009724019, "grad_norm": 0.11181640625, "learning_rate": 0.001983717318708077, "loss": 0.2592, "step": 61580 }, { "epoch": 0.437154979212248, "grad_norm": 0.158203125, "learning_rate": 0.001983711958942827, "loss": 0.2335, "step": 61590 }, { "epoch": 0.43722595745209414, "grad_norm": 0.1484375, "learning_rate": 0.001983706598303641, "loss": 0.217, "step": 61600 }, { "epoch": 0.4372969356919402, "grad_norm": 0.09814453125, "learning_rate": 0.0019837012367905247, "loss": 0.2333, "step": 61610 }, { "epoch": 0.43736791393178637, "grad_norm": 0.08935546875, "learning_rate": 0.0019836958744034834, "loss": 0.2411, "step": 61620 }, { "epoch": 0.43743889217163245, "grad_norm": 0.0888671875, "learning_rate": 0.0019836905111425217, "loss": 0.2377, "step": 61630 }, { "epoch": 0.4375098704114786, "grad_norm": 0.10791015625, "learning_rate": 0.001983685147007646, "loss": 0.2222, "step": 61640 }, { "epoch": 0.43758084865132474, "grad_norm": 0.125, "learning_rate": 0.001983679781998861, "loss": 0.2507, "step": 61650 }, { "epoch": 0.4376518268911708, "grad_norm": 0.1865234375, "learning_rate": 0.001983674416116172, "loss": 0.2323, "step": 61660 }, { "epoch": 0.43772280513101697, "grad_norm": 0.078125, "learning_rate": 0.001983669049359584, "loss": 0.2537, "step": 61670 }, { "epoch": 0.43779378337086305, "grad_norm": 0.09619140625, "learning_rate": 0.0019836636817291034, "loss": 0.2362, "step": 61680 }, { "epoch": 0.4378647616107092, "grad_norm": 0.08203125, "learning_rate": 0.0019836583132247345, "loss": 0.2381, "step": 61690 }, { "epoch": 0.43793573985055534, "grad_norm": 0.11572265625, "learning_rate": 0.0019836529438464826, "loss": 0.2537, "step": 61700 }, { "epoch": 0.4380067180904014, "grad_norm": 0.0888671875, "learning_rate": 0.0019836475735943537, "loss": 0.2532, "step": 61710 }, { "epoch": 0.43807769633024757, "grad_norm": 0.10107421875, "learning_rate": 0.0019836422024683527, "loss": 0.2483, "step": 61720 }, { "epoch": 0.43814867457009365, "grad_norm": 0.126953125, "learning_rate": 0.0019836368304684848, "loss": 0.2525, "step": 61730 }, { "epoch": 0.4382196528099398, "grad_norm": 0.142578125, "learning_rate": 0.0019836314575947555, "loss": 0.2365, "step": 61740 }, { "epoch": 0.43829063104978594, "grad_norm": 0.11474609375, "learning_rate": 0.00198362608384717, "loss": 0.2472, "step": 61750 }, { "epoch": 0.438361609289632, "grad_norm": 0.09521484375, "learning_rate": 0.001983620709225734, "loss": 0.2405, "step": 61760 }, { "epoch": 0.43843258752947817, "grad_norm": 0.2431640625, "learning_rate": 0.0019836153337304523, "loss": 0.2465, "step": 61770 }, { "epoch": 0.43850356576932426, "grad_norm": 0.10498046875, "learning_rate": 0.0019836099573613303, "loss": 0.2516, "step": 61780 }, { "epoch": 0.4385745440091704, "grad_norm": 0.08447265625, "learning_rate": 0.001983604580118374, "loss": 0.2516, "step": 61790 }, { "epoch": 0.4386455222490165, "grad_norm": 0.58203125, "learning_rate": 0.0019835992020015873, "loss": 0.2714, "step": 61800 }, { "epoch": 0.4387165004888626, "grad_norm": 0.08544921875, "learning_rate": 0.0019835938230109767, "loss": 0.258, "step": 61810 }, { "epoch": 0.43878747872870877, "grad_norm": 0.1015625, "learning_rate": 0.0019835884431465473, "loss": 0.246, "step": 61820 }, { "epoch": 0.43885845696855486, "grad_norm": 0.11376953125, "learning_rate": 0.0019835830624083044, "loss": 0.2364, "step": 61830 }, { "epoch": 0.438929435208401, "grad_norm": 0.15625, "learning_rate": 0.001983577680796253, "loss": 0.2581, "step": 61840 }, { "epoch": 0.4390004134482471, "grad_norm": 0.0771484375, "learning_rate": 0.001983572298310399, "loss": 0.2503, "step": 61850 }, { "epoch": 0.4390713916880932, "grad_norm": 0.146484375, "learning_rate": 0.0019835669149507467, "loss": 0.2384, "step": 61860 }, { "epoch": 0.43914236992793937, "grad_norm": 0.10546875, "learning_rate": 0.0019835615307173026, "loss": 0.2416, "step": 61870 }, { "epoch": 0.43921334816778546, "grad_norm": 0.09130859375, "learning_rate": 0.0019835561456100713, "loss": 0.2407, "step": 61880 }, { "epoch": 0.4392843264076316, "grad_norm": 0.091796875, "learning_rate": 0.0019835507596290586, "loss": 0.2552, "step": 61890 }, { "epoch": 0.4393553046474777, "grad_norm": 0.1455078125, "learning_rate": 0.0019835453727742695, "loss": 0.2478, "step": 61900 }, { "epoch": 0.4394262828873238, "grad_norm": 0.1513671875, "learning_rate": 0.0019835399850457094, "loss": 0.2599, "step": 61910 }, { "epoch": 0.4394972611271699, "grad_norm": 0.08447265625, "learning_rate": 0.0019835345964433837, "loss": 0.2474, "step": 61920 }, { "epoch": 0.43956823936701606, "grad_norm": 0.1044921875, "learning_rate": 0.0019835292069672974, "loss": 0.2373, "step": 61930 }, { "epoch": 0.4396392176068622, "grad_norm": 0.0888671875, "learning_rate": 0.001983523816617456, "loss": 0.2484, "step": 61940 }, { "epoch": 0.4397101958467083, "grad_norm": 0.0693359375, "learning_rate": 0.001983518425393865, "loss": 0.246, "step": 61950 }, { "epoch": 0.4397811740865544, "grad_norm": 0.33984375, "learning_rate": 0.0019835130332965297, "loss": 0.2483, "step": 61960 }, { "epoch": 0.4398521523264005, "grad_norm": 0.173828125, "learning_rate": 0.0019835076403254557, "loss": 0.2505, "step": 61970 }, { "epoch": 0.43992313056624666, "grad_norm": 0.1396484375, "learning_rate": 0.001983502246480648, "loss": 0.2513, "step": 61980 }, { "epoch": 0.4399941088060928, "grad_norm": 0.07763671875, "learning_rate": 0.0019834968517621118, "loss": 0.252, "step": 61990 }, { "epoch": 0.4400650870459389, "grad_norm": 0.11865234375, "learning_rate": 0.0019834914561698523, "loss": 0.2437, "step": 62000 }, { "epoch": 0.4400650870459389, "eval_covost2-zh-en_loss": 3.868107318878174, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 21.2281, "eval_covost2-zh-en_samples_per_second": 3.015, "eval_covost2-zh-en_steps_per_second": 0.188, "step": 62000 }, { "epoch": 0.4400650870459389, "eval_covost2-en-zh_loss": 3.1033449172973633, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 20.03, "eval_covost2-en-zh_samples_per_second": 3.195, "eval_covost2-en-zh_steps_per_second": 0.2, "step": 62000 }, { "epoch": 0.440136065285785, "grad_norm": 0.193359375, "learning_rate": 0.001983486059703875, "loss": 0.26, "step": 62010 }, { "epoch": 0.4402070435256311, "grad_norm": 0.11767578125, "learning_rate": 0.001983480662364186, "loss": 0.2519, "step": 62020 }, { "epoch": 0.44027802176547726, "grad_norm": 0.11083984375, "learning_rate": 0.00198347526415079, "loss": 0.2494, "step": 62030 }, { "epoch": 0.44034900000532334, "grad_norm": 0.09033203125, "learning_rate": 0.001983469865063692, "loss": 0.2577, "step": 62040 }, { "epoch": 0.4404199782451695, "grad_norm": 0.1123046875, "learning_rate": 0.0019834644651028975, "loss": 0.2679, "step": 62050 }, { "epoch": 0.44049095648501563, "grad_norm": 0.0791015625, "learning_rate": 0.001983459064268412, "loss": 0.2226, "step": 62060 }, { "epoch": 0.4405619347248617, "grad_norm": 0.1044921875, "learning_rate": 0.0019834536625602415, "loss": 0.2376, "step": 62070 }, { "epoch": 0.44063291296470786, "grad_norm": 0.09765625, "learning_rate": 0.0019834482599783904, "loss": 0.2321, "step": 62080 }, { "epoch": 0.44070389120455394, "grad_norm": 0.12890625, "learning_rate": 0.001983442856522864, "loss": 0.2708, "step": 62090 }, { "epoch": 0.4407748694444001, "grad_norm": 0.09716796875, "learning_rate": 0.0019834374521936686, "loss": 0.2538, "step": 62100 }, { "epoch": 0.44084584768424623, "grad_norm": 0.1650390625, "learning_rate": 0.001983432046990808, "loss": 0.2388, "step": 62110 }, { "epoch": 0.4409168259240923, "grad_norm": 0.1552734375, "learning_rate": 0.001983426640914289, "loss": 0.2381, "step": 62120 }, { "epoch": 0.44098780416393846, "grad_norm": 0.134765625, "learning_rate": 0.0019834212339641167, "loss": 0.2319, "step": 62130 }, { "epoch": 0.44105878240378454, "grad_norm": 0.11669921875, "learning_rate": 0.001983415826140296, "loss": 0.2651, "step": 62140 }, { "epoch": 0.4411297606436307, "grad_norm": 0.099609375, "learning_rate": 0.001983410417442832, "loss": 0.2316, "step": 62150 }, { "epoch": 0.4412007388834768, "grad_norm": 0.119140625, "learning_rate": 0.0019834050078717308, "loss": 0.2531, "step": 62160 }, { "epoch": 0.4412717171233229, "grad_norm": 0.09326171875, "learning_rate": 0.0019833995974269975, "loss": 0.2429, "step": 62170 }, { "epoch": 0.44134269536316906, "grad_norm": 0.08154296875, "learning_rate": 0.0019833941861086375, "loss": 0.2205, "step": 62180 }, { "epoch": 0.44141367360301514, "grad_norm": 0.09228515625, "learning_rate": 0.0019833887739166556, "loss": 0.2465, "step": 62190 }, { "epoch": 0.4414846518428613, "grad_norm": 0.0888671875, "learning_rate": 0.001983383360851058, "loss": 0.2614, "step": 62200 }, { "epoch": 0.4415556300827074, "grad_norm": 0.14453125, "learning_rate": 0.001983377946911849, "loss": 0.2204, "step": 62210 }, { "epoch": 0.4416266083225535, "grad_norm": 0.06982421875, "learning_rate": 0.001983372532099035, "loss": 0.2314, "step": 62220 }, { "epoch": 0.44169758656239966, "grad_norm": 0.08935546875, "learning_rate": 0.001983367116412621, "loss": 0.2531, "step": 62230 }, { "epoch": 0.44176856480224574, "grad_norm": 0.0888671875, "learning_rate": 0.001983361699852612, "loss": 0.2358, "step": 62240 }, { "epoch": 0.4418395430420919, "grad_norm": 0.140625, "learning_rate": 0.001983356282419014, "loss": 0.241, "step": 62250 }, { "epoch": 0.441910521281938, "grad_norm": 0.09326171875, "learning_rate": 0.001983350864111832, "loss": 0.2461, "step": 62260 }, { "epoch": 0.4419814995217841, "grad_norm": 0.08935546875, "learning_rate": 0.001983345444931071, "loss": 0.2328, "step": 62270 }, { "epoch": 0.4420524777616302, "grad_norm": 0.12451171875, "learning_rate": 0.0019833400248767366, "loss": 0.2639, "step": 62280 }, { "epoch": 0.44212345600147634, "grad_norm": 0.11474609375, "learning_rate": 0.0019833346039488343, "loss": 0.2479, "step": 62290 }, { "epoch": 0.4421944342413225, "grad_norm": 0.22265625, "learning_rate": 0.00198332918214737, "loss": 0.2418, "step": 62300 }, { "epoch": 0.4422654124811686, "grad_norm": 0.099609375, "learning_rate": 0.0019833237594723482, "loss": 0.2361, "step": 62310 }, { "epoch": 0.4423363907210147, "grad_norm": 0.08935546875, "learning_rate": 0.001983318335923775, "loss": 0.227, "step": 62320 }, { "epoch": 0.4424073689608608, "grad_norm": 0.09130859375, "learning_rate": 0.001983312911501654, "loss": 0.2296, "step": 62330 }, { "epoch": 0.44247834720070695, "grad_norm": 0.0751953125, "learning_rate": 0.001983307486205993, "loss": 0.252, "step": 62340 }, { "epoch": 0.4425493254405531, "grad_norm": 0.095703125, "learning_rate": 0.0019833020600367964, "loss": 0.2286, "step": 62350 }, { "epoch": 0.4426203036803992, "grad_norm": 0.10400390625, "learning_rate": 0.001983296632994069, "loss": 0.2267, "step": 62360 }, { "epoch": 0.4426912819202453, "grad_norm": 0.1533203125, "learning_rate": 0.0019832912050778166, "loss": 0.231, "step": 62370 }, { "epoch": 0.4427622601600914, "grad_norm": 0.10888671875, "learning_rate": 0.0019832857762880447, "loss": 0.2388, "step": 62380 }, { "epoch": 0.44283323839993755, "grad_norm": 0.06787109375, "learning_rate": 0.001983280346624758, "loss": 0.2374, "step": 62390 }, { "epoch": 0.44290421663978363, "grad_norm": 0.1337890625, "learning_rate": 0.001983274916087963, "loss": 0.2408, "step": 62400 }, { "epoch": 0.4429751948796298, "grad_norm": 0.0888671875, "learning_rate": 0.0019832694846776645, "loss": 0.2444, "step": 62410 }, { "epoch": 0.4430461731194759, "grad_norm": 0.08154296875, "learning_rate": 0.0019832640523938677, "loss": 0.2323, "step": 62420 }, { "epoch": 0.443117151359322, "grad_norm": 0.1171875, "learning_rate": 0.0019832586192365777, "loss": 0.2502, "step": 62430 }, { "epoch": 0.44318812959916815, "grad_norm": 0.072265625, "learning_rate": 0.0019832531852058007, "loss": 0.2351, "step": 62440 }, { "epoch": 0.44325910783901423, "grad_norm": 0.11328125, "learning_rate": 0.0019832477503015414, "loss": 0.2382, "step": 62450 }, { "epoch": 0.4433300860788604, "grad_norm": 0.1337890625, "learning_rate": 0.001983242314523806, "loss": 0.2467, "step": 62460 }, { "epoch": 0.4434010643187065, "grad_norm": 0.08984375, "learning_rate": 0.0019832368778725986, "loss": 0.2485, "step": 62470 }, { "epoch": 0.4434720425585526, "grad_norm": 0.0908203125, "learning_rate": 0.001983231440347926, "loss": 0.2279, "step": 62480 }, { "epoch": 0.44354302079839875, "grad_norm": 0.08984375, "learning_rate": 0.001983226001949792, "loss": 0.2282, "step": 62490 }, { "epoch": 0.44361399903824483, "grad_norm": 0.1220703125, "learning_rate": 0.0019832205626782035, "loss": 0.2512, "step": 62500 }, { "epoch": 0.443684977278091, "grad_norm": 0.103515625, "learning_rate": 0.001983215122533165, "loss": 0.2544, "step": 62510 }, { "epoch": 0.44375595551793706, "grad_norm": 0.125, "learning_rate": 0.0019832096815146825, "loss": 0.2262, "step": 62520 }, { "epoch": 0.4438269337577832, "grad_norm": 0.12255859375, "learning_rate": 0.00198320423962276, "loss": 0.2455, "step": 62530 }, { "epoch": 0.44389791199762935, "grad_norm": 0.1689453125, "learning_rate": 0.001983198796857405, "loss": 0.2422, "step": 62540 }, { "epoch": 0.44396889023747543, "grad_norm": 0.06982421875, "learning_rate": 0.001983193353218621, "loss": 0.2296, "step": 62550 }, { "epoch": 0.4440398684773216, "grad_norm": 0.07763671875, "learning_rate": 0.0019831879087064143, "loss": 0.2302, "step": 62560 }, { "epoch": 0.44411084671716766, "grad_norm": 0.08447265625, "learning_rate": 0.00198318246332079, "loss": 0.2244, "step": 62570 }, { "epoch": 0.4441818249570138, "grad_norm": 0.06298828125, "learning_rate": 0.001983177017061754, "loss": 0.2522, "step": 62580 }, { "epoch": 0.44425280319685995, "grad_norm": 0.1484375, "learning_rate": 0.001983171569929311, "loss": 0.2251, "step": 62590 }, { "epoch": 0.44432378143670603, "grad_norm": 0.1787109375, "learning_rate": 0.0019831661219234667, "loss": 0.2511, "step": 62600 }, { "epoch": 0.4443947596765522, "grad_norm": 0.07568359375, "learning_rate": 0.0019831606730442266, "loss": 0.2406, "step": 62610 }, { "epoch": 0.44446573791639826, "grad_norm": 0.1181640625, "learning_rate": 0.0019831552232915956, "loss": 0.2328, "step": 62620 }, { "epoch": 0.4445367161562444, "grad_norm": 0.07958984375, "learning_rate": 0.0019831497726655795, "loss": 0.2416, "step": 62630 }, { "epoch": 0.4446076943960905, "grad_norm": 0.1328125, "learning_rate": 0.001983144321166184, "loss": 0.2481, "step": 62640 }, { "epoch": 0.44467867263593663, "grad_norm": 0.0947265625, "learning_rate": 0.001983138868793414, "loss": 0.2436, "step": 62650 }, { "epoch": 0.4447496508757828, "grad_norm": 0.08935546875, "learning_rate": 0.0019831334155472747, "loss": 0.2554, "step": 62660 }, { "epoch": 0.44482062911562886, "grad_norm": 0.10400390625, "learning_rate": 0.001983127961427772, "loss": 0.2432, "step": 62670 }, { "epoch": 0.444891607355475, "grad_norm": 0.0888671875, "learning_rate": 0.001983122506434911, "loss": 0.251, "step": 62680 }, { "epoch": 0.4449625855953211, "grad_norm": 0.119140625, "learning_rate": 0.001983117050568697, "loss": 0.2443, "step": 62690 }, { "epoch": 0.44503356383516723, "grad_norm": 0.2138671875, "learning_rate": 0.001983111593829136, "loss": 0.2565, "step": 62700 }, { "epoch": 0.4451045420750134, "grad_norm": 0.1630859375, "learning_rate": 0.001983106136216233, "loss": 0.2206, "step": 62710 }, { "epoch": 0.44517552031485946, "grad_norm": 0.10888671875, "learning_rate": 0.0019831006777299934, "loss": 0.2273, "step": 62720 }, { "epoch": 0.4452464985547056, "grad_norm": 0.11865234375, "learning_rate": 0.0019830952183704223, "loss": 0.2425, "step": 62730 }, { "epoch": 0.4453174767945517, "grad_norm": 0.14453125, "learning_rate": 0.0019830897581375254, "loss": 0.2229, "step": 62740 }, { "epoch": 0.44538845503439783, "grad_norm": 0.1279296875, "learning_rate": 0.0019830842970313084, "loss": 0.2271, "step": 62750 }, { "epoch": 0.4454594332742439, "grad_norm": 0.1015625, "learning_rate": 0.001983078835051776, "loss": 0.2318, "step": 62760 }, { "epoch": 0.44553041151409006, "grad_norm": 0.1533203125, "learning_rate": 0.001983073372198934, "loss": 0.245, "step": 62770 }, { "epoch": 0.4456013897539362, "grad_norm": 0.10888671875, "learning_rate": 0.0019830679084727877, "loss": 0.2473, "step": 62780 }, { "epoch": 0.4456723679937823, "grad_norm": 0.07763671875, "learning_rate": 0.001983062443873343, "loss": 0.2546, "step": 62790 }, { "epoch": 0.44574334623362843, "grad_norm": 0.1181640625, "learning_rate": 0.001983056978400605, "loss": 0.2485, "step": 62800 }, { "epoch": 0.4458143244734745, "grad_norm": 0.10107421875, "learning_rate": 0.0019830515120545784, "loss": 0.2497, "step": 62810 }, { "epoch": 0.44588530271332066, "grad_norm": 0.08642578125, "learning_rate": 0.001983046044835269, "loss": 0.2441, "step": 62820 }, { "epoch": 0.4459562809531668, "grad_norm": 0.146484375, "learning_rate": 0.0019830405767426833, "loss": 0.2606, "step": 62830 }, { "epoch": 0.4460272591930129, "grad_norm": 0.11376953125, "learning_rate": 0.001983035107776825, "loss": 0.2505, "step": 62840 }, { "epoch": 0.44609823743285903, "grad_norm": 0.11376953125, "learning_rate": 0.0019830296379377012, "loss": 0.228, "step": 62850 }, { "epoch": 0.4461692156727051, "grad_norm": 0.185546875, "learning_rate": 0.0019830241672253155, "loss": 0.2359, "step": 62860 }, { "epoch": 0.44624019391255126, "grad_norm": 0.056640625, "learning_rate": 0.001983018695639675, "loss": 0.2352, "step": 62870 }, { "epoch": 0.4463111721523974, "grad_norm": 0.12353515625, "learning_rate": 0.001983013223180784, "loss": 0.2296, "step": 62880 }, { "epoch": 0.4463821503922435, "grad_norm": 0.10107421875, "learning_rate": 0.001983007749848648, "loss": 0.2375, "step": 62890 }, { "epoch": 0.44645312863208964, "grad_norm": 0.0771484375, "learning_rate": 0.001983002275643273, "loss": 0.2334, "step": 62900 }, { "epoch": 0.4465241068719357, "grad_norm": 0.111328125, "learning_rate": 0.001982996800564664, "loss": 0.2358, "step": 62910 }, { "epoch": 0.44659508511178186, "grad_norm": 0.15625, "learning_rate": 0.001982991324612827, "loss": 0.2448, "step": 62920 }, { "epoch": 0.44666606335162795, "grad_norm": 0.1103515625, "learning_rate": 0.001982985847787766, "loss": 0.242, "step": 62930 }, { "epoch": 0.4467370415914741, "grad_norm": 0.11865234375, "learning_rate": 0.0019829803700894878, "loss": 0.2666, "step": 62940 }, { "epoch": 0.44680801983132024, "grad_norm": 0.09912109375, "learning_rate": 0.0019829748915179976, "loss": 0.2313, "step": 62950 }, { "epoch": 0.4468789980711663, "grad_norm": 0.07421875, "learning_rate": 0.0019829694120733, "loss": 0.244, "step": 62960 }, { "epoch": 0.44694997631101246, "grad_norm": 0.095703125, "learning_rate": 0.0019829639317554015, "loss": 0.2459, "step": 62970 }, { "epoch": 0.44702095455085855, "grad_norm": 0.10693359375, "learning_rate": 0.001982958450564307, "loss": 0.2404, "step": 62980 }, { "epoch": 0.4470919327907047, "grad_norm": 0.09716796875, "learning_rate": 0.0019829529685000216, "loss": 0.2242, "step": 62990 }, { "epoch": 0.44716291103055084, "grad_norm": 0.1201171875, "learning_rate": 0.001982947485562551, "loss": 0.2523, "step": 63000 }, { "epoch": 0.4472338892703969, "grad_norm": 0.150390625, "learning_rate": 0.0019829420017519013, "loss": 0.2366, "step": 63010 }, { "epoch": 0.44730486751024306, "grad_norm": 0.08056640625, "learning_rate": 0.001982936517068077, "loss": 0.2325, "step": 63020 }, { "epoch": 0.44737584575008915, "grad_norm": 0.140625, "learning_rate": 0.0019829310315110835, "loss": 0.2282, "step": 63030 }, { "epoch": 0.4474468239899353, "grad_norm": 0.08056640625, "learning_rate": 0.001982925545080927, "loss": 0.2307, "step": 63040 }, { "epoch": 0.4475178022297814, "grad_norm": 0.1396484375, "learning_rate": 0.0019829200577776117, "loss": 0.2298, "step": 63050 }, { "epoch": 0.4475887804696275, "grad_norm": 0.1474609375, "learning_rate": 0.0019829145696011446, "loss": 0.2363, "step": 63060 }, { "epoch": 0.44765975870947367, "grad_norm": 0.09912109375, "learning_rate": 0.00198290908055153, "loss": 0.236, "step": 63070 }, { "epoch": 0.44773073694931975, "grad_norm": 0.224609375, "learning_rate": 0.0019829035906287736, "loss": 0.2361, "step": 63080 }, { "epoch": 0.4478017151891659, "grad_norm": 0.095703125, "learning_rate": 0.001982898099832881, "loss": 0.2705, "step": 63090 }, { "epoch": 0.447872693429012, "grad_norm": 0.103515625, "learning_rate": 0.001982892608163858, "loss": 0.2543, "step": 63100 }, { "epoch": 0.4479436716688581, "grad_norm": 0.1025390625, "learning_rate": 0.001982887115621709, "loss": 0.2256, "step": 63110 }, { "epoch": 0.44801464990870427, "grad_norm": 0.1318359375, "learning_rate": 0.0019828816222064397, "loss": 0.22, "step": 63120 }, { "epoch": 0.44808562814855035, "grad_norm": 0.099609375, "learning_rate": 0.001982876127918056, "loss": 0.2417, "step": 63130 }, { "epoch": 0.4481566063883965, "grad_norm": 0.11328125, "learning_rate": 0.0019828706327565636, "loss": 0.2478, "step": 63140 }, { "epoch": 0.4482275846282426, "grad_norm": 0.10791015625, "learning_rate": 0.001982865136721967, "loss": 0.2206, "step": 63150 }, { "epoch": 0.4482985628680887, "grad_norm": 0.09033203125, "learning_rate": 0.0019828596398142723, "loss": 0.2255, "step": 63160 }, { "epoch": 0.4483695411079348, "grad_norm": 0.1025390625, "learning_rate": 0.001982854142033485, "loss": 0.2416, "step": 63170 }, { "epoch": 0.44844051934778095, "grad_norm": 0.126953125, "learning_rate": 0.00198284864337961, "loss": 0.2353, "step": 63180 }, { "epoch": 0.4485114975876271, "grad_norm": 0.1689453125, "learning_rate": 0.0019828431438526526, "loss": 0.2669, "step": 63190 }, { "epoch": 0.4485824758274732, "grad_norm": 0.06689453125, "learning_rate": 0.0019828376434526196, "loss": 0.2499, "step": 63200 }, { "epoch": 0.4486534540673193, "grad_norm": 0.07861328125, "learning_rate": 0.0019828321421795146, "loss": 0.222, "step": 63210 }, { "epoch": 0.4487244323071654, "grad_norm": 0.1171875, "learning_rate": 0.0019828266400333445, "loss": 0.2444, "step": 63220 }, { "epoch": 0.44879541054701155, "grad_norm": 0.08935546875, "learning_rate": 0.001982821137014114, "loss": 0.238, "step": 63230 }, { "epoch": 0.4488663887868577, "grad_norm": 0.1064453125, "learning_rate": 0.001982815633121829, "loss": 0.2253, "step": 63240 }, { "epoch": 0.4489373670267038, "grad_norm": 0.0947265625, "learning_rate": 0.0019828101283564945, "loss": 0.2336, "step": 63250 }, { "epoch": 0.4490083452665499, "grad_norm": 0.0869140625, "learning_rate": 0.001982804622718116, "loss": 0.2366, "step": 63260 }, { "epoch": 0.449079323506396, "grad_norm": 0.1669921875, "learning_rate": 0.001982799116206699, "loss": 0.2151, "step": 63270 }, { "epoch": 0.44915030174624215, "grad_norm": 0.10205078125, "learning_rate": 0.001982793608822249, "loss": 0.2665, "step": 63280 }, { "epoch": 0.44922127998608824, "grad_norm": 0.087890625, "learning_rate": 0.001982788100564772, "loss": 0.2239, "step": 63290 }, { "epoch": 0.4492922582259344, "grad_norm": 0.09814453125, "learning_rate": 0.001982782591434272, "loss": 0.2301, "step": 63300 }, { "epoch": 0.4493632364657805, "grad_norm": 0.1025390625, "learning_rate": 0.0019827770814307556, "loss": 0.2479, "step": 63310 }, { "epoch": 0.4494342147056266, "grad_norm": 0.0693359375, "learning_rate": 0.001982771570554229, "loss": 0.2546, "step": 63320 }, { "epoch": 0.44950519294547275, "grad_norm": 0.154296875, "learning_rate": 0.0019827660588046954, "loss": 0.2363, "step": 63330 }, { "epoch": 0.44957617118531884, "grad_norm": 0.251953125, "learning_rate": 0.0019827605461821616, "loss": 0.2525, "step": 63340 }, { "epoch": 0.449647149425165, "grad_norm": 0.0673828125, "learning_rate": 0.0019827550326866335, "loss": 0.2446, "step": 63350 }, { "epoch": 0.4497181276650111, "grad_norm": 0.140625, "learning_rate": 0.0019827495183181157, "loss": 0.2481, "step": 63360 }, { "epoch": 0.4497891059048572, "grad_norm": 0.2412109375, "learning_rate": 0.001982744003076614, "loss": 0.24, "step": 63370 }, { "epoch": 0.44986008414470335, "grad_norm": 0.09619140625, "learning_rate": 0.0019827384869621338, "loss": 0.2428, "step": 63380 }, { "epoch": 0.44993106238454944, "grad_norm": 0.1953125, "learning_rate": 0.0019827329699746805, "loss": 0.2658, "step": 63390 }, { "epoch": 0.4500020406243956, "grad_norm": 0.1015625, "learning_rate": 0.0019827274521142598, "loss": 0.2505, "step": 63400 }, { "epoch": 0.45007301886424167, "grad_norm": 0.12158203125, "learning_rate": 0.001982721933380877, "loss": 0.2366, "step": 63410 }, { "epoch": 0.4501439971040878, "grad_norm": 0.08837890625, "learning_rate": 0.0019827164137745375, "loss": 0.2456, "step": 63420 }, { "epoch": 0.45021497534393395, "grad_norm": 0.076171875, "learning_rate": 0.0019827108932952463, "loss": 0.2241, "step": 63430 }, { "epoch": 0.45028595358378004, "grad_norm": 0.1025390625, "learning_rate": 0.0019827053719430098, "loss": 0.2249, "step": 63440 }, { "epoch": 0.4503569318236262, "grad_norm": 0.09375, "learning_rate": 0.001982699849717833, "loss": 0.2273, "step": 63450 }, { "epoch": 0.45042791006347227, "grad_norm": 0.10498046875, "learning_rate": 0.0019826943266197214, "loss": 0.2423, "step": 63460 }, { "epoch": 0.4504988883033184, "grad_norm": 0.09765625, "learning_rate": 0.0019826888026486804, "loss": 0.2528, "step": 63470 }, { "epoch": 0.45056986654316455, "grad_norm": 0.1591796875, "learning_rate": 0.0019826832778047153, "loss": 0.2581, "step": 63480 }, { "epoch": 0.45064084478301064, "grad_norm": 0.142578125, "learning_rate": 0.001982677752087832, "loss": 0.2304, "step": 63490 }, { "epoch": 0.4507118230228568, "grad_norm": 0.11572265625, "learning_rate": 0.0019826722254980356, "loss": 0.2484, "step": 63500 }, { "epoch": 0.45078280126270287, "grad_norm": 0.07568359375, "learning_rate": 0.001982666698035332, "loss": 0.2388, "step": 63510 }, { "epoch": 0.450853779502549, "grad_norm": 0.10107421875, "learning_rate": 0.0019826611696997258, "loss": 0.2627, "step": 63520 }, { "epoch": 0.4509247577423951, "grad_norm": 0.06689453125, "learning_rate": 0.0019826556404912237, "loss": 0.2353, "step": 63530 }, { "epoch": 0.45099573598224124, "grad_norm": 0.083984375, "learning_rate": 0.00198265011040983, "loss": 0.248, "step": 63540 }, { "epoch": 0.4510667142220874, "grad_norm": 0.0908203125, "learning_rate": 0.0019826445794555504, "loss": 0.2391, "step": 63550 }, { "epoch": 0.45113769246193347, "grad_norm": 0.10205078125, "learning_rate": 0.0019826390476283913, "loss": 0.2272, "step": 63560 }, { "epoch": 0.4512086707017796, "grad_norm": 0.1142578125, "learning_rate": 0.001982633514928357, "loss": 0.2382, "step": 63570 }, { "epoch": 0.4512796489416257, "grad_norm": 0.123046875, "learning_rate": 0.0019826279813554538, "loss": 0.2404, "step": 63580 }, { "epoch": 0.45135062718147184, "grad_norm": 0.119140625, "learning_rate": 0.0019826224469096866, "loss": 0.2367, "step": 63590 }, { "epoch": 0.451421605421318, "grad_norm": 0.1298828125, "learning_rate": 0.0019826169115910613, "loss": 0.2415, "step": 63600 }, { "epoch": 0.45149258366116407, "grad_norm": 0.0947265625, "learning_rate": 0.001982611375399583, "loss": 0.2364, "step": 63610 }, { "epoch": 0.4515635619010102, "grad_norm": 0.09521484375, "learning_rate": 0.0019826058383352577, "loss": 0.2347, "step": 63620 }, { "epoch": 0.4516345401408563, "grad_norm": 0.2109375, "learning_rate": 0.0019826003003980902, "loss": 0.2319, "step": 63630 }, { "epoch": 0.45170551838070244, "grad_norm": 0.1689453125, "learning_rate": 0.0019825947615880867, "loss": 0.2405, "step": 63640 }, { "epoch": 0.45177649662054853, "grad_norm": 0.08203125, "learning_rate": 0.001982589221905252, "loss": 0.252, "step": 63650 }, { "epoch": 0.45184747486039467, "grad_norm": 0.08740234375, "learning_rate": 0.0019825836813495918, "loss": 0.2249, "step": 63660 }, { "epoch": 0.4519184531002408, "grad_norm": 0.08349609375, "learning_rate": 0.0019825781399211116, "loss": 0.2377, "step": 63670 }, { "epoch": 0.4519894313400869, "grad_norm": 0.095703125, "learning_rate": 0.001982572597619817, "loss": 0.2474, "step": 63680 }, { "epoch": 0.45206040957993304, "grad_norm": 0.10400390625, "learning_rate": 0.0019825670544457137, "loss": 0.2292, "step": 63690 }, { "epoch": 0.45213138781977913, "grad_norm": 0.1162109375, "learning_rate": 0.001982561510398807, "loss": 0.2456, "step": 63700 }, { "epoch": 0.45220236605962527, "grad_norm": 0.0947265625, "learning_rate": 0.001982555965479102, "loss": 0.2469, "step": 63710 }, { "epoch": 0.4522733442994714, "grad_norm": 0.1279296875, "learning_rate": 0.001982550419686604, "loss": 0.2303, "step": 63720 }, { "epoch": 0.4523443225393175, "grad_norm": 0.0947265625, "learning_rate": 0.0019825448730213194, "loss": 0.2421, "step": 63730 }, { "epoch": 0.45241530077916364, "grad_norm": 0.10400390625, "learning_rate": 0.0019825393254832534, "loss": 0.227, "step": 63740 }, { "epoch": 0.45248627901900973, "grad_norm": 0.07958984375, "learning_rate": 0.001982533777072411, "loss": 0.2437, "step": 63750 }, { "epoch": 0.45255725725885587, "grad_norm": 0.0810546875, "learning_rate": 0.0019825282277887986, "loss": 0.2416, "step": 63760 }, { "epoch": 0.45262823549870196, "grad_norm": 0.08447265625, "learning_rate": 0.0019825226776324202, "loss": 0.2396, "step": 63770 }, { "epoch": 0.4526992137385481, "grad_norm": 0.11474609375, "learning_rate": 0.001982517126603283, "loss": 0.2337, "step": 63780 }, { "epoch": 0.45277019197839424, "grad_norm": 0.103515625, "learning_rate": 0.0019825115747013917, "loss": 0.2502, "step": 63790 }, { "epoch": 0.45284117021824033, "grad_norm": 0.1845703125, "learning_rate": 0.001982506021926751, "loss": 0.2381, "step": 63800 }, { "epoch": 0.45291214845808647, "grad_norm": 0.10693359375, "learning_rate": 0.0019825004682793674, "loss": 0.2418, "step": 63810 }, { "epoch": 0.45298312669793256, "grad_norm": 0.10595703125, "learning_rate": 0.0019824949137592462, "loss": 0.246, "step": 63820 }, { "epoch": 0.4530541049377787, "grad_norm": 0.083984375, "learning_rate": 0.001982489358366393, "loss": 0.2428, "step": 63830 }, { "epoch": 0.45312508317762484, "grad_norm": 0.08203125, "learning_rate": 0.0019824838021008134, "loss": 0.2348, "step": 63840 }, { "epoch": 0.45319606141747093, "grad_norm": 0.083984375, "learning_rate": 0.0019824782449625123, "loss": 0.2332, "step": 63850 }, { "epoch": 0.4532670396573171, "grad_norm": 0.107421875, "learning_rate": 0.001982472686951495, "loss": 0.2255, "step": 63860 }, { "epoch": 0.45333801789716316, "grad_norm": 0.08154296875, "learning_rate": 0.001982467128067768, "loss": 0.2253, "step": 63870 }, { "epoch": 0.4534089961370093, "grad_norm": 0.0869140625, "learning_rate": 0.001982461568311337, "loss": 0.2284, "step": 63880 }, { "epoch": 0.4534799743768554, "grad_norm": 0.09619140625, "learning_rate": 0.0019824560076822056, "loss": 0.2367, "step": 63890 }, { "epoch": 0.45355095261670153, "grad_norm": 0.1376953125, "learning_rate": 0.0019824504461803815, "loss": 0.2386, "step": 63900 }, { "epoch": 0.4536219308565477, "grad_norm": 0.11279296875, "learning_rate": 0.0019824448838058686, "loss": 0.234, "step": 63910 }, { "epoch": 0.45369290909639376, "grad_norm": 0.1337890625, "learning_rate": 0.0019824393205586733, "loss": 0.2693, "step": 63920 }, { "epoch": 0.4537638873362399, "grad_norm": 0.08984375, "learning_rate": 0.001982433756438801, "loss": 0.2329, "step": 63930 }, { "epoch": 0.453834865576086, "grad_norm": 0.11279296875, "learning_rate": 0.0019824281914462565, "loss": 0.2378, "step": 63940 }, { "epoch": 0.45390584381593213, "grad_norm": 0.07177734375, "learning_rate": 0.0019824226255810463, "loss": 0.2283, "step": 63950 }, { "epoch": 0.4539768220557783, "grad_norm": 0.123046875, "learning_rate": 0.0019824170588431753, "loss": 0.2295, "step": 63960 }, { "epoch": 0.45404780029562436, "grad_norm": 0.07763671875, "learning_rate": 0.001982411491232649, "loss": 0.2542, "step": 63970 }, { "epoch": 0.4541187785354705, "grad_norm": 0.1455078125, "learning_rate": 0.0019824059227494728, "loss": 0.2314, "step": 63980 }, { "epoch": 0.4541897567753166, "grad_norm": 0.09326171875, "learning_rate": 0.001982400353393653, "loss": 0.2454, "step": 63990 }, { "epoch": 0.45426073501516273, "grad_norm": 0.14453125, "learning_rate": 0.001982394783165194, "loss": 0.2501, "step": 64000 }, { "epoch": 0.45426073501516273, "eval_covost2-zh-en_loss": 3.851095676422119, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 21.3635, "eval_covost2-zh-en_samples_per_second": 2.996, "eval_covost2-zh-en_steps_per_second": 0.187, "step": 64000 }, { "epoch": 0.45426073501516273, "eval_covost2-en-zh_loss": 3.1495375633239746, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 23.0625, "eval_covost2-en-zh_samples_per_second": 2.775, "eval_covost2-en-zh_steps_per_second": 0.173, "step": 64000 }, { "epoch": 0.4543317132550089, "grad_norm": 0.087890625, "learning_rate": 0.001982389212064102, "loss": 0.2442, "step": 64010 }, { "epoch": 0.45440269149485496, "grad_norm": 0.09033203125, "learning_rate": 0.001982383640090383, "loss": 0.2335, "step": 64020 }, { "epoch": 0.4544736697347011, "grad_norm": 0.10693359375, "learning_rate": 0.0019823780672440414, "loss": 0.248, "step": 64030 }, { "epoch": 0.4545446479745472, "grad_norm": 0.10595703125, "learning_rate": 0.001982372493525083, "loss": 0.2682, "step": 64040 }, { "epoch": 0.45461562621439333, "grad_norm": 0.095703125, "learning_rate": 0.0019823669189335136, "loss": 0.2435, "step": 64050 }, { "epoch": 0.4546866044542394, "grad_norm": 0.109375, "learning_rate": 0.0019823613434693387, "loss": 0.2306, "step": 64060 }, { "epoch": 0.45475758269408556, "grad_norm": 0.07666015625, "learning_rate": 0.001982355767132564, "loss": 0.2399, "step": 64070 }, { "epoch": 0.4548285609339317, "grad_norm": 0.142578125, "learning_rate": 0.0019823501899231943, "loss": 0.2318, "step": 64080 }, { "epoch": 0.4548995391737778, "grad_norm": 0.1318359375, "learning_rate": 0.001982344611841236, "loss": 0.24, "step": 64090 }, { "epoch": 0.45497051741362393, "grad_norm": 0.1416015625, "learning_rate": 0.001982339032886694, "loss": 0.2421, "step": 64100 }, { "epoch": 0.45504149565347, "grad_norm": 0.1181640625, "learning_rate": 0.0019823334530595735, "loss": 0.2268, "step": 64110 }, { "epoch": 0.45511247389331616, "grad_norm": 0.083984375, "learning_rate": 0.0019823278723598813, "loss": 0.2276, "step": 64120 }, { "epoch": 0.4551834521331623, "grad_norm": 0.1005859375, "learning_rate": 0.0019823222907876217, "loss": 0.233, "step": 64130 }, { "epoch": 0.4552544303730084, "grad_norm": 0.0986328125, "learning_rate": 0.0019823167083428007, "loss": 0.2608, "step": 64140 }, { "epoch": 0.45532540861285453, "grad_norm": 0.078125, "learning_rate": 0.0019823111250254237, "loss": 0.2542, "step": 64150 }, { "epoch": 0.4553963868527006, "grad_norm": 0.111328125, "learning_rate": 0.0019823055408354964, "loss": 0.2319, "step": 64160 }, { "epoch": 0.45546736509254676, "grad_norm": 0.10498046875, "learning_rate": 0.0019822999557730244, "loss": 0.2579, "step": 64170 }, { "epoch": 0.45553834333239285, "grad_norm": 0.1025390625, "learning_rate": 0.001982294369838013, "loss": 0.2464, "step": 64180 }, { "epoch": 0.455609321572239, "grad_norm": 0.08740234375, "learning_rate": 0.0019822887830304677, "loss": 0.2501, "step": 64190 }, { "epoch": 0.45568029981208513, "grad_norm": 0.08544921875, "learning_rate": 0.001982283195350394, "loss": 0.2295, "step": 64200 }, { "epoch": 0.4557512780519312, "grad_norm": 0.0986328125, "learning_rate": 0.0019822776067977975, "loss": 0.2374, "step": 64210 }, { "epoch": 0.45582225629177736, "grad_norm": 0.15625, "learning_rate": 0.001982272017372684, "loss": 0.2416, "step": 64220 }, { "epoch": 0.45589323453162345, "grad_norm": 0.091796875, "learning_rate": 0.0019822664270750584, "loss": 0.2369, "step": 64230 }, { "epoch": 0.4559642127714696, "grad_norm": 0.107421875, "learning_rate": 0.0019822608359049265, "loss": 0.2449, "step": 64240 }, { "epoch": 0.45603519101131573, "grad_norm": 0.1328125, "learning_rate": 0.001982255243862294, "loss": 0.2665, "step": 64250 }, { "epoch": 0.4561061692511618, "grad_norm": 0.13671875, "learning_rate": 0.0019822496509471666, "loss": 0.2643, "step": 64260 }, { "epoch": 0.45617714749100796, "grad_norm": 0.140625, "learning_rate": 0.00198224405715955, "loss": 0.2504, "step": 64270 }, { "epoch": 0.45624812573085405, "grad_norm": 0.1376953125, "learning_rate": 0.0019822384624994483, "loss": 0.2479, "step": 64280 }, { "epoch": 0.4563191039707002, "grad_norm": 0.1728515625, "learning_rate": 0.001982232866966869, "loss": 0.2301, "step": 64290 }, { "epoch": 0.4563900822105463, "grad_norm": 0.08544921875, "learning_rate": 0.001982227270561816, "loss": 0.2608, "step": 64300 }, { "epoch": 0.4564610604503924, "grad_norm": 0.12060546875, "learning_rate": 0.001982221673284296, "loss": 0.2264, "step": 64310 }, { "epoch": 0.45653203869023856, "grad_norm": 0.1005859375, "learning_rate": 0.001982216075134314, "loss": 0.2446, "step": 64320 }, { "epoch": 0.45660301693008465, "grad_norm": 0.1064453125, "learning_rate": 0.0019822104761118755, "loss": 0.2318, "step": 64330 }, { "epoch": 0.4566739951699308, "grad_norm": 0.11474609375, "learning_rate": 0.001982204876216986, "loss": 0.2289, "step": 64340 }, { "epoch": 0.4567449734097769, "grad_norm": 0.10400390625, "learning_rate": 0.001982199275449651, "loss": 0.2492, "step": 64350 }, { "epoch": 0.456815951649623, "grad_norm": 0.07470703125, "learning_rate": 0.0019821936738098765, "loss": 0.2285, "step": 64360 }, { "epoch": 0.45688692988946916, "grad_norm": 0.1103515625, "learning_rate": 0.001982188071297668, "loss": 0.2475, "step": 64370 }, { "epoch": 0.45695790812931525, "grad_norm": 0.2265625, "learning_rate": 0.0019821824679130302, "loss": 0.2439, "step": 64380 }, { "epoch": 0.4570288863691614, "grad_norm": 0.07763671875, "learning_rate": 0.0019821768636559696, "loss": 0.2334, "step": 64390 }, { "epoch": 0.4570998646090075, "grad_norm": 0.1357421875, "learning_rate": 0.0019821712585264915, "loss": 0.2274, "step": 64400 }, { "epoch": 0.4571708428488536, "grad_norm": 0.126953125, "learning_rate": 0.001982165652524601, "loss": 0.238, "step": 64410 }, { "epoch": 0.4572418210886997, "grad_norm": 0.1328125, "learning_rate": 0.001982160045650304, "loss": 0.2294, "step": 64420 }, { "epoch": 0.45731279932854585, "grad_norm": 0.12060546875, "learning_rate": 0.0019821544379036065, "loss": 0.2238, "step": 64430 }, { "epoch": 0.457383777568392, "grad_norm": 0.064453125, "learning_rate": 0.001982148829284513, "loss": 0.2412, "step": 64440 }, { "epoch": 0.4574547558082381, "grad_norm": 0.09814453125, "learning_rate": 0.0019821432197930297, "loss": 0.2398, "step": 64450 }, { "epoch": 0.4575257340480842, "grad_norm": 0.0947265625, "learning_rate": 0.0019821376094291622, "loss": 0.2646, "step": 64460 }, { "epoch": 0.4575967122879303, "grad_norm": 0.111328125, "learning_rate": 0.0019821319981929157, "loss": 0.2437, "step": 64470 }, { "epoch": 0.45766769052777645, "grad_norm": 0.10546875, "learning_rate": 0.001982126386084296, "loss": 0.2659, "step": 64480 }, { "epoch": 0.4577386687676226, "grad_norm": 0.1044921875, "learning_rate": 0.0019821207731033088, "loss": 0.2447, "step": 64490 }, { "epoch": 0.4578096470074687, "grad_norm": 0.09228515625, "learning_rate": 0.0019821151592499587, "loss": 0.2752, "step": 64500 }, { "epoch": 0.4578806252473148, "grad_norm": 0.1240234375, "learning_rate": 0.0019821095445242526, "loss": 0.2221, "step": 64510 }, { "epoch": 0.4579516034871609, "grad_norm": 0.1240234375, "learning_rate": 0.0019821039289261953, "loss": 0.2352, "step": 64520 }, { "epoch": 0.45802258172700705, "grad_norm": 0.173828125, "learning_rate": 0.001982098312455793, "loss": 0.2347, "step": 64530 }, { "epoch": 0.45809355996685314, "grad_norm": 0.142578125, "learning_rate": 0.0019820926951130504, "loss": 0.2343, "step": 64540 }, { "epoch": 0.4581645382066993, "grad_norm": 0.1923828125, "learning_rate": 0.0019820870768979732, "loss": 0.2447, "step": 64550 }, { "epoch": 0.4582355164465454, "grad_norm": 0.08740234375, "learning_rate": 0.0019820814578105674, "loss": 0.2639, "step": 64560 }, { "epoch": 0.4583064946863915, "grad_norm": 0.095703125, "learning_rate": 0.001982075837850838, "loss": 0.2547, "step": 64570 }, { "epoch": 0.45837747292623765, "grad_norm": 0.095703125, "learning_rate": 0.0019820702170187912, "loss": 0.2451, "step": 64580 }, { "epoch": 0.45844845116608374, "grad_norm": 0.1533203125, "learning_rate": 0.0019820645953144323, "loss": 0.2316, "step": 64590 }, { "epoch": 0.4585194294059299, "grad_norm": 0.1806640625, "learning_rate": 0.0019820589727377663, "loss": 0.2326, "step": 64600 }, { "epoch": 0.458590407645776, "grad_norm": 0.1044921875, "learning_rate": 0.0019820533492888, "loss": 0.2376, "step": 64610 }, { "epoch": 0.4586613858856221, "grad_norm": 0.07763671875, "learning_rate": 0.0019820477249675376, "loss": 0.2492, "step": 64620 }, { "epoch": 0.45873236412546825, "grad_norm": 0.1748046875, "learning_rate": 0.0019820420997739858, "loss": 0.2374, "step": 64630 }, { "epoch": 0.45880334236531434, "grad_norm": 0.08154296875, "learning_rate": 0.001982036473708149, "loss": 0.2331, "step": 64640 }, { "epoch": 0.4588743206051605, "grad_norm": 0.10546875, "learning_rate": 0.001982030846770034, "loss": 0.2454, "step": 64650 }, { "epoch": 0.45894529884500657, "grad_norm": 0.08740234375, "learning_rate": 0.0019820252189596457, "loss": 0.243, "step": 64660 }, { "epoch": 0.4590162770848527, "grad_norm": 0.1376953125, "learning_rate": 0.0019820195902769894, "loss": 0.2311, "step": 64670 }, { "epoch": 0.45908725532469885, "grad_norm": 0.09375, "learning_rate": 0.0019820139607220713, "loss": 0.2282, "step": 64680 }, { "epoch": 0.45915823356454494, "grad_norm": 0.1162109375, "learning_rate": 0.0019820083302948964, "loss": 0.2278, "step": 64690 }, { "epoch": 0.4592292118043911, "grad_norm": 0.06689453125, "learning_rate": 0.0019820026989954705, "loss": 0.2328, "step": 64700 }, { "epoch": 0.45930019004423717, "grad_norm": 0.09765625, "learning_rate": 0.0019819970668237996, "loss": 0.2394, "step": 64710 }, { "epoch": 0.4593711682840833, "grad_norm": 0.126953125, "learning_rate": 0.0019819914337798885, "loss": 0.2541, "step": 64720 }, { "epoch": 0.45944214652392945, "grad_norm": 0.099609375, "learning_rate": 0.0019819857998637437, "loss": 0.2346, "step": 64730 }, { "epoch": 0.45951312476377554, "grad_norm": 0.1474609375, "learning_rate": 0.0019819801650753695, "loss": 0.2446, "step": 64740 }, { "epoch": 0.4595841030036217, "grad_norm": 0.09130859375, "learning_rate": 0.0019819745294147725, "loss": 0.2455, "step": 64750 }, { "epoch": 0.45965508124346777, "grad_norm": 0.07666015625, "learning_rate": 0.0019819688928819582, "loss": 0.2362, "step": 64760 }, { "epoch": 0.4597260594833139, "grad_norm": 0.1123046875, "learning_rate": 0.001981963255476932, "loss": 0.2349, "step": 64770 }, { "epoch": 0.45979703772316, "grad_norm": 0.0947265625, "learning_rate": 0.001981957617199699, "loss": 0.2309, "step": 64780 }, { "epoch": 0.45986801596300614, "grad_norm": 0.189453125, "learning_rate": 0.0019819519780502654, "loss": 0.2487, "step": 64790 }, { "epoch": 0.4599389942028523, "grad_norm": 0.150390625, "learning_rate": 0.0019819463380286364, "loss": 0.2365, "step": 64800 }, { "epoch": 0.46000997244269837, "grad_norm": 0.09912109375, "learning_rate": 0.001981940697134818, "loss": 0.2325, "step": 64810 }, { "epoch": 0.4600809506825445, "grad_norm": 0.09228515625, "learning_rate": 0.001981935055368815, "loss": 0.2289, "step": 64820 }, { "epoch": 0.4601519289223906, "grad_norm": 0.10546875, "learning_rate": 0.0019819294127306343, "loss": 0.2479, "step": 64830 }, { "epoch": 0.46022290716223674, "grad_norm": 0.1044921875, "learning_rate": 0.0019819237692202804, "loss": 0.2432, "step": 64840 }, { "epoch": 0.4602938854020829, "grad_norm": 0.08984375, "learning_rate": 0.001981918124837759, "loss": 0.2403, "step": 64850 }, { "epoch": 0.46036486364192897, "grad_norm": 0.08984375, "learning_rate": 0.0019819124795830757, "loss": 0.2506, "step": 64860 }, { "epoch": 0.4604358418817751, "grad_norm": 0.099609375, "learning_rate": 0.0019819068334562366, "loss": 0.2399, "step": 64870 }, { "epoch": 0.4605068201216212, "grad_norm": 0.134765625, "learning_rate": 0.0019819011864572467, "loss": 0.2325, "step": 64880 }, { "epoch": 0.46057779836146734, "grad_norm": 0.08349609375, "learning_rate": 0.001981895538586112, "loss": 0.2243, "step": 64890 }, { "epoch": 0.4606487766013134, "grad_norm": 0.1396484375, "learning_rate": 0.0019818898898428376, "loss": 0.2418, "step": 64900 }, { "epoch": 0.46071975484115957, "grad_norm": 0.1015625, "learning_rate": 0.0019818842402274297, "loss": 0.2345, "step": 64910 }, { "epoch": 0.4607907330810057, "grad_norm": 0.10546875, "learning_rate": 0.001981878589739893, "loss": 0.2567, "step": 64920 }, { "epoch": 0.4608617113208518, "grad_norm": 0.13671875, "learning_rate": 0.001981872938380234, "loss": 0.2545, "step": 64930 }, { "epoch": 0.46093268956069794, "grad_norm": 0.0791015625, "learning_rate": 0.0019818672861484582, "loss": 0.223, "step": 64940 }, { "epoch": 0.461003667800544, "grad_norm": 0.06884765625, "learning_rate": 0.001981861633044571, "loss": 0.2256, "step": 64950 }, { "epoch": 0.46107464604039017, "grad_norm": 0.1328125, "learning_rate": 0.0019818559790685773, "loss": 0.234, "step": 64960 }, { "epoch": 0.4611456242802363, "grad_norm": 0.09619140625, "learning_rate": 0.0019818503242204836, "loss": 0.2453, "step": 64970 }, { "epoch": 0.4612166025200824, "grad_norm": 0.11865234375, "learning_rate": 0.001981844668500295, "loss": 0.2445, "step": 64980 }, { "epoch": 0.46128758075992854, "grad_norm": 0.11279296875, "learning_rate": 0.0019818390119080175, "loss": 0.2604, "step": 64990 }, { "epoch": 0.4613585589997746, "grad_norm": 0.0966796875, "learning_rate": 0.001981833354443657, "loss": 0.2257, "step": 65000 }, { "epoch": 0.46142953723962077, "grad_norm": 0.09326171875, "learning_rate": 0.0019818276961072177, "loss": 0.2235, "step": 65010 }, { "epoch": 0.46150051547946686, "grad_norm": 0.1259765625, "learning_rate": 0.001981822036898706, "loss": 0.2476, "step": 65020 }, { "epoch": 0.461571493719313, "grad_norm": 0.09033203125, "learning_rate": 0.0019818163768181283, "loss": 0.2451, "step": 65030 }, { "epoch": 0.46164247195915914, "grad_norm": 0.068359375, "learning_rate": 0.0019818107158654894, "loss": 0.235, "step": 65040 }, { "epoch": 0.4617134501990052, "grad_norm": 0.07080078125, "learning_rate": 0.0019818050540407945, "loss": 0.2484, "step": 65050 }, { "epoch": 0.46178442843885137, "grad_norm": 0.11376953125, "learning_rate": 0.00198179939134405, "loss": 0.2292, "step": 65060 }, { "epoch": 0.46185540667869746, "grad_norm": 0.1123046875, "learning_rate": 0.001981793727775261, "loss": 0.2525, "step": 65070 }, { "epoch": 0.4619263849185436, "grad_norm": 0.119140625, "learning_rate": 0.0019817880633344336, "loss": 0.235, "step": 65080 }, { "epoch": 0.46199736315838974, "grad_norm": 0.0947265625, "learning_rate": 0.001981782398021573, "loss": 0.2214, "step": 65090 }, { "epoch": 0.4620683413982358, "grad_norm": 0.10693359375, "learning_rate": 0.0019817767318366843, "loss": 0.269, "step": 65100 }, { "epoch": 0.46213931963808197, "grad_norm": 0.08447265625, "learning_rate": 0.001981771064779774, "loss": 0.2301, "step": 65110 }, { "epoch": 0.46221029787792806, "grad_norm": 0.111328125, "learning_rate": 0.0019817653968508477, "loss": 0.2379, "step": 65120 }, { "epoch": 0.4622812761177742, "grad_norm": 0.1259765625, "learning_rate": 0.0019817597280499103, "loss": 0.2362, "step": 65130 }, { "epoch": 0.46235225435762034, "grad_norm": 0.06982421875, "learning_rate": 0.001981754058376968, "loss": 0.248, "step": 65140 }, { "epoch": 0.4624232325974664, "grad_norm": 0.11669921875, "learning_rate": 0.0019817483878320262, "loss": 0.2407, "step": 65150 }, { "epoch": 0.46249421083731257, "grad_norm": 0.11279296875, "learning_rate": 0.00198174271641509, "loss": 0.2481, "step": 65160 }, { "epoch": 0.46256518907715866, "grad_norm": 0.09130859375, "learning_rate": 0.001981737044126166, "loss": 0.2486, "step": 65170 }, { "epoch": 0.4626361673170048, "grad_norm": 0.08740234375, "learning_rate": 0.0019817313709652594, "loss": 0.2276, "step": 65180 }, { "epoch": 0.4627071455568509, "grad_norm": 0.12451171875, "learning_rate": 0.0019817256969323757, "loss": 0.2392, "step": 65190 }, { "epoch": 0.462778123796697, "grad_norm": 0.1494140625, "learning_rate": 0.0019817200220275206, "loss": 0.2465, "step": 65200 }, { "epoch": 0.46284910203654317, "grad_norm": 0.2001953125, "learning_rate": 0.0019817143462506994, "loss": 0.2377, "step": 65210 }, { "epoch": 0.46292008027638926, "grad_norm": 0.18359375, "learning_rate": 0.001981708669601918, "loss": 0.2503, "step": 65220 }, { "epoch": 0.4629910585162354, "grad_norm": 0.11181640625, "learning_rate": 0.001981702992081182, "loss": 0.2388, "step": 65230 }, { "epoch": 0.4630620367560815, "grad_norm": 0.08935546875, "learning_rate": 0.001981697313688497, "loss": 0.2405, "step": 65240 }, { "epoch": 0.4631330149959276, "grad_norm": 0.1015625, "learning_rate": 0.001981691634423869, "loss": 0.2417, "step": 65250 }, { "epoch": 0.46320399323577377, "grad_norm": 0.107421875, "learning_rate": 0.0019816859542873028, "loss": 0.237, "step": 65260 }, { "epoch": 0.46327497147561986, "grad_norm": 0.10693359375, "learning_rate": 0.0019816802732788045, "loss": 0.2227, "step": 65270 }, { "epoch": 0.463345949715466, "grad_norm": 0.0966796875, "learning_rate": 0.0019816745913983796, "loss": 0.2291, "step": 65280 }, { "epoch": 0.4634169279553121, "grad_norm": 0.0888671875, "learning_rate": 0.001981668908646034, "loss": 0.2449, "step": 65290 }, { "epoch": 0.46348790619515823, "grad_norm": 0.111328125, "learning_rate": 0.001981663225021773, "loss": 0.2478, "step": 65300 }, { "epoch": 0.4635588844350043, "grad_norm": 0.255859375, "learning_rate": 0.0019816575405256025, "loss": 0.2756, "step": 65310 }, { "epoch": 0.46362986267485046, "grad_norm": 0.0810546875, "learning_rate": 0.001981651855157528, "loss": 0.2522, "step": 65320 }, { "epoch": 0.4637008409146966, "grad_norm": 0.1455078125, "learning_rate": 0.001981646168917555, "loss": 0.2414, "step": 65330 }, { "epoch": 0.4637718191545427, "grad_norm": 0.08935546875, "learning_rate": 0.001981640481805689, "loss": 0.2474, "step": 65340 }, { "epoch": 0.46384279739438883, "grad_norm": 0.10400390625, "learning_rate": 0.001981634793821936, "loss": 0.2499, "step": 65350 }, { "epoch": 0.4639137756342349, "grad_norm": 0.07763671875, "learning_rate": 0.0019816291049663012, "loss": 0.243, "step": 65360 }, { "epoch": 0.46398475387408106, "grad_norm": 0.1064453125, "learning_rate": 0.0019816234152387905, "loss": 0.2454, "step": 65370 }, { "epoch": 0.4640557321139272, "grad_norm": 0.0859375, "learning_rate": 0.0019816177246394094, "loss": 0.2412, "step": 65380 }, { "epoch": 0.4641267103537733, "grad_norm": 0.068359375, "learning_rate": 0.001981612033168164, "loss": 0.2256, "step": 65390 }, { "epoch": 0.46419768859361943, "grad_norm": 0.09130859375, "learning_rate": 0.0019816063408250595, "loss": 0.2401, "step": 65400 }, { "epoch": 0.4642686668334655, "grad_norm": 0.0908203125, "learning_rate": 0.0019816006476101014, "loss": 0.2387, "step": 65410 }, { "epoch": 0.46433964507331166, "grad_norm": 0.0830078125, "learning_rate": 0.0019815949535232956, "loss": 0.2257, "step": 65420 }, { "epoch": 0.46441062331315774, "grad_norm": 0.09716796875, "learning_rate": 0.0019815892585646477, "loss": 0.2528, "step": 65430 }, { "epoch": 0.4644816015530039, "grad_norm": 0.11474609375, "learning_rate": 0.0019815835627341634, "loss": 0.2207, "step": 65440 }, { "epoch": 0.46455257979285003, "grad_norm": 0.09521484375, "learning_rate": 0.001981577866031848, "loss": 0.2277, "step": 65450 }, { "epoch": 0.4646235580326961, "grad_norm": 0.083984375, "learning_rate": 0.0019815721684577074, "loss": 0.2294, "step": 65460 }, { "epoch": 0.46469453627254226, "grad_norm": 0.1533203125, "learning_rate": 0.001981566470011747, "loss": 0.2268, "step": 65470 }, { "epoch": 0.46476551451238834, "grad_norm": 0.09033203125, "learning_rate": 0.001981560770693973, "loss": 0.2356, "step": 65480 }, { "epoch": 0.4648364927522345, "grad_norm": 0.061767578125, "learning_rate": 0.0019815550705043904, "loss": 0.2248, "step": 65490 }, { "epoch": 0.46490747099208063, "grad_norm": 0.1259765625, "learning_rate": 0.001981549369443005, "loss": 0.2531, "step": 65500 }, { "epoch": 0.4649784492319267, "grad_norm": 0.0869140625, "learning_rate": 0.0019815436675098223, "loss": 0.2417, "step": 65510 }, { "epoch": 0.46504942747177286, "grad_norm": 0.08740234375, "learning_rate": 0.001981537964704849, "loss": 0.228, "step": 65520 }, { "epoch": 0.46512040571161894, "grad_norm": 0.1259765625, "learning_rate": 0.0019815322610280887, "loss": 0.2218, "step": 65530 }, { "epoch": 0.4651913839514651, "grad_norm": 0.1279296875, "learning_rate": 0.0019815265564795495, "loss": 0.2383, "step": 65540 }, { "epoch": 0.4652623621913112, "grad_norm": 0.08251953125, "learning_rate": 0.001981520851059235, "loss": 0.2299, "step": 65550 }, { "epoch": 0.4653333404311573, "grad_norm": 0.103515625, "learning_rate": 0.001981515144767152, "loss": 0.2217, "step": 65560 }, { "epoch": 0.46540431867100346, "grad_norm": 0.103515625, "learning_rate": 0.0019815094376033054, "loss": 0.2429, "step": 65570 }, { "epoch": 0.46547529691084955, "grad_norm": 0.1025390625, "learning_rate": 0.0019815037295677013, "loss": 0.2501, "step": 65580 }, { "epoch": 0.4655462751506957, "grad_norm": 0.1103515625, "learning_rate": 0.0019814980206603453, "loss": 0.2435, "step": 65590 }, { "epoch": 0.4656172533905418, "grad_norm": 0.08544921875, "learning_rate": 0.0019814923108812436, "loss": 0.2502, "step": 65600 }, { "epoch": 0.4656882316303879, "grad_norm": 0.1279296875, "learning_rate": 0.0019814866002304003, "loss": 0.2383, "step": 65610 }, { "epoch": 0.46575920987023406, "grad_norm": 0.0849609375, "learning_rate": 0.0019814808887078225, "loss": 0.244, "step": 65620 }, { "epoch": 0.46583018811008015, "grad_norm": 0.1689453125, "learning_rate": 0.0019814751763135153, "loss": 0.2479, "step": 65630 }, { "epoch": 0.4659011663499263, "grad_norm": 0.0888671875, "learning_rate": 0.0019814694630474845, "loss": 0.2307, "step": 65640 }, { "epoch": 0.4659721445897724, "grad_norm": 0.0703125, "learning_rate": 0.0019814637489097356, "loss": 0.2351, "step": 65650 }, { "epoch": 0.4660431228296185, "grad_norm": 0.115234375, "learning_rate": 0.0019814580339002743, "loss": 0.2367, "step": 65660 }, { "epoch": 0.4661141010694646, "grad_norm": 0.07470703125, "learning_rate": 0.001981452318019106, "loss": 0.2616, "step": 65670 }, { "epoch": 0.46618507930931075, "grad_norm": 0.19140625, "learning_rate": 0.001981446601266237, "loss": 0.2315, "step": 65680 }, { "epoch": 0.4662560575491569, "grad_norm": 0.09423828125, "learning_rate": 0.001981440883641672, "loss": 0.2483, "step": 65690 }, { "epoch": 0.466327035789003, "grad_norm": 0.09765625, "learning_rate": 0.0019814351651454174, "loss": 0.2381, "step": 65700 }, { "epoch": 0.4663980140288491, "grad_norm": 0.1220703125, "learning_rate": 0.001981429445777479, "loss": 0.2381, "step": 65710 }, { "epoch": 0.4664689922686952, "grad_norm": 0.1337890625, "learning_rate": 0.001981423725537862, "loss": 0.2492, "step": 65720 }, { "epoch": 0.46653997050854135, "grad_norm": 0.130859375, "learning_rate": 0.0019814180044265724, "loss": 0.2355, "step": 65730 }, { "epoch": 0.4666109487483875, "grad_norm": 0.1904296875, "learning_rate": 0.001981412282443615, "loss": 0.2359, "step": 65740 }, { "epoch": 0.4666819269882336, "grad_norm": 0.10888671875, "learning_rate": 0.0019814065595889965, "loss": 0.2375, "step": 65750 }, { "epoch": 0.4667529052280797, "grad_norm": 0.09228515625, "learning_rate": 0.001981400835862722, "loss": 0.2311, "step": 65760 }, { "epoch": 0.4668238834679258, "grad_norm": 0.1640625, "learning_rate": 0.0019813951112647972, "loss": 0.2403, "step": 65770 }, { "epoch": 0.46689486170777195, "grad_norm": 0.11669921875, "learning_rate": 0.0019813893857952283, "loss": 0.2327, "step": 65780 }, { "epoch": 0.46696583994761803, "grad_norm": 0.1806640625, "learning_rate": 0.0019813836594540202, "loss": 0.2415, "step": 65790 }, { "epoch": 0.4670368181874642, "grad_norm": 0.15625, "learning_rate": 0.001981377932241179, "loss": 0.2357, "step": 65800 }, { "epoch": 0.4671077964273103, "grad_norm": 0.1630859375, "learning_rate": 0.00198137220415671, "loss": 0.2503, "step": 65810 }, { "epoch": 0.4671787746671564, "grad_norm": 0.103515625, "learning_rate": 0.0019813664752006198, "loss": 0.2593, "step": 65820 }, { "epoch": 0.46724975290700255, "grad_norm": 0.1279296875, "learning_rate": 0.001981360745372913, "loss": 0.2213, "step": 65830 }, { "epoch": 0.46732073114684863, "grad_norm": 0.11572265625, "learning_rate": 0.001981355014673596, "loss": 0.2449, "step": 65840 }, { "epoch": 0.4673917093866948, "grad_norm": 0.09326171875, "learning_rate": 0.0019813492831026736, "loss": 0.2464, "step": 65850 }, { "epoch": 0.4674626876265409, "grad_norm": 0.091796875, "learning_rate": 0.001981343550660152, "loss": 0.227, "step": 65860 }, { "epoch": 0.467533665866387, "grad_norm": 0.1044921875, "learning_rate": 0.001981337817346037, "loss": 0.2348, "step": 65870 }, { "epoch": 0.46760464410623315, "grad_norm": 0.125, "learning_rate": 0.0019813320831603342, "loss": 0.2292, "step": 65880 }, { "epoch": 0.46767562234607923, "grad_norm": 0.1552734375, "learning_rate": 0.0019813263481030493, "loss": 0.2141, "step": 65890 }, { "epoch": 0.4677466005859254, "grad_norm": 0.0849609375, "learning_rate": 0.0019813206121741876, "loss": 0.2334, "step": 65900 }, { "epoch": 0.46781757882577146, "grad_norm": 0.055908203125, "learning_rate": 0.0019813148753737553, "loss": 0.2227, "step": 65910 }, { "epoch": 0.4678885570656176, "grad_norm": 0.125, "learning_rate": 0.0019813091377017577, "loss": 0.2308, "step": 65920 }, { "epoch": 0.46795953530546375, "grad_norm": 0.0771484375, "learning_rate": 0.0019813033991582006, "loss": 0.2287, "step": 65930 }, { "epoch": 0.46803051354530983, "grad_norm": 0.08935546875, "learning_rate": 0.00198129765974309, "loss": 0.233, "step": 65940 }, { "epoch": 0.468101491785156, "grad_norm": 0.1962890625, "learning_rate": 0.0019812919194564307, "loss": 0.2268, "step": 65950 }, { "epoch": 0.46817247002500206, "grad_norm": 0.1123046875, "learning_rate": 0.0019812861782982296, "loss": 0.246, "step": 65960 }, { "epoch": 0.4682434482648482, "grad_norm": 0.10595703125, "learning_rate": 0.001981280436268491, "loss": 0.2419, "step": 65970 }, { "epoch": 0.46831442650469435, "grad_norm": 0.16015625, "learning_rate": 0.0019812746933672216, "loss": 0.2548, "step": 65980 }, { "epoch": 0.46838540474454043, "grad_norm": 0.08642578125, "learning_rate": 0.0019812689495944265, "loss": 0.2206, "step": 65990 }, { "epoch": 0.4684563829843866, "grad_norm": 0.0888671875, "learning_rate": 0.001981263204950112, "loss": 0.2495, "step": 66000 }, { "epoch": 0.4684563829843866, "eval_covost2-zh-en_loss": 3.8184216022491455, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 21.8271, "eval_covost2-zh-en_samples_per_second": 2.932, "eval_covost2-zh-en_steps_per_second": 0.183, "step": 66000 }, { "epoch": 0.4684563829843866, "eval_covost2-en-zh_loss": 3.1552274227142334, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 21.0254, "eval_covost2-en-zh_samples_per_second": 3.044, "eval_covost2-en-zh_steps_per_second": 0.19, "step": 66000 }, { "epoch": 0.46852736122423266, "grad_norm": 0.1005859375, "learning_rate": 0.0019812574594342837, "loss": 0.2514, "step": 66010 }, { "epoch": 0.4685983394640788, "grad_norm": 0.1337890625, "learning_rate": 0.0019812517130469464, "loss": 0.2251, "step": 66020 }, { "epoch": 0.4686693177039249, "grad_norm": 0.1533203125, "learning_rate": 0.0019812459657881066, "loss": 0.2311, "step": 66030 }, { "epoch": 0.46874029594377103, "grad_norm": 0.10986328125, "learning_rate": 0.00198124021765777, "loss": 0.2504, "step": 66040 }, { "epoch": 0.4688112741836172, "grad_norm": 0.1357421875, "learning_rate": 0.0019812344686559414, "loss": 0.2488, "step": 66050 }, { "epoch": 0.46888225242346326, "grad_norm": 0.09423828125, "learning_rate": 0.001981228718782628, "loss": 0.262, "step": 66060 }, { "epoch": 0.4689532306633094, "grad_norm": 0.07568359375, "learning_rate": 0.001981222968037834, "loss": 0.233, "step": 66070 }, { "epoch": 0.4690242089031555, "grad_norm": 0.10302734375, "learning_rate": 0.001981217216421566, "loss": 0.2287, "step": 66080 }, { "epoch": 0.46909518714300164, "grad_norm": 0.138671875, "learning_rate": 0.0019812114639338294, "loss": 0.2507, "step": 66090 }, { "epoch": 0.4691661653828478, "grad_norm": 0.0966796875, "learning_rate": 0.0019812057105746293, "loss": 0.2414, "step": 66100 }, { "epoch": 0.46923714362269386, "grad_norm": 0.1015625, "learning_rate": 0.001981199956343973, "loss": 0.2339, "step": 66110 }, { "epoch": 0.46930812186254, "grad_norm": 0.1259765625, "learning_rate": 0.0019811942012418642, "loss": 0.2294, "step": 66120 }, { "epoch": 0.4693791001023861, "grad_norm": 0.10888671875, "learning_rate": 0.00198118844526831, "loss": 0.2436, "step": 66130 }, { "epoch": 0.46945007834223224, "grad_norm": 0.12158203125, "learning_rate": 0.001981182688423316, "loss": 0.2543, "step": 66140 }, { "epoch": 0.4695210565820783, "grad_norm": 0.09228515625, "learning_rate": 0.0019811769307068873, "loss": 0.2478, "step": 66150 }, { "epoch": 0.46959203482192446, "grad_norm": 0.09423828125, "learning_rate": 0.0019811711721190294, "loss": 0.2546, "step": 66160 }, { "epoch": 0.4696630130617706, "grad_norm": 0.07763671875, "learning_rate": 0.0019811654126597493, "loss": 0.246, "step": 66170 }, { "epoch": 0.4697339913016167, "grad_norm": 0.16015625, "learning_rate": 0.0019811596523290516, "loss": 0.2469, "step": 66180 }, { "epoch": 0.46980496954146284, "grad_norm": 0.08837890625, "learning_rate": 0.0019811538911269416, "loss": 0.2266, "step": 66190 }, { "epoch": 0.4698759477813089, "grad_norm": 0.1630859375, "learning_rate": 0.0019811481290534258, "loss": 0.2506, "step": 66200 }, { "epoch": 0.46994692602115506, "grad_norm": 0.11572265625, "learning_rate": 0.0019811423661085102, "loss": 0.2381, "step": 66210 }, { "epoch": 0.4700179042610012, "grad_norm": 0.201171875, "learning_rate": 0.0019811366022921997, "loss": 0.2321, "step": 66220 }, { "epoch": 0.4700888825008473, "grad_norm": 0.09912109375, "learning_rate": 0.0019811308376045003, "loss": 0.2386, "step": 66230 }, { "epoch": 0.47015986074069344, "grad_norm": 0.0810546875, "learning_rate": 0.001981125072045418, "loss": 0.2401, "step": 66240 }, { "epoch": 0.4702308389805395, "grad_norm": 0.1337890625, "learning_rate": 0.0019811193056149578, "loss": 0.2383, "step": 66250 }, { "epoch": 0.47030181722038567, "grad_norm": 0.0751953125, "learning_rate": 0.001981113538313126, "loss": 0.2583, "step": 66260 }, { "epoch": 0.47037279546023175, "grad_norm": 0.1337890625, "learning_rate": 0.0019811077701399286, "loss": 0.2483, "step": 66270 }, { "epoch": 0.4704437737000779, "grad_norm": 0.0927734375, "learning_rate": 0.00198110200109537, "loss": 0.2447, "step": 66280 }, { "epoch": 0.47051475193992404, "grad_norm": 0.08837890625, "learning_rate": 0.0019810962311794574, "loss": 0.2354, "step": 66290 }, { "epoch": 0.4705857301797701, "grad_norm": 0.15234375, "learning_rate": 0.0019810904603921957, "loss": 0.2318, "step": 66300 }, { "epoch": 0.47065670841961627, "grad_norm": 0.1123046875, "learning_rate": 0.0019810846887335907, "loss": 0.2333, "step": 66310 }, { "epoch": 0.47072768665946235, "grad_norm": 0.140625, "learning_rate": 0.001981078916203648, "loss": 0.2119, "step": 66320 }, { "epoch": 0.4707986648993085, "grad_norm": 0.123046875, "learning_rate": 0.0019810731428023735, "loss": 0.2372, "step": 66330 }, { "epoch": 0.47086964313915464, "grad_norm": 0.09033203125, "learning_rate": 0.001981067368529773, "loss": 0.2182, "step": 66340 }, { "epoch": 0.4709406213790007, "grad_norm": 0.09375, "learning_rate": 0.001981061593385852, "loss": 0.2379, "step": 66350 }, { "epoch": 0.47101159961884687, "grad_norm": 0.134765625, "learning_rate": 0.0019810558173706167, "loss": 0.2344, "step": 66360 }, { "epoch": 0.47108257785869295, "grad_norm": 0.0966796875, "learning_rate": 0.001981050040484072, "loss": 0.2479, "step": 66370 }, { "epoch": 0.4711535560985391, "grad_norm": 0.083984375, "learning_rate": 0.001981044262726224, "loss": 0.2457, "step": 66380 }, { "epoch": 0.47122453433838524, "grad_norm": 0.20703125, "learning_rate": 0.001981038484097079, "loss": 0.2398, "step": 66390 }, { "epoch": 0.4712955125782313, "grad_norm": 0.111328125, "learning_rate": 0.001981032704596641, "loss": 0.2546, "step": 66400 }, { "epoch": 0.47136649081807747, "grad_norm": 0.205078125, "learning_rate": 0.001981026924224918, "loss": 0.2688, "step": 66410 }, { "epoch": 0.47143746905792355, "grad_norm": 0.07568359375, "learning_rate": 0.0019810211429819144, "loss": 0.249, "step": 66420 }, { "epoch": 0.4715084472977697, "grad_norm": 0.08837890625, "learning_rate": 0.0019810153608676357, "loss": 0.2351, "step": 66430 }, { "epoch": 0.4715794255376158, "grad_norm": 0.1044921875, "learning_rate": 0.0019810095778820883, "loss": 0.2462, "step": 66440 }, { "epoch": 0.4716504037774619, "grad_norm": 0.1201171875, "learning_rate": 0.0019810037940252777, "loss": 0.2259, "step": 66450 }, { "epoch": 0.47172138201730807, "grad_norm": 0.18359375, "learning_rate": 0.001980998009297209, "loss": 0.2482, "step": 66460 }, { "epoch": 0.47179236025715415, "grad_norm": 0.087890625, "learning_rate": 0.001980992223697889, "loss": 0.2313, "step": 66470 }, { "epoch": 0.4718633384970003, "grad_norm": 0.0830078125, "learning_rate": 0.001980986437227323, "loss": 0.2557, "step": 66480 }, { "epoch": 0.4719343167368464, "grad_norm": 0.10205078125, "learning_rate": 0.0019809806498855163, "loss": 0.2416, "step": 66490 }, { "epoch": 0.4720052949766925, "grad_norm": 0.107421875, "learning_rate": 0.0019809748616724755, "loss": 0.2348, "step": 66500 }, { "epoch": 0.47207627321653867, "grad_norm": 0.10595703125, "learning_rate": 0.0019809690725882054, "loss": 0.2356, "step": 66510 }, { "epoch": 0.47214725145638475, "grad_norm": 0.099609375, "learning_rate": 0.001980963282632712, "loss": 0.2431, "step": 66520 }, { "epoch": 0.4722182296962309, "grad_norm": 0.08935546875, "learning_rate": 0.001980957491806001, "loss": 0.2491, "step": 66530 }, { "epoch": 0.472289207936077, "grad_norm": 0.09228515625, "learning_rate": 0.0019809517001080786, "loss": 0.2439, "step": 66540 }, { "epoch": 0.4723601861759231, "grad_norm": 0.1005859375, "learning_rate": 0.00198094590753895, "loss": 0.2191, "step": 66550 }, { "epoch": 0.4724311644157692, "grad_norm": 0.1611328125, "learning_rate": 0.0019809401140986212, "loss": 0.2288, "step": 66560 }, { "epoch": 0.47250214265561535, "grad_norm": 0.08935546875, "learning_rate": 0.001980934319787098, "loss": 0.2333, "step": 66570 }, { "epoch": 0.4725731208954615, "grad_norm": 0.1103515625, "learning_rate": 0.001980928524604386, "loss": 0.2353, "step": 66580 }, { "epoch": 0.4726440991353076, "grad_norm": 0.09814453125, "learning_rate": 0.0019809227285504903, "loss": 0.2381, "step": 66590 }, { "epoch": 0.4727150773751537, "grad_norm": 0.09423828125, "learning_rate": 0.0019809169316254175, "loss": 0.2339, "step": 66600 }, { "epoch": 0.4727860556149998, "grad_norm": 0.1142578125, "learning_rate": 0.0019809111338291736, "loss": 0.2427, "step": 66610 }, { "epoch": 0.47285703385484595, "grad_norm": 0.109375, "learning_rate": 0.001980905335161763, "loss": 0.2523, "step": 66620 }, { "epoch": 0.4729280120946921, "grad_norm": 0.1123046875, "learning_rate": 0.001980899535623193, "loss": 0.231, "step": 66630 }, { "epoch": 0.4729989903345382, "grad_norm": 0.126953125, "learning_rate": 0.001980893735213468, "loss": 0.2442, "step": 66640 }, { "epoch": 0.4730699685743843, "grad_norm": 0.1171875, "learning_rate": 0.0019808879339325946, "loss": 0.2466, "step": 66650 }, { "epoch": 0.4731409468142304, "grad_norm": 0.091796875, "learning_rate": 0.001980882131780578, "loss": 0.2388, "step": 66660 }, { "epoch": 0.47321192505407655, "grad_norm": 0.10009765625, "learning_rate": 0.0019808763287574244, "loss": 0.2383, "step": 66670 }, { "epoch": 0.47328290329392264, "grad_norm": 0.10009765625, "learning_rate": 0.001980870524863139, "loss": 0.2456, "step": 66680 }, { "epoch": 0.4733538815337688, "grad_norm": 0.1484375, "learning_rate": 0.001980864720097728, "loss": 0.2431, "step": 66690 }, { "epoch": 0.4734248597736149, "grad_norm": 0.1142578125, "learning_rate": 0.001980858914461197, "loss": 0.2224, "step": 66700 }, { "epoch": 0.473495838013461, "grad_norm": 0.11572265625, "learning_rate": 0.0019808531079535518, "loss": 0.2293, "step": 66710 }, { "epoch": 0.47356681625330715, "grad_norm": 0.279296875, "learning_rate": 0.001980847300574798, "loss": 0.2548, "step": 66720 }, { "epoch": 0.47363779449315324, "grad_norm": 0.08544921875, "learning_rate": 0.0019808414923249418, "loss": 0.2407, "step": 66730 }, { "epoch": 0.4737087727329994, "grad_norm": 0.11376953125, "learning_rate": 0.0019808356832039884, "loss": 0.2588, "step": 66740 }, { "epoch": 0.4737797509728455, "grad_norm": 0.06201171875, "learning_rate": 0.001980829873211943, "loss": 0.2298, "step": 66750 }, { "epoch": 0.4738507292126916, "grad_norm": 0.1044921875, "learning_rate": 0.001980824062348813, "loss": 0.2441, "step": 66760 }, { "epoch": 0.47392170745253775, "grad_norm": 0.1025390625, "learning_rate": 0.001980818250614603, "loss": 0.2222, "step": 66770 }, { "epoch": 0.47399268569238384, "grad_norm": 0.091796875, "learning_rate": 0.001980812438009319, "loss": 0.2546, "step": 66780 }, { "epoch": 0.47406366393223, "grad_norm": 0.08203125, "learning_rate": 0.0019808066245329665, "loss": 0.2448, "step": 66790 }, { "epoch": 0.47413464217207607, "grad_norm": 0.10546875, "learning_rate": 0.001980800810185552, "loss": 0.2517, "step": 66800 }, { "epoch": 0.4742056204119222, "grad_norm": 0.1259765625, "learning_rate": 0.0019807949949670798, "loss": 0.2255, "step": 66810 }, { "epoch": 0.47427659865176836, "grad_norm": 0.111328125, "learning_rate": 0.001980789178877557, "loss": 0.2585, "step": 66820 }, { "epoch": 0.47434757689161444, "grad_norm": 0.13671875, "learning_rate": 0.001980783361916989, "loss": 0.2364, "step": 66830 }, { "epoch": 0.4744185551314606, "grad_norm": 0.10107421875, "learning_rate": 0.0019807775440853813, "loss": 0.228, "step": 66840 }, { "epoch": 0.47448953337130667, "grad_norm": 0.2138671875, "learning_rate": 0.0019807717253827396, "loss": 0.2406, "step": 66850 }, { "epoch": 0.4745605116111528, "grad_norm": 0.134765625, "learning_rate": 0.0019807659058090703, "loss": 0.2382, "step": 66860 }, { "epoch": 0.47463148985099896, "grad_norm": 0.2041015625, "learning_rate": 0.001980760085364378, "loss": 0.2322, "step": 66870 }, { "epoch": 0.47470246809084504, "grad_norm": 0.2421875, "learning_rate": 0.00198075426404867, "loss": 0.2459, "step": 66880 }, { "epoch": 0.4747734463306912, "grad_norm": 0.1474609375, "learning_rate": 0.0019807484418619507, "loss": 0.2448, "step": 66890 }, { "epoch": 0.47484442457053727, "grad_norm": 0.09326171875, "learning_rate": 0.001980742618804227, "loss": 0.242, "step": 66900 }, { "epoch": 0.4749154028103834, "grad_norm": 0.185546875, "learning_rate": 0.0019807367948755034, "loss": 0.2406, "step": 66910 }, { "epoch": 0.4749863810502295, "grad_norm": 0.12158203125, "learning_rate": 0.0019807309700757866, "loss": 0.2337, "step": 66920 }, { "epoch": 0.47505735929007564, "grad_norm": 0.08154296875, "learning_rate": 0.001980725144405082, "loss": 0.2432, "step": 66930 }, { "epoch": 0.4751283375299218, "grad_norm": 0.1025390625, "learning_rate": 0.0019807193178633955, "loss": 0.2393, "step": 66940 }, { "epoch": 0.47519931576976787, "grad_norm": 0.0869140625, "learning_rate": 0.001980713490450733, "loss": 0.2349, "step": 66950 }, { "epoch": 0.475270294009614, "grad_norm": 0.142578125, "learning_rate": 0.0019807076621670993, "loss": 0.2289, "step": 66960 }, { "epoch": 0.4753412722494601, "grad_norm": 0.10009765625, "learning_rate": 0.001980701833012502, "loss": 0.241, "step": 66970 }, { "epoch": 0.47541225048930624, "grad_norm": 0.0927734375, "learning_rate": 0.001980696002986945, "loss": 0.2209, "step": 66980 }, { "epoch": 0.4754832287291524, "grad_norm": 0.10888671875, "learning_rate": 0.001980690172090435, "loss": 0.2413, "step": 66990 }, { "epoch": 0.47555420696899847, "grad_norm": 0.091796875, "learning_rate": 0.0019806843403229776, "loss": 0.2519, "step": 67000 }, { "epoch": 0.4756251852088446, "grad_norm": 0.107421875, "learning_rate": 0.001980678507684579, "loss": 0.2266, "step": 67010 }, { "epoch": 0.4756961634486907, "grad_norm": 0.0966796875, "learning_rate": 0.001980672674175244, "loss": 0.243, "step": 67020 }, { "epoch": 0.47576714168853684, "grad_norm": 0.11572265625, "learning_rate": 0.001980666839794979, "loss": 0.2492, "step": 67030 }, { "epoch": 0.47583811992838293, "grad_norm": 0.08837890625, "learning_rate": 0.00198066100454379, "loss": 0.2523, "step": 67040 }, { "epoch": 0.47590909816822907, "grad_norm": 0.0869140625, "learning_rate": 0.0019806551684216824, "loss": 0.2351, "step": 67050 }, { "epoch": 0.4759800764080752, "grad_norm": 0.10693359375, "learning_rate": 0.001980649331428662, "loss": 0.2162, "step": 67060 }, { "epoch": 0.4760510546479213, "grad_norm": 0.1396484375, "learning_rate": 0.0019806434935647345, "loss": 0.2406, "step": 67070 }, { "epoch": 0.47612203288776744, "grad_norm": 0.08642578125, "learning_rate": 0.001980637654829906, "loss": 0.2295, "step": 67080 }, { "epoch": 0.47619301112761353, "grad_norm": 0.09033203125, "learning_rate": 0.0019806318152241816, "loss": 0.2365, "step": 67090 }, { "epoch": 0.4762639893674597, "grad_norm": 0.12109375, "learning_rate": 0.0019806259747475676, "loss": 0.2425, "step": 67100 }, { "epoch": 0.4763349676073058, "grad_norm": 0.1015625, "learning_rate": 0.00198062013340007, "loss": 0.2292, "step": 67110 }, { "epoch": 0.4764059458471519, "grad_norm": 0.0966796875, "learning_rate": 0.001980614291181694, "loss": 0.2343, "step": 67120 }, { "epoch": 0.47647692408699804, "grad_norm": 0.091796875, "learning_rate": 0.001980608448092446, "loss": 0.2358, "step": 67130 }, { "epoch": 0.47654790232684413, "grad_norm": 0.12109375, "learning_rate": 0.0019806026041323315, "loss": 0.2258, "step": 67140 }, { "epoch": 0.4766188805666903, "grad_norm": 0.07763671875, "learning_rate": 0.001980596759301356, "loss": 0.2292, "step": 67150 }, { "epoch": 0.47668985880653636, "grad_norm": 0.083984375, "learning_rate": 0.0019805909135995254, "loss": 0.2329, "step": 67160 }, { "epoch": 0.4767608370463825, "grad_norm": 0.06787109375, "learning_rate": 0.001980585067026846, "loss": 0.2311, "step": 67170 }, { "epoch": 0.47683181528622864, "grad_norm": 0.1630859375, "learning_rate": 0.001980579219583322, "loss": 0.2297, "step": 67180 }, { "epoch": 0.47690279352607473, "grad_norm": 0.142578125, "learning_rate": 0.0019805733712689616, "loss": 0.2359, "step": 67190 }, { "epoch": 0.4769737717659209, "grad_norm": 0.1416015625, "learning_rate": 0.001980567522083769, "loss": 0.235, "step": 67200 }, { "epoch": 0.47704475000576696, "grad_norm": 0.12060546875, "learning_rate": 0.00198056167202775, "loss": 0.2498, "step": 67210 }, { "epoch": 0.4771157282456131, "grad_norm": 0.1171875, "learning_rate": 0.001980555821100911, "loss": 0.2263, "step": 67220 }, { "epoch": 0.47718670648545924, "grad_norm": 0.08349609375, "learning_rate": 0.0019805499693032573, "loss": 0.249, "step": 67230 }, { "epoch": 0.47725768472530533, "grad_norm": 0.1416015625, "learning_rate": 0.001980544116634795, "loss": 0.236, "step": 67240 }, { "epoch": 0.4773286629651515, "grad_norm": 0.08056640625, "learning_rate": 0.0019805382630955297, "loss": 0.2235, "step": 67250 }, { "epoch": 0.47739964120499756, "grad_norm": 0.12255859375, "learning_rate": 0.001980532408685467, "loss": 0.2301, "step": 67260 }, { "epoch": 0.4774706194448437, "grad_norm": 0.0947265625, "learning_rate": 0.001980526553404613, "loss": 0.237, "step": 67270 }, { "epoch": 0.4775415976846898, "grad_norm": 0.0947265625, "learning_rate": 0.001980520697252974, "loss": 0.2661, "step": 67280 }, { "epoch": 0.47761257592453593, "grad_norm": 0.08935546875, "learning_rate": 0.001980514840230555, "loss": 0.2358, "step": 67290 }, { "epoch": 0.4776835541643821, "grad_norm": 0.10400390625, "learning_rate": 0.001980508982337362, "loss": 0.2198, "step": 67300 }, { "epoch": 0.47775453240422816, "grad_norm": 0.2353515625, "learning_rate": 0.0019805031235734, "loss": 0.2379, "step": 67310 }, { "epoch": 0.4778255106440743, "grad_norm": 0.1318359375, "learning_rate": 0.0019804972639386764, "loss": 0.2275, "step": 67320 }, { "epoch": 0.4778964888839204, "grad_norm": 0.1201171875, "learning_rate": 0.001980491403433196, "loss": 0.2388, "step": 67330 }, { "epoch": 0.47796746712376653, "grad_norm": 0.10302734375, "learning_rate": 0.001980485542056965, "loss": 0.2477, "step": 67340 }, { "epoch": 0.4780384453636127, "grad_norm": 0.07177734375, "learning_rate": 0.0019804796798099885, "loss": 0.2578, "step": 67350 }, { "epoch": 0.47810942360345876, "grad_norm": 0.11962890625, "learning_rate": 0.0019804738166922732, "loss": 0.2191, "step": 67360 }, { "epoch": 0.4781804018433049, "grad_norm": 0.07763671875, "learning_rate": 0.0019804679527038243, "loss": 0.2369, "step": 67370 }, { "epoch": 0.478251380083151, "grad_norm": 0.0908203125, "learning_rate": 0.001980462087844648, "loss": 0.2522, "step": 67380 }, { "epoch": 0.47832235832299713, "grad_norm": 0.09326171875, "learning_rate": 0.0019804562221147496, "loss": 0.2545, "step": 67390 }, { "epoch": 0.4783933365628432, "grad_norm": 0.119140625, "learning_rate": 0.001980450355514135, "loss": 0.229, "step": 67400 }, { "epoch": 0.47846431480268936, "grad_norm": 0.1015625, "learning_rate": 0.0019804444880428106, "loss": 0.2271, "step": 67410 }, { "epoch": 0.4785352930425355, "grad_norm": 0.2265625, "learning_rate": 0.0019804386197007817, "loss": 0.2251, "step": 67420 }, { "epoch": 0.4786062712823816, "grad_norm": 0.1396484375, "learning_rate": 0.0019804327504880544, "loss": 0.2422, "step": 67430 }, { "epoch": 0.47867724952222773, "grad_norm": 0.125, "learning_rate": 0.0019804268804046337, "loss": 0.2514, "step": 67440 }, { "epoch": 0.4787482277620738, "grad_norm": 0.0712890625, "learning_rate": 0.0019804210094505263, "loss": 0.227, "step": 67450 }, { "epoch": 0.47881920600191996, "grad_norm": 0.1064453125, "learning_rate": 0.0019804151376257378, "loss": 0.2297, "step": 67460 }, { "epoch": 0.4788901842417661, "grad_norm": 0.216796875, "learning_rate": 0.0019804092649302737, "loss": 0.2325, "step": 67470 }, { "epoch": 0.4789611624816122, "grad_norm": 0.09423828125, "learning_rate": 0.0019804033913641403, "loss": 0.2473, "step": 67480 }, { "epoch": 0.47903214072145833, "grad_norm": 0.17578125, "learning_rate": 0.001980397516927343, "loss": 0.2272, "step": 67490 }, { "epoch": 0.4791031189613044, "grad_norm": 0.15234375, "learning_rate": 0.0019803916416198876, "loss": 0.2465, "step": 67500 }, { "epoch": 0.47917409720115056, "grad_norm": 0.08154296875, "learning_rate": 0.00198038576544178, "loss": 0.2344, "step": 67510 }, { "epoch": 0.4792450754409967, "grad_norm": 0.12890625, "learning_rate": 0.0019803798883930262, "loss": 0.2272, "step": 67520 }, { "epoch": 0.4793160536808428, "grad_norm": 0.17578125, "learning_rate": 0.001980374010473632, "loss": 0.244, "step": 67530 }, { "epoch": 0.47938703192068893, "grad_norm": 0.09375, "learning_rate": 0.001980368131683603, "loss": 0.2184, "step": 67540 }, { "epoch": 0.479458010160535, "grad_norm": 0.09814453125, "learning_rate": 0.001980362252022945, "loss": 0.2423, "step": 67550 }, { "epoch": 0.47952898840038116, "grad_norm": 0.09326171875, "learning_rate": 0.0019803563714916638, "loss": 0.2399, "step": 67560 }, { "epoch": 0.47959996664022725, "grad_norm": 0.11669921875, "learning_rate": 0.0019803504900897657, "loss": 0.2228, "step": 67570 }, { "epoch": 0.4796709448800734, "grad_norm": 0.09521484375, "learning_rate": 0.001980344607817256, "loss": 0.2414, "step": 67580 }, { "epoch": 0.47974192311991953, "grad_norm": 0.169921875, "learning_rate": 0.0019803387246741405, "loss": 0.2681, "step": 67590 }, { "epoch": 0.4798129013597656, "grad_norm": 0.08544921875, "learning_rate": 0.001980332840660425, "loss": 0.2316, "step": 67600 }, { "epoch": 0.47988387959961176, "grad_norm": 0.10009765625, "learning_rate": 0.001980326955776116, "loss": 0.2396, "step": 67610 }, { "epoch": 0.47995485783945785, "grad_norm": 0.15625, "learning_rate": 0.0019803210700212186, "loss": 0.2293, "step": 67620 }, { "epoch": 0.480025836079304, "grad_norm": 0.11669921875, "learning_rate": 0.0019803151833957386, "loss": 0.2207, "step": 67630 }, { "epoch": 0.48009681431915013, "grad_norm": 0.11962890625, "learning_rate": 0.001980309295899682, "loss": 0.2451, "step": 67640 }, { "epoch": 0.4801677925589962, "grad_norm": 0.06396484375, "learning_rate": 0.001980303407533055, "loss": 0.2402, "step": 67650 }, { "epoch": 0.48023877079884236, "grad_norm": 0.09375, "learning_rate": 0.001980297518295863, "loss": 0.2491, "step": 67660 }, { "epoch": 0.48030974903868845, "grad_norm": 0.0849609375, "learning_rate": 0.001980291628188112, "loss": 0.2219, "step": 67670 }, { "epoch": 0.4803807272785346, "grad_norm": 0.11279296875, "learning_rate": 0.001980285737209808, "loss": 0.2384, "step": 67680 }, { "epoch": 0.4804517055183807, "grad_norm": 0.08447265625, "learning_rate": 0.001980279845360956, "loss": 0.2552, "step": 67690 }, { "epoch": 0.4805226837582268, "grad_norm": 0.12255859375, "learning_rate": 0.0019802739526415627, "loss": 0.2388, "step": 67700 }, { "epoch": 0.48059366199807296, "grad_norm": 0.1396484375, "learning_rate": 0.0019802680590516337, "loss": 0.2372, "step": 67710 }, { "epoch": 0.48066464023791905, "grad_norm": 0.09716796875, "learning_rate": 0.001980262164591175, "loss": 0.2516, "step": 67720 }, { "epoch": 0.4807356184777652, "grad_norm": 0.1376953125, "learning_rate": 0.0019802562692601915, "loss": 0.2459, "step": 67730 }, { "epoch": 0.4808065967176113, "grad_norm": 0.09619140625, "learning_rate": 0.00198025037305869, "loss": 0.2419, "step": 67740 }, { "epoch": 0.4808775749574574, "grad_norm": 0.09765625, "learning_rate": 0.0019802444759866762, "loss": 0.2373, "step": 67750 }, { "epoch": 0.48094855319730356, "grad_norm": 0.11376953125, "learning_rate": 0.001980238578044156, "loss": 0.2421, "step": 67760 }, { "epoch": 0.48101953143714965, "grad_norm": 0.11083984375, "learning_rate": 0.0019802326792311346, "loss": 0.2545, "step": 67770 }, { "epoch": 0.4810905096769958, "grad_norm": 0.09423828125, "learning_rate": 0.0019802267795476186, "loss": 0.2384, "step": 67780 }, { "epoch": 0.4811614879168419, "grad_norm": 0.3671875, "learning_rate": 0.001980220878993613, "loss": 0.2482, "step": 67790 }, { "epoch": 0.481232466156688, "grad_norm": 0.08544921875, "learning_rate": 0.0019802149775691243, "loss": 0.2456, "step": 67800 }, { "epoch": 0.4813034443965341, "grad_norm": 0.11962890625, "learning_rate": 0.0019802090752741586, "loss": 0.2233, "step": 67810 }, { "epoch": 0.48137442263638025, "grad_norm": 0.10888671875, "learning_rate": 0.0019802031721087208, "loss": 0.2343, "step": 67820 }, { "epoch": 0.4814454008762264, "grad_norm": 0.2197265625, "learning_rate": 0.001980197268072817, "loss": 0.2531, "step": 67830 }, { "epoch": 0.4815163791160725, "grad_norm": 0.1875, "learning_rate": 0.001980191363166454, "loss": 0.2254, "step": 67840 }, { "epoch": 0.4815873573559186, "grad_norm": 0.09375, "learning_rate": 0.0019801854573896363, "loss": 0.259, "step": 67850 }, { "epoch": 0.4816583355957647, "grad_norm": 0.09228515625, "learning_rate": 0.0019801795507423704, "loss": 0.2311, "step": 67860 }, { "epoch": 0.48172931383561085, "grad_norm": 0.12890625, "learning_rate": 0.0019801736432246623, "loss": 0.2432, "step": 67870 }, { "epoch": 0.481800292075457, "grad_norm": 0.0810546875, "learning_rate": 0.0019801677348365177, "loss": 0.2333, "step": 67880 }, { "epoch": 0.4818712703153031, "grad_norm": 0.10400390625, "learning_rate": 0.001980161825577942, "loss": 0.2333, "step": 67890 }, { "epoch": 0.4819422485551492, "grad_norm": 0.09619140625, "learning_rate": 0.001980155915448942, "loss": 0.2488, "step": 67900 }, { "epoch": 0.4820132267949953, "grad_norm": 0.10400390625, "learning_rate": 0.0019801500044495227, "loss": 0.2282, "step": 67910 }, { "epoch": 0.48208420503484145, "grad_norm": 0.1572265625, "learning_rate": 0.00198014409257969, "loss": 0.2454, "step": 67920 }, { "epoch": 0.48215518327468754, "grad_norm": 0.08154296875, "learning_rate": 0.0019801381798394504, "loss": 0.2231, "step": 67930 }, { "epoch": 0.4822261615145337, "grad_norm": 0.08251953125, "learning_rate": 0.001980132266228809, "loss": 0.2457, "step": 67940 }, { "epoch": 0.4822971397543798, "grad_norm": 0.12890625, "learning_rate": 0.001980126351747772, "loss": 0.2416, "step": 67950 }, { "epoch": 0.4823681179942259, "grad_norm": 0.0986328125, "learning_rate": 0.001980120436396345, "loss": 0.2394, "step": 67960 }, { "epoch": 0.48243909623407205, "grad_norm": 0.1337890625, "learning_rate": 0.0019801145201745344, "loss": 0.2446, "step": 67970 }, { "epoch": 0.48251007447391814, "grad_norm": 0.1025390625, "learning_rate": 0.0019801086030823453, "loss": 0.2259, "step": 67980 }, { "epoch": 0.4825810527137643, "grad_norm": 0.1875, "learning_rate": 0.001980102685119784, "loss": 0.2276, "step": 67990 }, { "epoch": 0.4826520309536104, "grad_norm": 0.109375, "learning_rate": 0.0019800967662868567, "loss": 0.2301, "step": 68000 }, { "epoch": 0.4826520309536104, "eval_covost2-zh-en_loss": 3.9423513412475586, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 21.0761, "eval_covost2-zh-en_samples_per_second": 3.037, "eval_covost2-zh-en_steps_per_second": 0.19, "step": 68000 }, { "epoch": 0.4826520309536104, "eval_covost2-en-zh_loss": 3.1478779315948486, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 19.3815, "eval_covost2-en-zh_samples_per_second": 3.302, "eval_covost2-en-zh_steps_per_second": 0.206, "step": 68000 }, { "epoch": 0.4827230091934565, "grad_norm": 0.08984375, "learning_rate": 0.0019800908465835683, "loss": 0.2311, "step": 68010 }, { "epoch": 0.48279398743330265, "grad_norm": 0.0888671875, "learning_rate": 0.001980084926009926, "loss": 0.2256, "step": 68020 }, { "epoch": 0.48286496567314874, "grad_norm": 0.083984375, "learning_rate": 0.0019800790045659345, "loss": 0.2411, "step": 68030 }, { "epoch": 0.4829359439129949, "grad_norm": 0.11328125, "learning_rate": 0.0019800730822516, "loss": 0.2366, "step": 68040 }, { "epoch": 0.48300692215284097, "grad_norm": 0.0732421875, "learning_rate": 0.001980067159066928, "loss": 0.233, "step": 68050 }, { "epoch": 0.4830779003926871, "grad_norm": 0.146484375, "learning_rate": 0.001980061235011925, "loss": 0.2311, "step": 68060 }, { "epoch": 0.48314887863253325, "grad_norm": 0.10693359375, "learning_rate": 0.0019800553100865967, "loss": 0.2478, "step": 68070 }, { "epoch": 0.48321985687237934, "grad_norm": 0.05859375, "learning_rate": 0.0019800493842909487, "loss": 0.2476, "step": 68080 }, { "epoch": 0.4832908351122255, "grad_norm": 0.1572265625, "learning_rate": 0.001980043457624987, "loss": 0.2254, "step": 68090 }, { "epoch": 0.48336181335207157, "grad_norm": 0.1298828125, "learning_rate": 0.0019800375300887177, "loss": 0.2731, "step": 68100 }, { "epoch": 0.4834327915919177, "grad_norm": 0.0732421875, "learning_rate": 0.0019800316016821465, "loss": 0.2279, "step": 68110 }, { "epoch": 0.48350376983176385, "grad_norm": 0.1240234375, "learning_rate": 0.001980025672405279, "loss": 0.2333, "step": 68120 }, { "epoch": 0.48357474807160994, "grad_norm": 0.08251953125, "learning_rate": 0.0019800197422581212, "loss": 0.2377, "step": 68130 }, { "epoch": 0.4836457263114561, "grad_norm": 0.1318359375, "learning_rate": 0.0019800138112406793, "loss": 0.2597, "step": 68140 }, { "epoch": 0.48371670455130217, "grad_norm": 0.119140625, "learning_rate": 0.0019800078793529587, "loss": 0.2479, "step": 68150 }, { "epoch": 0.4837876827911483, "grad_norm": 0.0830078125, "learning_rate": 0.001980001946594965, "loss": 0.2293, "step": 68160 }, { "epoch": 0.4838586610309944, "grad_norm": 0.11474609375, "learning_rate": 0.0019799960129667053, "loss": 0.2563, "step": 68170 }, { "epoch": 0.48392963927084054, "grad_norm": 0.09228515625, "learning_rate": 0.0019799900784681843, "loss": 0.2241, "step": 68180 }, { "epoch": 0.4840006175106867, "grad_norm": 0.1279296875, "learning_rate": 0.0019799841430994084, "loss": 0.2466, "step": 68190 }, { "epoch": 0.48407159575053277, "grad_norm": 0.09521484375, "learning_rate": 0.0019799782068603833, "loss": 0.2119, "step": 68200 }, { "epoch": 0.4841425739903789, "grad_norm": 0.2109375, "learning_rate": 0.001979972269751115, "loss": 0.2573, "step": 68210 }, { "epoch": 0.484213552230225, "grad_norm": 0.076171875, "learning_rate": 0.0019799663317716094, "loss": 0.233, "step": 68220 }, { "epoch": 0.48428453047007114, "grad_norm": 0.08154296875, "learning_rate": 0.0019799603929218715, "loss": 0.232, "step": 68230 }, { "epoch": 0.4843555087099173, "grad_norm": 0.0830078125, "learning_rate": 0.0019799544532019086, "loss": 0.2362, "step": 68240 }, { "epoch": 0.48442648694976337, "grad_norm": 0.07470703125, "learning_rate": 0.001979948512611726, "loss": 0.2355, "step": 68250 }, { "epoch": 0.4844974651896095, "grad_norm": 0.12890625, "learning_rate": 0.001979942571151329, "loss": 0.2374, "step": 68260 }, { "epoch": 0.4845684434294556, "grad_norm": 0.11962890625, "learning_rate": 0.0019799366288207245, "loss": 0.2428, "step": 68270 }, { "epoch": 0.48463942166930174, "grad_norm": 0.12255859375, "learning_rate": 0.001979930685619917, "loss": 0.254, "step": 68280 }, { "epoch": 0.4847103999091478, "grad_norm": 0.0849609375, "learning_rate": 0.001979924741548914, "loss": 0.2287, "step": 68290 }, { "epoch": 0.48478137814899397, "grad_norm": 0.0927734375, "learning_rate": 0.00197991879660772, "loss": 0.234, "step": 68300 }, { "epoch": 0.4848523563888401, "grad_norm": 0.0830078125, "learning_rate": 0.001979912850796342, "loss": 0.2273, "step": 68310 }, { "epoch": 0.4849233346286862, "grad_norm": 0.1337890625, "learning_rate": 0.0019799069041147852, "loss": 0.2327, "step": 68320 }, { "epoch": 0.48499431286853234, "grad_norm": 0.09130859375, "learning_rate": 0.0019799009565630555, "loss": 0.2372, "step": 68330 }, { "epoch": 0.4850652911083784, "grad_norm": 0.1142578125, "learning_rate": 0.001979895008141159, "loss": 0.2423, "step": 68340 }, { "epoch": 0.48513626934822457, "grad_norm": 0.1474609375, "learning_rate": 0.0019798890588491012, "loss": 0.2551, "step": 68350 }, { "epoch": 0.4852072475880707, "grad_norm": 0.103515625, "learning_rate": 0.0019798831086868885, "loss": 0.215, "step": 68360 }, { "epoch": 0.4852782258279168, "grad_norm": 0.1806640625, "learning_rate": 0.0019798771576545263, "loss": 0.2409, "step": 68370 }, { "epoch": 0.48534920406776294, "grad_norm": 0.0654296875, "learning_rate": 0.001979871205752021, "loss": 0.2491, "step": 68380 }, { "epoch": 0.485420182307609, "grad_norm": 0.11865234375, "learning_rate": 0.0019798652529793783, "loss": 0.237, "step": 68390 }, { "epoch": 0.48549116054745517, "grad_norm": 0.119140625, "learning_rate": 0.0019798592993366037, "loss": 0.217, "step": 68400 }, { "epoch": 0.48556213878730126, "grad_norm": 0.11669921875, "learning_rate": 0.0019798533448237036, "loss": 0.2585, "step": 68410 }, { "epoch": 0.4856331170271474, "grad_norm": 0.0869140625, "learning_rate": 0.0019798473894406836, "loss": 0.222, "step": 68420 }, { "epoch": 0.48570409526699354, "grad_norm": 0.091796875, "learning_rate": 0.0019798414331875496, "loss": 0.2252, "step": 68430 }, { "epoch": 0.4857750735068396, "grad_norm": 0.1328125, "learning_rate": 0.001979835476064308, "loss": 0.2354, "step": 68440 }, { "epoch": 0.48584605174668577, "grad_norm": 0.11279296875, "learning_rate": 0.001979829518070964, "loss": 0.2341, "step": 68450 }, { "epoch": 0.48591702998653186, "grad_norm": 0.0966796875, "learning_rate": 0.0019798235592075235, "loss": 0.223, "step": 68460 }, { "epoch": 0.485988008226378, "grad_norm": 0.10595703125, "learning_rate": 0.0019798175994739926, "loss": 0.2324, "step": 68470 }, { "epoch": 0.48605898646622414, "grad_norm": 0.109375, "learning_rate": 0.0019798116388703778, "loss": 0.2358, "step": 68480 }, { "epoch": 0.48612996470607023, "grad_norm": 0.11181640625, "learning_rate": 0.0019798056773966837, "loss": 0.2532, "step": 68490 }, { "epoch": 0.48620094294591637, "grad_norm": 0.09033203125, "learning_rate": 0.001979799715052918, "loss": 0.2349, "step": 68500 }, { "epoch": 0.48627192118576246, "grad_norm": 0.1484375, "learning_rate": 0.0019797937518390846, "loss": 0.2279, "step": 68510 }, { "epoch": 0.4863428994256086, "grad_norm": 0.11962890625, "learning_rate": 0.0019797877877551903, "loss": 0.2524, "step": 68520 }, { "epoch": 0.4864138776654547, "grad_norm": 0.138671875, "learning_rate": 0.0019797818228012415, "loss": 0.2247, "step": 68530 }, { "epoch": 0.48648485590530083, "grad_norm": 0.1171875, "learning_rate": 0.0019797758569772435, "loss": 0.2347, "step": 68540 }, { "epoch": 0.48655583414514697, "grad_norm": 0.150390625, "learning_rate": 0.0019797698902832023, "loss": 0.2284, "step": 68550 }, { "epoch": 0.48662681238499306, "grad_norm": 0.12255859375, "learning_rate": 0.0019797639227191236, "loss": 0.2394, "step": 68560 }, { "epoch": 0.4866977906248392, "grad_norm": 0.07861328125, "learning_rate": 0.0019797579542850134, "loss": 0.2367, "step": 68570 }, { "epoch": 0.4867687688646853, "grad_norm": 0.072265625, "learning_rate": 0.001979751984980878, "loss": 0.2348, "step": 68580 }, { "epoch": 0.48683974710453143, "grad_norm": 0.2333984375, "learning_rate": 0.001979746014806723, "loss": 0.2339, "step": 68590 }, { "epoch": 0.48691072534437757, "grad_norm": 0.07958984375, "learning_rate": 0.001979740043762554, "loss": 0.2278, "step": 68600 }, { "epoch": 0.48698170358422366, "grad_norm": 0.1103515625, "learning_rate": 0.001979734071848378, "loss": 0.239, "step": 68610 }, { "epoch": 0.4870526818240698, "grad_norm": 0.08984375, "learning_rate": 0.0019797280990641996, "loss": 0.234, "step": 68620 }, { "epoch": 0.4871236600639159, "grad_norm": 0.0830078125, "learning_rate": 0.0019797221254100254, "loss": 0.2498, "step": 68630 }, { "epoch": 0.48719463830376203, "grad_norm": 0.15625, "learning_rate": 0.0019797161508858614, "loss": 0.2222, "step": 68640 }, { "epoch": 0.48726561654360817, "grad_norm": 0.0693359375, "learning_rate": 0.0019797101754917127, "loss": 0.2315, "step": 68650 }, { "epoch": 0.48733659478345426, "grad_norm": 0.08837890625, "learning_rate": 0.0019797041992275864, "loss": 0.2262, "step": 68660 }, { "epoch": 0.4874075730233004, "grad_norm": 0.11767578125, "learning_rate": 0.001979698222093487, "loss": 0.2262, "step": 68670 }, { "epoch": 0.4874785512631465, "grad_norm": 0.08837890625, "learning_rate": 0.0019796922440894218, "loss": 0.2181, "step": 68680 }, { "epoch": 0.48754952950299263, "grad_norm": 0.087890625, "learning_rate": 0.001979686265215396, "loss": 0.2439, "step": 68690 }, { "epoch": 0.4876205077428387, "grad_norm": 0.212890625, "learning_rate": 0.0019796802854714153, "loss": 0.2382, "step": 68700 }, { "epoch": 0.48769148598268486, "grad_norm": 0.154296875, "learning_rate": 0.0019796743048574863, "loss": 0.2444, "step": 68710 }, { "epoch": 0.487762464222531, "grad_norm": 0.1669921875, "learning_rate": 0.0019796683233736143, "loss": 0.247, "step": 68720 }, { "epoch": 0.4878334424623771, "grad_norm": 0.11181640625, "learning_rate": 0.0019796623410198054, "loss": 0.2459, "step": 68730 }, { "epoch": 0.48790442070222323, "grad_norm": 0.1142578125, "learning_rate": 0.0019796563577960664, "loss": 0.2393, "step": 68740 }, { "epoch": 0.4879753989420693, "grad_norm": 0.08935546875, "learning_rate": 0.0019796503737024014, "loss": 0.2343, "step": 68750 }, { "epoch": 0.48804637718191546, "grad_norm": 0.09375, "learning_rate": 0.0019796443887388176, "loss": 0.2168, "step": 68760 }, { "epoch": 0.4881173554217616, "grad_norm": 0.087890625, "learning_rate": 0.001979638402905321, "loss": 0.2316, "step": 68770 }, { "epoch": 0.4881883336616077, "grad_norm": 0.09814453125, "learning_rate": 0.0019796324162019165, "loss": 0.2182, "step": 68780 }, { "epoch": 0.48825931190145383, "grad_norm": 0.1279296875, "learning_rate": 0.001979626428628611, "loss": 0.2345, "step": 68790 }, { "epoch": 0.4883302901412999, "grad_norm": 0.10400390625, "learning_rate": 0.00197962044018541, "loss": 0.2391, "step": 68800 }, { "epoch": 0.48840126838114606, "grad_norm": 0.123046875, "learning_rate": 0.00197961445087232, "loss": 0.2351, "step": 68810 }, { "epoch": 0.48847224662099215, "grad_norm": 0.10302734375, "learning_rate": 0.001979608460689346, "loss": 0.223, "step": 68820 }, { "epoch": 0.4885432248608383, "grad_norm": 0.1376953125, "learning_rate": 0.0019796024696364946, "loss": 0.2402, "step": 68830 }, { "epoch": 0.48861420310068443, "grad_norm": 0.10400390625, "learning_rate": 0.001979596477713771, "loss": 0.2345, "step": 68840 }, { "epoch": 0.4886851813405305, "grad_norm": 0.1943359375, "learning_rate": 0.001979590484921182, "loss": 0.2603, "step": 68850 }, { "epoch": 0.48875615958037666, "grad_norm": 0.09912109375, "learning_rate": 0.0019795844912587333, "loss": 0.2325, "step": 68860 }, { "epoch": 0.48882713782022275, "grad_norm": 0.10791015625, "learning_rate": 0.00197957849672643, "loss": 0.258, "step": 68870 }, { "epoch": 0.4888981160600689, "grad_norm": 0.11279296875, "learning_rate": 0.001979572501324279, "loss": 0.2276, "step": 68880 }, { "epoch": 0.48896909429991503, "grad_norm": 0.09326171875, "learning_rate": 0.0019795665050522863, "loss": 0.2436, "step": 68890 }, { "epoch": 0.4890400725397611, "grad_norm": 0.0986328125, "learning_rate": 0.001979560507910457, "loss": 0.2254, "step": 68900 }, { "epoch": 0.48911105077960726, "grad_norm": 0.107421875, "learning_rate": 0.001979554509898798, "loss": 0.2518, "step": 68910 }, { "epoch": 0.48918202901945335, "grad_norm": 0.11767578125, "learning_rate": 0.001979548511017314, "loss": 0.2349, "step": 68920 }, { "epoch": 0.4892530072592995, "grad_norm": 0.09912109375, "learning_rate": 0.001979542511266012, "loss": 0.2344, "step": 68930 }, { "epoch": 0.4893239854991456, "grad_norm": 0.1015625, "learning_rate": 0.001979536510644898, "loss": 0.2537, "step": 68940 }, { "epoch": 0.4893949637389917, "grad_norm": 0.125, "learning_rate": 0.001979530509153977, "loss": 0.2206, "step": 68950 }, { "epoch": 0.48946594197883786, "grad_norm": 0.1748046875, "learning_rate": 0.0019795245067932557, "loss": 0.246, "step": 68960 }, { "epoch": 0.48953692021868395, "grad_norm": 0.099609375, "learning_rate": 0.0019795185035627397, "loss": 0.2341, "step": 68970 }, { "epoch": 0.4896078984585301, "grad_norm": 0.1318359375, "learning_rate": 0.0019795124994624346, "loss": 0.2289, "step": 68980 }, { "epoch": 0.4896788766983762, "grad_norm": 0.09228515625, "learning_rate": 0.0019795064944923475, "loss": 0.2308, "step": 68990 }, { "epoch": 0.4897498549382223, "grad_norm": 0.11279296875, "learning_rate": 0.001979500488652483, "loss": 0.2367, "step": 69000 }, { "epoch": 0.48982083317806846, "grad_norm": 0.1142578125, "learning_rate": 0.001979494481942848, "loss": 0.2314, "step": 69010 }, { "epoch": 0.48989181141791455, "grad_norm": 0.11767578125, "learning_rate": 0.001979488474363448, "loss": 0.2257, "step": 69020 }, { "epoch": 0.4899627896577607, "grad_norm": 0.07958984375, "learning_rate": 0.001979482465914289, "loss": 0.2236, "step": 69030 }, { "epoch": 0.4900337678976068, "grad_norm": 0.1611328125, "learning_rate": 0.001979476456595377, "loss": 0.2385, "step": 69040 }, { "epoch": 0.4901047461374529, "grad_norm": 0.1474609375, "learning_rate": 0.001979470446406718, "loss": 0.2612, "step": 69050 }, { "epoch": 0.490175724377299, "grad_norm": 0.11669921875, "learning_rate": 0.0019794644353483175, "loss": 0.2197, "step": 69060 }, { "epoch": 0.49024670261714515, "grad_norm": 0.13671875, "learning_rate": 0.0019794584234201816, "loss": 0.2334, "step": 69070 }, { "epoch": 0.4903176808569913, "grad_norm": 0.11572265625, "learning_rate": 0.001979452410622317, "loss": 0.2264, "step": 69080 }, { "epoch": 0.4903886590968374, "grad_norm": 0.12158203125, "learning_rate": 0.0019794463969547292, "loss": 0.2493, "step": 69090 }, { "epoch": 0.4904596373366835, "grad_norm": 0.11865234375, "learning_rate": 0.001979440382417424, "loss": 0.2243, "step": 69100 }, { "epoch": 0.4905306155765296, "grad_norm": 0.07861328125, "learning_rate": 0.001979434367010407, "loss": 0.2212, "step": 69110 }, { "epoch": 0.49060159381637575, "grad_norm": 0.1181640625, "learning_rate": 0.0019794283507336844, "loss": 0.2465, "step": 69120 }, { "epoch": 0.4906725720562219, "grad_norm": 0.150390625, "learning_rate": 0.0019794223335872626, "loss": 0.2307, "step": 69130 }, { "epoch": 0.490743550296068, "grad_norm": 0.115234375, "learning_rate": 0.0019794163155711472, "loss": 0.2368, "step": 69140 }, { "epoch": 0.4908145285359141, "grad_norm": 0.07861328125, "learning_rate": 0.0019794102966853443, "loss": 0.2385, "step": 69150 }, { "epoch": 0.4908855067757602, "grad_norm": 0.08203125, "learning_rate": 0.0019794042769298594, "loss": 0.2325, "step": 69160 }, { "epoch": 0.49095648501560635, "grad_norm": 0.142578125, "learning_rate": 0.0019793982563046987, "loss": 0.2373, "step": 69170 }, { "epoch": 0.49102746325545243, "grad_norm": 0.099609375, "learning_rate": 0.001979392234809869, "loss": 0.2478, "step": 69180 }, { "epoch": 0.4910984414952986, "grad_norm": 0.09619140625, "learning_rate": 0.0019793862124453747, "loss": 0.2489, "step": 69190 }, { "epoch": 0.4911694197351447, "grad_norm": 0.09423828125, "learning_rate": 0.001979380189211223, "loss": 0.2312, "step": 69200 }, { "epoch": 0.4912403979749908, "grad_norm": 0.1552734375, "learning_rate": 0.001979374165107419, "loss": 0.237, "step": 69210 }, { "epoch": 0.49131137621483695, "grad_norm": 0.10400390625, "learning_rate": 0.0019793681401339695, "loss": 0.2608, "step": 69220 }, { "epoch": 0.49138235445468303, "grad_norm": 0.08740234375, "learning_rate": 0.00197936211429088, "loss": 0.2266, "step": 69230 }, { "epoch": 0.4914533326945292, "grad_norm": 0.09228515625, "learning_rate": 0.0019793560875781564, "loss": 0.2435, "step": 69240 }, { "epoch": 0.4915243109343753, "grad_norm": 0.1044921875, "learning_rate": 0.0019793500599958047, "loss": 0.2305, "step": 69250 }, { "epoch": 0.4915952891742214, "grad_norm": 0.123046875, "learning_rate": 0.0019793440315438306, "loss": 0.2179, "step": 69260 }, { "epoch": 0.49166626741406755, "grad_norm": 0.16015625, "learning_rate": 0.0019793380022222404, "loss": 0.2407, "step": 69270 }, { "epoch": 0.49173724565391363, "grad_norm": 0.078125, "learning_rate": 0.0019793319720310407, "loss": 0.2235, "step": 69280 }, { "epoch": 0.4918082238937598, "grad_norm": 0.059326171875, "learning_rate": 0.0019793259409702363, "loss": 0.2301, "step": 69290 }, { "epoch": 0.49187920213360586, "grad_norm": 0.1181640625, "learning_rate": 0.0019793199090398336, "loss": 0.2463, "step": 69300 }, { "epoch": 0.491950180373452, "grad_norm": 0.12451171875, "learning_rate": 0.0019793138762398384, "loss": 0.2464, "step": 69310 }, { "epoch": 0.49202115861329815, "grad_norm": 0.1845703125, "learning_rate": 0.0019793078425702575, "loss": 0.2231, "step": 69320 }, { "epoch": 0.49209213685314424, "grad_norm": 0.08740234375, "learning_rate": 0.001979301808031096, "loss": 0.2333, "step": 69330 }, { "epoch": 0.4921631150929904, "grad_norm": 0.1318359375, "learning_rate": 0.00197929577262236, "loss": 0.2328, "step": 69340 }, { "epoch": 0.49223409333283646, "grad_norm": 0.12451171875, "learning_rate": 0.001979289736344056, "loss": 0.2232, "step": 69350 }, { "epoch": 0.4923050715726826, "grad_norm": 0.08447265625, "learning_rate": 0.0019792836991961893, "loss": 0.2471, "step": 69360 }, { "epoch": 0.49237604981252875, "grad_norm": 0.09375, "learning_rate": 0.001979277661178766, "loss": 0.2312, "step": 69370 }, { "epoch": 0.49244702805237484, "grad_norm": 0.13671875, "learning_rate": 0.001979271622291792, "loss": 0.224, "step": 69380 }, { "epoch": 0.492518006292221, "grad_norm": 0.166015625, "learning_rate": 0.001979265582535274, "loss": 0.2183, "step": 69390 }, { "epoch": 0.49258898453206706, "grad_norm": 0.0927734375, "learning_rate": 0.0019792595419092173, "loss": 0.2212, "step": 69400 }, { "epoch": 0.4926599627719132, "grad_norm": 0.09521484375, "learning_rate": 0.001979253500413628, "loss": 0.2346, "step": 69410 }, { "epoch": 0.4927309410117593, "grad_norm": 0.1015625, "learning_rate": 0.001979247458048512, "loss": 0.2343, "step": 69420 }, { "epoch": 0.49280191925160544, "grad_norm": 0.09619140625, "learning_rate": 0.001979241414813875, "loss": 0.2255, "step": 69430 }, { "epoch": 0.4928728974914516, "grad_norm": 0.1220703125, "learning_rate": 0.001979235370709724, "loss": 0.2184, "step": 69440 }, { "epoch": 0.49294387573129766, "grad_norm": 0.10498046875, "learning_rate": 0.001979229325736064, "loss": 0.2358, "step": 69450 }, { "epoch": 0.4930148539711438, "grad_norm": 0.058349609375, "learning_rate": 0.0019792232798929014, "loss": 0.2138, "step": 69460 }, { "epoch": 0.4930858322109899, "grad_norm": 0.140625, "learning_rate": 0.0019792172331802417, "loss": 0.2431, "step": 69470 }, { "epoch": 0.49315681045083604, "grad_norm": 0.1142578125, "learning_rate": 0.001979211185598092, "loss": 0.2329, "step": 69480 }, { "epoch": 0.4932277886906822, "grad_norm": 0.1083984375, "learning_rate": 0.001979205137146457, "loss": 0.2479, "step": 69490 }, { "epoch": 0.49329876693052827, "grad_norm": 0.10888671875, "learning_rate": 0.0019791990878253434, "loss": 0.2465, "step": 69500 }, { "epoch": 0.4933697451703744, "grad_norm": 0.099609375, "learning_rate": 0.001979193037634757, "loss": 0.2302, "step": 69510 }, { "epoch": 0.4934407234102205, "grad_norm": 0.1103515625, "learning_rate": 0.0019791869865747034, "loss": 0.2256, "step": 69520 }, { "epoch": 0.49351170165006664, "grad_norm": 0.12109375, "learning_rate": 0.0019791809346451895, "loss": 0.2213, "step": 69530 }, { "epoch": 0.4935826798899127, "grad_norm": 0.115234375, "learning_rate": 0.001979174881846221, "loss": 0.2393, "step": 69540 }, { "epoch": 0.49365365812975887, "grad_norm": 0.103515625, "learning_rate": 0.001979168828177803, "loss": 0.2469, "step": 69550 }, { "epoch": 0.493724636369605, "grad_norm": 0.10791015625, "learning_rate": 0.001979162773639942, "loss": 0.2201, "step": 69560 }, { "epoch": 0.4937956146094511, "grad_norm": 0.109375, "learning_rate": 0.0019791567182326445, "loss": 0.2292, "step": 69570 }, { "epoch": 0.49386659284929724, "grad_norm": 0.08837890625, "learning_rate": 0.001979150661955916, "loss": 0.2339, "step": 69580 }, { "epoch": 0.4939375710891433, "grad_norm": 0.1484375, "learning_rate": 0.001979144604809763, "loss": 0.2055, "step": 69590 }, { "epoch": 0.49400854932898947, "grad_norm": 0.07763671875, "learning_rate": 0.0019791385467941907, "loss": 0.2178, "step": 69600 }, { "epoch": 0.4940795275688356, "grad_norm": 0.08154296875, "learning_rate": 0.0019791324879092054, "loss": 0.2279, "step": 69610 }, { "epoch": 0.4941505058086817, "grad_norm": 0.08544921875, "learning_rate": 0.0019791264281548136, "loss": 0.2466, "step": 69620 }, { "epoch": 0.49422148404852784, "grad_norm": 0.1357421875, "learning_rate": 0.0019791203675310204, "loss": 0.2325, "step": 69630 }, { "epoch": 0.4942924622883739, "grad_norm": 0.1396484375, "learning_rate": 0.0019791143060378324, "loss": 0.2351, "step": 69640 }, { "epoch": 0.49436344052822007, "grad_norm": 0.09765625, "learning_rate": 0.0019791082436752556, "loss": 0.2282, "step": 69650 }, { "epoch": 0.49443441876806615, "grad_norm": 0.07958984375, "learning_rate": 0.0019791021804432957, "loss": 0.242, "step": 69660 }, { "epoch": 0.4945053970079123, "grad_norm": 0.1005859375, "learning_rate": 0.0019790961163419587, "loss": 0.2455, "step": 69670 }, { "epoch": 0.49457637524775844, "grad_norm": 0.07470703125, "learning_rate": 0.001979090051371251, "loss": 0.2356, "step": 69680 }, { "epoch": 0.4946473534876045, "grad_norm": 0.1357421875, "learning_rate": 0.0019790839855311783, "loss": 0.2192, "step": 69690 }, { "epoch": 0.49471833172745067, "grad_norm": 0.177734375, "learning_rate": 0.0019790779188217466, "loss": 0.2404, "step": 69700 }, { "epoch": 0.49478930996729675, "grad_norm": 0.11572265625, "learning_rate": 0.0019790718512429618, "loss": 0.2433, "step": 69710 }, { "epoch": 0.4948602882071429, "grad_norm": 0.08740234375, "learning_rate": 0.00197906578279483, "loss": 0.2289, "step": 69720 }, { "epoch": 0.49493126644698904, "grad_norm": 0.10888671875, "learning_rate": 0.0019790597134773575, "loss": 0.24, "step": 69730 }, { "epoch": 0.4950022446868351, "grad_norm": 0.1005859375, "learning_rate": 0.00197905364329055, "loss": 0.2364, "step": 69740 }, { "epoch": 0.49507322292668127, "grad_norm": 0.11962890625, "learning_rate": 0.0019790475722344133, "loss": 0.2222, "step": 69750 }, { "epoch": 0.49514420116652735, "grad_norm": 0.06201171875, "learning_rate": 0.001979041500308954, "loss": 0.2362, "step": 69760 }, { "epoch": 0.4952151794063735, "grad_norm": 0.0986328125, "learning_rate": 0.0019790354275141774, "loss": 0.2432, "step": 69770 }, { "epoch": 0.4952861576462196, "grad_norm": 0.07373046875, "learning_rate": 0.00197902935385009, "loss": 0.2414, "step": 69780 }, { "epoch": 0.4953571358860657, "grad_norm": 0.0849609375, "learning_rate": 0.0019790232793166977, "loss": 0.2403, "step": 69790 }, { "epoch": 0.49542811412591187, "grad_norm": 0.06640625, "learning_rate": 0.001979017203914007, "loss": 0.2243, "step": 69800 }, { "epoch": 0.49549909236575795, "grad_norm": 0.1123046875, "learning_rate": 0.001979011127642022, "loss": 0.2236, "step": 69810 }, { "epoch": 0.4955700706056041, "grad_norm": 0.0869140625, "learning_rate": 0.001979005050500751, "loss": 0.2395, "step": 69820 }, { "epoch": 0.4956410488454502, "grad_norm": 0.099609375, "learning_rate": 0.001978998972490199, "loss": 0.2257, "step": 69830 }, { "epoch": 0.4957120270852963, "grad_norm": 0.1279296875, "learning_rate": 0.001978992893610372, "loss": 0.2337, "step": 69840 }, { "epoch": 0.49578300532514247, "grad_norm": 0.103515625, "learning_rate": 0.0019789868138612764, "loss": 0.2345, "step": 69850 }, { "epoch": 0.49585398356498855, "grad_norm": 0.1259765625, "learning_rate": 0.0019789807332429177, "loss": 0.2352, "step": 69860 }, { "epoch": 0.4959249618048347, "grad_norm": 0.10595703125, "learning_rate": 0.001978974651755302, "loss": 0.2345, "step": 69870 }, { "epoch": 0.4959959400446808, "grad_norm": 0.12890625, "learning_rate": 0.0019789685693984357, "loss": 0.2451, "step": 69880 }, { "epoch": 0.4960669182845269, "grad_norm": 0.103515625, "learning_rate": 0.001978962486172325, "loss": 0.2285, "step": 69890 }, { "epoch": 0.49613789652437307, "grad_norm": 0.1962890625, "learning_rate": 0.0019789564020769746, "loss": 0.2391, "step": 69900 }, { "epoch": 0.49620887476421915, "grad_norm": 0.14453125, "learning_rate": 0.001978950317112392, "loss": 0.2276, "step": 69910 }, { "epoch": 0.4962798530040653, "grad_norm": 0.080078125, "learning_rate": 0.0019789442312785825, "loss": 0.232, "step": 69920 }, { "epoch": 0.4963508312439114, "grad_norm": 0.1123046875, "learning_rate": 0.001978938144575552, "loss": 0.2316, "step": 69930 }, { "epoch": 0.4964218094837575, "grad_norm": 0.064453125, "learning_rate": 0.001978932057003307, "loss": 0.233, "step": 69940 }, { "epoch": 0.4964927877236036, "grad_norm": 0.109375, "learning_rate": 0.0019789259685618534, "loss": 0.2425, "step": 69950 }, { "epoch": 0.49656376596344975, "grad_norm": 0.09326171875, "learning_rate": 0.001978919879251197, "loss": 0.2596, "step": 69960 }, { "epoch": 0.4966347442032959, "grad_norm": 0.0830078125, "learning_rate": 0.0019789137890713435, "loss": 0.2337, "step": 69970 }, { "epoch": 0.496705722443142, "grad_norm": 0.109375, "learning_rate": 0.0019789076980223, "loss": 0.2338, "step": 69980 }, { "epoch": 0.4967767006829881, "grad_norm": 0.08203125, "learning_rate": 0.0019789016061040714, "loss": 0.2358, "step": 69990 }, { "epoch": 0.4968476789228342, "grad_norm": 0.0849609375, "learning_rate": 0.0019788955133166644, "loss": 0.2385, "step": 70000 }, { "epoch": 0.4968476789228342, "eval_covost2-zh-en_loss": 3.815560817718506, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 20.4236, "eval_covost2-zh-en_samples_per_second": 3.134, "eval_covost2-zh-en_steps_per_second": 0.196, "step": 70000 }, { "epoch": 0.4968476789228342, "eval_covost2-en-zh_loss": 3.1480448246002197, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 19.9607, "eval_covost2-en-zh_samples_per_second": 3.206, "eval_covost2-en-zh_steps_per_second": 0.2, "step": 70000 }, { "epoch": 0.49691865716268036, "grad_norm": 0.142578125, "learning_rate": 0.0019788894196600846, "loss": 0.2317, "step": 70010 }, { "epoch": 0.4969896354025265, "grad_norm": 0.1484375, "learning_rate": 0.0019788833251343386, "loss": 0.2306, "step": 70020 }, { "epoch": 0.4970606136423726, "grad_norm": 0.15234375, "learning_rate": 0.001978877229739432, "loss": 0.2274, "step": 70030 }, { "epoch": 0.4971315918822187, "grad_norm": 0.07470703125, "learning_rate": 0.0019788711334753706, "loss": 0.2326, "step": 70040 }, { "epoch": 0.4972025701220648, "grad_norm": 0.20703125, "learning_rate": 0.0019788650363421613, "loss": 0.2395, "step": 70050 }, { "epoch": 0.49727354836191096, "grad_norm": 0.08935546875, "learning_rate": 0.001978858938339809, "loss": 0.2256, "step": 70060 }, { "epoch": 0.49734452660175704, "grad_norm": 0.1298828125, "learning_rate": 0.001978852839468321, "loss": 0.2365, "step": 70070 }, { "epoch": 0.4974155048416032, "grad_norm": 0.1396484375, "learning_rate": 0.0019788467397277017, "loss": 0.2252, "step": 70080 }, { "epoch": 0.4974864830814493, "grad_norm": 0.12060546875, "learning_rate": 0.0019788406391179586, "loss": 0.245, "step": 70090 }, { "epoch": 0.4975574613212954, "grad_norm": 0.146484375, "learning_rate": 0.001978834537639097, "loss": 0.2243, "step": 70100 }, { "epoch": 0.49762843956114156, "grad_norm": 0.2373046875, "learning_rate": 0.001978828435291123, "loss": 0.2473, "step": 70110 }, { "epoch": 0.49769941780098764, "grad_norm": 0.10302734375, "learning_rate": 0.001978822332074043, "loss": 0.227, "step": 70120 }, { "epoch": 0.4977703960408338, "grad_norm": 0.11328125, "learning_rate": 0.001978816227987863, "loss": 0.2498, "step": 70130 }, { "epoch": 0.4978413742806799, "grad_norm": 0.150390625, "learning_rate": 0.001978810123032588, "loss": 0.2331, "step": 70140 }, { "epoch": 0.497912352520526, "grad_norm": 0.0966796875, "learning_rate": 0.001978804017208226, "loss": 0.2214, "step": 70150 }, { "epoch": 0.49798333076037216, "grad_norm": 0.107421875, "learning_rate": 0.001978797910514781, "loss": 0.2306, "step": 70160 }, { "epoch": 0.49805430900021824, "grad_norm": 0.0849609375, "learning_rate": 0.0019787918029522604, "loss": 0.2277, "step": 70170 }, { "epoch": 0.4981252872400644, "grad_norm": 0.12890625, "learning_rate": 0.0019787856945206695, "loss": 0.2196, "step": 70180 }, { "epoch": 0.49819626547991047, "grad_norm": 0.11376953125, "learning_rate": 0.001978779585220015, "loss": 0.2358, "step": 70190 }, { "epoch": 0.4982672437197566, "grad_norm": 0.1962890625, "learning_rate": 0.0019787734750503026, "loss": 0.2288, "step": 70200 }, { "epoch": 0.49833822195960276, "grad_norm": 0.09912109375, "learning_rate": 0.0019787673640115377, "loss": 0.2204, "step": 70210 }, { "epoch": 0.49840920019944884, "grad_norm": 0.0830078125, "learning_rate": 0.0019787612521037275, "loss": 0.2448, "step": 70220 }, { "epoch": 0.498480178439295, "grad_norm": 0.06591796875, "learning_rate": 0.0019787551393268773, "loss": 0.2267, "step": 70230 }, { "epoch": 0.49855115667914107, "grad_norm": 0.0869140625, "learning_rate": 0.0019787490256809935, "loss": 0.2344, "step": 70240 }, { "epoch": 0.4986221349189872, "grad_norm": 0.07470703125, "learning_rate": 0.0019787429111660817, "loss": 0.2272, "step": 70250 }, { "epoch": 0.49869311315883336, "grad_norm": 0.09423828125, "learning_rate": 0.0019787367957821485, "loss": 0.2394, "step": 70260 }, { "epoch": 0.49876409139867944, "grad_norm": 0.150390625, "learning_rate": 0.0019787306795291996, "loss": 0.2376, "step": 70270 }, { "epoch": 0.4988350696385256, "grad_norm": 0.380859375, "learning_rate": 0.001978724562407241, "loss": 0.2297, "step": 70280 }, { "epoch": 0.4989060478783717, "grad_norm": 0.13671875, "learning_rate": 0.001978718444416279, "loss": 0.2513, "step": 70290 }, { "epoch": 0.4989770261182178, "grad_norm": 0.12060546875, "learning_rate": 0.0019787123255563194, "loss": 0.239, "step": 70300 }, { "epoch": 0.4990480043580639, "grad_norm": 0.10791015625, "learning_rate": 0.001978706205827369, "loss": 0.2377, "step": 70310 }, { "epoch": 0.49911898259791004, "grad_norm": 0.20703125, "learning_rate": 0.0019787000852294324, "loss": 0.2336, "step": 70320 }, { "epoch": 0.4991899608377562, "grad_norm": 0.16015625, "learning_rate": 0.001978693963762517, "loss": 0.2361, "step": 70330 }, { "epoch": 0.4992609390776023, "grad_norm": 0.12890625, "learning_rate": 0.001978687841426628, "loss": 0.2271, "step": 70340 }, { "epoch": 0.4993319173174484, "grad_norm": 0.0791015625, "learning_rate": 0.001978681718221772, "loss": 0.2523, "step": 70350 }, { "epoch": 0.4994028955572945, "grad_norm": 0.1015625, "learning_rate": 0.0019786755941479544, "loss": 0.235, "step": 70360 }, { "epoch": 0.49947387379714064, "grad_norm": 0.2001953125, "learning_rate": 0.0019786694692051827, "loss": 0.2163, "step": 70370 }, { "epoch": 0.4995448520369868, "grad_norm": 0.08935546875, "learning_rate": 0.001978663343393461, "loss": 0.2373, "step": 70380 }, { "epoch": 0.4996158302768329, "grad_norm": 0.330078125, "learning_rate": 0.001978657216712797, "loss": 0.2309, "step": 70390 }, { "epoch": 0.499686808516679, "grad_norm": 0.11083984375, "learning_rate": 0.0019786510891631956, "loss": 0.2323, "step": 70400 }, { "epoch": 0.4997577867565251, "grad_norm": 0.244140625, "learning_rate": 0.001978644960744664, "loss": 0.2279, "step": 70410 }, { "epoch": 0.49982876499637124, "grad_norm": 0.09619140625, "learning_rate": 0.0019786388314572065, "loss": 0.2579, "step": 70420 }, { "epoch": 0.49989974323621733, "grad_norm": 0.15234375, "learning_rate": 0.0019786327013008313, "loss": 0.2414, "step": 70430 }, { "epoch": 0.4999707214760635, "grad_norm": 0.09814453125, "learning_rate": 0.0019786265702755428, "loss": 0.2471, "step": 70440 }, { "epoch": 0.5000416997159096, "grad_norm": 0.1513671875, "learning_rate": 0.001978620438381348, "loss": 0.2432, "step": 70450 }, { "epoch": 0.5001126779557558, "grad_norm": 0.16015625, "learning_rate": 0.0019786143056182526, "loss": 0.237, "step": 70460 }, { "epoch": 0.5001836561956018, "grad_norm": 0.09619140625, "learning_rate": 0.0019786081719862627, "loss": 0.2212, "step": 70470 }, { "epoch": 0.5002546344354479, "grad_norm": 0.09765625, "learning_rate": 0.0019786020374853845, "loss": 0.2371, "step": 70480 }, { "epoch": 0.5003256126752941, "grad_norm": 0.111328125, "learning_rate": 0.0019785959021156236, "loss": 0.233, "step": 70490 }, { "epoch": 0.5003965909151402, "grad_norm": 0.09765625, "learning_rate": 0.0019785897658769866, "loss": 0.2448, "step": 70500 }, { "epoch": 0.5004675691549864, "grad_norm": 0.1171875, "learning_rate": 0.0019785836287694795, "loss": 0.233, "step": 70510 }, { "epoch": 0.5005385473948324, "grad_norm": 0.09375, "learning_rate": 0.0019785774907931084, "loss": 0.2395, "step": 70520 }, { "epoch": 0.5006095256346785, "grad_norm": 0.08056640625, "learning_rate": 0.0019785713519478785, "loss": 0.2214, "step": 70530 }, { "epoch": 0.5006805038745247, "grad_norm": 0.12255859375, "learning_rate": 0.001978565212233797, "loss": 0.2389, "step": 70540 }, { "epoch": 0.5007514821143708, "grad_norm": 0.0478515625, "learning_rate": 0.00197855907165087, "loss": 0.2218, "step": 70550 }, { "epoch": 0.500822460354217, "grad_norm": 0.1982421875, "learning_rate": 0.001978552930199103, "loss": 0.2272, "step": 70560 }, { "epoch": 0.500893438594063, "grad_norm": 0.1513671875, "learning_rate": 0.001978546787878502, "loss": 0.2301, "step": 70570 }, { "epoch": 0.5009644168339091, "grad_norm": 0.109375, "learning_rate": 0.001978540644689073, "loss": 0.2459, "step": 70580 }, { "epoch": 0.5010353950737553, "grad_norm": 0.09912109375, "learning_rate": 0.001978534500630823, "loss": 0.2296, "step": 70590 }, { "epoch": 0.5011063733136014, "grad_norm": 0.09521484375, "learning_rate": 0.001978528355703757, "loss": 0.2401, "step": 70600 }, { "epoch": 0.5011773515534474, "grad_norm": 0.140625, "learning_rate": 0.001978522209907882, "loss": 0.2437, "step": 70610 }, { "epoch": 0.5012483297932936, "grad_norm": 0.11865234375, "learning_rate": 0.001978516063243203, "loss": 0.2279, "step": 70620 }, { "epoch": 0.5013193080331397, "grad_norm": 0.11083984375, "learning_rate": 0.0019785099157097266, "loss": 0.2274, "step": 70630 }, { "epoch": 0.5013902862729859, "grad_norm": 0.181640625, "learning_rate": 0.0019785037673074597, "loss": 0.2466, "step": 70640 }, { "epoch": 0.501461264512832, "grad_norm": 0.10498046875, "learning_rate": 0.001978497618036407, "loss": 0.2403, "step": 70650 }, { "epoch": 0.501532242752678, "grad_norm": 0.10107421875, "learning_rate": 0.0019784914678965754, "loss": 0.2275, "step": 70660 }, { "epoch": 0.5016032209925242, "grad_norm": 0.10791015625, "learning_rate": 0.001978485316887971, "loss": 0.2478, "step": 70670 }, { "epoch": 0.5016741992323703, "grad_norm": 0.09619140625, "learning_rate": 0.0019784791650105994, "loss": 0.2316, "step": 70680 }, { "epoch": 0.5017451774722165, "grad_norm": 0.0791015625, "learning_rate": 0.001978473012264467, "loss": 0.23, "step": 70690 }, { "epoch": 0.5018161557120626, "grad_norm": 0.123046875, "learning_rate": 0.00197846685864958, "loss": 0.2421, "step": 70700 }, { "epoch": 0.5018871339519086, "grad_norm": 0.1318359375, "learning_rate": 0.001978460704165944, "loss": 0.2205, "step": 70710 }, { "epoch": 0.5019581121917548, "grad_norm": 0.1044921875, "learning_rate": 0.0019784545488135655, "loss": 0.231, "step": 70720 }, { "epoch": 0.5020290904316009, "grad_norm": 0.125, "learning_rate": 0.001978448392592451, "loss": 0.2394, "step": 70730 }, { "epoch": 0.5021000686714471, "grad_norm": 0.0849609375, "learning_rate": 0.0019784422355026057, "loss": 0.2264, "step": 70740 }, { "epoch": 0.5021710469112932, "grad_norm": 0.1318359375, "learning_rate": 0.001978436077544036, "loss": 0.2105, "step": 70750 }, { "epoch": 0.5022420251511392, "grad_norm": 0.07666015625, "learning_rate": 0.0019784299187167484, "loss": 0.234, "step": 70760 }, { "epoch": 0.5023130033909854, "grad_norm": 0.1015625, "learning_rate": 0.001978423759020748, "loss": 0.229, "step": 70770 }, { "epoch": 0.5023839816308315, "grad_norm": 0.0947265625, "learning_rate": 0.001978417598456042, "loss": 0.2384, "step": 70780 }, { "epoch": 0.5024549598706777, "grad_norm": 0.099609375, "learning_rate": 0.0019784114370226366, "loss": 0.2289, "step": 70790 }, { "epoch": 0.5025259381105238, "grad_norm": 0.0986328125, "learning_rate": 0.0019784052747205366, "loss": 0.234, "step": 70800 }, { "epoch": 0.5025969163503698, "grad_norm": 0.1953125, "learning_rate": 0.001978399111549749, "loss": 0.2604, "step": 70810 }, { "epoch": 0.502667894590216, "grad_norm": 0.1767578125, "learning_rate": 0.00197839294751028, "loss": 0.2462, "step": 70820 }, { "epoch": 0.5027388728300621, "grad_norm": 0.1015625, "learning_rate": 0.001978386782602135, "loss": 0.2233, "step": 70830 }, { "epoch": 0.5028098510699083, "grad_norm": 0.1064453125, "learning_rate": 0.001978380616825321, "loss": 0.217, "step": 70840 }, { "epoch": 0.5028808293097543, "grad_norm": 0.1396484375, "learning_rate": 0.0019783744501798432, "loss": 0.267, "step": 70850 }, { "epoch": 0.5029518075496004, "grad_norm": 0.07470703125, "learning_rate": 0.0019783682826657084, "loss": 0.2482, "step": 70860 }, { "epoch": 0.5030227857894466, "grad_norm": 0.07763671875, "learning_rate": 0.001978362114282922, "loss": 0.2321, "step": 70870 }, { "epoch": 0.5030937640292927, "grad_norm": 0.10546875, "learning_rate": 0.001978355945031491, "loss": 0.2436, "step": 70880 }, { "epoch": 0.5031647422691389, "grad_norm": 0.1142578125, "learning_rate": 0.0019783497749114214, "loss": 0.2178, "step": 70890 }, { "epoch": 0.5032357205089849, "grad_norm": 0.119140625, "learning_rate": 0.001978343603922718, "loss": 0.2352, "step": 70900 }, { "epoch": 0.503306698748831, "grad_norm": 0.10009765625, "learning_rate": 0.0019783374320653884, "loss": 0.2435, "step": 70910 }, { "epoch": 0.5033776769886772, "grad_norm": 0.08251953125, "learning_rate": 0.001978331259339438, "loss": 0.234, "step": 70920 }, { "epoch": 0.5034486552285233, "grad_norm": 0.1337890625, "learning_rate": 0.0019783250857448725, "loss": 0.235, "step": 70930 }, { "epoch": 0.5035196334683695, "grad_norm": 0.1005859375, "learning_rate": 0.001978318911281699, "loss": 0.232, "step": 70940 }, { "epoch": 0.5035906117082155, "grad_norm": 0.109375, "learning_rate": 0.0019783127359499233, "loss": 0.2335, "step": 70950 }, { "epoch": 0.5036615899480616, "grad_norm": 0.09228515625, "learning_rate": 0.0019783065597495514, "loss": 0.237, "step": 70960 }, { "epoch": 0.5037325681879078, "grad_norm": 0.10693359375, "learning_rate": 0.001978300382680589, "loss": 0.2409, "step": 70970 }, { "epoch": 0.5038035464277539, "grad_norm": 0.16015625, "learning_rate": 0.0019782942047430426, "loss": 0.2363, "step": 70980 }, { "epoch": 0.5038745246676001, "grad_norm": 0.0595703125, "learning_rate": 0.0019782880259369183, "loss": 0.2137, "step": 70990 }, { "epoch": 0.5039455029074461, "grad_norm": 0.08984375, "learning_rate": 0.0019782818462622223, "loss": 0.237, "step": 71000 }, { "epoch": 0.5040164811472923, "grad_norm": 0.251953125, "learning_rate": 0.0019782756657189607, "loss": 0.2395, "step": 71010 }, { "epoch": 0.5040874593871384, "grad_norm": 0.126953125, "learning_rate": 0.0019782694843071395, "loss": 0.2331, "step": 71020 }, { "epoch": 0.5041584376269845, "grad_norm": 0.095703125, "learning_rate": 0.0019782633020267648, "loss": 0.2413, "step": 71030 }, { "epoch": 0.5042294158668307, "grad_norm": 0.10400390625, "learning_rate": 0.0019782571188778427, "loss": 0.2417, "step": 71040 }, { "epoch": 0.5043003941066767, "grad_norm": 0.08349609375, "learning_rate": 0.0019782509348603796, "loss": 0.2266, "step": 71050 }, { "epoch": 0.5043713723465229, "grad_norm": 0.103515625, "learning_rate": 0.001978244749974381, "loss": 0.2362, "step": 71060 }, { "epoch": 0.504442350586369, "grad_norm": 0.126953125, "learning_rate": 0.0019782385642198533, "loss": 0.2197, "step": 71070 }, { "epoch": 0.5045133288262151, "grad_norm": 0.09375, "learning_rate": 0.0019782323775968027, "loss": 0.234, "step": 71080 }, { "epoch": 0.5045843070660613, "grad_norm": 0.11669921875, "learning_rate": 0.001978226190105236, "loss": 0.2374, "step": 71090 }, { "epoch": 0.5046552853059073, "grad_norm": 0.07275390625, "learning_rate": 0.0019782200017451576, "loss": 0.2312, "step": 71100 }, { "epoch": 0.5047262635457535, "grad_norm": 0.09912109375, "learning_rate": 0.0019782138125165752, "loss": 0.2203, "step": 71110 }, { "epoch": 0.5047972417855996, "grad_norm": 0.0810546875, "learning_rate": 0.0019782076224194945, "loss": 0.2388, "step": 71120 }, { "epoch": 0.5048682200254457, "grad_norm": 0.1064453125, "learning_rate": 0.0019782014314539214, "loss": 0.2413, "step": 71130 }, { "epoch": 0.5049391982652918, "grad_norm": 0.09375, "learning_rate": 0.001978195239619862, "loss": 0.2145, "step": 71140 }, { "epoch": 0.5050101765051379, "grad_norm": 0.10888671875, "learning_rate": 0.001978189046917323, "loss": 0.2337, "step": 71150 }, { "epoch": 0.505081154744984, "grad_norm": 0.197265625, "learning_rate": 0.001978182853346309, "loss": 0.2433, "step": 71160 }, { "epoch": 0.5051521329848302, "grad_norm": 0.1083984375, "learning_rate": 0.0019781766589068283, "loss": 0.2361, "step": 71170 }, { "epoch": 0.5052231112246763, "grad_norm": 0.2578125, "learning_rate": 0.001978170463598886, "loss": 0.2455, "step": 71180 }, { "epoch": 0.5052940894645224, "grad_norm": 0.1328125, "learning_rate": 0.0019781642674224875, "loss": 0.2451, "step": 71190 }, { "epoch": 0.5053650677043685, "grad_norm": 0.0888671875, "learning_rate": 0.0019781580703776397, "loss": 0.2291, "step": 71200 }, { "epoch": 0.5054360459442147, "grad_norm": 0.10107421875, "learning_rate": 0.0019781518724643486, "loss": 0.2179, "step": 71210 }, { "epoch": 0.5055070241840608, "grad_norm": 0.09521484375, "learning_rate": 0.0019781456736826203, "loss": 0.2252, "step": 71220 }, { "epoch": 0.5055780024239069, "grad_norm": 0.1484375, "learning_rate": 0.001978139474032461, "loss": 0.2434, "step": 71230 }, { "epoch": 0.505648980663753, "grad_norm": 0.09521484375, "learning_rate": 0.001978133273513877, "loss": 0.2384, "step": 71240 }, { "epoch": 0.5057199589035991, "grad_norm": 0.0986328125, "learning_rate": 0.0019781270721268744, "loss": 0.2363, "step": 71250 }, { "epoch": 0.5057909371434453, "grad_norm": 0.11328125, "learning_rate": 0.0019781208698714586, "loss": 0.2413, "step": 71260 }, { "epoch": 0.5058619153832914, "grad_norm": 0.1103515625, "learning_rate": 0.0019781146667476367, "loss": 0.2302, "step": 71270 }, { "epoch": 0.5059328936231375, "grad_norm": 0.10400390625, "learning_rate": 0.0019781084627554144, "loss": 0.2355, "step": 71280 }, { "epoch": 0.5060038718629836, "grad_norm": 0.08447265625, "learning_rate": 0.0019781022578947978, "loss": 0.2186, "step": 71290 }, { "epoch": 0.5060748501028297, "grad_norm": 0.0693359375, "learning_rate": 0.001978096052165793, "loss": 0.2085, "step": 71300 }, { "epoch": 0.5061458283426759, "grad_norm": 0.08154296875, "learning_rate": 0.001978089845568406, "loss": 0.2393, "step": 71310 }, { "epoch": 0.506216806582522, "grad_norm": 0.1474609375, "learning_rate": 0.0019780836381026435, "loss": 0.2245, "step": 71320 }, { "epoch": 0.5062877848223681, "grad_norm": 0.1201171875, "learning_rate": 0.0019780774297685115, "loss": 0.255, "step": 71330 }, { "epoch": 0.5063587630622142, "grad_norm": 0.10498046875, "learning_rate": 0.0019780712205660156, "loss": 0.2536, "step": 71340 }, { "epoch": 0.5064297413020603, "grad_norm": 0.130859375, "learning_rate": 0.0019780650104951627, "loss": 0.248, "step": 71350 }, { "epoch": 0.5065007195419065, "grad_norm": 0.1083984375, "learning_rate": 0.001978058799555958, "loss": 0.2327, "step": 71360 }, { "epoch": 0.5065716977817526, "grad_norm": 0.1298828125, "learning_rate": 0.0019780525877484086, "loss": 0.2638, "step": 71370 }, { "epoch": 0.5066426760215986, "grad_norm": 0.11669921875, "learning_rate": 0.00197804637507252, "loss": 0.2349, "step": 71380 }, { "epoch": 0.5067136542614448, "grad_norm": 0.10498046875, "learning_rate": 0.001978040161528299, "loss": 0.2353, "step": 71390 }, { "epoch": 0.5067846325012909, "grad_norm": 0.134765625, "learning_rate": 0.0019780339471157505, "loss": 0.2323, "step": 71400 }, { "epoch": 0.506855610741137, "grad_norm": 0.1357421875, "learning_rate": 0.0019780277318348824, "loss": 0.2272, "step": 71410 }, { "epoch": 0.5069265889809832, "grad_norm": 0.091796875, "learning_rate": 0.0019780215156856994, "loss": 0.2324, "step": 71420 }, { "epoch": 0.5069975672208292, "grad_norm": 0.10888671875, "learning_rate": 0.001978015298668208, "loss": 0.2235, "step": 71430 }, { "epoch": 0.5070685454606754, "grad_norm": 0.07421875, "learning_rate": 0.001978009080782415, "loss": 0.2416, "step": 71440 }, { "epoch": 0.5071395237005215, "grad_norm": 0.103515625, "learning_rate": 0.0019780028620283254, "loss": 0.2401, "step": 71450 }, { "epoch": 0.5072105019403677, "grad_norm": 0.06298828125, "learning_rate": 0.0019779966424059462, "loss": 0.2297, "step": 71460 }, { "epoch": 0.5072814801802138, "grad_norm": 0.125, "learning_rate": 0.0019779904219152833, "loss": 0.2344, "step": 71470 }, { "epoch": 0.5073524584200598, "grad_norm": 0.08544921875, "learning_rate": 0.001977984200556343, "loss": 0.234, "step": 71480 }, { "epoch": 0.507423436659906, "grad_norm": 0.10107421875, "learning_rate": 0.001977977978329132, "loss": 0.2238, "step": 71490 }, { "epoch": 0.5074944148997521, "grad_norm": 0.07275390625, "learning_rate": 0.001977971755233655, "loss": 0.2214, "step": 71500 }, { "epoch": 0.5075653931395983, "grad_norm": 0.08056640625, "learning_rate": 0.001977965531269919, "loss": 0.2395, "step": 71510 }, { "epoch": 0.5076363713794444, "grad_norm": 0.11279296875, "learning_rate": 0.0019779593064379306, "loss": 0.2407, "step": 71520 }, { "epoch": 0.5077073496192904, "grad_norm": 0.1083984375, "learning_rate": 0.001977953080737695, "loss": 0.2173, "step": 71530 }, { "epoch": 0.5077783278591366, "grad_norm": 0.091796875, "learning_rate": 0.001977946854169219, "loss": 0.2337, "step": 71540 }, { "epoch": 0.5078493060989827, "grad_norm": 0.1240234375, "learning_rate": 0.0019779406267325084, "loss": 0.2352, "step": 71550 }, { "epoch": 0.5079202843388289, "grad_norm": 0.1630859375, "learning_rate": 0.00197793439842757, "loss": 0.2446, "step": 71560 }, { "epoch": 0.507991262578675, "grad_norm": 0.11669921875, "learning_rate": 0.001977928169254409, "loss": 0.2269, "step": 71570 }, { "epoch": 0.508062240818521, "grad_norm": 0.07080078125, "learning_rate": 0.001977921939213032, "loss": 0.2399, "step": 71580 }, { "epoch": 0.5081332190583672, "grad_norm": 0.1455078125, "learning_rate": 0.001977915708303446, "loss": 0.2356, "step": 71590 }, { "epoch": 0.5082041972982133, "grad_norm": 0.06982421875, "learning_rate": 0.0019779094765256557, "loss": 0.2187, "step": 71600 }, { "epoch": 0.5082751755380595, "grad_norm": 0.07470703125, "learning_rate": 0.0019779032438796682, "loss": 0.2434, "step": 71610 }, { "epoch": 0.5083461537779055, "grad_norm": 0.07861328125, "learning_rate": 0.0019778970103654894, "loss": 0.2319, "step": 71620 }, { "epoch": 0.5084171320177516, "grad_norm": 0.08935546875, "learning_rate": 0.0019778907759831254, "loss": 0.2372, "step": 71630 }, { "epoch": 0.5084881102575978, "grad_norm": 0.06494140625, "learning_rate": 0.0019778845407325825, "loss": 0.245, "step": 71640 }, { "epoch": 0.5085590884974439, "grad_norm": 0.255859375, "learning_rate": 0.0019778783046138664, "loss": 0.2411, "step": 71650 }, { "epoch": 0.50863006673729, "grad_norm": 0.259765625, "learning_rate": 0.0019778720676269846, "loss": 0.2481, "step": 71660 }, { "epoch": 0.5087010449771361, "grad_norm": 0.16796875, "learning_rate": 0.001977865829771942, "loss": 0.231, "step": 71670 }, { "epoch": 0.5087720232169822, "grad_norm": 0.10888671875, "learning_rate": 0.001977859591048745, "loss": 0.2353, "step": 71680 }, { "epoch": 0.5088430014568284, "grad_norm": 0.11767578125, "learning_rate": 0.0019778533514573995, "loss": 0.2371, "step": 71690 }, { "epoch": 0.5089139796966745, "grad_norm": 0.095703125, "learning_rate": 0.0019778471109979123, "loss": 0.2179, "step": 71700 }, { "epoch": 0.5089849579365207, "grad_norm": 0.08544921875, "learning_rate": 0.0019778408696702896, "loss": 0.2242, "step": 71710 }, { "epoch": 0.5090559361763667, "grad_norm": 0.138671875, "learning_rate": 0.001977834627474537, "loss": 0.2461, "step": 71720 }, { "epoch": 0.5091269144162128, "grad_norm": 0.12060546875, "learning_rate": 0.0019778283844106613, "loss": 0.2356, "step": 71730 }, { "epoch": 0.509197892656059, "grad_norm": 0.08349609375, "learning_rate": 0.001977822140478668, "loss": 0.24, "step": 71740 }, { "epoch": 0.5092688708959051, "grad_norm": 0.10595703125, "learning_rate": 0.0019778158956785634, "loss": 0.2257, "step": 71750 }, { "epoch": 0.5093398491357513, "grad_norm": 0.10986328125, "learning_rate": 0.001977809650010354, "loss": 0.2299, "step": 71760 }, { "epoch": 0.5094108273755973, "grad_norm": 0.07666015625, "learning_rate": 0.0019778034034740465, "loss": 0.2336, "step": 71770 }, { "epoch": 0.5094818056154434, "grad_norm": 0.115234375, "learning_rate": 0.0019777971560696462, "loss": 0.24, "step": 71780 }, { "epoch": 0.5095527838552896, "grad_norm": 0.0947265625, "learning_rate": 0.001977790907797159, "loss": 0.2429, "step": 71790 }, { "epoch": 0.5096237620951357, "grad_norm": 0.078125, "learning_rate": 0.001977784658656592, "loss": 0.2246, "step": 71800 }, { "epoch": 0.5096947403349819, "grad_norm": 0.1201171875, "learning_rate": 0.001977778408647951, "loss": 0.2296, "step": 71810 }, { "epoch": 0.5097657185748279, "grad_norm": 0.1826171875, "learning_rate": 0.0019777721577712423, "loss": 0.2217, "step": 71820 }, { "epoch": 0.509836696814674, "grad_norm": 0.1435546875, "learning_rate": 0.001977765906026472, "loss": 0.2451, "step": 71830 }, { "epoch": 0.5099076750545202, "grad_norm": 0.10009765625, "learning_rate": 0.0019777596534136456, "loss": 0.2415, "step": 71840 }, { "epoch": 0.5099786532943663, "grad_norm": 0.1259765625, "learning_rate": 0.0019777533999327706, "loss": 0.2306, "step": 71850 }, { "epoch": 0.5100496315342123, "grad_norm": 0.146484375, "learning_rate": 0.0019777471455838525, "loss": 0.2205, "step": 71860 }, { "epoch": 0.5101206097740585, "grad_norm": 0.10302734375, "learning_rate": 0.0019777408903668974, "loss": 0.2528, "step": 71870 }, { "epoch": 0.5101915880139046, "grad_norm": 0.126953125, "learning_rate": 0.001977734634281911, "loss": 0.2475, "step": 71880 }, { "epoch": 0.5102625662537508, "grad_norm": 0.07177734375, "learning_rate": 0.0019777283773289005, "loss": 0.2386, "step": 71890 }, { "epoch": 0.5103335444935969, "grad_norm": 0.138671875, "learning_rate": 0.001977722119507872, "loss": 0.239, "step": 71900 }, { "epoch": 0.510404522733443, "grad_norm": 0.12890625, "learning_rate": 0.0019777158608188304, "loss": 0.2254, "step": 71910 }, { "epoch": 0.5104755009732891, "grad_norm": 0.08642578125, "learning_rate": 0.0019777096012617837, "loss": 0.228, "step": 71920 }, { "epoch": 0.5105464792131352, "grad_norm": 0.060791015625, "learning_rate": 0.001977703340836737, "loss": 0.2266, "step": 71930 }, { "epoch": 0.5106174574529814, "grad_norm": 0.08642578125, "learning_rate": 0.0019776970795436963, "loss": 0.226, "step": 71940 }, { "epoch": 0.5106884356928275, "grad_norm": 0.0830078125, "learning_rate": 0.0019776908173826683, "loss": 0.2232, "step": 71950 }, { "epoch": 0.5107594139326735, "grad_norm": 0.09521484375, "learning_rate": 0.0019776845543536593, "loss": 0.2124, "step": 71960 }, { "epoch": 0.5108303921725197, "grad_norm": 0.07958984375, "learning_rate": 0.0019776782904566753, "loss": 0.2191, "step": 71970 }, { "epoch": 0.5109013704123658, "grad_norm": 0.09326171875, "learning_rate": 0.0019776720256917224, "loss": 0.2209, "step": 71980 }, { "epoch": 0.510972348652212, "grad_norm": 0.09033203125, "learning_rate": 0.0019776657600588067, "loss": 0.2466, "step": 71990 }, { "epoch": 0.5110433268920581, "grad_norm": 0.142578125, "learning_rate": 0.0019776594935579346, "loss": 0.2221, "step": 72000 }, { "epoch": 0.5110433268920581, "eval_covost2-zh-en_loss": 3.823176860809326, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 20.8559, "eval_covost2-zh-en_samples_per_second": 3.069, "eval_covost2-zh-en_steps_per_second": 0.192, "step": 72000 }, { "epoch": 0.5110433268920581, "eval_covost2-en-zh_loss": 3.1206769943237305, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 21.9365, "eval_covost2-en-zh_samples_per_second": 2.918, "eval_covost2-en-zh_steps_per_second": 0.182, "step": 72000 }, { "epoch": 0.5111143051319041, "grad_norm": 0.1630859375, "learning_rate": 0.001977653226189113, "loss": 0.2313, "step": 72010 }, { "epoch": 0.5111852833717503, "grad_norm": 0.126953125, "learning_rate": 0.001977646957952347, "loss": 0.2376, "step": 72020 }, { "epoch": 0.5112562616115964, "grad_norm": 0.09130859375, "learning_rate": 0.0019776406888476427, "loss": 0.2544, "step": 72030 }, { "epoch": 0.5113272398514426, "grad_norm": 0.123046875, "learning_rate": 0.001977634418875007, "loss": 0.2226, "step": 72040 }, { "epoch": 0.5113982180912887, "grad_norm": 0.1455078125, "learning_rate": 0.0019776281480344458, "loss": 0.234, "step": 72050 }, { "epoch": 0.5114691963311347, "grad_norm": 0.09912109375, "learning_rate": 0.0019776218763259655, "loss": 0.2537, "step": 72060 }, { "epoch": 0.5115401745709809, "grad_norm": 0.1044921875, "learning_rate": 0.001977615603749572, "loss": 0.2341, "step": 72070 }, { "epoch": 0.511611152810827, "grad_norm": 0.1591796875, "learning_rate": 0.001977609330305272, "loss": 0.2341, "step": 72080 }, { "epoch": 0.5116821310506732, "grad_norm": 0.1318359375, "learning_rate": 0.001977603055993071, "loss": 0.2285, "step": 72090 }, { "epoch": 0.5117531092905192, "grad_norm": 0.10791015625, "learning_rate": 0.0019775967808129758, "loss": 0.2289, "step": 72100 }, { "epoch": 0.5118240875303653, "grad_norm": 0.09326171875, "learning_rate": 0.0019775905047649924, "loss": 0.2486, "step": 72110 }, { "epoch": 0.5118950657702115, "grad_norm": 0.09130859375, "learning_rate": 0.001977584227849127, "loss": 0.2319, "step": 72120 }, { "epoch": 0.5119660440100576, "grad_norm": 0.10205078125, "learning_rate": 0.001977577950065386, "loss": 0.2255, "step": 72130 }, { "epoch": 0.5120370222499038, "grad_norm": 0.08447265625, "learning_rate": 0.001977571671413775, "loss": 0.2442, "step": 72140 }, { "epoch": 0.5121080004897498, "grad_norm": 0.125, "learning_rate": 0.001977565391894301, "loss": 0.2523, "step": 72150 }, { "epoch": 0.512178978729596, "grad_norm": 0.099609375, "learning_rate": 0.0019775591115069693, "loss": 0.2449, "step": 72160 }, { "epoch": 0.5122499569694421, "grad_norm": 0.08642578125, "learning_rate": 0.001977552830251787, "loss": 0.222, "step": 72170 }, { "epoch": 0.5123209352092882, "grad_norm": 0.103515625, "learning_rate": 0.00197754654812876, "loss": 0.2285, "step": 72180 }, { "epoch": 0.5123919134491344, "grad_norm": 0.09326171875, "learning_rate": 0.001977540265137894, "loss": 0.2335, "step": 72190 }, { "epoch": 0.5124628916889804, "grad_norm": 0.1796875, "learning_rate": 0.001977533981279196, "loss": 0.2223, "step": 72200 }, { "epoch": 0.5125338699288265, "grad_norm": 0.107421875, "learning_rate": 0.001977527696552672, "loss": 0.2344, "step": 72210 }, { "epoch": 0.5126048481686727, "grad_norm": 0.11181640625, "learning_rate": 0.001977521410958328, "loss": 0.2307, "step": 72220 }, { "epoch": 0.5126758264085188, "grad_norm": 0.076171875, "learning_rate": 0.0019775151244961703, "loss": 0.2181, "step": 72230 }, { "epoch": 0.512746804648365, "grad_norm": 0.15625, "learning_rate": 0.0019775088371662055, "loss": 0.2389, "step": 72240 }, { "epoch": 0.512817782888211, "grad_norm": 0.07958984375, "learning_rate": 0.001977502548968439, "loss": 0.2414, "step": 72250 }, { "epoch": 0.5128887611280571, "grad_norm": 0.0810546875, "learning_rate": 0.0019774962599028775, "loss": 0.224, "step": 72260 }, { "epoch": 0.5129597393679033, "grad_norm": 0.1201171875, "learning_rate": 0.0019774899699695273, "loss": 0.266, "step": 72270 }, { "epoch": 0.5130307176077494, "grad_norm": 0.09375, "learning_rate": 0.0019774836791683946, "loss": 0.2301, "step": 72280 }, { "epoch": 0.5131016958475956, "grad_norm": 0.1103515625, "learning_rate": 0.0019774773874994857, "loss": 0.2193, "step": 72290 }, { "epoch": 0.5131726740874416, "grad_norm": 0.10888671875, "learning_rate": 0.0019774710949628065, "loss": 0.2312, "step": 72300 }, { "epoch": 0.5132436523272877, "grad_norm": 0.1572265625, "learning_rate": 0.001977464801558363, "loss": 0.2498, "step": 72310 }, { "epoch": 0.5133146305671339, "grad_norm": 0.146484375, "learning_rate": 0.0019774585072861624, "loss": 0.2332, "step": 72320 }, { "epoch": 0.51338560880698, "grad_norm": 0.1044921875, "learning_rate": 0.0019774522121462097, "loss": 0.2368, "step": 72330 }, { "epoch": 0.5134565870468262, "grad_norm": 0.1083984375, "learning_rate": 0.0019774459161385123, "loss": 0.2378, "step": 72340 }, { "epoch": 0.5135275652866722, "grad_norm": 0.07421875, "learning_rate": 0.001977439619263076, "loss": 0.2208, "step": 72350 }, { "epoch": 0.5135985435265183, "grad_norm": 0.0751953125, "learning_rate": 0.001977433321519906, "loss": 0.2251, "step": 72360 }, { "epoch": 0.5136695217663645, "grad_norm": 0.2138671875, "learning_rate": 0.0019774270229090105, "loss": 0.2465, "step": 72370 }, { "epoch": 0.5137405000062106, "grad_norm": 0.078125, "learning_rate": 0.001977420723430394, "loss": 0.2336, "step": 72380 }, { "epoch": 0.5138114782460567, "grad_norm": 0.10302734375, "learning_rate": 0.0019774144230840637, "loss": 0.2458, "step": 72390 }, { "epoch": 0.5138824564859028, "grad_norm": 0.07861328125, "learning_rate": 0.0019774081218700253, "loss": 0.2345, "step": 72400 }, { "epoch": 0.513953434725749, "grad_norm": 0.10400390625, "learning_rate": 0.001977401819788285, "loss": 0.2149, "step": 72410 }, { "epoch": 0.5140244129655951, "grad_norm": 0.10107421875, "learning_rate": 0.00197739551683885, "loss": 0.2298, "step": 72420 }, { "epoch": 0.5140953912054412, "grad_norm": 0.1123046875, "learning_rate": 0.0019773892130217257, "loss": 0.2431, "step": 72430 }, { "epoch": 0.5141663694452873, "grad_norm": 0.10986328125, "learning_rate": 0.001977382908336918, "loss": 0.2267, "step": 72440 }, { "epoch": 0.5142373476851334, "grad_norm": 0.09619140625, "learning_rate": 0.001977376602784434, "loss": 0.2234, "step": 72450 }, { "epoch": 0.5143083259249795, "grad_norm": 0.09423828125, "learning_rate": 0.001977370296364279, "loss": 0.2329, "step": 72460 }, { "epoch": 0.5143793041648257, "grad_norm": 0.12353515625, "learning_rate": 0.0019773639890764603, "loss": 0.23, "step": 72470 }, { "epoch": 0.5144502824046718, "grad_norm": 0.11669921875, "learning_rate": 0.0019773576809209836, "loss": 0.2394, "step": 72480 }, { "epoch": 0.5145212606445179, "grad_norm": 0.08203125, "learning_rate": 0.001977351371897855, "loss": 0.233, "step": 72490 }, { "epoch": 0.514592238884364, "grad_norm": 0.1630859375, "learning_rate": 0.001977345062007081, "loss": 0.2402, "step": 72500 }, { "epoch": 0.5146632171242101, "grad_norm": 0.138671875, "learning_rate": 0.0019773387512486674, "loss": 0.2595, "step": 72510 }, { "epoch": 0.5147341953640563, "grad_norm": 0.11865234375, "learning_rate": 0.001977332439622621, "loss": 0.2311, "step": 72520 }, { "epoch": 0.5148051736039024, "grad_norm": 0.10498046875, "learning_rate": 0.0019773261271289477, "loss": 0.254, "step": 72530 }, { "epoch": 0.5148761518437485, "grad_norm": 0.10693359375, "learning_rate": 0.001977319813767654, "loss": 0.2313, "step": 72540 }, { "epoch": 0.5149471300835946, "grad_norm": 0.09716796875, "learning_rate": 0.0019773134995387455, "loss": 0.2464, "step": 72550 }, { "epoch": 0.5150181083234407, "grad_norm": 0.07421875, "learning_rate": 0.0019773071844422295, "loss": 0.241, "step": 72560 }, { "epoch": 0.5150890865632869, "grad_norm": 0.08837890625, "learning_rate": 0.0019773008684781115, "loss": 0.2266, "step": 72570 }, { "epoch": 0.515160064803133, "grad_norm": 0.1259765625, "learning_rate": 0.001977294551646398, "loss": 0.2387, "step": 72580 }, { "epoch": 0.5152310430429791, "grad_norm": 0.1357421875, "learning_rate": 0.0019772882339470954, "loss": 0.2285, "step": 72590 }, { "epoch": 0.5153020212828252, "grad_norm": 0.1484375, "learning_rate": 0.001977281915380209, "loss": 0.2444, "step": 72600 }, { "epoch": 0.5153729995226713, "grad_norm": 0.126953125, "learning_rate": 0.0019772755959457464, "loss": 0.2272, "step": 72610 }, { "epoch": 0.5154439777625175, "grad_norm": 0.12890625, "learning_rate": 0.001977269275643713, "loss": 0.2279, "step": 72620 }, { "epoch": 0.5155149560023635, "grad_norm": 0.103515625, "learning_rate": 0.0019772629544741152, "loss": 0.2282, "step": 72630 }, { "epoch": 0.5155859342422097, "grad_norm": 0.12353515625, "learning_rate": 0.0019772566324369593, "loss": 0.2409, "step": 72640 }, { "epoch": 0.5156569124820558, "grad_norm": 0.12109375, "learning_rate": 0.0019772503095322517, "loss": 0.2341, "step": 72650 }, { "epoch": 0.515727890721902, "grad_norm": 0.06884765625, "learning_rate": 0.0019772439857599984, "loss": 0.2414, "step": 72660 }, { "epoch": 0.5157988689617481, "grad_norm": 0.12451171875, "learning_rate": 0.001977237661120206, "loss": 0.2377, "step": 72670 }, { "epoch": 0.5158698472015941, "grad_norm": 0.054931640625, "learning_rate": 0.0019772313356128802, "loss": 0.2173, "step": 72680 }, { "epoch": 0.5159408254414403, "grad_norm": 0.08935546875, "learning_rate": 0.001977225009238028, "loss": 0.2285, "step": 72690 }, { "epoch": 0.5160118036812864, "grad_norm": 0.07080078125, "learning_rate": 0.001977218681995655, "loss": 0.2157, "step": 72700 }, { "epoch": 0.5160827819211325, "grad_norm": 0.08251953125, "learning_rate": 0.001977212353885768, "loss": 0.2346, "step": 72710 }, { "epoch": 0.5161537601609787, "grad_norm": 0.10888671875, "learning_rate": 0.0019772060249083723, "loss": 0.2268, "step": 72720 }, { "epoch": 0.5162247384008247, "grad_norm": 0.12255859375, "learning_rate": 0.0019771996950634753, "loss": 0.2342, "step": 72730 }, { "epoch": 0.5162957166406709, "grad_norm": 0.13671875, "learning_rate": 0.001977193364351083, "loss": 0.237, "step": 72740 }, { "epoch": 0.516366694880517, "grad_norm": 0.1123046875, "learning_rate": 0.0019771870327712007, "loss": 0.2313, "step": 72750 }, { "epoch": 0.5164376731203631, "grad_norm": 0.1455078125, "learning_rate": 0.001977180700323836, "loss": 0.2188, "step": 72760 }, { "epoch": 0.5165086513602093, "grad_norm": 0.11669921875, "learning_rate": 0.0019771743670089943, "loss": 0.2297, "step": 72770 }, { "epoch": 0.5165796296000553, "grad_norm": 0.087890625, "learning_rate": 0.0019771680328266824, "loss": 0.2229, "step": 72780 }, { "epoch": 0.5166506078399015, "grad_norm": 0.1494140625, "learning_rate": 0.0019771616977769064, "loss": 0.2326, "step": 72790 }, { "epoch": 0.5167215860797476, "grad_norm": 0.0966796875, "learning_rate": 0.001977155361859672, "loss": 0.2276, "step": 72800 }, { "epoch": 0.5167925643195938, "grad_norm": 0.1279296875, "learning_rate": 0.001977149025074986, "loss": 0.2358, "step": 72810 }, { "epoch": 0.5168635425594399, "grad_norm": 0.08984375, "learning_rate": 0.001977142687422855, "loss": 0.2014, "step": 72820 }, { "epoch": 0.5169345207992859, "grad_norm": 0.138671875, "learning_rate": 0.0019771363489032843, "loss": 0.2313, "step": 72830 }, { "epoch": 0.5170054990391321, "grad_norm": 0.0751953125, "learning_rate": 0.0019771300095162807, "loss": 0.2435, "step": 72840 }, { "epoch": 0.5170764772789782, "grad_norm": 0.07666015625, "learning_rate": 0.001977123669261851, "loss": 0.2283, "step": 72850 }, { "epoch": 0.5171474555188244, "grad_norm": 0.1484375, "learning_rate": 0.001977117328140001, "loss": 0.2445, "step": 72860 }, { "epoch": 0.5172184337586704, "grad_norm": 0.0849609375, "learning_rate": 0.001977110986150736, "loss": 0.2391, "step": 72870 }, { "epoch": 0.5172894119985165, "grad_norm": 0.08154296875, "learning_rate": 0.001977104643294064, "loss": 0.2276, "step": 72880 }, { "epoch": 0.5173603902383627, "grad_norm": 0.08203125, "learning_rate": 0.001977098299569991, "loss": 0.2327, "step": 72890 }, { "epoch": 0.5174313684782088, "grad_norm": 0.125, "learning_rate": 0.001977091954978522, "loss": 0.2212, "step": 72900 }, { "epoch": 0.517502346718055, "grad_norm": 0.1044921875, "learning_rate": 0.001977085609519664, "loss": 0.238, "step": 72910 }, { "epoch": 0.517573324957901, "grad_norm": 0.10107421875, "learning_rate": 0.0019770792631934233, "loss": 0.2302, "step": 72920 }, { "epoch": 0.5176443031977471, "grad_norm": 0.0673828125, "learning_rate": 0.0019770729159998062, "loss": 0.243, "step": 72930 }, { "epoch": 0.5177152814375933, "grad_norm": 0.1123046875, "learning_rate": 0.0019770665679388192, "loss": 0.2382, "step": 72940 }, { "epoch": 0.5177862596774394, "grad_norm": 0.11474609375, "learning_rate": 0.0019770602190104683, "loss": 0.2435, "step": 72950 }, { "epoch": 0.5178572379172856, "grad_norm": 0.09423828125, "learning_rate": 0.0019770538692147595, "loss": 0.2306, "step": 72960 }, { "epoch": 0.5179282161571316, "grad_norm": 0.1796875, "learning_rate": 0.0019770475185516994, "loss": 0.2237, "step": 72970 }, { "epoch": 0.5179991943969777, "grad_norm": 0.087890625, "learning_rate": 0.0019770411670212945, "loss": 0.2403, "step": 72980 }, { "epoch": 0.5180701726368239, "grad_norm": 0.12158203125, "learning_rate": 0.0019770348146235513, "loss": 0.2265, "step": 72990 }, { "epoch": 0.51814115087667, "grad_norm": 0.11328125, "learning_rate": 0.001977028461358475, "loss": 0.2315, "step": 73000 }, { "epoch": 0.5182121291165162, "grad_norm": 0.1474609375, "learning_rate": 0.0019770221072260724, "loss": 0.252, "step": 73010 }, { "epoch": 0.5182831073563622, "grad_norm": 0.171875, "learning_rate": 0.0019770157522263506, "loss": 0.2266, "step": 73020 }, { "epoch": 0.5183540855962083, "grad_norm": 0.265625, "learning_rate": 0.001977009396359315, "loss": 0.2244, "step": 73030 }, { "epoch": 0.5184250638360545, "grad_norm": 0.09765625, "learning_rate": 0.0019770030396249715, "loss": 0.2293, "step": 73040 }, { "epoch": 0.5184960420759006, "grad_norm": 0.10693359375, "learning_rate": 0.001976996682023327, "loss": 0.2399, "step": 73050 }, { "epoch": 0.5185670203157468, "grad_norm": 0.09521484375, "learning_rate": 0.0019769903235543887, "loss": 0.2487, "step": 73060 }, { "epoch": 0.5186379985555928, "grad_norm": 0.1484375, "learning_rate": 0.001976983964218161, "loss": 0.2341, "step": 73070 }, { "epoch": 0.5187089767954389, "grad_norm": 0.08251953125, "learning_rate": 0.0019769776040146512, "loss": 0.2119, "step": 73080 }, { "epoch": 0.5187799550352851, "grad_norm": 0.251953125, "learning_rate": 0.0019769712429438657, "loss": 0.2472, "step": 73090 }, { "epoch": 0.5188509332751312, "grad_norm": 0.08544921875, "learning_rate": 0.001976964881005811, "loss": 0.229, "step": 73100 }, { "epoch": 0.5189219115149772, "grad_norm": 0.10595703125, "learning_rate": 0.0019769585182004925, "loss": 0.2314, "step": 73110 }, { "epoch": 0.5189928897548234, "grad_norm": 0.1181640625, "learning_rate": 0.001976952154527917, "loss": 0.2342, "step": 73120 }, { "epoch": 0.5190638679946695, "grad_norm": 0.07861328125, "learning_rate": 0.001976945789988091, "loss": 0.2321, "step": 73130 }, { "epoch": 0.5191348462345157, "grad_norm": 0.1865234375, "learning_rate": 0.0019769394245810206, "loss": 0.2283, "step": 73140 }, { "epoch": 0.5192058244743618, "grad_norm": 0.0947265625, "learning_rate": 0.0019769330583067116, "loss": 0.2161, "step": 73150 }, { "epoch": 0.5192768027142078, "grad_norm": 0.107421875, "learning_rate": 0.0019769266911651715, "loss": 0.2353, "step": 73160 }, { "epoch": 0.519347780954054, "grad_norm": 0.0869140625, "learning_rate": 0.0019769203231564053, "loss": 0.2254, "step": 73170 }, { "epoch": 0.5194187591939001, "grad_norm": 0.1328125, "learning_rate": 0.00197691395428042, "loss": 0.2362, "step": 73180 }, { "epoch": 0.5194897374337463, "grad_norm": 0.0859375, "learning_rate": 0.0019769075845372215, "loss": 0.2368, "step": 73190 }, { "epoch": 0.5195607156735924, "grad_norm": 0.0830078125, "learning_rate": 0.001976901213926817, "loss": 0.2303, "step": 73200 }, { "epoch": 0.5196316939134384, "grad_norm": 0.0849609375, "learning_rate": 0.001976894842449212, "loss": 0.2122, "step": 73210 }, { "epoch": 0.5197026721532846, "grad_norm": 0.1171875, "learning_rate": 0.0019768884701044124, "loss": 0.235, "step": 73220 }, { "epoch": 0.5197736503931307, "grad_norm": 0.09619140625, "learning_rate": 0.0019768820968924256, "loss": 0.2283, "step": 73230 }, { "epoch": 0.5198446286329769, "grad_norm": 0.11083984375, "learning_rate": 0.001976875722813257, "loss": 0.2248, "step": 73240 }, { "epoch": 0.519915606872823, "grad_norm": 0.1494140625, "learning_rate": 0.0019768693478669137, "loss": 0.2276, "step": 73250 }, { "epoch": 0.519986585112669, "grad_norm": 0.11376953125, "learning_rate": 0.001976862972053401, "loss": 0.2389, "step": 73260 }, { "epoch": 0.5200575633525152, "grad_norm": 0.09228515625, "learning_rate": 0.0019768565953727263, "loss": 0.2381, "step": 73270 }, { "epoch": 0.5201285415923613, "grad_norm": 0.123046875, "learning_rate": 0.0019768502178248953, "loss": 0.2519, "step": 73280 }, { "epoch": 0.5201995198322075, "grad_norm": 0.1142578125, "learning_rate": 0.0019768438394099142, "loss": 0.2365, "step": 73290 }, { "epoch": 0.5202704980720536, "grad_norm": 0.1201171875, "learning_rate": 0.0019768374601277896, "loss": 0.2226, "step": 73300 }, { "epoch": 0.5203414763118996, "grad_norm": 0.240234375, "learning_rate": 0.0019768310799785278, "loss": 0.2358, "step": 73310 }, { "epoch": 0.5204124545517458, "grad_norm": 0.0830078125, "learning_rate": 0.001976824698962135, "loss": 0.2281, "step": 73320 }, { "epoch": 0.5204834327915919, "grad_norm": 0.0751953125, "learning_rate": 0.0019768183170786172, "loss": 0.2364, "step": 73330 }, { "epoch": 0.5205544110314381, "grad_norm": 0.10888671875, "learning_rate": 0.0019768119343279815, "loss": 0.2366, "step": 73340 }, { "epoch": 0.5206253892712842, "grad_norm": 0.10400390625, "learning_rate": 0.0019768055507102334, "loss": 0.2296, "step": 73350 }, { "epoch": 0.5206963675111302, "grad_norm": 0.11767578125, "learning_rate": 0.00197679916622538, "loss": 0.2484, "step": 73360 }, { "epoch": 0.5207673457509764, "grad_norm": 0.09326171875, "learning_rate": 0.0019767927808734265, "loss": 0.23, "step": 73370 }, { "epoch": 0.5208383239908225, "grad_norm": 0.1884765625, "learning_rate": 0.0019767863946543803, "loss": 0.2102, "step": 73380 }, { "epoch": 0.5209093022306687, "grad_norm": 0.0830078125, "learning_rate": 0.0019767800075682473, "loss": 0.2358, "step": 73390 }, { "epoch": 0.5209802804705147, "grad_norm": 0.1591796875, "learning_rate": 0.001976773619615034, "loss": 0.2462, "step": 73400 }, { "epoch": 0.5210512587103608, "grad_norm": 0.09423828125, "learning_rate": 0.001976767230794746, "loss": 0.2242, "step": 73410 }, { "epoch": 0.521122236950207, "grad_norm": 0.0966796875, "learning_rate": 0.0019767608411073905, "loss": 0.2474, "step": 73420 }, { "epoch": 0.5211932151900531, "grad_norm": 0.09521484375, "learning_rate": 0.0019767544505529737, "loss": 0.2375, "step": 73430 }, { "epoch": 0.5212641934298993, "grad_norm": 0.08740234375, "learning_rate": 0.0019767480591315013, "loss": 0.2451, "step": 73440 }, { "epoch": 0.5213351716697453, "grad_norm": 0.14453125, "learning_rate": 0.00197674166684298, "loss": 0.2383, "step": 73450 }, { "epoch": 0.5214061499095914, "grad_norm": 0.09033203125, "learning_rate": 0.0019767352736874168, "loss": 0.2306, "step": 73460 }, { "epoch": 0.5214771281494376, "grad_norm": 0.0908203125, "learning_rate": 0.0019767288796648163, "loss": 0.2411, "step": 73470 }, { "epoch": 0.5215481063892837, "grad_norm": 0.08447265625, "learning_rate": 0.0019767224847751868, "loss": 0.2466, "step": 73480 }, { "epoch": 0.5216190846291299, "grad_norm": 0.1259765625, "learning_rate": 0.0019767160890185333, "loss": 0.2349, "step": 73490 }, { "epoch": 0.5216900628689759, "grad_norm": 0.126953125, "learning_rate": 0.001976709692394863, "loss": 0.2294, "step": 73500 }, { "epoch": 0.521761041108822, "grad_norm": 0.11572265625, "learning_rate": 0.001976703294904181, "loss": 0.2251, "step": 73510 }, { "epoch": 0.5218320193486682, "grad_norm": 0.08642578125, "learning_rate": 0.0019766968965464946, "loss": 0.236, "step": 73520 }, { "epoch": 0.5219029975885143, "grad_norm": 0.1142578125, "learning_rate": 0.00197669049732181, "loss": 0.2436, "step": 73530 }, { "epoch": 0.5219739758283605, "grad_norm": 0.125, "learning_rate": 0.001976684097230134, "loss": 0.2232, "step": 73540 }, { "epoch": 0.5220449540682065, "grad_norm": 0.08154296875, "learning_rate": 0.001976677696271472, "loss": 0.2242, "step": 73550 }, { "epoch": 0.5221159323080526, "grad_norm": 0.0849609375, "learning_rate": 0.0019766712944458303, "loss": 0.2132, "step": 73560 }, { "epoch": 0.5221869105478988, "grad_norm": 0.10498046875, "learning_rate": 0.001976664891753216, "loss": 0.2309, "step": 73570 }, { "epoch": 0.5222578887877449, "grad_norm": 0.1484375, "learning_rate": 0.001976658488193635, "loss": 0.227, "step": 73580 }, { "epoch": 0.5223288670275911, "grad_norm": 0.07421875, "learning_rate": 0.0019766520837670937, "loss": 0.2331, "step": 73590 }, { "epoch": 0.5223998452674371, "grad_norm": 0.091796875, "learning_rate": 0.0019766456784735984, "loss": 0.2515, "step": 73600 }, { "epoch": 0.5224708235072832, "grad_norm": 0.134765625, "learning_rate": 0.0019766392723131556, "loss": 0.2514, "step": 73610 }, { "epoch": 0.5225418017471294, "grad_norm": 0.1640625, "learning_rate": 0.0019766328652857713, "loss": 0.2309, "step": 73620 }, { "epoch": 0.5226127799869755, "grad_norm": 0.1337890625, "learning_rate": 0.0019766264573914524, "loss": 0.2314, "step": 73630 }, { "epoch": 0.5226837582268216, "grad_norm": 0.064453125, "learning_rate": 0.0019766200486302046, "loss": 0.2493, "step": 73640 }, { "epoch": 0.5227547364666677, "grad_norm": 0.0908203125, "learning_rate": 0.0019766136390020343, "loss": 0.2317, "step": 73650 }, { "epoch": 0.5228257147065138, "grad_norm": 0.072265625, "learning_rate": 0.0019766072285069486, "loss": 0.2325, "step": 73660 }, { "epoch": 0.52289669294636, "grad_norm": 0.1083984375, "learning_rate": 0.0019766008171449526, "loss": 0.2342, "step": 73670 }, { "epoch": 0.5229676711862061, "grad_norm": 0.1533203125, "learning_rate": 0.0019765944049160537, "loss": 0.2359, "step": 73680 }, { "epoch": 0.5230386494260522, "grad_norm": 0.142578125, "learning_rate": 0.001976587991820258, "loss": 0.2318, "step": 73690 }, { "epoch": 0.5231096276658983, "grad_norm": 0.08740234375, "learning_rate": 0.001976581577857572, "loss": 0.2502, "step": 73700 }, { "epoch": 0.5231806059057444, "grad_norm": 0.10888671875, "learning_rate": 0.001976575163028001, "loss": 0.2255, "step": 73710 }, { "epoch": 0.5232515841455906, "grad_norm": 0.1015625, "learning_rate": 0.0019765687473315523, "loss": 0.2323, "step": 73720 }, { "epoch": 0.5233225623854367, "grad_norm": 0.109375, "learning_rate": 0.0019765623307682325, "loss": 0.2258, "step": 73730 }, { "epoch": 0.5233935406252828, "grad_norm": 0.07958984375, "learning_rate": 0.001976555913338047, "loss": 0.2348, "step": 73740 }, { "epoch": 0.5234645188651289, "grad_norm": 0.146484375, "learning_rate": 0.0019765494950410027, "loss": 0.2303, "step": 73750 }, { "epoch": 0.523535497104975, "grad_norm": 0.091796875, "learning_rate": 0.001976543075877106, "loss": 0.2423, "step": 73760 }, { "epoch": 0.5236064753448212, "grad_norm": 0.130859375, "learning_rate": 0.0019765366558463634, "loss": 0.2218, "step": 73770 }, { "epoch": 0.5236774535846673, "grad_norm": 0.10009765625, "learning_rate": 0.0019765302349487807, "loss": 0.2355, "step": 73780 }, { "epoch": 0.5237484318245134, "grad_norm": 0.07373046875, "learning_rate": 0.001976523813184365, "loss": 0.2412, "step": 73790 }, { "epoch": 0.5238194100643595, "grad_norm": 0.07861328125, "learning_rate": 0.0019765173905531216, "loss": 0.2172, "step": 73800 }, { "epoch": 0.5238903883042056, "grad_norm": 0.1025390625, "learning_rate": 0.0019765109670550575, "loss": 0.2622, "step": 73810 }, { "epoch": 0.5239613665440518, "grad_norm": 0.1162109375, "learning_rate": 0.0019765045426901793, "loss": 0.2413, "step": 73820 }, { "epoch": 0.5240323447838979, "grad_norm": 0.10205078125, "learning_rate": 0.0019764981174584923, "loss": 0.2281, "step": 73830 }, { "epoch": 0.524103323023744, "grad_norm": 0.08447265625, "learning_rate": 0.0019764916913600043, "loss": 0.2219, "step": 73840 }, { "epoch": 0.5241743012635901, "grad_norm": 0.095703125, "learning_rate": 0.001976485264394721, "loss": 0.2263, "step": 73850 }, { "epoch": 0.5242452795034362, "grad_norm": 0.0869140625, "learning_rate": 0.0019764788365626484, "loss": 0.2277, "step": 73860 }, { "epoch": 0.5243162577432824, "grad_norm": 0.0986328125, "learning_rate": 0.001976472407863793, "loss": 0.2271, "step": 73870 }, { "epoch": 0.5243872359831284, "grad_norm": 0.0927734375, "learning_rate": 0.001976465978298162, "loss": 0.2427, "step": 73880 }, { "epoch": 0.5244582142229746, "grad_norm": 0.1630859375, "learning_rate": 0.0019764595478657604, "loss": 0.2238, "step": 73890 }, { "epoch": 0.5245291924628207, "grad_norm": 0.1357421875, "learning_rate": 0.0019764531165665953, "loss": 0.2361, "step": 73900 }, { "epoch": 0.5246001707026668, "grad_norm": 0.10498046875, "learning_rate": 0.0019764466844006735, "loss": 0.2265, "step": 73910 }, { "epoch": 0.524671148942513, "grad_norm": 0.1240234375, "learning_rate": 0.0019764402513680005, "loss": 0.2307, "step": 73920 }, { "epoch": 0.524742127182359, "grad_norm": 0.09326171875, "learning_rate": 0.0019764338174685834, "loss": 0.2265, "step": 73930 }, { "epoch": 0.5248131054222052, "grad_norm": 0.1328125, "learning_rate": 0.0019764273827024277, "loss": 0.2309, "step": 73940 }, { "epoch": 0.5248840836620513, "grad_norm": 0.07275390625, "learning_rate": 0.0019764209470695404, "loss": 0.2246, "step": 73950 }, { "epoch": 0.5249550619018974, "grad_norm": 0.1328125, "learning_rate": 0.0019764145105699276, "loss": 0.2412, "step": 73960 }, { "epoch": 0.5250260401417436, "grad_norm": 0.08349609375, "learning_rate": 0.0019764080732035958, "loss": 0.2548, "step": 73970 }, { "epoch": 0.5250970183815896, "grad_norm": 0.12060546875, "learning_rate": 0.0019764016349705514, "loss": 0.2271, "step": 73980 }, { "epoch": 0.5251679966214358, "grad_norm": 0.1181640625, "learning_rate": 0.001976395195870801, "loss": 0.2204, "step": 73990 }, { "epoch": 0.5252389748612819, "grad_norm": 0.1005859375, "learning_rate": 0.00197638875590435, "loss": 0.2404, "step": 74000 }, { "epoch": 0.5252389748612819, "eval_covost2-zh-en_loss": 3.7821946144104004, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 21.7501, "eval_covost2-zh-en_samples_per_second": 2.943, "eval_covost2-zh-en_steps_per_second": 0.184, "step": 74000 }, { "epoch": 0.5252389748612819, "eval_covost2-en-zh_loss": 3.134957790374756, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 20.5459, "eval_covost2-en-zh_samples_per_second": 3.115, "eval_covost2-en-zh_steps_per_second": 0.195, "step": 74000 }, { "epoch": 0.525309953101128, "grad_norm": 0.087890625, "learning_rate": 0.0019763823150712057, "loss": 0.2311, "step": 74010 }, { "epoch": 0.5253809313409742, "grad_norm": 0.083984375, "learning_rate": 0.0019763758733713746, "loss": 0.2317, "step": 74020 }, { "epoch": 0.5254519095808202, "grad_norm": 0.072265625, "learning_rate": 0.0019763694308048627, "loss": 0.2096, "step": 74030 }, { "epoch": 0.5255228878206664, "grad_norm": 0.09814453125, "learning_rate": 0.001976362987371676, "loss": 0.2244, "step": 74040 }, { "epoch": 0.5255938660605125, "grad_norm": 0.10498046875, "learning_rate": 0.0019763565430718213, "loss": 0.2401, "step": 74050 }, { "epoch": 0.5256648443003586, "grad_norm": 0.1669921875, "learning_rate": 0.001976350097905305, "loss": 0.2559, "step": 74060 }, { "epoch": 0.5257358225402048, "grad_norm": 0.287109375, "learning_rate": 0.0019763436518721333, "loss": 0.2123, "step": 74070 }, { "epoch": 0.5258068007800508, "grad_norm": 0.08837890625, "learning_rate": 0.001976337204972313, "loss": 0.2408, "step": 74080 }, { "epoch": 0.525877779019897, "grad_norm": 0.248046875, "learning_rate": 0.0019763307572058498, "loss": 0.2441, "step": 74090 }, { "epoch": 0.5259487572597431, "grad_norm": 0.115234375, "learning_rate": 0.00197632430857275, "loss": 0.228, "step": 74100 }, { "epoch": 0.5260197354995892, "grad_norm": 0.0869140625, "learning_rate": 0.001976317859073021, "loss": 0.2414, "step": 74110 }, { "epoch": 0.5260907137394353, "grad_norm": 0.220703125, "learning_rate": 0.0019763114087066685, "loss": 0.2367, "step": 74120 }, { "epoch": 0.5261616919792814, "grad_norm": 0.1103515625, "learning_rate": 0.0019763049574736988, "loss": 0.227, "step": 74130 }, { "epoch": 0.5262326702191276, "grad_norm": 0.076171875, "learning_rate": 0.0019762985053741185, "loss": 0.2298, "step": 74140 }, { "epoch": 0.5263036484589737, "grad_norm": 0.07421875, "learning_rate": 0.0019762920524079336, "loss": 0.2415, "step": 74150 }, { "epoch": 0.5263746266988198, "grad_norm": 0.11474609375, "learning_rate": 0.001976285598575151, "loss": 0.2319, "step": 74160 }, { "epoch": 0.5264456049386659, "grad_norm": 0.080078125, "learning_rate": 0.001976279143875777, "loss": 0.2337, "step": 74170 }, { "epoch": 0.526516583178512, "grad_norm": 0.1376953125, "learning_rate": 0.001976272688309818, "loss": 0.2335, "step": 74180 }, { "epoch": 0.5265875614183582, "grad_norm": 0.1025390625, "learning_rate": 0.00197626623187728, "loss": 0.2271, "step": 74190 }, { "epoch": 0.5266585396582043, "grad_norm": 0.07568359375, "learning_rate": 0.001976259774578169, "loss": 0.231, "step": 74200 }, { "epoch": 0.5267295178980504, "grad_norm": 0.080078125, "learning_rate": 0.001976253316412493, "loss": 0.2241, "step": 74210 }, { "epoch": 0.5268004961378965, "grad_norm": 0.0673828125, "learning_rate": 0.0019762468573802567, "loss": 0.2212, "step": 74220 }, { "epoch": 0.5268714743777426, "grad_norm": 0.07373046875, "learning_rate": 0.0019762403974814676, "loss": 0.2304, "step": 74230 }, { "epoch": 0.5269424526175888, "grad_norm": 0.1337890625, "learning_rate": 0.0019762339367161313, "loss": 0.2279, "step": 74240 }, { "epoch": 0.5270134308574349, "grad_norm": 0.1787109375, "learning_rate": 0.001976227475084255, "loss": 0.2305, "step": 74250 }, { "epoch": 0.527084409097281, "grad_norm": 0.119140625, "learning_rate": 0.001976221012585844, "loss": 0.2659, "step": 74260 }, { "epoch": 0.5271553873371271, "grad_norm": 0.06884765625, "learning_rate": 0.001976214549220906, "loss": 0.2293, "step": 74270 }, { "epoch": 0.5272263655769732, "grad_norm": 0.1611328125, "learning_rate": 0.001976208084989446, "loss": 0.2418, "step": 74280 }, { "epoch": 0.5272973438168194, "grad_norm": 0.11328125, "learning_rate": 0.0019762016198914723, "loss": 0.2298, "step": 74290 }, { "epoch": 0.5273683220566655, "grad_norm": 0.09423828125, "learning_rate": 0.0019761951539269892, "loss": 0.2419, "step": 74300 }, { "epoch": 0.5274393002965116, "grad_norm": 0.09912109375, "learning_rate": 0.001976188687096004, "loss": 0.2284, "step": 74310 }, { "epoch": 0.5275102785363577, "grad_norm": 0.09423828125, "learning_rate": 0.0019761822193985236, "loss": 0.2271, "step": 74320 }, { "epoch": 0.5275812567762038, "grad_norm": 0.08984375, "learning_rate": 0.0019761757508345535, "loss": 0.2349, "step": 74330 }, { "epoch": 0.52765223501605, "grad_norm": 0.06982421875, "learning_rate": 0.0019761692814041004, "loss": 0.2496, "step": 74340 }, { "epoch": 0.5277232132558961, "grad_norm": 0.119140625, "learning_rate": 0.001976162811107171, "loss": 0.243, "step": 74350 }, { "epoch": 0.5277941914957421, "grad_norm": 0.0869140625, "learning_rate": 0.0019761563399437717, "loss": 0.2358, "step": 74360 }, { "epoch": 0.5278651697355883, "grad_norm": 0.09033203125, "learning_rate": 0.0019761498679139087, "loss": 0.2473, "step": 74370 }, { "epoch": 0.5279361479754344, "grad_norm": 0.09912109375, "learning_rate": 0.0019761433950175877, "loss": 0.2389, "step": 74380 }, { "epoch": 0.5280071262152806, "grad_norm": 0.0654296875, "learning_rate": 0.0019761369212548162, "loss": 0.2316, "step": 74390 }, { "epoch": 0.5280781044551267, "grad_norm": 0.1416015625, "learning_rate": 0.0019761304466256006, "loss": 0.2309, "step": 74400 }, { "epoch": 0.5281490826949727, "grad_norm": 0.091796875, "learning_rate": 0.001976123971129946, "loss": 0.2341, "step": 74410 }, { "epoch": 0.5282200609348189, "grad_norm": 0.1484375, "learning_rate": 0.0019761174947678607, "loss": 0.2383, "step": 74420 }, { "epoch": 0.528291039174665, "grad_norm": 0.0849609375, "learning_rate": 0.0019761110175393498, "loss": 0.2346, "step": 74430 }, { "epoch": 0.5283620174145112, "grad_norm": 0.09228515625, "learning_rate": 0.00197610453944442, "loss": 0.2229, "step": 74440 }, { "epoch": 0.5284329956543573, "grad_norm": 0.09423828125, "learning_rate": 0.0019760980604830772, "loss": 0.2331, "step": 74450 }, { "epoch": 0.5285039738942033, "grad_norm": 0.1201171875, "learning_rate": 0.0019760915806553286, "loss": 0.2398, "step": 74460 }, { "epoch": 0.5285749521340495, "grad_norm": 0.1259765625, "learning_rate": 0.0019760850999611806, "loss": 0.2324, "step": 74470 }, { "epoch": 0.5286459303738956, "grad_norm": 0.09228515625, "learning_rate": 0.0019760786184006393, "loss": 0.2298, "step": 74480 }, { "epoch": 0.5287169086137418, "grad_norm": 0.099609375, "learning_rate": 0.001976072135973711, "loss": 0.2165, "step": 74490 }, { "epoch": 0.5287878868535879, "grad_norm": 0.1240234375, "learning_rate": 0.0019760656526804023, "loss": 0.2261, "step": 74500 }, { "epoch": 0.5288588650934339, "grad_norm": 0.1044921875, "learning_rate": 0.0019760591685207196, "loss": 0.2256, "step": 74510 }, { "epoch": 0.5289298433332801, "grad_norm": 0.07568359375, "learning_rate": 0.0019760526834946696, "loss": 0.2276, "step": 74520 }, { "epoch": 0.5290008215731262, "grad_norm": 0.0791015625, "learning_rate": 0.001976046197602258, "loss": 0.2248, "step": 74530 }, { "epoch": 0.5290717998129724, "grad_norm": 0.13671875, "learning_rate": 0.001976039710843492, "loss": 0.228, "step": 74540 }, { "epoch": 0.5291427780528185, "grad_norm": 0.10498046875, "learning_rate": 0.0019760332232183772, "loss": 0.2285, "step": 74550 }, { "epoch": 0.5292137562926645, "grad_norm": 0.0712890625, "learning_rate": 0.0019760267347269204, "loss": 0.2204, "step": 74560 }, { "epoch": 0.5292847345325107, "grad_norm": 0.212890625, "learning_rate": 0.0019760202453691284, "loss": 0.2419, "step": 74570 }, { "epoch": 0.5293557127723568, "grad_norm": 0.08056640625, "learning_rate": 0.001976013755145007, "loss": 0.2411, "step": 74580 }, { "epoch": 0.529426691012203, "grad_norm": 0.08056640625, "learning_rate": 0.001976007264054563, "loss": 0.2329, "step": 74590 }, { "epoch": 0.5294976692520491, "grad_norm": 0.08447265625, "learning_rate": 0.0019760007720978027, "loss": 0.2485, "step": 74600 }, { "epoch": 0.5295686474918951, "grad_norm": 0.095703125, "learning_rate": 0.0019759942792747327, "loss": 0.2283, "step": 74610 }, { "epoch": 0.5296396257317413, "grad_norm": 0.07275390625, "learning_rate": 0.001975987785585359, "loss": 0.2341, "step": 74620 }, { "epoch": 0.5297106039715874, "grad_norm": 0.12451171875, "learning_rate": 0.0019759812910296885, "loss": 0.2263, "step": 74630 }, { "epoch": 0.5297815822114336, "grad_norm": 0.087890625, "learning_rate": 0.0019759747956077274, "loss": 0.2303, "step": 74640 }, { "epoch": 0.5298525604512796, "grad_norm": 0.0869140625, "learning_rate": 0.001975968299319482, "loss": 0.2175, "step": 74650 }, { "epoch": 0.5299235386911257, "grad_norm": 0.0673828125, "learning_rate": 0.0019759618021649586, "loss": 0.2201, "step": 74660 }, { "epoch": 0.5299945169309719, "grad_norm": 0.08837890625, "learning_rate": 0.001975955304144165, "loss": 0.2305, "step": 74670 }, { "epoch": 0.530065495170818, "grad_norm": 0.1953125, "learning_rate": 0.0019759488052571053, "loss": 0.2253, "step": 74680 }, { "epoch": 0.5301364734106642, "grad_norm": 0.10009765625, "learning_rate": 0.0019759423055037873, "loss": 0.2208, "step": 74690 }, { "epoch": 0.5302074516505102, "grad_norm": 0.11083984375, "learning_rate": 0.0019759358048842177, "loss": 0.2342, "step": 74700 }, { "epoch": 0.5302784298903563, "grad_norm": 0.062255859375, "learning_rate": 0.0019759293033984023, "loss": 0.244, "step": 74710 }, { "epoch": 0.5303494081302025, "grad_norm": 0.08349609375, "learning_rate": 0.0019759228010463475, "loss": 0.2303, "step": 74720 }, { "epoch": 0.5304203863700486, "grad_norm": 0.1015625, "learning_rate": 0.0019759162978280603, "loss": 0.2339, "step": 74730 }, { "epoch": 0.5304913646098948, "grad_norm": 0.11376953125, "learning_rate": 0.0019759097937435467, "loss": 0.2408, "step": 74740 }, { "epoch": 0.5305623428497408, "grad_norm": 0.1435546875, "learning_rate": 0.001975903288792813, "loss": 0.233, "step": 74750 }, { "epoch": 0.5306333210895869, "grad_norm": 0.10595703125, "learning_rate": 0.001975896782975866, "loss": 0.2345, "step": 74760 }, { "epoch": 0.5307042993294331, "grad_norm": 0.1103515625, "learning_rate": 0.001975890276292712, "loss": 0.2505, "step": 74770 }, { "epoch": 0.5307752775692792, "grad_norm": 0.0927734375, "learning_rate": 0.0019758837687433575, "loss": 0.2208, "step": 74780 }, { "epoch": 0.5308462558091254, "grad_norm": 0.064453125, "learning_rate": 0.0019758772603278084, "loss": 0.2122, "step": 74790 }, { "epoch": 0.5309172340489714, "grad_norm": 0.11328125, "learning_rate": 0.001975870751046072, "loss": 0.2413, "step": 74800 }, { "epoch": 0.5309882122888175, "grad_norm": 0.12353515625, "learning_rate": 0.001975864240898154, "loss": 0.2235, "step": 74810 }, { "epoch": 0.5310591905286637, "grad_norm": 0.1162109375, "learning_rate": 0.0019758577298840617, "loss": 0.2414, "step": 74820 }, { "epoch": 0.5311301687685098, "grad_norm": 0.12060546875, "learning_rate": 0.0019758512180038006, "loss": 0.2278, "step": 74830 }, { "epoch": 0.531201147008356, "grad_norm": 0.115234375, "learning_rate": 0.0019758447052573777, "loss": 0.2267, "step": 74840 }, { "epoch": 0.531272125248202, "grad_norm": 0.0986328125, "learning_rate": 0.001975838191644799, "loss": 0.2357, "step": 74850 }, { "epoch": 0.5313431034880481, "grad_norm": 0.0751953125, "learning_rate": 0.0019758316771660714, "loss": 0.2335, "step": 74860 }, { "epoch": 0.5314140817278943, "grad_norm": 0.166015625, "learning_rate": 0.0019758251618212015, "loss": 0.2432, "step": 74870 }, { "epoch": 0.5314850599677404, "grad_norm": 0.0859375, "learning_rate": 0.001975818645610195, "loss": 0.2432, "step": 74880 }, { "epoch": 0.5315560382075865, "grad_norm": 0.111328125, "learning_rate": 0.0019758121285330587, "loss": 0.2268, "step": 74890 }, { "epoch": 0.5316270164474326, "grad_norm": 0.091796875, "learning_rate": 0.0019758056105897994, "loss": 0.23, "step": 74900 }, { "epoch": 0.5316979946872787, "grad_norm": 0.08349609375, "learning_rate": 0.001975799091780423, "loss": 0.2325, "step": 74910 }, { "epoch": 0.5317689729271249, "grad_norm": 0.09228515625, "learning_rate": 0.0019757925721049364, "loss": 0.2328, "step": 74920 }, { "epoch": 0.531839951166971, "grad_norm": 0.0869140625, "learning_rate": 0.001975786051563346, "loss": 0.223, "step": 74930 }, { "epoch": 0.5319109294068171, "grad_norm": 0.09375, "learning_rate": 0.001975779530155657, "loss": 0.2209, "step": 74940 }, { "epoch": 0.5319819076466632, "grad_norm": 0.09619140625, "learning_rate": 0.001975773007881878, "loss": 0.2272, "step": 74950 }, { "epoch": 0.5320528858865093, "grad_norm": 0.11962890625, "learning_rate": 0.0019757664847420142, "loss": 0.2352, "step": 74960 }, { "epoch": 0.5321238641263555, "grad_norm": 0.08935546875, "learning_rate": 0.001975759960736072, "loss": 0.2336, "step": 74970 }, { "epoch": 0.5321948423662016, "grad_norm": 0.1787109375, "learning_rate": 0.001975753435864058, "loss": 0.2407, "step": 74980 }, { "epoch": 0.5322658206060477, "grad_norm": 0.12451171875, "learning_rate": 0.0019757469101259794, "loss": 0.2274, "step": 74990 }, { "epoch": 0.5323367988458938, "grad_norm": 0.0859375, "learning_rate": 0.0019757403835218416, "loss": 0.2335, "step": 75000 }, { "epoch": 0.5324077770857399, "grad_norm": 0.08447265625, "learning_rate": 0.0019757338560516514, "loss": 0.237, "step": 75010 }, { "epoch": 0.5324787553255861, "grad_norm": 0.08935546875, "learning_rate": 0.001975727327715415, "loss": 0.2469, "step": 75020 }, { "epoch": 0.5325497335654322, "grad_norm": 0.1201171875, "learning_rate": 0.0019757207985131395, "loss": 0.2343, "step": 75030 }, { "epoch": 0.5326207118052783, "grad_norm": 0.08251953125, "learning_rate": 0.0019757142684448314, "loss": 0.2316, "step": 75040 }, { "epoch": 0.5326916900451244, "grad_norm": 0.07177734375, "learning_rate": 0.001975707737510496, "loss": 0.2319, "step": 75050 }, { "epoch": 0.5327626682849705, "grad_norm": 0.1064453125, "learning_rate": 0.001975701205710141, "loss": 0.2357, "step": 75060 }, { "epoch": 0.5328336465248167, "grad_norm": 0.11279296875, "learning_rate": 0.0019756946730437723, "loss": 0.2341, "step": 75070 }, { "epoch": 0.5329046247646628, "grad_norm": 0.1123046875, "learning_rate": 0.0019756881395113965, "loss": 0.215, "step": 75080 }, { "epoch": 0.5329756030045089, "grad_norm": 0.10498046875, "learning_rate": 0.00197568160511302, "loss": 0.2403, "step": 75090 }, { "epoch": 0.533046581244355, "grad_norm": 0.09423828125, "learning_rate": 0.001975675069848649, "loss": 0.2272, "step": 75100 }, { "epoch": 0.5331175594842011, "grad_norm": 0.0947265625, "learning_rate": 0.0019756685337182904, "loss": 0.2234, "step": 75110 }, { "epoch": 0.5331885377240473, "grad_norm": 0.09619140625, "learning_rate": 0.0019756619967219507, "loss": 0.2255, "step": 75120 }, { "epoch": 0.5332595159638933, "grad_norm": 0.10986328125, "learning_rate": 0.001975655458859636, "loss": 0.2195, "step": 75130 }, { "epoch": 0.5333304942037395, "grad_norm": 0.1455078125, "learning_rate": 0.0019756489201313526, "loss": 0.234, "step": 75140 }, { "epoch": 0.5334014724435856, "grad_norm": 0.09228515625, "learning_rate": 0.001975642380537108, "loss": 0.2425, "step": 75150 }, { "epoch": 0.5334724506834317, "grad_norm": 0.1611328125, "learning_rate": 0.0019756358400769072, "loss": 0.2542, "step": 75160 }, { "epoch": 0.5335434289232779, "grad_norm": 0.1650390625, "learning_rate": 0.0019756292987507585, "loss": 0.2404, "step": 75170 }, { "epoch": 0.5336144071631239, "grad_norm": 0.111328125, "learning_rate": 0.001975622756558666, "loss": 0.2484, "step": 75180 }, { "epoch": 0.5336853854029701, "grad_norm": 0.12890625, "learning_rate": 0.0019756162135006385, "loss": 0.2226, "step": 75190 }, { "epoch": 0.5337563636428162, "grad_norm": 0.09765625, "learning_rate": 0.001975609669576681, "loss": 0.2102, "step": 75200 }, { "epoch": 0.5338273418826623, "grad_norm": 0.087890625, "learning_rate": 0.0019756031247868005, "loss": 0.236, "step": 75210 }, { "epoch": 0.5338983201225085, "grad_norm": 0.09033203125, "learning_rate": 0.0019755965791310036, "loss": 0.2193, "step": 75220 }, { "epoch": 0.5339692983623545, "grad_norm": 0.080078125, "learning_rate": 0.001975590032609296, "loss": 0.2421, "step": 75230 }, { "epoch": 0.5340402766022007, "grad_norm": 0.10107421875, "learning_rate": 0.0019755834852216853, "loss": 0.2314, "step": 75240 }, { "epoch": 0.5341112548420468, "grad_norm": 0.1357421875, "learning_rate": 0.0019755769369681772, "loss": 0.2435, "step": 75250 }, { "epoch": 0.5341822330818929, "grad_norm": 0.06982421875, "learning_rate": 0.001975570387848778, "loss": 0.2202, "step": 75260 }, { "epoch": 0.5342532113217391, "grad_norm": 0.10302734375, "learning_rate": 0.0019755638378634952, "loss": 0.2369, "step": 75270 }, { "epoch": 0.5343241895615851, "grad_norm": 0.10302734375, "learning_rate": 0.0019755572870123342, "loss": 0.229, "step": 75280 }, { "epoch": 0.5343951678014313, "grad_norm": 0.2490234375, "learning_rate": 0.0019755507352953025, "loss": 0.213, "step": 75290 }, { "epoch": 0.5344661460412774, "grad_norm": 0.1513671875, "learning_rate": 0.001975544182712405, "loss": 0.2467, "step": 75300 }, { "epoch": 0.5345371242811235, "grad_norm": 0.1376953125, "learning_rate": 0.00197553762926365, "loss": 0.2308, "step": 75310 }, { "epoch": 0.5346081025209697, "grad_norm": 0.109375, "learning_rate": 0.001975531074949043, "loss": 0.2378, "step": 75320 }, { "epoch": 0.5346790807608157, "grad_norm": 0.09375, "learning_rate": 0.0019755245197685904, "loss": 0.2356, "step": 75330 }, { "epoch": 0.5347500590006619, "grad_norm": 0.1494140625, "learning_rate": 0.0019755179637222994, "loss": 0.234, "step": 75340 }, { "epoch": 0.534821037240508, "grad_norm": 0.083984375, "learning_rate": 0.0019755114068101755, "loss": 0.2312, "step": 75350 }, { "epoch": 0.5348920154803541, "grad_norm": 0.0947265625, "learning_rate": 0.001975504849032226, "loss": 0.2376, "step": 75360 }, { "epoch": 0.5349629937202002, "grad_norm": 0.1025390625, "learning_rate": 0.001975498290388457, "loss": 0.231, "step": 75370 }, { "epoch": 0.5350339719600463, "grad_norm": 0.08447265625, "learning_rate": 0.001975491730878875, "loss": 0.2339, "step": 75380 }, { "epoch": 0.5351049501998925, "grad_norm": 0.1494140625, "learning_rate": 0.0019754851705034867, "loss": 0.2359, "step": 75390 }, { "epoch": 0.5351759284397386, "grad_norm": 0.15625, "learning_rate": 0.0019754786092622983, "loss": 0.2361, "step": 75400 }, { "epoch": 0.5352469066795847, "grad_norm": 0.126953125, "learning_rate": 0.0019754720471553164, "loss": 0.2327, "step": 75410 }, { "epoch": 0.5353178849194308, "grad_norm": 0.0693359375, "learning_rate": 0.0019754654841825474, "loss": 0.229, "step": 75420 }, { "epoch": 0.5353888631592769, "grad_norm": 0.19921875, "learning_rate": 0.0019754589203439983, "loss": 0.2366, "step": 75430 }, { "epoch": 0.5354598413991231, "grad_norm": 0.087890625, "learning_rate": 0.0019754523556396748, "loss": 0.2206, "step": 75440 }, { "epoch": 0.5355308196389692, "grad_norm": 0.08251953125, "learning_rate": 0.0019754457900695837, "loss": 0.2338, "step": 75450 }, { "epoch": 0.5356017978788153, "grad_norm": 0.208984375, "learning_rate": 0.001975439223633732, "loss": 0.2173, "step": 75460 }, { "epoch": 0.5356727761186614, "grad_norm": 0.11376953125, "learning_rate": 0.001975432656332126, "loss": 0.2366, "step": 75470 }, { "epoch": 0.5357437543585075, "grad_norm": 0.140625, "learning_rate": 0.0019754260881647712, "loss": 0.2294, "step": 75480 }, { "epoch": 0.5358147325983537, "grad_norm": 0.08251953125, "learning_rate": 0.001975419519131675, "loss": 0.2304, "step": 75490 }, { "epoch": 0.5358857108381998, "grad_norm": 0.1025390625, "learning_rate": 0.001975412949232844, "loss": 0.2527, "step": 75500 }, { "epoch": 0.535956689078046, "grad_norm": 0.07958984375, "learning_rate": 0.001975406378468285, "loss": 0.2131, "step": 75510 }, { "epoch": 0.536027667317892, "grad_norm": 0.11474609375, "learning_rate": 0.001975399806838003, "loss": 0.2374, "step": 75520 }, { "epoch": 0.5360986455577381, "grad_norm": 0.10986328125, "learning_rate": 0.0019753932343420056, "loss": 0.2375, "step": 75530 }, { "epoch": 0.5361696237975843, "grad_norm": 0.1455078125, "learning_rate": 0.0019753866609802996, "loss": 0.2565, "step": 75540 }, { "epoch": 0.5362406020374304, "grad_norm": 0.08349609375, "learning_rate": 0.001975380086752891, "loss": 0.2257, "step": 75550 }, { "epoch": 0.5363115802772765, "grad_norm": 0.11669921875, "learning_rate": 0.001975373511659786, "loss": 0.2469, "step": 75560 }, { "epoch": 0.5363825585171226, "grad_norm": 0.0830078125, "learning_rate": 0.0019753669357009916, "loss": 0.2526, "step": 75570 }, { "epoch": 0.5364535367569687, "grad_norm": 0.142578125, "learning_rate": 0.0019753603588765147, "loss": 0.2393, "step": 75580 }, { "epoch": 0.5365245149968149, "grad_norm": 0.11962890625, "learning_rate": 0.001975353781186361, "loss": 0.2272, "step": 75590 }, { "epoch": 0.536595493236661, "grad_norm": 0.0771484375, "learning_rate": 0.0019753472026305366, "loss": 0.2338, "step": 75600 }, { "epoch": 0.5366664714765071, "grad_norm": 0.09375, "learning_rate": 0.0019753406232090494, "loss": 0.2191, "step": 75610 }, { "epoch": 0.5367374497163532, "grad_norm": 0.14453125, "learning_rate": 0.0019753340429219048, "loss": 0.2427, "step": 75620 }, { "epoch": 0.5368084279561993, "grad_norm": 0.15625, "learning_rate": 0.0019753274617691097, "loss": 0.2422, "step": 75630 }, { "epoch": 0.5368794061960455, "grad_norm": 0.1015625, "learning_rate": 0.0019753208797506708, "loss": 0.2447, "step": 75640 }, { "epoch": 0.5369503844358916, "grad_norm": 0.11767578125, "learning_rate": 0.0019753142968665944, "loss": 0.2264, "step": 75650 }, { "epoch": 0.5370213626757376, "grad_norm": 0.08251953125, "learning_rate": 0.001975307713116887, "loss": 0.2303, "step": 75660 }, { "epoch": 0.5370923409155838, "grad_norm": 0.10986328125, "learning_rate": 0.0019753011285015554, "loss": 0.2233, "step": 75670 }, { "epoch": 0.5371633191554299, "grad_norm": 0.0859375, "learning_rate": 0.0019752945430206053, "loss": 0.2136, "step": 75680 }, { "epoch": 0.5372342973952761, "grad_norm": 0.1376953125, "learning_rate": 0.001975287956674044, "loss": 0.2315, "step": 75690 }, { "epoch": 0.5373052756351222, "grad_norm": 0.130859375, "learning_rate": 0.001975281369461878, "loss": 0.2261, "step": 75700 }, { "epoch": 0.5373762538749682, "grad_norm": 0.1064453125, "learning_rate": 0.001975274781384113, "loss": 0.2296, "step": 75710 }, { "epoch": 0.5374472321148144, "grad_norm": 0.1064453125, "learning_rate": 0.0019752681924407565, "loss": 0.2252, "step": 75720 }, { "epoch": 0.5375182103546605, "grad_norm": 0.080078125, "learning_rate": 0.0019752616026318145, "loss": 0.2283, "step": 75730 }, { "epoch": 0.5375891885945067, "grad_norm": 0.062255859375, "learning_rate": 0.0019752550119572936, "loss": 0.2401, "step": 75740 }, { "epoch": 0.5376601668343528, "grad_norm": 0.11279296875, "learning_rate": 0.001975248420417201, "loss": 0.2231, "step": 75750 }, { "epoch": 0.5377311450741988, "grad_norm": 0.08056640625, "learning_rate": 0.0019752418280115417, "loss": 0.237, "step": 75760 }, { "epoch": 0.537802123314045, "grad_norm": 0.07275390625, "learning_rate": 0.0019752352347403236, "loss": 0.2132, "step": 75770 }, { "epoch": 0.5378731015538911, "grad_norm": 0.1064453125, "learning_rate": 0.0019752286406035523, "loss": 0.2384, "step": 75780 }, { "epoch": 0.5379440797937373, "grad_norm": 0.11376953125, "learning_rate": 0.001975222045601235, "loss": 0.2317, "step": 75790 }, { "epoch": 0.5380150580335834, "grad_norm": 0.099609375, "learning_rate": 0.001975215449733378, "loss": 0.2501, "step": 75800 }, { "epoch": 0.5380860362734294, "grad_norm": 0.12353515625, "learning_rate": 0.0019752088529999875, "loss": 0.2278, "step": 75810 }, { "epoch": 0.5381570145132756, "grad_norm": 0.10400390625, "learning_rate": 0.0019752022554010706, "loss": 0.2334, "step": 75820 }, { "epoch": 0.5382279927531217, "grad_norm": 0.0986328125, "learning_rate": 0.0019751956569366335, "loss": 0.2238, "step": 75830 }, { "epoch": 0.5382989709929679, "grad_norm": 0.107421875, "learning_rate": 0.0019751890576066825, "loss": 0.2246, "step": 75840 }, { "epoch": 0.538369949232814, "grad_norm": 0.1708984375, "learning_rate": 0.0019751824574112246, "loss": 0.2455, "step": 75850 }, { "epoch": 0.53844092747266, "grad_norm": 0.1181640625, "learning_rate": 0.001975175856350266, "loss": 0.2266, "step": 75860 }, { "epoch": 0.5385119057125062, "grad_norm": 0.0859375, "learning_rate": 0.0019751692544238133, "loss": 0.2339, "step": 75870 }, { "epoch": 0.5385828839523523, "grad_norm": 0.154296875, "learning_rate": 0.0019751626516318734, "loss": 0.2352, "step": 75880 }, { "epoch": 0.5386538621921985, "grad_norm": 0.1494140625, "learning_rate": 0.0019751560479744517, "loss": 0.2396, "step": 75890 }, { "epoch": 0.5387248404320445, "grad_norm": 0.048828125, "learning_rate": 0.001975149443451556, "loss": 0.2298, "step": 75900 }, { "epoch": 0.5387958186718906, "grad_norm": 0.1376953125, "learning_rate": 0.0019751428380631927, "loss": 0.2462, "step": 75910 }, { "epoch": 0.5388667969117368, "grad_norm": 0.1572265625, "learning_rate": 0.0019751362318093674, "loss": 0.2247, "step": 75920 }, { "epoch": 0.5389377751515829, "grad_norm": 0.11865234375, "learning_rate": 0.0019751296246900874, "loss": 0.2403, "step": 75930 }, { "epoch": 0.5390087533914291, "grad_norm": 0.12451171875, "learning_rate": 0.0019751230167053594, "loss": 0.2336, "step": 75940 }, { "epoch": 0.5390797316312751, "grad_norm": 0.09326171875, "learning_rate": 0.001975116407855189, "loss": 0.2285, "step": 75950 }, { "epoch": 0.5391507098711212, "grad_norm": 0.11767578125, "learning_rate": 0.0019751097981395836, "loss": 0.2491, "step": 75960 }, { "epoch": 0.5392216881109674, "grad_norm": 0.107421875, "learning_rate": 0.0019751031875585496, "loss": 0.2179, "step": 75970 }, { "epoch": 0.5392926663508135, "grad_norm": 0.1181640625, "learning_rate": 0.0019750965761120933, "loss": 0.2334, "step": 75980 }, { "epoch": 0.5393636445906597, "grad_norm": 0.13671875, "learning_rate": 0.0019750899638002212, "loss": 0.239, "step": 75990 }, { "epoch": 0.5394346228305057, "grad_norm": 0.123046875, "learning_rate": 0.0019750833506229403, "loss": 0.2259, "step": 76000 }, { "epoch": 0.5394346228305057, "eval_covost2-zh-en_loss": 3.744062900543213, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 21.0104, "eval_covost2-zh-en_samples_per_second": 3.046, "eval_covost2-zh-en_steps_per_second": 0.19, "step": 76000 }, { "epoch": 0.5394346228305057, "eval_covost2-en-zh_loss": 3.1459765434265137, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 21.6548, "eval_covost2-en-zh_samples_per_second": 2.955, "eval_covost2-en-zh_steps_per_second": 0.185, "step": 76000 }, { "epoch": 0.5395056010703518, "grad_norm": 0.09716796875, "learning_rate": 0.0019750767365802565, "loss": 0.236, "step": 76010 }, { "epoch": 0.539576579310198, "grad_norm": 0.10302734375, "learning_rate": 0.0019750701216721765, "loss": 0.2396, "step": 76020 }, { "epoch": 0.5396475575500441, "grad_norm": 0.11572265625, "learning_rate": 0.0019750635058987075, "loss": 0.2329, "step": 76030 }, { "epoch": 0.5397185357898903, "grad_norm": 0.09228515625, "learning_rate": 0.001975056889259855, "loss": 0.2276, "step": 76040 }, { "epoch": 0.5397895140297363, "grad_norm": 0.050537109375, "learning_rate": 0.0019750502717556266, "loss": 0.2181, "step": 76050 }, { "epoch": 0.5398604922695824, "grad_norm": 0.1142578125, "learning_rate": 0.0019750436533860277, "loss": 0.2368, "step": 76060 }, { "epoch": 0.5399314705094286, "grad_norm": 0.068359375, "learning_rate": 0.001975037034151066, "loss": 0.2317, "step": 76070 }, { "epoch": 0.5400024487492747, "grad_norm": 0.07373046875, "learning_rate": 0.0019750304140507473, "loss": 0.24, "step": 76080 }, { "epoch": 0.5400734269891209, "grad_norm": 0.10498046875, "learning_rate": 0.0019750237930850783, "loss": 0.2302, "step": 76090 }, { "epoch": 0.5401444052289669, "grad_norm": 0.09423828125, "learning_rate": 0.001975017171254066, "loss": 0.2202, "step": 76100 }, { "epoch": 0.540215383468813, "grad_norm": 0.07177734375, "learning_rate": 0.0019750105485577163, "loss": 0.2218, "step": 76110 }, { "epoch": 0.5402863617086592, "grad_norm": 0.09716796875, "learning_rate": 0.001975003924996036, "loss": 0.2252, "step": 76120 }, { "epoch": 0.5403573399485053, "grad_norm": 0.07373046875, "learning_rate": 0.0019749973005690315, "loss": 0.2267, "step": 76130 }, { "epoch": 0.5404283181883514, "grad_norm": 0.09716796875, "learning_rate": 0.0019749906752767094, "loss": 0.2364, "step": 76140 }, { "epoch": 0.5404992964281975, "grad_norm": 0.1748046875, "learning_rate": 0.001974984049119077, "loss": 0.2212, "step": 76150 }, { "epoch": 0.5405702746680436, "grad_norm": 0.10498046875, "learning_rate": 0.0019749774220961394, "loss": 0.224, "step": 76160 }, { "epoch": 0.5406412529078898, "grad_norm": 0.1845703125, "learning_rate": 0.0019749707942079047, "loss": 0.228, "step": 76170 }, { "epoch": 0.5407122311477359, "grad_norm": 0.07666015625, "learning_rate": 0.0019749641654543786, "loss": 0.2392, "step": 76180 }, { "epoch": 0.540783209387582, "grad_norm": 0.0966796875, "learning_rate": 0.001974957535835567, "loss": 0.2348, "step": 76190 }, { "epoch": 0.5408541876274281, "grad_norm": 0.21875, "learning_rate": 0.0019749509053514783, "loss": 0.2321, "step": 76200 }, { "epoch": 0.5409251658672742, "grad_norm": 0.10888671875, "learning_rate": 0.0019749442740021176, "loss": 0.2228, "step": 76210 }, { "epoch": 0.5409961441071204, "grad_norm": 0.1953125, "learning_rate": 0.0019749376417874916, "loss": 0.236, "step": 76220 }, { "epoch": 0.5410671223469665, "grad_norm": 0.0966796875, "learning_rate": 0.001974931008707607, "loss": 0.2302, "step": 76230 }, { "epoch": 0.5411381005868126, "grad_norm": 0.1494140625, "learning_rate": 0.001974924374762471, "loss": 0.2334, "step": 76240 }, { "epoch": 0.5412090788266587, "grad_norm": 0.2060546875, "learning_rate": 0.0019749177399520893, "loss": 0.2283, "step": 76250 }, { "epoch": 0.5412800570665048, "grad_norm": 0.1611328125, "learning_rate": 0.0019749111042764687, "loss": 0.2452, "step": 76260 }, { "epoch": 0.541351035306351, "grad_norm": 0.1142578125, "learning_rate": 0.0019749044677356163, "loss": 0.2364, "step": 76270 }, { "epoch": 0.5414220135461971, "grad_norm": 0.103515625, "learning_rate": 0.001974897830329538, "loss": 0.244, "step": 76280 }, { "epoch": 0.5414929917860432, "grad_norm": 0.0927734375, "learning_rate": 0.0019748911920582403, "loss": 0.2375, "step": 76290 }, { "epoch": 0.5415639700258893, "grad_norm": 0.095703125, "learning_rate": 0.00197488455292173, "loss": 0.2465, "step": 76300 }, { "epoch": 0.5416349482657354, "grad_norm": 0.11181640625, "learning_rate": 0.0019748779129200146, "loss": 0.2334, "step": 76310 }, { "epoch": 0.5417059265055816, "grad_norm": 0.1103515625, "learning_rate": 0.0019748712720530992, "loss": 0.2147, "step": 76320 }, { "epoch": 0.5417769047454277, "grad_norm": 0.10498046875, "learning_rate": 0.001974864630320991, "loss": 0.2414, "step": 76330 }, { "epoch": 0.5418478829852738, "grad_norm": 0.09423828125, "learning_rate": 0.0019748579877236962, "loss": 0.2449, "step": 76340 }, { "epoch": 0.5419188612251199, "grad_norm": 0.14453125, "learning_rate": 0.0019748513442612217, "loss": 0.2335, "step": 76350 }, { "epoch": 0.541989839464966, "grad_norm": 0.0673828125, "learning_rate": 0.001974844699933574, "loss": 0.2469, "step": 76360 }, { "epoch": 0.5420608177048122, "grad_norm": 0.08740234375, "learning_rate": 0.00197483805474076, "loss": 0.222, "step": 76370 }, { "epoch": 0.5421317959446582, "grad_norm": 0.1796875, "learning_rate": 0.001974831408682786, "loss": 0.2377, "step": 76380 }, { "epoch": 0.5422027741845044, "grad_norm": 0.162109375, "learning_rate": 0.001974824761759659, "loss": 0.2309, "step": 76390 }, { "epoch": 0.5422737524243505, "grad_norm": 0.06884765625, "learning_rate": 0.001974818113971384, "loss": 0.2352, "step": 76400 }, { "epoch": 0.5423447306641966, "grad_norm": 0.130859375, "learning_rate": 0.0019748114653179694, "loss": 0.2341, "step": 76410 }, { "epoch": 0.5424157089040428, "grad_norm": 0.0673828125, "learning_rate": 0.001974804815799421, "loss": 0.229, "step": 76420 }, { "epoch": 0.5424866871438888, "grad_norm": 0.08544921875, "learning_rate": 0.001974798165415746, "loss": 0.2453, "step": 76430 }, { "epoch": 0.542557665383735, "grad_norm": 0.1337890625, "learning_rate": 0.0019747915141669495, "loss": 0.2326, "step": 76440 }, { "epoch": 0.5426286436235811, "grad_norm": 0.07666015625, "learning_rate": 0.00197478486205304, "loss": 0.2224, "step": 76450 }, { "epoch": 0.5426996218634272, "grad_norm": 0.11279296875, "learning_rate": 0.0019747782090740224, "loss": 0.2354, "step": 76460 }, { "epoch": 0.5427706001032734, "grad_norm": 0.1416015625, "learning_rate": 0.0019747715552299043, "loss": 0.2351, "step": 76470 }, { "epoch": 0.5428415783431194, "grad_norm": 0.1396484375, "learning_rate": 0.001974764900520692, "loss": 0.2162, "step": 76480 }, { "epoch": 0.5429125565829656, "grad_norm": 0.0947265625, "learning_rate": 0.0019747582449463914, "loss": 0.2163, "step": 76490 }, { "epoch": 0.5429835348228117, "grad_norm": 0.1328125, "learning_rate": 0.0019747515885070103, "loss": 0.2438, "step": 76500 }, { "epoch": 0.5430545130626578, "grad_norm": 0.0712890625, "learning_rate": 0.0019747449312025544, "loss": 0.2279, "step": 76510 }, { "epoch": 0.543125491302504, "grad_norm": 0.251953125, "learning_rate": 0.001974738273033031, "loss": 0.2429, "step": 76520 }, { "epoch": 0.54319646954235, "grad_norm": 0.1083984375, "learning_rate": 0.0019747316139984462, "loss": 0.2337, "step": 76530 }, { "epoch": 0.5432674477821962, "grad_norm": 0.10205078125, "learning_rate": 0.0019747249540988062, "loss": 0.2285, "step": 76540 }, { "epoch": 0.5433384260220423, "grad_norm": 0.0869140625, "learning_rate": 0.0019747182933341186, "loss": 0.2277, "step": 76550 }, { "epoch": 0.5434094042618884, "grad_norm": 0.095703125, "learning_rate": 0.001974711631704389, "loss": 0.2333, "step": 76560 }, { "epoch": 0.5434803825017346, "grad_norm": 0.09228515625, "learning_rate": 0.0019747049692096248, "loss": 0.2558, "step": 76570 }, { "epoch": 0.5435513607415806, "grad_norm": 0.0927734375, "learning_rate": 0.0019746983058498316, "loss": 0.2378, "step": 76580 }, { "epoch": 0.5436223389814268, "grad_norm": 0.1015625, "learning_rate": 0.001974691641625017, "loss": 0.2432, "step": 76590 }, { "epoch": 0.5436933172212729, "grad_norm": 0.125, "learning_rate": 0.001974684976535187, "loss": 0.246, "step": 76600 }, { "epoch": 0.543764295461119, "grad_norm": 0.126953125, "learning_rate": 0.001974678310580349, "loss": 0.2238, "step": 76610 }, { "epoch": 0.5438352737009651, "grad_norm": 0.10302734375, "learning_rate": 0.0019746716437605088, "loss": 0.2383, "step": 76620 }, { "epoch": 0.5439062519408112, "grad_norm": 0.1337890625, "learning_rate": 0.0019746649760756727, "loss": 0.2192, "step": 76630 }, { "epoch": 0.5439772301806574, "grad_norm": 0.09521484375, "learning_rate": 0.001974658307525848, "loss": 0.2481, "step": 76640 }, { "epoch": 0.5440482084205035, "grad_norm": 0.087890625, "learning_rate": 0.0019746516381110406, "loss": 0.2273, "step": 76650 }, { "epoch": 0.5441191866603496, "grad_norm": 0.1005859375, "learning_rate": 0.001974644967831258, "loss": 0.2206, "step": 76660 }, { "epoch": 0.5441901649001957, "grad_norm": 0.2109375, "learning_rate": 0.0019746382966865064, "loss": 0.2199, "step": 76670 }, { "epoch": 0.5442611431400418, "grad_norm": 0.11083984375, "learning_rate": 0.001974631624676792, "loss": 0.2303, "step": 76680 }, { "epoch": 0.544332121379888, "grad_norm": 0.056640625, "learning_rate": 0.0019746249518021223, "loss": 0.2415, "step": 76690 }, { "epoch": 0.5444030996197341, "grad_norm": 0.1103515625, "learning_rate": 0.0019746182780625027, "loss": 0.2255, "step": 76700 }, { "epoch": 0.5444740778595802, "grad_norm": 0.11181640625, "learning_rate": 0.001974611603457941, "loss": 0.2552, "step": 76710 }, { "epoch": 0.5445450560994263, "grad_norm": 0.130859375, "learning_rate": 0.0019746049279884426, "loss": 0.2504, "step": 76720 }, { "epoch": 0.5446160343392724, "grad_norm": 0.16015625, "learning_rate": 0.0019745982516540155, "loss": 0.2363, "step": 76730 }, { "epoch": 0.5446870125791186, "grad_norm": 0.08251953125, "learning_rate": 0.001974591574454665, "loss": 0.2303, "step": 76740 }, { "epoch": 0.5447579908189647, "grad_norm": 0.234375, "learning_rate": 0.0019745848963903987, "loss": 0.2393, "step": 76750 }, { "epoch": 0.5448289690588108, "grad_norm": 0.095703125, "learning_rate": 0.0019745782174612225, "loss": 0.2189, "step": 76760 }, { "epoch": 0.5448999472986569, "grad_norm": 0.0947265625, "learning_rate": 0.001974571537667143, "loss": 0.2347, "step": 76770 }, { "epoch": 0.544970925538503, "grad_norm": 0.0947265625, "learning_rate": 0.0019745648570081674, "loss": 0.2389, "step": 76780 }, { "epoch": 0.5450419037783492, "grad_norm": 0.11572265625, "learning_rate": 0.001974558175484302, "loss": 0.2386, "step": 76790 }, { "epoch": 0.5451128820181953, "grad_norm": 0.1123046875, "learning_rate": 0.0019745514930955533, "loss": 0.2326, "step": 76800 }, { "epoch": 0.5451838602580414, "grad_norm": 0.1259765625, "learning_rate": 0.001974544809841928, "loss": 0.216, "step": 76810 }, { "epoch": 0.5452548384978875, "grad_norm": 0.0927734375, "learning_rate": 0.001974538125723432, "loss": 0.243, "step": 76820 }, { "epoch": 0.5453258167377336, "grad_norm": 0.171875, "learning_rate": 0.001974531440740073, "loss": 0.2128, "step": 76830 }, { "epoch": 0.5453967949775798, "grad_norm": 0.2265625, "learning_rate": 0.0019745247548918583, "loss": 0.255, "step": 76840 }, { "epoch": 0.5454677732174259, "grad_norm": 0.083984375, "learning_rate": 0.0019745180681787922, "loss": 0.2227, "step": 76850 }, { "epoch": 0.545538751457272, "grad_norm": 0.0908203125, "learning_rate": 0.001974511380600883, "loss": 0.2162, "step": 76860 }, { "epoch": 0.5456097296971181, "grad_norm": 0.1142578125, "learning_rate": 0.0019745046921581366, "loss": 0.2397, "step": 76870 }, { "epoch": 0.5456807079369642, "grad_norm": 0.1689453125, "learning_rate": 0.00197449800285056, "loss": 0.2373, "step": 76880 }, { "epoch": 0.5457516861768104, "grad_norm": 0.154296875, "learning_rate": 0.00197449131267816, "loss": 0.2399, "step": 76890 }, { "epoch": 0.5458226644166565, "grad_norm": 0.0869140625, "learning_rate": 0.0019744846216409426, "loss": 0.2243, "step": 76900 }, { "epoch": 0.5458936426565025, "grad_norm": 0.1484375, "learning_rate": 0.0019744779297389144, "loss": 0.241, "step": 76910 }, { "epoch": 0.5459646208963487, "grad_norm": 0.0966796875, "learning_rate": 0.001974471236972083, "loss": 0.2278, "step": 76920 }, { "epoch": 0.5460355991361948, "grad_norm": 0.1572265625, "learning_rate": 0.001974464543340454, "loss": 0.2255, "step": 76930 }, { "epoch": 0.546106577376041, "grad_norm": 0.09521484375, "learning_rate": 0.0019744578488440343, "loss": 0.2365, "step": 76940 }, { "epoch": 0.5461775556158871, "grad_norm": 0.109375, "learning_rate": 0.0019744511534828305, "loss": 0.2343, "step": 76950 }, { "epoch": 0.5462485338557331, "grad_norm": 0.076171875, "learning_rate": 0.0019744444572568497, "loss": 0.2422, "step": 76960 }, { "epoch": 0.5463195120955793, "grad_norm": 0.05810546875, "learning_rate": 0.001974437760166098, "loss": 0.2341, "step": 76970 }, { "epoch": 0.5463904903354254, "grad_norm": 0.1513671875, "learning_rate": 0.0019744310622105818, "loss": 0.2296, "step": 76980 }, { "epoch": 0.5464614685752716, "grad_norm": 0.1455078125, "learning_rate": 0.0019744243633903085, "loss": 0.2456, "step": 76990 }, { "epoch": 0.5465324468151177, "grad_norm": 0.126953125, "learning_rate": 0.0019744176637052843, "loss": 0.2415, "step": 77000 }, { "epoch": 0.5466034250549637, "grad_norm": 0.126953125, "learning_rate": 0.0019744109631555156, "loss": 0.2365, "step": 77010 }, { "epoch": 0.5466744032948099, "grad_norm": 0.119140625, "learning_rate": 0.0019744042617410094, "loss": 0.226, "step": 77020 }, { "epoch": 0.546745381534656, "grad_norm": 0.09716796875, "learning_rate": 0.001974397559461772, "loss": 0.2365, "step": 77030 }, { "epoch": 0.5468163597745022, "grad_norm": 0.1259765625, "learning_rate": 0.001974390856317811, "loss": 0.2439, "step": 77040 }, { "epoch": 0.5468873380143483, "grad_norm": 0.0732421875, "learning_rate": 0.0019743841523091315, "loss": 0.2267, "step": 77050 }, { "epoch": 0.5469583162541943, "grad_norm": 0.154296875, "learning_rate": 0.001974377447435741, "loss": 0.2362, "step": 77060 }, { "epoch": 0.5470292944940405, "grad_norm": 0.09375, "learning_rate": 0.0019743707416976456, "loss": 0.2217, "step": 77070 }, { "epoch": 0.5471002727338866, "grad_norm": 0.08935546875, "learning_rate": 0.001974364035094853, "loss": 0.2347, "step": 77080 }, { "epoch": 0.5471712509737328, "grad_norm": 0.158203125, "learning_rate": 0.0019743573276273687, "loss": 0.2349, "step": 77090 }, { "epoch": 0.5472422292135789, "grad_norm": 0.15234375, "learning_rate": 0.0019743506192952, "loss": 0.2141, "step": 77100 }, { "epoch": 0.5473132074534249, "grad_norm": 0.1181640625, "learning_rate": 0.001974343910098354, "loss": 0.2278, "step": 77110 }, { "epoch": 0.5473841856932711, "grad_norm": 0.06640625, "learning_rate": 0.0019743372000368357, "loss": 0.23, "step": 77120 }, { "epoch": 0.5474551639331172, "grad_norm": 0.150390625, "learning_rate": 0.001974330489110653, "loss": 0.2257, "step": 77130 }, { "epoch": 0.5475261421729634, "grad_norm": 0.08544921875, "learning_rate": 0.0019743237773198123, "loss": 0.2277, "step": 77140 }, { "epoch": 0.5475971204128094, "grad_norm": 0.0849609375, "learning_rate": 0.00197431706466432, "loss": 0.2245, "step": 77150 }, { "epoch": 0.5476680986526555, "grad_norm": 0.150390625, "learning_rate": 0.001974310351144183, "loss": 0.2133, "step": 77160 }, { "epoch": 0.5477390768925017, "grad_norm": 0.1259765625, "learning_rate": 0.001974303636759408, "loss": 0.2323, "step": 77170 }, { "epoch": 0.5478100551323478, "grad_norm": 0.07421875, "learning_rate": 0.0019742969215100015, "loss": 0.2179, "step": 77180 }, { "epoch": 0.547881033372194, "grad_norm": 0.095703125, "learning_rate": 0.0019742902053959703, "loss": 0.2288, "step": 77190 }, { "epoch": 0.54795201161204, "grad_norm": 0.107421875, "learning_rate": 0.0019742834884173203, "loss": 0.2285, "step": 77200 }, { "epoch": 0.5480229898518861, "grad_norm": 0.10009765625, "learning_rate": 0.001974276770574059, "loss": 0.2327, "step": 77210 }, { "epoch": 0.5480939680917323, "grad_norm": 0.109375, "learning_rate": 0.001974270051866193, "loss": 0.2113, "step": 77220 }, { "epoch": 0.5481649463315784, "grad_norm": 0.154296875, "learning_rate": 0.0019742633322937285, "loss": 0.2307, "step": 77230 }, { "epoch": 0.5482359245714246, "grad_norm": 0.10009765625, "learning_rate": 0.0019742566118566723, "loss": 0.2385, "step": 77240 }, { "epoch": 0.5483069028112706, "grad_norm": 0.1337890625, "learning_rate": 0.0019742498905550316, "loss": 0.2315, "step": 77250 }, { "epoch": 0.5483778810511167, "grad_norm": 0.201171875, "learning_rate": 0.001974243168388812, "loss": 0.2253, "step": 77260 }, { "epoch": 0.5484488592909629, "grad_norm": 0.06982421875, "learning_rate": 0.001974236445358021, "loss": 0.2401, "step": 77270 }, { "epoch": 0.548519837530809, "grad_norm": 0.06640625, "learning_rate": 0.001974229721462665, "loss": 0.2314, "step": 77280 }, { "epoch": 0.5485908157706552, "grad_norm": 0.072265625, "learning_rate": 0.00197422299670275, "loss": 0.2105, "step": 77290 }, { "epoch": 0.5486617940105012, "grad_norm": 0.10546875, "learning_rate": 0.001974216271078284, "loss": 0.2359, "step": 77300 }, { "epoch": 0.5487327722503473, "grad_norm": 0.1455078125, "learning_rate": 0.0019742095445892728, "loss": 0.2509, "step": 77310 }, { "epoch": 0.5488037504901935, "grad_norm": 0.099609375, "learning_rate": 0.0019742028172357227, "loss": 0.2306, "step": 77320 }, { "epoch": 0.5488747287300396, "grad_norm": 0.0908203125, "learning_rate": 0.001974196089017641, "loss": 0.2382, "step": 77330 }, { "epoch": 0.5489457069698858, "grad_norm": 0.09228515625, "learning_rate": 0.0019741893599350344, "loss": 0.2219, "step": 77340 }, { "epoch": 0.5490166852097318, "grad_norm": 0.130859375, "learning_rate": 0.001974182629987909, "loss": 0.2268, "step": 77350 }, { "epoch": 0.5490876634495779, "grad_norm": 0.083984375, "learning_rate": 0.0019741758991762715, "loss": 0.23, "step": 77360 }, { "epoch": 0.5491586416894241, "grad_norm": 0.103515625, "learning_rate": 0.0019741691675001292, "loss": 0.2309, "step": 77370 }, { "epoch": 0.5492296199292702, "grad_norm": 0.0927734375, "learning_rate": 0.001974162434959489, "loss": 0.2223, "step": 77380 }, { "epoch": 0.5493005981691162, "grad_norm": 0.142578125, "learning_rate": 0.0019741557015543563, "loss": 0.2324, "step": 77390 }, { "epoch": 0.5493715764089624, "grad_norm": 0.0986328125, "learning_rate": 0.001974148967284738, "loss": 0.2464, "step": 77400 }, { "epoch": 0.5494425546488085, "grad_norm": 0.0859375, "learning_rate": 0.0019741422321506415, "loss": 0.2179, "step": 77410 }, { "epoch": 0.5495135328886547, "grad_norm": 0.09716796875, "learning_rate": 0.0019741354961520735, "loss": 0.2393, "step": 77420 }, { "epoch": 0.5495845111285008, "grad_norm": 0.0986328125, "learning_rate": 0.00197412875928904, "loss": 0.2153, "step": 77430 }, { "epoch": 0.5496554893683469, "grad_norm": 0.078125, "learning_rate": 0.0019741220215615477, "loss": 0.2447, "step": 77440 }, { "epoch": 0.549726467608193, "grad_norm": 0.1708984375, "learning_rate": 0.0019741152829696037, "loss": 0.2352, "step": 77450 }, { "epoch": 0.5497974458480391, "grad_norm": 0.062255859375, "learning_rate": 0.0019741085435132145, "loss": 0.2233, "step": 77460 }, { "epoch": 0.5498684240878853, "grad_norm": 0.1533203125, "learning_rate": 0.0019741018031923865, "loss": 0.2341, "step": 77470 }, { "epoch": 0.5499394023277314, "grad_norm": 0.09521484375, "learning_rate": 0.001974095062007127, "loss": 0.2464, "step": 77480 }, { "epoch": 0.5500103805675775, "grad_norm": 0.0888671875, "learning_rate": 0.001974088319957442, "loss": 0.2286, "step": 77490 }, { "epoch": 0.5500813588074236, "grad_norm": 0.078125, "learning_rate": 0.001974081577043339, "loss": 0.2317, "step": 77500 }, { "epoch": 0.5501523370472697, "grad_norm": 0.0869140625, "learning_rate": 0.001974074833264823, "loss": 0.2245, "step": 77510 }, { "epoch": 0.5502233152871159, "grad_norm": 0.08984375, "learning_rate": 0.0019740680886219026, "loss": 0.2301, "step": 77520 }, { "epoch": 0.550294293526962, "grad_norm": 0.1298828125, "learning_rate": 0.0019740613431145837, "loss": 0.2316, "step": 77530 }, { "epoch": 0.550365271766808, "grad_norm": 0.09228515625, "learning_rate": 0.001974054596742873, "loss": 0.2185, "step": 77540 }, { "epoch": 0.5504362500066542, "grad_norm": 0.1201171875, "learning_rate": 0.001974047849506776, "loss": 0.2391, "step": 77550 }, { "epoch": 0.5505072282465003, "grad_norm": 0.1318359375, "learning_rate": 0.0019740411014063015, "loss": 0.233, "step": 77560 }, { "epoch": 0.5505782064863465, "grad_norm": 0.09375, "learning_rate": 0.0019740343524414544, "loss": 0.2309, "step": 77570 }, { "epoch": 0.5506491847261926, "grad_norm": 0.2099609375, "learning_rate": 0.001974027602612243, "loss": 0.224, "step": 77580 }, { "epoch": 0.5507201629660387, "grad_norm": 0.1708984375, "learning_rate": 0.0019740208519186723, "loss": 0.2365, "step": 77590 }, { "epoch": 0.5507911412058848, "grad_norm": 0.1376953125, "learning_rate": 0.00197401410036075, "loss": 0.2356, "step": 77600 }, { "epoch": 0.5508621194457309, "grad_norm": 0.10302734375, "learning_rate": 0.001974007347938483, "loss": 0.2263, "step": 77610 }, { "epoch": 0.5509330976855771, "grad_norm": 0.0947265625, "learning_rate": 0.001974000594651877, "loss": 0.24, "step": 77620 }, { "epoch": 0.5510040759254231, "grad_norm": 0.1123046875, "learning_rate": 0.001973993840500939, "loss": 0.254, "step": 77630 }, { "epoch": 0.5510750541652693, "grad_norm": 0.10498046875, "learning_rate": 0.0019739870854856767, "loss": 0.2286, "step": 77640 }, { "epoch": 0.5511460324051154, "grad_norm": 0.1083984375, "learning_rate": 0.0019739803296060948, "loss": 0.237, "step": 77650 }, { "epoch": 0.5512170106449615, "grad_norm": 0.10302734375, "learning_rate": 0.001973973572862202, "loss": 0.2317, "step": 77660 }, { "epoch": 0.5512879888848077, "grad_norm": 0.07861328125, "learning_rate": 0.001973966815254004, "loss": 0.202, "step": 77670 }, { "epoch": 0.5513589671246537, "grad_norm": 0.08935546875, "learning_rate": 0.0019739600567815073, "loss": 0.2379, "step": 77680 }, { "epoch": 0.5514299453644999, "grad_norm": 0.140625, "learning_rate": 0.001973953297444719, "loss": 0.2458, "step": 77690 }, { "epoch": 0.551500923604346, "grad_norm": 0.11181640625, "learning_rate": 0.0019739465372436457, "loss": 0.2156, "step": 77700 }, { "epoch": 0.5515719018441921, "grad_norm": 0.083984375, "learning_rate": 0.001973939776178294, "loss": 0.2436, "step": 77710 }, { "epoch": 0.5516428800840383, "grad_norm": 0.0908203125, "learning_rate": 0.001973933014248671, "loss": 0.2302, "step": 77720 }, { "epoch": 0.5517138583238843, "grad_norm": 0.06689453125, "learning_rate": 0.0019739262514547823, "loss": 0.2217, "step": 77730 }, { "epoch": 0.5517848365637305, "grad_norm": 0.2314453125, "learning_rate": 0.001973919487796636, "loss": 0.2312, "step": 77740 }, { "epoch": 0.5518558148035766, "grad_norm": 0.08837890625, "learning_rate": 0.001973912723274238, "loss": 0.2408, "step": 77750 }, { "epoch": 0.5519267930434227, "grad_norm": 0.0751953125, "learning_rate": 0.0019739059578875945, "loss": 0.2506, "step": 77760 }, { "epoch": 0.5519977712832689, "grad_norm": 0.1181640625, "learning_rate": 0.0019738991916367133, "loss": 0.2207, "step": 77770 }, { "epoch": 0.5520687495231149, "grad_norm": 0.10498046875, "learning_rate": 0.0019738924245216007, "loss": 0.23, "step": 77780 }, { "epoch": 0.552139727762961, "grad_norm": 0.1318359375, "learning_rate": 0.0019738856565422627, "loss": 0.2222, "step": 77790 }, { "epoch": 0.5522107060028072, "grad_norm": 0.0888671875, "learning_rate": 0.0019738788876987067, "loss": 0.2226, "step": 77800 }, { "epoch": 0.5522816842426533, "grad_norm": 0.0908203125, "learning_rate": 0.0019738721179909397, "loss": 0.2252, "step": 77810 }, { "epoch": 0.5523526624824995, "grad_norm": 0.07763671875, "learning_rate": 0.0019738653474189677, "loss": 0.2321, "step": 77820 }, { "epoch": 0.5524236407223455, "grad_norm": 0.1005859375, "learning_rate": 0.0019738585759827973, "loss": 0.2336, "step": 77830 }, { "epoch": 0.5524946189621917, "grad_norm": 0.0947265625, "learning_rate": 0.001973851803682436, "loss": 0.2468, "step": 77840 }, { "epoch": 0.5525655972020378, "grad_norm": 0.083984375, "learning_rate": 0.00197384503051789, "loss": 0.2148, "step": 77850 }, { "epoch": 0.5526365754418839, "grad_norm": 0.07275390625, "learning_rate": 0.0019738382564891657, "loss": 0.2281, "step": 77860 }, { "epoch": 0.55270755368173, "grad_norm": 0.0986328125, "learning_rate": 0.001973831481596271, "loss": 0.2286, "step": 77870 }, { "epoch": 0.5527785319215761, "grad_norm": 0.09033203125, "learning_rate": 0.0019738247058392108, "loss": 0.2353, "step": 77880 }, { "epoch": 0.5528495101614223, "grad_norm": 0.240234375, "learning_rate": 0.0019738179292179926, "loss": 0.2406, "step": 77890 }, { "epoch": 0.5529204884012684, "grad_norm": 0.42578125, "learning_rate": 0.0019738111517326237, "loss": 0.229, "step": 77900 }, { "epoch": 0.5529914666411145, "grad_norm": 0.1357421875, "learning_rate": 0.0019738043733831105, "loss": 0.22, "step": 77910 }, { "epoch": 0.5530624448809606, "grad_norm": 0.12353515625, "learning_rate": 0.001973797594169459, "loss": 0.236, "step": 77920 }, { "epoch": 0.5531334231208067, "grad_norm": 0.09228515625, "learning_rate": 0.001973790814091677, "loss": 0.2229, "step": 77930 }, { "epoch": 0.5532044013606529, "grad_norm": 0.130859375, "learning_rate": 0.0019737840331497704, "loss": 0.2426, "step": 77940 }, { "epoch": 0.553275379600499, "grad_norm": 0.09814453125, "learning_rate": 0.001973777251343746, "loss": 0.231, "step": 77950 }, { "epoch": 0.5533463578403451, "grad_norm": 0.087890625, "learning_rate": 0.001973770468673611, "loss": 0.2324, "step": 77960 }, { "epoch": 0.5534173360801912, "grad_norm": 0.099609375, "learning_rate": 0.0019737636851393714, "loss": 0.2453, "step": 77970 }, { "epoch": 0.5534883143200373, "grad_norm": 0.0859375, "learning_rate": 0.0019737569007410344, "loss": 0.2223, "step": 77980 }, { "epoch": 0.5535592925598835, "grad_norm": 0.0859375, "learning_rate": 0.001973750115478607, "loss": 0.2314, "step": 77990 }, { "epoch": 0.5536302707997296, "grad_norm": 0.1787109375, "learning_rate": 0.001973743329352095, "loss": 0.2166, "step": 78000 }, { "epoch": 0.5536302707997296, "eval_covost2-zh-en_loss": 3.8238067626953125, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 20.963, "eval_covost2-zh-en_samples_per_second": 3.053, "eval_covost2-zh-en_steps_per_second": 0.191, "step": 78000 }, { "epoch": 0.5536302707997296, "eval_covost2-en-zh_loss": 3.170856475830078, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 19.4772, "eval_covost2-en-zh_samples_per_second": 3.286, "eval_covost2-en-zh_steps_per_second": 0.205, "step": 78000 }, { "epoch": 0.5537012490395757, "grad_norm": 0.11865234375, "learning_rate": 0.0019737365423615058, "loss": 0.2347, "step": 78010 }, { "epoch": 0.5537722272794218, "grad_norm": 0.072265625, "learning_rate": 0.001973729754506846, "loss": 0.2117, "step": 78020 }, { "epoch": 0.5538432055192679, "grad_norm": 0.083984375, "learning_rate": 0.001973722965788122, "loss": 0.2196, "step": 78030 }, { "epoch": 0.553914183759114, "grad_norm": 0.06787109375, "learning_rate": 0.001973716176205341, "loss": 0.2175, "step": 78040 }, { "epoch": 0.5539851619989602, "grad_norm": 0.09375, "learning_rate": 0.001973709385758509, "loss": 0.2215, "step": 78050 }, { "epoch": 0.5540561402388063, "grad_norm": 0.12255859375, "learning_rate": 0.0019737025944476334, "loss": 0.2243, "step": 78060 }, { "epoch": 0.5541271184786524, "grad_norm": 0.109375, "learning_rate": 0.001973695802272721, "loss": 0.2248, "step": 78070 }, { "epoch": 0.5541980967184985, "grad_norm": 0.1142578125, "learning_rate": 0.001973689009233778, "loss": 0.2403, "step": 78080 }, { "epoch": 0.5542690749583447, "grad_norm": 0.10400390625, "learning_rate": 0.001973682215330811, "loss": 0.2308, "step": 78090 }, { "epoch": 0.5543400531981908, "grad_norm": 0.11376953125, "learning_rate": 0.0019736754205638275, "loss": 0.2422, "step": 78100 }, { "epoch": 0.5544110314380369, "grad_norm": 0.09375, "learning_rate": 0.001973668624932834, "loss": 0.2257, "step": 78110 }, { "epoch": 0.554482009677883, "grad_norm": 0.09423828125, "learning_rate": 0.0019736618284378364, "loss": 0.2235, "step": 78120 }, { "epoch": 0.5545529879177291, "grad_norm": 0.103515625, "learning_rate": 0.0019736550310788426, "loss": 0.233, "step": 78130 }, { "epoch": 0.5546239661575753, "grad_norm": 0.1435546875, "learning_rate": 0.001973648232855858, "loss": 0.2365, "step": 78140 }, { "epoch": 0.5546949443974214, "grad_norm": 0.1083984375, "learning_rate": 0.0019736414337688905, "loss": 0.213, "step": 78150 }, { "epoch": 0.5547659226372674, "grad_norm": 0.1044921875, "learning_rate": 0.001973634633817946, "loss": 0.2367, "step": 78160 }, { "epoch": 0.5548369008771136, "grad_norm": 0.169921875, "learning_rate": 0.0019736278330030324, "loss": 0.2317, "step": 78170 }, { "epoch": 0.5549078791169597, "grad_norm": 0.08837890625, "learning_rate": 0.0019736210313241547, "loss": 0.2294, "step": 78180 }, { "epoch": 0.5549788573568059, "grad_norm": 0.083984375, "learning_rate": 0.001973614228781321, "loss": 0.2375, "step": 78190 }, { "epoch": 0.555049835596652, "grad_norm": 0.71484375, "learning_rate": 0.0019736074253745377, "loss": 0.2395, "step": 78200 }, { "epoch": 0.555120813836498, "grad_norm": 0.1591796875, "learning_rate": 0.0019736006211038114, "loss": 0.2374, "step": 78210 }, { "epoch": 0.5551917920763442, "grad_norm": 0.08544921875, "learning_rate": 0.0019735938159691486, "loss": 0.233, "step": 78220 }, { "epoch": 0.5552627703161903, "grad_norm": 0.0791015625, "learning_rate": 0.0019735870099705562, "loss": 0.2528, "step": 78230 }, { "epoch": 0.5553337485560365, "grad_norm": 0.07958984375, "learning_rate": 0.001973580203108041, "loss": 0.2502, "step": 78240 }, { "epoch": 0.5554047267958826, "grad_norm": 0.08154296875, "learning_rate": 0.00197357339538161, "loss": 0.2262, "step": 78250 }, { "epoch": 0.5554757050357286, "grad_norm": 0.0732421875, "learning_rate": 0.00197356658679127, "loss": 0.2214, "step": 78260 }, { "epoch": 0.5555466832755748, "grad_norm": 0.0927734375, "learning_rate": 0.0019735597773370265, "loss": 0.2252, "step": 78270 }, { "epoch": 0.5556176615154209, "grad_norm": 0.0908203125, "learning_rate": 0.0019735529670188877, "loss": 0.2208, "step": 78280 }, { "epoch": 0.555688639755267, "grad_norm": 0.12060546875, "learning_rate": 0.0019735461558368595, "loss": 0.2354, "step": 78290 }, { "epoch": 0.5557596179951132, "grad_norm": 0.1005859375, "learning_rate": 0.0019735393437909496, "loss": 0.2243, "step": 78300 }, { "epoch": 0.5558305962349592, "grad_norm": 0.08740234375, "learning_rate": 0.0019735325308811633, "loss": 0.2127, "step": 78310 }, { "epoch": 0.5559015744748054, "grad_norm": 0.1328125, "learning_rate": 0.001973525717107508, "loss": 0.2461, "step": 78320 }, { "epoch": 0.5559725527146515, "grad_norm": 0.09765625, "learning_rate": 0.0019735189024699907, "loss": 0.2282, "step": 78330 }, { "epoch": 0.5560435309544977, "grad_norm": 0.07080078125, "learning_rate": 0.0019735120869686183, "loss": 0.2227, "step": 78340 }, { "epoch": 0.5561145091943438, "grad_norm": 0.099609375, "learning_rate": 0.001973505270603397, "loss": 0.2333, "step": 78350 }, { "epoch": 0.5561854874341898, "grad_norm": 0.1171875, "learning_rate": 0.0019734984533743335, "loss": 0.2192, "step": 78360 }, { "epoch": 0.556256465674036, "grad_norm": 0.1025390625, "learning_rate": 0.001973491635281435, "loss": 0.2222, "step": 78370 }, { "epoch": 0.5563274439138821, "grad_norm": 0.10791015625, "learning_rate": 0.001973484816324708, "loss": 0.2436, "step": 78380 }, { "epoch": 0.5563984221537283, "grad_norm": 0.09814453125, "learning_rate": 0.001973477996504159, "loss": 0.2429, "step": 78390 }, { "epoch": 0.5564694003935743, "grad_norm": 0.1220703125, "learning_rate": 0.0019734711758197954, "loss": 0.2297, "step": 78400 }, { "epoch": 0.5565403786334204, "grad_norm": 0.10302734375, "learning_rate": 0.001973464354271623, "loss": 0.2357, "step": 78410 }, { "epoch": 0.5566113568732666, "grad_norm": 0.10693359375, "learning_rate": 0.00197345753185965, "loss": 0.2593, "step": 78420 }, { "epoch": 0.5566823351131127, "grad_norm": 0.09765625, "learning_rate": 0.0019734507085838815, "loss": 0.2388, "step": 78430 }, { "epoch": 0.5567533133529589, "grad_norm": 0.08154296875, "learning_rate": 0.001973443884444325, "loss": 0.2422, "step": 78440 }, { "epoch": 0.5568242915928049, "grad_norm": 0.09033203125, "learning_rate": 0.0019734370594409875, "loss": 0.2254, "step": 78450 }, { "epoch": 0.556895269832651, "grad_norm": 0.1279296875, "learning_rate": 0.0019734302335738755, "loss": 0.2328, "step": 78460 }, { "epoch": 0.5569662480724972, "grad_norm": 0.095703125, "learning_rate": 0.001973423406842996, "loss": 0.2466, "step": 78470 }, { "epoch": 0.5570372263123433, "grad_norm": 0.0966796875, "learning_rate": 0.001973416579248355, "loss": 0.213, "step": 78480 }, { "epoch": 0.5571082045521895, "grad_norm": 0.1484375, "learning_rate": 0.00197340975078996, "loss": 0.2338, "step": 78490 }, { "epoch": 0.5571791827920355, "grad_norm": 0.056640625, "learning_rate": 0.0019734029214678174, "loss": 0.2442, "step": 78500 }, { "epoch": 0.5572501610318816, "grad_norm": 0.14453125, "learning_rate": 0.001973396091281934, "loss": 0.2216, "step": 78510 }, { "epoch": 0.5573211392717278, "grad_norm": 0.11669921875, "learning_rate": 0.0019733892602323165, "loss": 0.2275, "step": 78520 }, { "epoch": 0.5573921175115739, "grad_norm": 0.111328125, "learning_rate": 0.001973382428318972, "loss": 0.2214, "step": 78530 }, { "epoch": 0.5574630957514201, "grad_norm": 0.12109375, "learning_rate": 0.001973375595541907, "loss": 0.2377, "step": 78540 }, { "epoch": 0.5575340739912661, "grad_norm": 0.11865234375, "learning_rate": 0.001973368761901128, "loss": 0.2336, "step": 78550 }, { "epoch": 0.5576050522311122, "grad_norm": 0.09130859375, "learning_rate": 0.0019733619273966424, "loss": 0.2361, "step": 78560 }, { "epoch": 0.5576760304709584, "grad_norm": 0.08837890625, "learning_rate": 0.0019733550920284565, "loss": 0.2311, "step": 78570 }, { "epoch": 0.5577470087108045, "grad_norm": 0.12451171875, "learning_rate": 0.0019733482557965773, "loss": 0.2234, "step": 78580 }, { "epoch": 0.5578179869506507, "grad_norm": 0.10009765625, "learning_rate": 0.001973341418701011, "loss": 0.2404, "step": 78590 }, { "epoch": 0.5578889651904967, "grad_norm": 0.11669921875, "learning_rate": 0.001973334580741765, "loss": 0.2479, "step": 78600 }, { "epoch": 0.5579599434303428, "grad_norm": 0.08984375, "learning_rate": 0.0019733277419188462, "loss": 0.2304, "step": 78610 }, { "epoch": 0.558030921670189, "grad_norm": 0.1103515625, "learning_rate": 0.0019733209022322605, "loss": 0.2445, "step": 78620 }, { "epoch": 0.5581018999100351, "grad_norm": 0.10986328125, "learning_rate": 0.001973314061682015, "loss": 0.229, "step": 78630 }, { "epoch": 0.5581728781498811, "grad_norm": 0.08642578125, "learning_rate": 0.0019733072202681168, "loss": 0.2278, "step": 78640 }, { "epoch": 0.5582438563897273, "grad_norm": 0.2158203125, "learning_rate": 0.0019733003779905728, "loss": 0.2421, "step": 78650 }, { "epoch": 0.5583148346295734, "grad_norm": 0.11376953125, "learning_rate": 0.001973293534849389, "loss": 0.2296, "step": 78660 }, { "epoch": 0.5583858128694196, "grad_norm": 0.1201171875, "learning_rate": 0.001973286690844573, "loss": 0.2404, "step": 78670 }, { "epoch": 0.5584567911092657, "grad_norm": 0.130859375, "learning_rate": 0.0019732798459761313, "loss": 0.2427, "step": 78680 }, { "epoch": 0.5585277693491117, "grad_norm": 0.06884765625, "learning_rate": 0.00197327300024407, "loss": 0.2145, "step": 78690 }, { "epoch": 0.5585987475889579, "grad_norm": 0.1298828125, "learning_rate": 0.0019732661536483965, "loss": 0.2226, "step": 78700 }, { "epoch": 0.558669725828804, "grad_norm": 0.0859375, "learning_rate": 0.001973259306189118, "loss": 0.2278, "step": 78710 }, { "epoch": 0.5587407040686502, "grad_norm": 0.095703125, "learning_rate": 0.0019732524578662406, "loss": 0.2174, "step": 78720 }, { "epoch": 0.5588116823084963, "grad_norm": 0.0888671875, "learning_rate": 0.0019732456086797713, "loss": 0.2364, "step": 78730 }, { "epoch": 0.5588826605483423, "grad_norm": 0.1044921875, "learning_rate": 0.0019732387586297168, "loss": 0.23, "step": 78740 }, { "epoch": 0.5589536387881885, "grad_norm": 0.09521484375, "learning_rate": 0.0019732319077160835, "loss": 0.2327, "step": 78750 }, { "epoch": 0.5590246170280346, "grad_norm": 0.08349609375, "learning_rate": 0.0019732250559388793, "loss": 0.2103, "step": 78760 }, { "epoch": 0.5590955952678808, "grad_norm": 0.08447265625, "learning_rate": 0.00197321820329811, "loss": 0.215, "step": 78770 }, { "epoch": 0.5591665735077269, "grad_norm": 0.08349609375, "learning_rate": 0.001973211349793782, "loss": 0.2502, "step": 78780 }, { "epoch": 0.559237551747573, "grad_norm": 0.08447265625, "learning_rate": 0.0019732044954259035, "loss": 0.2258, "step": 78790 }, { "epoch": 0.5593085299874191, "grad_norm": 0.10888671875, "learning_rate": 0.00197319764019448, "loss": 0.2209, "step": 78800 }, { "epoch": 0.5593795082272652, "grad_norm": 0.1171875, "learning_rate": 0.0019731907840995185, "loss": 0.2469, "step": 78810 }, { "epoch": 0.5594504864671114, "grad_norm": 0.06494140625, "learning_rate": 0.001973183927141027, "loss": 0.2328, "step": 78820 }, { "epoch": 0.5595214647069575, "grad_norm": 0.10986328125, "learning_rate": 0.0019731770693190106, "loss": 0.2443, "step": 78830 }, { "epoch": 0.5595924429468035, "grad_norm": 0.125, "learning_rate": 0.001973170210633477, "loss": 0.2318, "step": 78840 }, { "epoch": 0.5596634211866497, "grad_norm": 0.078125, "learning_rate": 0.001973163351084433, "loss": 0.2198, "step": 78850 }, { "epoch": 0.5597343994264958, "grad_norm": 0.08447265625, "learning_rate": 0.001973156490671885, "loss": 0.2383, "step": 78860 }, { "epoch": 0.559805377666342, "grad_norm": 0.07470703125, "learning_rate": 0.0019731496293958397, "loss": 0.2321, "step": 78870 }, { "epoch": 0.559876355906188, "grad_norm": 0.11474609375, "learning_rate": 0.0019731427672563043, "loss": 0.2334, "step": 78880 }, { "epoch": 0.5599473341460341, "grad_norm": 0.10009765625, "learning_rate": 0.001973135904253285, "loss": 0.2289, "step": 78890 }, { "epoch": 0.5600183123858803, "grad_norm": 0.130859375, "learning_rate": 0.00197312904038679, "loss": 0.2216, "step": 78900 }, { "epoch": 0.5600892906257264, "grad_norm": 0.10107421875, "learning_rate": 0.0019731221756568245, "loss": 0.2369, "step": 78910 }, { "epoch": 0.5601602688655726, "grad_norm": 0.1220703125, "learning_rate": 0.0019731153100633956, "loss": 0.2434, "step": 78920 }, { "epoch": 0.5602312471054186, "grad_norm": 0.09619140625, "learning_rate": 0.0019731084436065112, "loss": 0.2536, "step": 78930 }, { "epoch": 0.5603022253452647, "grad_norm": 0.0859375, "learning_rate": 0.001973101576286177, "loss": 0.2229, "step": 78940 }, { "epoch": 0.5603732035851109, "grad_norm": 0.1376953125, "learning_rate": 0.0019730947081023995, "loss": 0.2379, "step": 78950 }, { "epoch": 0.560444181824957, "grad_norm": 0.07958984375, "learning_rate": 0.0019730878390551865, "loss": 0.2292, "step": 78960 }, { "epoch": 0.5605151600648032, "grad_norm": 0.10400390625, "learning_rate": 0.0019730809691445444, "loss": 0.2521, "step": 78970 }, { "epoch": 0.5605861383046492, "grad_norm": 0.10107421875, "learning_rate": 0.0019730740983704797, "loss": 0.2483, "step": 78980 }, { "epoch": 0.5606571165444953, "grad_norm": 0.0986328125, "learning_rate": 0.001973067226733, "loss": 0.2102, "step": 78990 }, { "epoch": 0.5607280947843415, "grad_norm": 0.11328125, "learning_rate": 0.0019730603542321107, "loss": 0.2493, "step": 79000 }, { "epoch": 0.5607990730241876, "grad_norm": 0.09033203125, "learning_rate": 0.0019730534808678197, "loss": 0.2336, "step": 79010 }, { "epoch": 0.5608700512640338, "grad_norm": 0.13671875, "learning_rate": 0.001973046606640134, "loss": 0.2432, "step": 79020 }, { "epoch": 0.5609410295038798, "grad_norm": 0.09912109375, "learning_rate": 0.0019730397315490594, "loss": 0.2288, "step": 79030 }, { "epoch": 0.561012007743726, "grad_norm": 0.091796875, "learning_rate": 0.0019730328555946034, "loss": 0.2225, "step": 79040 }, { "epoch": 0.5610829859835721, "grad_norm": 0.109375, "learning_rate": 0.0019730259787767725, "loss": 0.2181, "step": 79050 }, { "epoch": 0.5611539642234182, "grad_norm": 0.08447265625, "learning_rate": 0.0019730191010955737, "loss": 0.2215, "step": 79060 }, { "epoch": 0.5612249424632644, "grad_norm": 0.12060546875, "learning_rate": 0.0019730122225510138, "loss": 0.2315, "step": 79070 }, { "epoch": 0.5612959207031104, "grad_norm": 0.10546875, "learning_rate": 0.0019730053431430998, "loss": 0.2263, "step": 79080 }, { "epoch": 0.5613668989429565, "grad_norm": 0.1220703125, "learning_rate": 0.0019729984628718378, "loss": 0.2307, "step": 79090 }, { "epoch": 0.5614378771828027, "grad_norm": 0.189453125, "learning_rate": 0.001972991581737235, "loss": 0.2258, "step": 79100 }, { "epoch": 0.5615088554226488, "grad_norm": 0.08544921875, "learning_rate": 0.0019729846997392983, "loss": 0.2326, "step": 79110 }, { "epoch": 0.561579833662495, "grad_norm": 0.12451171875, "learning_rate": 0.0019729778168780347, "loss": 0.2383, "step": 79120 }, { "epoch": 0.561650811902341, "grad_norm": 0.15234375, "learning_rate": 0.0019729709331534504, "loss": 0.2192, "step": 79130 }, { "epoch": 0.5617217901421871, "grad_norm": 0.11572265625, "learning_rate": 0.001972964048565553, "loss": 0.2263, "step": 79140 }, { "epoch": 0.5617927683820333, "grad_norm": 0.1552734375, "learning_rate": 0.0019729571631143484, "loss": 0.2385, "step": 79150 }, { "epoch": 0.5618637466218794, "grad_norm": 0.10693359375, "learning_rate": 0.001972950276799844, "loss": 0.2247, "step": 79160 }, { "epoch": 0.5619347248617255, "grad_norm": 0.0791015625, "learning_rate": 0.001972943389622047, "loss": 0.2425, "step": 79170 }, { "epoch": 0.5620057031015716, "grad_norm": 0.0966796875, "learning_rate": 0.001972936501580963, "loss": 0.2216, "step": 79180 }, { "epoch": 0.5620766813414177, "grad_norm": 0.080078125, "learning_rate": 0.0019729296126766, "loss": 0.2201, "step": 79190 }, { "epoch": 0.5621476595812639, "grad_norm": 0.11279296875, "learning_rate": 0.001972922722908964, "loss": 0.229, "step": 79200 }, { "epoch": 0.56221863782111, "grad_norm": 0.130859375, "learning_rate": 0.0019729158322780624, "loss": 0.2398, "step": 79210 }, { "epoch": 0.5622896160609561, "grad_norm": 0.1044921875, "learning_rate": 0.0019729089407839015, "loss": 0.233, "step": 79220 }, { "epoch": 0.5623605943008022, "grad_norm": 0.11669921875, "learning_rate": 0.0019729020484264884, "loss": 0.2516, "step": 79230 }, { "epoch": 0.5624315725406484, "grad_norm": 0.1044921875, "learning_rate": 0.00197289515520583, "loss": 0.2392, "step": 79240 }, { "epoch": 0.5625025507804945, "grad_norm": 0.09423828125, "learning_rate": 0.001972888261121933, "loss": 0.2283, "step": 79250 }, { "epoch": 0.5625735290203406, "grad_norm": 0.296875, "learning_rate": 0.0019728813661748046, "loss": 0.2237, "step": 79260 }, { "epoch": 0.5626445072601867, "grad_norm": 0.07421875, "learning_rate": 0.0019728744703644504, "loss": 0.2296, "step": 79270 }, { "epoch": 0.5627154855000328, "grad_norm": 0.06982421875, "learning_rate": 0.0019728675736908783, "loss": 0.2304, "step": 79280 }, { "epoch": 0.562786463739879, "grad_norm": 0.16796875, "learning_rate": 0.0019728606761540953, "loss": 0.2351, "step": 79290 }, { "epoch": 0.5628574419797251, "grad_norm": 0.1083984375, "learning_rate": 0.0019728537777541078, "loss": 0.2112, "step": 79300 }, { "epoch": 0.5629284202195712, "grad_norm": 0.0810546875, "learning_rate": 0.001972846878490922, "loss": 0.2441, "step": 79310 }, { "epoch": 0.5629993984594173, "grad_norm": 0.0966796875, "learning_rate": 0.0019728399783645457, "loss": 0.229, "step": 79320 }, { "epoch": 0.5630703766992634, "grad_norm": 0.140625, "learning_rate": 0.0019728330773749856, "loss": 0.2342, "step": 79330 }, { "epoch": 0.5631413549391096, "grad_norm": 0.11328125, "learning_rate": 0.001972826175522248, "loss": 0.2474, "step": 79340 }, { "epoch": 0.5632123331789557, "grad_norm": 0.08935546875, "learning_rate": 0.00197281927280634, "loss": 0.2243, "step": 79350 }, { "epoch": 0.5632833114188018, "grad_norm": 0.1435546875, "learning_rate": 0.0019728123692272686, "loss": 0.2236, "step": 79360 }, { "epoch": 0.5633542896586479, "grad_norm": 0.09130859375, "learning_rate": 0.00197280546478504, "loss": 0.2327, "step": 79370 }, { "epoch": 0.563425267898494, "grad_norm": 0.0830078125, "learning_rate": 0.001972798559479662, "loss": 0.2277, "step": 79380 }, { "epoch": 0.5634962461383402, "grad_norm": 0.1044921875, "learning_rate": 0.0019727916533111406, "loss": 0.2349, "step": 79390 }, { "epoch": 0.5635672243781863, "grad_norm": 0.10888671875, "learning_rate": 0.0019727847462794835, "loss": 0.2316, "step": 79400 }, { "epoch": 0.5636382026180323, "grad_norm": 0.09375, "learning_rate": 0.0019727778383846965, "loss": 0.2452, "step": 79410 }, { "epoch": 0.5637091808578785, "grad_norm": 0.08447265625, "learning_rate": 0.001972770929626787, "loss": 0.2458, "step": 79420 }, { "epoch": 0.5637801590977246, "grad_norm": 0.05517578125, "learning_rate": 0.0019727640200057624, "loss": 0.2257, "step": 79430 }, { "epoch": 0.5638511373375708, "grad_norm": 0.123046875, "learning_rate": 0.0019727571095216278, "loss": 0.2277, "step": 79440 }, { "epoch": 0.5639221155774169, "grad_norm": 0.138671875, "learning_rate": 0.001972750198174392, "loss": 0.2425, "step": 79450 }, { "epoch": 0.5639930938172629, "grad_norm": 0.0830078125, "learning_rate": 0.0019727432859640605, "loss": 0.2422, "step": 79460 }, { "epoch": 0.5640640720571091, "grad_norm": 0.0810546875, "learning_rate": 0.0019727363728906403, "loss": 0.239, "step": 79470 }, { "epoch": 0.5641350502969552, "grad_norm": 0.140625, "learning_rate": 0.0019727294589541393, "loss": 0.2375, "step": 79480 }, { "epoch": 0.5642060285368014, "grad_norm": 0.07373046875, "learning_rate": 0.001972722544154563, "loss": 0.2309, "step": 79490 }, { "epoch": 0.5642770067766475, "grad_norm": 0.07763671875, "learning_rate": 0.001972715628491919, "loss": 0.2094, "step": 79500 }, { "epoch": 0.5643479850164935, "grad_norm": 0.08544921875, "learning_rate": 0.001972708711966214, "loss": 0.2413, "step": 79510 }, { "epoch": 0.5644189632563397, "grad_norm": 0.1279296875, "learning_rate": 0.0019727017945774545, "loss": 0.2193, "step": 79520 }, { "epoch": 0.5644899414961858, "grad_norm": 0.0966796875, "learning_rate": 0.001972694876325648, "loss": 0.2248, "step": 79530 }, { "epoch": 0.564560919736032, "grad_norm": 0.0859375, "learning_rate": 0.0019726879572108, "loss": 0.2359, "step": 79540 }, { "epoch": 0.5646318979758781, "grad_norm": 0.1025390625, "learning_rate": 0.0019726810372329194, "loss": 0.2364, "step": 79550 }, { "epoch": 0.5647028762157241, "grad_norm": 0.06103515625, "learning_rate": 0.0019726741163920117, "loss": 0.224, "step": 79560 }, { "epoch": 0.5647738544555703, "grad_norm": 0.205078125, "learning_rate": 0.0019726671946880842, "loss": 0.2192, "step": 79570 }, { "epoch": 0.5648448326954164, "grad_norm": 0.71875, "learning_rate": 0.001972660272121143, "loss": 0.2407, "step": 79580 }, { "epoch": 0.5649158109352626, "grad_norm": 0.23046875, "learning_rate": 0.001972653348691196, "loss": 0.2444, "step": 79590 }, { "epoch": 0.5649867891751087, "grad_norm": 0.12353515625, "learning_rate": 0.001972646424398249, "loss": 0.2152, "step": 79600 }, { "epoch": 0.5650577674149547, "grad_norm": 0.23046875, "learning_rate": 0.00197263949924231, "loss": 0.2326, "step": 79610 }, { "epoch": 0.5651287456548009, "grad_norm": 0.12353515625, "learning_rate": 0.0019726325732233843, "loss": 0.2368, "step": 79620 }, { "epoch": 0.565199723894647, "grad_norm": 0.0908203125, "learning_rate": 0.0019726256463414803, "loss": 0.2353, "step": 79630 }, { "epoch": 0.5652707021344932, "grad_norm": 0.06982421875, "learning_rate": 0.0019726187185966043, "loss": 0.2212, "step": 79640 }, { "epoch": 0.5653416803743392, "grad_norm": 0.1123046875, "learning_rate": 0.001972611789988763, "loss": 0.2206, "step": 79650 }, { "epoch": 0.5654126586141853, "grad_norm": 0.07861328125, "learning_rate": 0.0019726048605179635, "loss": 0.2191, "step": 79660 }, { "epoch": 0.5654836368540315, "grad_norm": 0.0849609375, "learning_rate": 0.001972597930184212, "loss": 0.2335, "step": 79670 }, { "epoch": 0.5655546150938776, "grad_norm": 0.2021484375, "learning_rate": 0.001972590998987516, "loss": 0.2237, "step": 79680 }, { "epoch": 0.5656255933337238, "grad_norm": 0.11572265625, "learning_rate": 0.0019725840669278824, "loss": 0.2454, "step": 79690 }, { "epoch": 0.5656965715735698, "grad_norm": 0.08349609375, "learning_rate": 0.001972577134005318, "loss": 0.2473, "step": 79700 }, { "epoch": 0.5657675498134159, "grad_norm": 0.10302734375, "learning_rate": 0.001972570200219829, "loss": 0.2293, "step": 79710 }, { "epoch": 0.5658385280532621, "grad_norm": 0.06640625, "learning_rate": 0.001972563265571423, "loss": 0.2254, "step": 79720 }, { "epoch": 0.5659095062931082, "grad_norm": 0.10400390625, "learning_rate": 0.0019725563300601067, "loss": 0.2461, "step": 79730 }, { "epoch": 0.5659804845329544, "grad_norm": 0.068359375, "learning_rate": 0.0019725493936858865, "loss": 0.2244, "step": 79740 }, { "epoch": 0.5660514627728004, "grad_norm": 0.087890625, "learning_rate": 0.00197254245644877, "loss": 0.231, "step": 79750 }, { "epoch": 0.5661224410126465, "grad_norm": 0.1181640625, "learning_rate": 0.0019725355183487637, "loss": 0.2435, "step": 79760 }, { "epoch": 0.5661934192524927, "grad_norm": 0.12890625, "learning_rate": 0.0019725285793858746, "loss": 0.2366, "step": 79770 }, { "epoch": 0.5662643974923388, "grad_norm": 0.1318359375, "learning_rate": 0.001972521639560109, "loss": 0.2216, "step": 79780 }, { "epoch": 0.566335375732185, "grad_norm": 0.10400390625, "learning_rate": 0.0019725146988714744, "loss": 0.2207, "step": 79790 }, { "epoch": 0.566406353972031, "grad_norm": 0.123046875, "learning_rate": 0.0019725077573199776, "loss": 0.2216, "step": 79800 }, { "epoch": 0.5664773322118771, "grad_norm": 0.0888671875, "learning_rate": 0.001972500814905625, "loss": 0.2351, "step": 79810 }, { "epoch": 0.5665483104517233, "grad_norm": 0.0966796875, "learning_rate": 0.001972493871628424, "loss": 0.2467, "step": 79820 }, { "epoch": 0.5666192886915694, "grad_norm": 0.07080078125, "learning_rate": 0.001972486927488381, "loss": 0.2374, "step": 79830 }, { "epoch": 0.5666902669314156, "grad_norm": 0.0859375, "learning_rate": 0.001972479982485503, "loss": 0.2327, "step": 79840 }, { "epoch": 0.5667612451712616, "grad_norm": 0.07275390625, "learning_rate": 0.0019724730366197977, "loss": 0.2294, "step": 79850 }, { "epoch": 0.5668322234111077, "grad_norm": 0.091796875, "learning_rate": 0.0019724660898912706, "loss": 0.2217, "step": 79860 }, { "epoch": 0.5669032016509539, "grad_norm": 0.2255859375, "learning_rate": 0.0019724591422999294, "loss": 0.2362, "step": 79870 }, { "epoch": 0.5669741798908, "grad_norm": 0.1806640625, "learning_rate": 0.001972452193845781, "loss": 0.2279, "step": 79880 }, { "epoch": 0.567045158130646, "grad_norm": 0.103515625, "learning_rate": 0.001972445244528832, "loss": 0.2517, "step": 79890 }, { "epoch": 0.5671161363704922, "grad_norm": 0.1181640625, "learning_rate": 0.001972438294349089, "loss": 0.2139, "step": 79900 }, { "epoch": 0.5671871146103383, "grad_norm": 0.1318359375, "learning_rate": 0.0019724313433065596, "loss": 0.2446, "step": 79910 }, { "epoch": 0.5672580928501845, "grad_norm": 0.10205078125, "learning_rate": 0.00197242439140125, "loss": 0.2271, "step": 79920 }, { "epoch": 0.5673290710900306, "grad_norm": 0.1337890625, "learning_rate": 0.0019724174386331673, "loss": 0.2524, "step": 79930 }, { "epoch": 0.5674000493298766, "grad_norm": 0.09326171875, "learning_rate": 0.0019724104850023187, "loss": 0.2336, "step": 79940 }, { "epoch": 0.5674710275697228, "grad_norm": 0.1103515625, "learning_rate": 0.0019724035305087105, "loss": 0.2555, "step": 79950 }, { "epoch": 0.5675420058095689, "grad_norm": 0.10107421875, "learning_rate": 0.0019723965751523506, "loss": 0.226, "step": 79960 }, { "epoch": 0.5676129840494151, "grad_norm": 0.130859375, "learning_rate": 0.0019723896189332442, "loss": 0.2269, "step": 79970 }, { "epoch": 0.5676839622892612, "grad_norm": 0.1396484375, "learning_rate": 0.0019723826618513996, "loss": 0.216, "step": 79980 }, { "epoch": 0.5677549405291072, "grad_norm": 0.0986328125, "learning_rate": 0.001972375703906823, "loss": 0.2301, "step": 79990 }, { "epoch": 0.5678259187689534, "grad_norm": 0.0888671875, "learning_rate": 0.001972368745099522, "loss": 0.2393, "step": 80000 }, { "epoch": 0.5678259187689534, "eval_covost2-zh-en_loss": 3.889228343963623, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 21.99, "eval_covost2-zh-en_samples_per_second": 2.91, "eval_covost2-zh-en_steps_per_second": 0.182, "step": 80000 }, { "epoch": 0.5678259187689534, "eval_covost2-en-zh_loss": 3.140929698944092, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 21.4937, "eval_covost2-en-zh_samples_per_second": 2.978, "eval_covost2-en-zh_steps_per_second": 0.186, "step": 80000 }, { "epoch": 0.5678968970087995, "grad_norm": 0.0712890625, "learning_rate": 0.0019723617854295024, "loss": 0.2226, "step": 80010 }, { "epoch": 0.5679678752486457, "grad_norm": 0.146484375, "learning_rate": 0.0019723548248967724, "loss": 0.2398, "step": 80020 }, { "epoch": 0.5680388534884918, "grad_norm": 0.1884765625, "learning_rate": 0.001972347863501338, "loss": 0.2324, "step": 80030 }, { "epoch": 0.5681098317283378, "grad_norm": 0.07958984375, "learning_rate": 0.0019723409012432054, "loss": 0.2404, "step": 80040 }, { "epoch": 0.568180809968184, "grad_norm": 0.08984375, "learning_rate": 0.001972333938122383, "loss": 0.2308, "step": 80050 }, { "epoch": 0.5682517882080301, "grad_norm": 0.109375, "learning_rate": 0.001972326974138877, "loss": 0.2235, "step": 80060 }, { "epoch": 0.5683227664478763, "grad_norm": 0.09814453125, "learning_rate": 0.001972320009292694, "loss": 0.2262, "step": 80070 }, { "epoch": 0.5683937446877224, "grad_norm": 0.11376953125, "learning_rate": 0.0019723130435838412, "loss": 0.2452, "step": 80080 }, { "epoch": 0.5684647229275684, "grad_norm": 0.09326171875, "learning_rate": 0.001972306077012326, "loss": 0.2382, "step": 80090 }, { "epoch": 0.5685357011674146, "grad_norm": 0.09228515625, "learning_rate": 0.001972299109578154, "loss": 0.2235, "step": 80100 }, { "epoch": 0.5686066794072607, "grad_norm": 0.10498046875, "learning_rate": 0.0019722921412813335, "loss": 0.2291, "step": 80110 }, { "epoch": 0.5686776576471069, "grad_norm": 0.0869140625, "learning_rate": 0.0019722851721218704, "loss": 0.2315, "step": 80120 }, { "epoch": 0.5687486358869529, "grad_norm": 0.0830078125, "learning_rate": 0.001972278202099772, "loss": 0.2414, "step": 80130 }, { "epoch": 0.568819614126799, "grad_norm": 0.08447265625, "learning_rate": 0.001972271231215045, "loss": 0.2209, "step": 80140 }, { "epoch": 0.5688905923666452, "grad_norm": 0.1181640625, "learning_rate": 0.0019722642594676965, "loss": 0.2408, "step": 80150 }, { "epoch": 0.5689615706064913, "grad_norm": 0.083984375, "learning_rate": 0.0019722572868577336, "loss": 0.2297, "step": 80160 }, { "epoch": 0.5690325488463375, "grad_norm": 0.1767578125, "learning_rate": 0.001972250313385163, "loss": 0.2397, "step": 80170 }, { "epoch": 0.5691035270861835, "grad_norm": 0.0986328125, "learning_rate": 0.001972243339049991, "loss": 0.2164, "step": 80180 }, { "epoch": 0.5691745053260296, "grad_norm": 0.099609375, "learning_rate": 0.0019722363638522253, "loss": 0.225, "step": 80190 }, { "epoch": 0.5692454835658758, "grad_norm": 0.1337890625, "learning_rate": 0.0019722293877918722, "loss": 0.2369, "step": 80200 }, { "epoch": 0.5693164618057219, "grad_norm": 0.08203125, "learning_rate": 0.001972222410868939, "loss": 0.2417, "step": 80210 }, { "epoch": 0.5693874400455681, "grad_norm": 0.12109375, "learning_rate": 0.0019722154330834327, "loss": 0.2399, "step": 80220 }, { "epoch": 0.5694584182854141, "grad_norm": 0.08056640625, "learning_rate": 0.00197220845443536, "loss": 0.2299, "step": 80230 }, { "epoch": 0.5695293965252602, "grad_norm": 0.07958984375, "learning_rate": 0.001972201474924728, "loss": 0.2268, "step": 80240 }, { "epoch": 0.5696003747651064, "grad_norm": 0.142578125, "learning_rate": 0.001972194494551543, "loss": 0.2466, "step": 80250 }, { "epoch": 0.5696713530049525, "grad_norm": 0.1376953125, "learning_rate": 0.0019721875133158124, "loss": 0.2362, "step": 80260 }, { "epoch": 0.5697423312447987, "grad_norm": 0.10693359375, "learning_rate": 0.001972180531217543, "loss": 0.2378, "step": 80270 }, { "epoch": 0.5698133094846447, "grad_norm": 0.13671875, "learning_rate": 0.0019721735482567415, "loss": 0.2292, "step": 80280 }, { "epoch": 0.5698842877244908, "grad_norm": 0.09912109375, "learning_rate": 0.0019721665644334154, "loss": 0.216, "step": 80290 }, { "epoch": 0.569955265964337, "grad_norm": 0.09228515625, "learning_rate": 0.0019721595797475714, "loss": 0.2313, "step": 80300 }, { "epoch": 0.5700262442041831, "grad_norm": 0.09326171875, "learning_rate": 0.001972152594199216, "loss": 0.2413, "step": 80310 }, { "epoch": 0.5700972224440293, "grad_norm": 0.11328125, "learning_rate": 0.0019721456077883563, "loss": 0.2474, "step": 80320 }, { "epoch": 0.5701682006838753, "grad_norm": 0.08203125, "learning_rate": 0.001972138620514999, "loss": 0.2247, "step": 80330 }, { "epoch": 0.5702391789237214, "grad_norm": 0.07763671875, "learning_rate": 0.0019721316323791514, "loss": 0.2244, "step": 80340 }, { "epoch": 0.5703101571635676, "grad_norm": 0.1767578125, "learning_rate": 0.001972124643380821, "loss": 0.2351, "step": 80350 }, { "epoch": 0.5703811354034137, "grad_norm": 0.09130859375, "learning_rate": 0.001972117653520013, "loss": 0.2305, "step": 80360 }, { "epoch": 0.5704521136432599, "grad_norm": 0.1552734375, "learning_rate": 0.0019721106627967355, "loss": 0.2507, "step": 80370 }, { "epoch": 0.5705230918831059, "grad_norm": 0.119140625, "learning_rate": 0.0019721036712109954, "loss": 0.2413, "step": 80380 }, { "epoch": 0.570594070122952, "grad_norm": 0.1162109375, "learning_rate": 0.0019720966787627994, "loss": 0.2382, "step": 80390 }, { "epoch": 0.5706650483627982, "grad_norm": 0.1396484375, "learning_rate": 0.0019720896854521545, "loss": 0.2354, "step": 80400 }, { "epoch": 0.5707360266026443, "grad_norm": 0.1650390625, "learning_rate": 0.001972082691279068, "loss": 0.2298, "step": 80410 }, { "epoch": 0.5708070048424904, "grad_norm": 0.0869140625, "learning_rate": 0.0019720756962435457, "loss": 0.2295, "step": 80420 }, { "epoch": 0.5708779830823365, "grad_norm": 0.12353515625, "learning_rate": 0.0019720687003455953, "loss": 0.2299, "step": 80430 }, { "epoch": 0.5709489613221826, "grad_norm": 0.09619140625, "learning_rate": 0.0019720617035852237, "loss": 0.2301, "step": 80440 }, { "epoch": 0.5710199395620288, "grad_norm": 0.1142578125, "learning_rate": 0.001972054705962438, "loss": 0.2241, "step": 80450 }, { "epoch": 0.5710909178018749, "grad_norm": 0.09619140625, "learning_rate": 0.001972047707477244, "loss": 0.2249, "step": 80460 }, { "epoch": 0.571161896041721, "grad_norm": 0.11083984375, "learning_rate": 0.0019720407081296504, "loss": 0.2286, "step": 80470 }, { "epoch": 0.5712328742815671, "grad_norm": 0.0908203125, "learning_rate": 0.0019720337079196628, "loss": 0.2457, "step": 80480 }, { "epoch": 0.5713038525214132, "grad_norm": 0.302734375, "learning_rate": 0.0019720267068472886, "loss": 0.2294, "step": 80490 }, { "epoch": 0.5713748307612594, "grad_norm": 0.146484375, "learning_rate": 0.0019720197049125344, "loss": 0.2373, "step": 80500 }, { "epoch": 0.5714458090011055, "grad_norm": 0.09619140625, "learning_rate": 0.001972012702115408, "loss": 0.2257, "step": 80510 }, { "epoch": 0.5715167872409516, "grad_norm": 0.0810546875, "learning_rate": 0.0019720056984559153, "loss": 0.2317, "step": 80520 }, { "epoch": 0.5715877654807977, "grad_norm": 0.11279296875, "learning_rate": 0.0019719986939340634, "loss": 0.2368, "step": 80530 }, { "epoch": 0.5716587437206438, "grad_norm": 0.087890625, "learning_rate": 0.0019719916885498593, "loss": 0.2252, "step": 80540 }, { "epoch": 0.57172972196049, "grad_norm": 0.134765625, "learning_rate": 0.0019719846823033106, "loss": 0.2393, "step": 80550 }, { "epoch": 0.5718007002003361, "grad_norm": 0.103515625, "learning_rate": 0.0019719776751944236, "loss": 0.2296, "step": 80560 }, { "epoch": 0.5718716784401822, "grad_norm": 0.11865234375, "learning_rate": 0.0019719706672232055, "loss": 0.2558, "step": 80570 }, { "epoch": 0.5719426566800283, "grad_norm": 0.07080078125, "learning_rate": 0.0019719636583896624, "loss": 0.2337, "step": 80580 }, { "epoch": 0.5720136349198744, "grad_norm": 0.10498046875, "learning_rate": 0.0019719566486938026, "loss": 0.2236, "step": 80590 }, { "epoch": 0.5720846131597206, "grad_norm": 0.12060546875, "learning_rate": 0.0019719496381356318, "loss": 0.2358, "step": 80600 }, { "epoch": 0.5721555913995667, "grad_norm": 0.0986328125, "learning_rate": 0.001971942626715158, "loss": 0.2412, "step": 80610 }, { "epoch": 0.5722265696394128, "grad_norm": 0.140625, "learning_rate": 0.001971935614432387, "loss": 0.2334, "step": 80620 }, { "epoch": 0.5722975478792589, "grad_norm": 0.07568359375, "learning_rate": 0.0019719286012873267, "loss": 0.228, "step": 80630 }, { "epoch": 0.572368526119105, "grad_norm": 0.11962890625, "learning_rate": 0.0019719215872799835, "loss": 0.2244, "step": 80640 }, { "epoch": 0.5724395043589512, "grad_norm": 0.11572265625, "learning_rate": 0.0019719145724103643, "loss": 0.2496, "step": 80650 }, { "epoch": 0.5725104825987972, "grad_norm": 0.1337890625, "learning_rate": 0.0019719075566784765, "loss": 0.2397, "step": 80660 }, { "epoch": 0.5725814608386434, "grad_norm": 0.1259765625, "learning_rate": 0.001971900540084327, "loss": 0.2383, "step": 80670 }, { "epoch": 0.5726524390784895, "grad_norm": 0.130859375, "learning_rate": 0.001971893522627922, "loss": 0.2465, "step": 80680 }, { "epoch": 0.5727234173183356, "grad_norm": 0.07958984375, "learning_rate": 0.001971886504309269, "loss": 0.2285, "step": 80690 }, { "epoch": 0.5727943955581818, "grad_norm": 0.0908203125, "learning_rate": 0.0019718794851283756, "loss": 0.2384, "step": 80700 }, { "epoch": 0.5728653737980278, "grad_norm": 0.1015625, "learning_rate": 0.001971872465085247, "loss": 0.2251, "step": 80710 }, { "epoch": 0.572936352037874, "grad_norm": 0.12353515625, "learning_rate": 0.001971865444179892, "loss": 0.2282, "step": 80720 }, { "epoch": 0.5730073302777201, "grad_norm": 0.130859375, "learning_rate": 0.0019718584224123164, "loss": 0.252, "step": 80730 }, { "epoch": 0.5730783085175662, "grad_norm": 0.10595703125, "learning_rate": 0.0019718513997825276, "loss": 0.2446, "step": 80740 }, { "epoch": 0.5731492867574124, "grad_norm": 0.1318359375, "learning_rate": 0.001971844376290532, "loss": 0.2194, "step": 80750 }, { "epoch": 0.5732202649972584, "grad_norm": 0.10302734375, "learning_rate": 0.0019718373519363374, "loss": 0.2458, "step": 80760 }, { "epoch": 0.5732912432371046, "grad_norm": 0.1044921875, "learning_rate": 0.0019718303267199505, "loss": 0.226, "step": 80770 }, { "epoch": 0.5733622214769507, "grad_norm": 0.169921875, "learning_rate": 0.0019718233006413774, "loss": 0.2291, "step": 80780 }, { "epoch": 0.5734331997167968, "grad_norm": 0.1396484375, "learning_rate": 0.0019718162737006263, "loss": 0.2457, "step": 80790 }, { "epoch": 0.573504177956643, "grad_norm": 0.125, "learning_rate": 0.001971809245897703, "loss": 0.2282, "step": 80800 }, { "epoch": 0.573575156196489, "grad_norm": 0.1396484375, "learning_rate": 0.0019718022172326155, "loss": 0.2458, "step": 80810 }, { "epoch": 0.5736461344363352, "grad_norm": 0.103515625, "learning_rate": 0.0019717951877053696, "loss": 0.2257, "step": 80820 }, { "epoch": 0.5737171126761813, "grad_norm": 0.1337890625, "learning_rate": 0.0019717881573159735, "loss": 0.2376, "step": 80830 }, { "epoch": 0.5737880909160274, "grad_norm": 0.197265625, "learning_rate": 0.0019717811260644333, "loss": 0.2538, "step": 80840 }, { "epoch": 0.5738590691558736, "grad_norm": 0.07080078125, "learning_rate": 0.0019717740939507564, "loss": 0.2394, "step": 80850 }, { "epoch": 0.5739300473957196, "grad_norm": 0.11767578125, "learning_rate": 0.001971767060974949, "loss": 0.2347, "step": 80860 }, { "epoch": 0.5740010256355658, "grad_norm": 0.08154296875, "learning_rate": 0.001971760027137019, "loss": 0.2314, "step": 80870 }, { "epoch": 0.5740720038754119, "grad_norm": 0.11279296875, "learning_rate": 0.001971752992436973, "loss": 0.2212, "step": 80880 }, { "epoch": 0.574142982115258, "grad_norm": 0.09326171875, "learning_rate": 0.001971745956874818, "loss": 0.2121, "step": 80890 }, { "epoch": 0.5742139603551041, "grad_norm": 0.1123046875, "learning_rate": 0.001971738920450561, "loss": 0.2411, "step": 80900 }, { "epoch": 0.5742849385949502, "grad_norm": 0.0888671875, "learning_rate": 0.0019717318831642084, "loss": 0.2403, "step": 80910 }, { "epoch": 0.5743559168347964, "grad_norm": 0.1015625, "learning_rate": 0.001971724845015768, "loss": 0.2518, "step": 80920 }, { "epoch": 0.5744268950746425, "grad_norm": 0.11181640625, "learning_rate": 0.0019717178060052465, "loss": 0.2243, "step": 80930 }, { "epoch": 0.5744978733144886, "grad_norm": 0.103515625, "learning_rate": 0.00197171076613265, "loss": 0.252, "step": 80940 }, { "epoch": 0.5745688515543347, "grad_norm": 0.146484375, "learning_rate": 0.001971703725397987, "loss": 0.2236, "step": 80950 }, { "epoch": 0.5746398297941808, "grad_norm": 0.08544921875, "learning_rate": 0.0019716966838012632, "loss": 0.2365, "step": 80960 }, { "epoch": 0.574710808034027, "grad_norm": 0.1376953125, "learning_rate": 0.001971689641342486, "loss": 0.2245, "step": 80970 }, { "epoch": 0.5747817862738731, "grad_norm": 0.1640625, "learning_rate": 0.0019716825980216623, "loss": 0.2311, "step": 80980 }, { "epoch": 0.5748527645137192, "grad_norm": 0.1103515625, "learning_rate": 0.0019716755538387995, "loss": 0.2206, "step": 80990 }, { "epoch": 0.5749237427535653, "grad_norm": 0.07861328125, "learning_rate": 0.001971668508793904, "loss": 0.2253, "step": 81000 }, { "epoch": 0.5749947209934114, "grad_norm": 0.1455078125, "learning_rate": 0.001971661462886983, "loss": 0.2204, "step": 81010 }, { "epoch": 0.5750656992332576, "grad_norm": 0.11328125, "learning_rate": 0.0019716544161180434, "loss": 0.2271, "step": 81020 }, { "epoch": 0.5751366774731037, "grad_norm": 0.09423828125, "learning_rate": 0.001971647368487092, "loss": 0.2383, "step": 81030 }, { "epoch": 0.5752076557129499, "grad_norm": 0.09130859375, "learning_rate": 0.0019716403199941364, "loss": 0.2383, "step": 81040 }, { "epoch": 0.5752786339527959, "grad_norm": 0.1123046875, "learning_rate": 0.001971633270639183, "loss": 0.235, "step": 81050 }, { "epoch": 0.575349612192642, "grad_norm": 0.1259765625, "learning_rate": 0.001971626220422239, "loss": 0.2387, "step": 81060 }, { "epoch": 0.5754205904324882, "grad_norm": 0.08642578125, "learning_rate": 0.001971619169343311, "loss": 0.2417, "step": 81070 }, { "epoch": 0.5754915686723343, "grad_norm": 0.10693359375, "learning_rate": 0.0019716121174024066, "loss": 0.2327, "step": 81080 }, { "epoch": 0.5755625469121805, "grad_norm": 0.0703125, "learning_rate": 0.0019716050645995323, "loss": 0.2201, "step": 81090 }, { "epoch": 0.5756335251520265, "grad_norm": 0.1005859375, "learning_rate": 0.001971598010934695, "loss": 0.2356, "step": 81100 }, { "epoch": 0.5757045033918726, "grad_norm": 0.13671875, "learning_rate": 0.001971590956407902, "loss": 0.2253, "step": 81110 }, { "epoch": 0.5757754816317188, "grad_norm": 0.07958984375, "learning_rate": 0.0019715839010191606, "loss": 0.2352, "step": 81120 }, { "epoch": 0.5758464598715649, "grad_norm": 0.0791015625, "learning_rate": 0.001971576844768477, "loss": 0.2467, "step": 81130 }, { "epoch": 0.5759174381114109, "grad_norm": 0.1123046875, "learning_rate": 0.0019715697876558583, "loss": 0.2274, "step": 81140 }, { "epoch": 0.5759884163512571, "grad_norm": 0.0849609375, "learning_rate": 0.001971562729681312, "loss": 0.2383, "step": 81150 }, { "epoch": 0.5760593945911032, "grad_norm": 0.130859375, "learning_rate": 0.001971555670844845, "loss": 0.2316, "step": 81160 }, { "epoch": 0.5761303728309494, "grad_norm": 0.09423828125, "learning_rate": 0.0019715486111464638, "loss": 0.2421, "step": 81170 }, { "epoch": 0.5762013510707955, "grad_norm": 0.1015625, "learning_rate": 0.001971541550586176, "loss": 0.232, "step": 81180 }, { "epoch": 0.5762723293106415, "grad_norm": 0.07568359375, "learning_rate": 0.0019715344891639876, "loss": 0.2209, "step": 81190 }, { "epoch": 0.5763433075504877, "grad_norm": 0.07470703125, "learning_rate": 0.0019715274268799063, "loss": 0.2324, "step": 81200 }, { "epoch": 0.5764142857903338, "grad_norm": 0.11962890625, "learning_rate": 0.0019715203637339393, "loss": 0.2293, "step": 81210 }, { "epoch": 0.57648526403018, "grad_norm": 0.10205078125, "learning_rate": 0.001971513299726093, "loss": 0.2312, "step": 81220 }, { "epoch": 0.5765562422700261, "grad_norm": 0.0869140625, "learning_rate": 0.0019715062348563753, "loss": 0.2309, "step": 81230 }, { "epoch": 0.5766272205098721, "grad_norm": 0.12060546875, "learning_rate": 0.0019714991691247917, "loss": 0.2383, "step": 81240 }, { "epoch": 0.5766981987497183, "grad_norm": 0.11669921875, "learning_rate": 0.0019714921025313506, "loss": 0.2143, "step": 81250 }, { "epoch": 0.5767691769895644, "grad_norm": 0.123046875, "learning_rate": 0.001971485035076058, "loss": 0.245, "step": 81260 }, { "epoch": 0.5768401552294106, "grad_norm": 0.125, "learning_rate": 0.001971477966758922, "loss": 0.2317, "step": 81270 }, { "epoch": 0.5769111334692567, "grad_norm": 0.1435546875, "learning_rate": 0.0019714708975799486, "loss": 0.2325, "step": 81280 }, { "epoch": 0.5769821117091027, "grad_norm": 0.10107421875, "learning_rate": 0.001971463827539145, "loss": 0.2275, "step": 81290 }, { "epoch": 0.5770530899489489, "grad_norm": 0.11962890625, "learning_rate": 0.001971456756636518, "loss": 0.2602, "step": 81300 }, { "epoch": 0.577124068188795, "grad_norm": 0.2158203125, "learning_rate": 0.0019714496848720754, "loss": 0.2422, "step": 81310 }, { "epoch": 0.5771950464286412, "grad_norm": 0.11328125, "learning_rate": 0.0019714426122458234, "loss": 0.2394, "step": 81320 }, { "epoch": 0.5772660246684873, "grad_norm": 0.08056640625, "learning_rate": 0.0019714355387577694, "loss": 0.2363, "step": 81330 }, { "epoch": 0.5773370029083333, "grad_norm": 0.1884765625, "learning_rate": 0.0019714284644079204, "loss": 0.227, "step": 81340 }, { "epoch": 0.5774079811481795, "grad_norm": 0.1455078125, "learning_rate": 0.001971421389196283, "loss": 0.2281, "step": 81350 }, { "epoch": 0.5774789593880256, "grad_norm": 0.12255859375, "learning_rate": 0.0019714143131228648, "loss": 0.2329, "step": 81360 }, { "epoch": 0.5775499376278718, "grad_norm": 0.1103515625, "learning_rate": 0.001971407236187672, "loss": 0.2158, "step": 81370 }, { "epoch": 0.5776209158677178, "grad_norm": 0.1162109375, "learning_rate": 0.0019714001583907123, "loss": 0.23, "step": 81380 }, { "epoch": 0.5776918941075639, "grad_norm": 0.140625, "learning_rate": 0.0019713930797319924, "loss": 0.2348, "step": 81390 }, { "epoch": 0.5777628723474101, "grad_norm": 0.12353515625, "learning_rate": 0.001971386000211519, "loss": 0.2323, "step": 81400 }, { "epoch": 0.5778338505872562, "grad_norm": 0.1396484375, "learning_rate": 0.0019713789198293, "loss": 0.2281, "step": 81410 }, { "epoch": 0.5779048288271024, "grad_norm": 0.08349609375, "learning_rate": 0.0019713718385853418, "loss": 0.2205, "step": 81420 }, { "epoch": 0.5779758070669484, "grad_norm": 0.080078125, "learning_rate": 0.0019713647564796512, "loss": 0.2321, "step": 81430 }, { "epoch": 0.5780467853067945, "grad_norm": 0.1083984375, "learning_rate": 0.001971357673512236, "loss": 0.2521, "step": 81440 }, { "epoch": 0.5781177635466407, "grad_norm": 0.12890625, "learning_rate": 0.001971350589683102, "loss": 0.2401, "step": 81450 }, { "epoch": 0.5781887417864868, "grad_norm": 0.0986328125, "learning_rate": 0.0019713435049922576, "loss": 0.248, "step": 81460 }, { "epoch": 0.578259720026333, "grad_norm": 0.09326171875, "learning_rate": 0.0019713364194397084, "loss": 0.2425, "step": 81470 }, { "epoch": 0.578330698266179, "grad_norm": 0.08447265625, "learning_rate": 0.0019713293330254626, "loss": 0.2115, "step": 81480 }, { "epoch": 0.5784016765060251, "grad_norm": 0.11962890625, "learning_rate": 0.001971322245749526, "loss": 0.235, "step": 81490 }, { "epoch": 0.5784726547458713, "grad_norm": 0.11083984375, "learning_rate": 0.001971315157611907, "loss": 0.2422, "step": 81500 }, { "epoch": 0.5785436329857174, "grad_norm": 0.091796875, "learning_rate": 0.0019713080686126114, "loss": 0.2554, "step": 81510 }, { "epoch": 0.5786146112255636, "grad_norm": 0.076171875, "learning_rate": 0.001971300978751647, "loss": 0.2177, "step": 81520 }, { "epoch": 0.5786855894654096, "grad_norm": 0.1796875, "learning_rate": 0.001971293888029021, "loss": 0.2245, "step": 81530 }, { "epoch": 0.5787565677052557, "grad_norm": 0.08642578125, "learning_rate": 0.001971286796444739, "loss": 0.2463, "step": 81540 }, { "epoch": 0.5788275459451019, "grad_norm": 0.1142578125, "learning_rate": 0.0019712797039988096, "loss": 0.2283, "step": 81550 }, { "epoch": 0.578898524184948, "grad_norm": 0.10498046875, "learning_rate": 0.0019712726106912394, "loss": 0.2475, "step": 81560 }, { "epoch": 0.5789695024247942, "grad_norm": 0.146484375, "learning_rate": 0.0019712655165220346, "loss": 0.2356, "step": 81570 }, { "epoch": 0.5790404806646402, "grad_norm": 0.111328125, "learning_rate": 0.001971258421491203, "loss": 0.2429, "step": 81580 }, { "epoch": 0.5791114589044863, "grad_norm": 0.07177734375, "learning_rate": 0.0019712513255987513, "loss": 0.2496, "step": 81590 }, { "epoch": 0.5791824371443325, "grad_norm": 0.099609375, "learning_rate": 0.001971244228844687, "loss": 0.2456, "step": 81600 }, { "epoch": 0.5792534153841786, "grad_norm": 0.11865234375, "learning_rate": 0.0019712371312290166, "loss": 0.2363, "step": 81610 }, { "epoch": 0.5793243936240248, "grad_norm": 0.08837890625, "learning_rate": 0.0019712300327517475, "loss": 0.2383, "step": 81620 }, { "epoch": 0.5793953718638708, "grad_norm": 0.12353515625, "learning_rate": 0.001971222933412886, "loss": 0.2506, "step": 81630 }, { "epoch": 0.5794663501037169, "grad_norm": 0.1103515625, "learning_rate": 0.0019712158332124397, "loss": 0.2478, "step": 81640 }, { "epoch": 0.5795373283435631, "grad_norm": 0.1318359375, "learning_rate": 0.001971208732150416, "loss": 0.2469, "step": 81650 }, { "epoch": 0.5796083065834092, "grad_norm": 0.15625, "learning_rate": 0.0019712016302268214, "loss": 0.2422, "step": 81660 }, { "epoch": 0.5796792848232553, "grad_norm": 0.09326171875, "learning_rate": 0.0019711945274416627, "loss": 0.2517, "step": 81670 }, { "epoch": 0.5797502630631014, "grad_norm": 0.1826171875, "learning_rate": 0.001971187423794947, "loss": 0.2314, "step": 81680 }, { "epoch": 0.5798212413029475, "grad_norm": 0.08447265625, "learning_rate": 0.001971180319286682, "loss": 0.2469, "step": 81690 }, { "epoch": 0.5798922195427937, "grad_norm": 0.12890625, "learning_rate": 0.0019711732139168743, "loss": 0.2084, "step": 81700 }, { "epoch": 0.5799631977826398, "grad_norm": 0.07373046875, "learning_rate": 0.0019711661076855305, "loss": 0.2273, "step": 81710 }, { "epoch": 0.5800341760224859, "grad_norm": 0.09521484375, "learning_rate": 0.0019711590005926584, "loss": 0.2322, "step": 81720 }, { "epoch": 0.580105154262332, "grad_norm": 0.2216796875, "learning_rate": 0.0019711518926382645, "loss": 0.2411, "step": 81730 }, { "epoch": 0.5801761325021781, "grad_norm": 0.11279296875, "learning_rate": 0.001971144783822356, "loss": 0.242, "step": 81740 }, { "epoch": 0.5802471107420243, "grad_norm": 0.166015625, "learning_rate": 0.0019711376741449395, "loss": 0.2392, "step": 81750 }, { "epoch": 0.5803180889818704, "grad_norm": 0.1416015625, "learning_rate": 0.001971130563606023, "loss": 0.2351, "step": 81760 }, { "epoch": 0.5803890672217165, "grad_norm": 0.08984375, "learning_rate": 0.0019711234522056128, "loss": 0.252, "step": 81770 }, { "epoch": 0.5804600454615626, "grad_norm": 0.111328125, "learning_rate": 0.001971116339943716, "loss": 0.2381, "step": 81780 }, { "epoch": 0.5805310237014087, "grad_norm": 0.1318359375, "learning_rate": 0.00197110922682034, "loss": 0.2265, "step": 81790 }, { "epoch": 0.5806020019412549, "grad_norm": 0.1640625, "learning_rate": 0.001971102112835491, "loss": 0.2345, "step": 81800 }, { "epoch": 0.580672980181101, "grad_norm": 0.08935546875, "learning_rate": 0.001971094997989177, "loss": 0.2084, "step": 81810 }, { "epoch": 0.5807439584209471, "grad_norm": 0.1318359375, "learning_rate": 0.001971087882281405, "loss": 0.2424, "step": 81820 }, { "epoch": 0.5808149366607932, "grad_norm": 0.0771484375, "learning_rate": 0.0019710807657121808, "loss": 0.2237, "step": 81830 }, { "epoch": 0.5808859149006393, "grad_norm": 0.091796875, "learning_rate": 0.001971073648281513, "loss": 0.2439, "step": 81840 }, { "epoch": 0.5809568931404855, "grad_norm": 0.1357421875, "learning_rate": 0.001971066529989408, "loss": 0.2308, "step": 81850 }, { "epoch": 0.5810278713803316, "grad_norm": 0.09521484375, "learning_rate": 0.0019710594108358725, "loss": 0.2192, "step": 81860 }, { "epoch": 0.5810988496201777, "grad_norm": 0.115234375, "learning_rate": 0.001971052290820914, "loss": 0.2515, "step": 81870 }, { "epoch": 0.5811698278600238, "grad_norm": 0.130859375, "learning_rate": 0.001971045169944539, "loss": 0.2383, "step": 81880 }, { "epoch": 0.5812408060998699, "grad_norm": 0.1494140625, "learning_rate": 0.001971038048206755, "loss": 0.2339, "step": 81890 }, { "epoch": 0.5813117843397161, "grad_norm": 0.091796875, "learning_rate": 0.0019710309256075697, "loss": 0.2352, "step": 81900 }, { "epoch": 0.5813827625795621, "grad_norm": 0.2021484375, "learning_rate": 0.0019710238021469883, "loss": 0.2704, "step": 81910 }, { "epoch": 0.5814537408194083, "grad_norm": 0.09326171875, "learning_rate": 0.00197101667782502, "loss": 0.2363, "step": 81920 }, { "epoch": 0.5815247190592544, "grad_norm": 0.1396484375, "learning_rate": 0.00197100955264167, "loss": 0.2256, "step": 81930 }, { "epoch": 0.5815956972991005, "grad_norm": 0.10205078125, "learning_rate": 0.0019710024265969465, "loss": 0.2303, "step": 81940 }, { "epoch": 0.5816666755389467, "grad_norm": 0.09326171875, "learning_rate": 0.001970995299690856, "loss": 0.2388, "step": 81950 }, { "epoch": 0.5817376537787927, "grad_norm": 0.1181640625, "learning_rate": 0.001970988171923406, "loss": 0.2371, "step": 81960 }, { "epoch": 0.5818086320186389, "grad_norm": 0.1015625, "learning_rate": 0.0019709810432946033, "loss": 0.2269, "step": 81970 }, { "epoch": 0.581879610258485, "grad_norm": 0.130859375, "learning_rate": 0.001970973913804455, "loss": 0.2205, "step": 81980 }, { "epoch": 0.5819505884983311, "grad_norm": 0.126953125, "learning_rate": 0.001970966783452968, "loss": 0.2389, "step": 81990 }, { "epoch": 0.5820215667381773, "grad_norm": 0.11572265625, "learning_rate": 0.001970959652240149, "loss": 0.2574, "step": 82000 }, { "epoch": 0.5820215667381773, "eval_covost2-zh-en_loss": 3.716252565383911, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 20.7344, "eval_covost2-zh-en_samples_per_second": 3.087, "eval_covost2-zh-en_steps_per_second": 0.193, "step": 82000 }, { "epoch": 0.5820215667381773, "eval_covost2-en-zh_loss": 3.1372034549713135, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 19.2699, "eval_covost2-en-zh_samples_per_second": 3.321, "eval_covost2-en-zh_steps_per_second": 0.208, "step": 82000 }, { "epoch": 0.5820925449780233, "grad_norm": 0.142578125, "learning_rate": 0.001970952520166006, "loss": 0.2407, "step": 82010 }, { "epoch": 0.5821635232178695, "grad_norm": 0.0830078125, "learning_rate": 0.0019709453872305454, "loss": 0.2503, "step": 82020 }, { "epoch": 0.5822345014577156, "grad_norm": 0.1044921875, "learning_rate": 0.001970938253433774, "loss": 0.237, "step": 82030 }, { "epoch": 0.5823054796975617, "grad_norm": 0.1552734375, "learning_rate": 0.0019709311187756998, "loss": 0.229, "step": 82040 }, { "epoch": 0.5823764579374079, "grad_norm": 0.1025390625, "learning_rate": 0.001970923983256329, "loss": 0.2352, "step": 82050 }, { "epoch": 0.5824474361772539, "grad_norm": 0.0927734375, "learning_rate": 0.001970916846875669, "loss": 0.247, "step": 82060 }, { "epoch": 0.5825184144171001, "grad_norm": 0.12353515625, "learning_rate": 0.001970909709633727, "loss": 0.2398, "step": 82070 }, { "epoch": 0.5825893926569462, "grad_norm": 0.10205078125, "learning_rate": 0.0019709025715305096, "loss": 0.2394, "step": 82080 }, { "epoch": 0.5826603708967923, "grad_norm": 0.07470703125, "learning_rate": 0.0019708954325660245, "loss": 0.2416, "step": 82090 }, { "epoch": 0.5827313491366385, "grad_norm": 0.142578125, "learning_rate": 0.0019708882927402783, "loss": 0.2577, "step": 82100 }, { "epoch": 0.5828023273764845, "grad_norm": 0.09423828125, "learning_rate": 0.0019708811520532782, "loss": 0.2368, "step": 82110 }, { "epoch": 0.5828733056163307, "grad_norm": 0.0927734375, "learning_rate": 0.001970874010505031, "loss": 0.2477, "step": 82120 }, { "epoch": 0.5829442838561768, "grad_norm": 0.1748046875, "learning_rate": 0.001970866868095544, "loss": 0.2467, "step": 82130 }, { "epoch": 0.583015262096023, "grad_norm": 0.09912109375, "learning_rate": 0.0019708597248248244, "loss": 0.2249, "step": 82140 }, { "epoch": 0.583086240335869, "grad_norm": 0.0751953125, "learning_rate": 0.001970852580692879, "loss": 0.2289, "step": 82150 }, { "epoch": 0.5831572185757151, "grad_norm": 0.12890625, "learning_rate": 0.001970845435699715, "loss": 0.2205, "step": 82160 }, { "epoch": 0.5832281968155613, "grad_norm": 0.2060546875, "learning_rate": 0.00197083828984534, "loss": 0.2302, "step": 82170 }, { "epoch": 0.5832991750554074, "grad_norm": 0.11572265625, "learning_rate": 0.0019708311431297595, "loss": 0.2317, "step": 82180 }, { "epoch": 0.5833701532952535, "grad_norm": 0.2060546875, "learning_rate": 0.001970823995552982, "loss": 0.2312, "step": 82190 }, { "epoch": 0.5834411315350996, "grad_norm": 0.166015625, "learning_rate": 0.0019708168471150147, "loss": 0.2352, "step": 82200 }, { "epoch": 0.5835121097749457, "grad_norm": 0.09765625, "learning_rate": 0.0019708096978158633, "loss": 0.2422, "step": 82210 }, { "epoch": 0.5835830880147919, "grad_norm": 0.2265625, "learning_rate": 0.001970802547655536, "loss": 0.2303, "step": 82220 }, { "epoch": 0.583654066254638, "grad_norm": 0.162109375, "learning_rate": 0.00197079539663404, "loss": 0.232, "step": 82230 }, { "epoch": 0.5837250444944841, "grad_norm": 0.09375, "learning_rate": 0.0019707882447513813, "loss": 0.2241, "step": 82240 }, { "epoch": 0.5837960227343302, "grad_norm": 0.0908203125, "learning_rate": 0.001970781092007568, "loss": 0.2421, "step": 82250 }, { "epoch": 0.5838670009741763, "grad_norm": 0.10400390625, "learning_rate": 0.0019707739384026064, "loss": 0.2411, "step": 82260 }, { "epoch": 0.5839379792140225, "grad_norm": 0.109375, "learning_rate": 0.001970766783936504, "loss": 0.2399, "step": 82270 }, { "epoch": 0.5840089574538686, "grad_norm": 0.11328125, "learning_rate": 0.001970759628609268, "loss": 0.2393, "step": 82280 }, { "epoch": 0.5840799356937147, "grad_norm": 0.10302734375, "learning_rate": 0.001970752472420905, "loss": 0.2791, "step": 82290 }, { "epoch": 0.5841509139335608, "grad_norm": 0.10400390625, "learning_rate": 0.001970745315371423, "loss": 0.2387, "step": 82300 }, { "epoch": 0.5842218921734069, "grad_norm": 0.10693359375, "learning_rate": 0.001970738157460828, "loss": 0.2264, "step": 82310 }, { "epoch": 0.5842928704132531, "grad_norm": 0.1884765625, "learning_rate": 0.0019707309986891275, "loss": 0.2566, "step": 82320 }, { "epoch": 0.5843638486530992, "grad_norm": 0.1044921875, "learning_rate": 0.001970723839056329, "loss": 0.2361, "step": 82330 }, { "epoch": 0.5844348268929453, "grad_norm": 0.1123046875, "learning_rate": 0.001970716678562439, "loss": 0.2359, "step": 82340 }, { "epoch": 0.5845058051327914, "grad_norm": 0.1162109375, "learning_rate": 0.0019707095172074645, "loss": 0.2319, "step": 82350 }, { "epoch": 0.5845767833726375, "grad_norm": 0.078125, "learning_rate": 0.001970702354991413, "loss": 0.2424, "step": 82360 }, { "epoch": 0.5846477616124837, "grad_norm": 0.107421875, "learning_rate": 0.0019706951919142915, "loss": 0.2348, "step": 82370 }, { "epoch": 0.5847187398523298, "grad_norm": 0.10009765625, "learning_rate": 0.001970688027976107, "loss": 0.2214, "step": 82380 }, { "epoch": 0.5847897180921758, "grad_norm": 0.1513671875, "learning_rate": 0.0019706808631768666, "loss": 0.2327, "step": 82390 }, { "epoch": 0.584860696332022, "grad_norm": 0.11083984375, "learning_rate": 0.0019706736975165775, "loss": 0.24, "step": 82400 }, { "epoch": 0.5849316745718681, "grad_norm": 0.09423828125, "learning_rate": 0.0019706665309952467, "loss": 0.2546, "step": 82410 }, { "epoch": 0.5850026528117143, "grad_norm": 0.12109375, "learning_rate": 0.001970659363612881, "loss": 0.254, "step": 82420 }, { "epoch": 0.5850736310515604, "grad_norm": 0.11669921875, "learning_rate": 0.001970652195369488, "loss": 0.2512, "step": 82430 }, { "epoch": 0.5851446092914064, "grad_norm": 0.08837890625, "learning_rate": 0.001970645026265075, "loss": 0.2572, "step": 82440 }, { "epoch": 0.5852155875312526, "grad_norm": 0.09716796875, "learning_rate": 0.001970637856299648, "loss": 0.2263, "step": 82450 }, { "epoch": 0.5852865657710987, "grad_norm": 0.10498046875, "learning_rate": 0.0019706306854732148, "loss": 0.2429, "step": 82460 }, { "epoch": 0.5853575440109449, "grad_norm": 0.11181640625, "learning_rate": 0.0019706235137857825, "loss": 0.2498, "step": 82470 }, { "epoch": 0.585428522250791, "grad_norm": 0.08251953125, "learning_rate": 0.001970616341237358, "loss": 0.231, "step": 82480 }, { "epoch": 0.585499500490637, "grad_norm": 0.1357421875, "learning_rate": 0.0019706091678279485, "loss": 0.244, "step": 82490 }, { "epoch": 0.5855704787304832, "grad_norm": 0.2275390625, "learning_rate": 0.001970601993557561, "loss": 0.2207, "step": 82500 }, { "epoch": 0.5856414569703293, "grad_norm": 0.12255859375, "learning_rate": 0.001970594818426203, "loss": 0.2302, "step": 82510 }, { "epoch": 0.5857124352101755, "grad_norm": 0.0908203125, "learning_rate": 0.0019705876424338815, "loss": 0.2255, "step": 82520 }, { "epoch": 0.5857834134500216, "grad_norm": 0.12890625, "learning_rate": 0.0019705804655806027, "loss": 0.2328, "step": 82530 }, { "epoch": 0.5858543916898676, "grad_norm": 0.07421875, "learning_rate": 0.001970573287866375, "loss": 0.2375, "step": 82540 }, { "epoch": 0.5859253699297138, "grad_norm": 0.1484375, "learning_rate": 0.0019705661092912045, "loss": 0.242, "step": 82550 }, { "epoch": 0.5859963481695599, "grad_norm": 0.0859375, "learning_rate": 0.0019705589298550986, "loss": 0.2338, "step": 82560 }, { "epoch": 0.5860673264094061, "grad_norm": 0.12890625, "learning_rate": 0.001970551749558065, "loss": 0.2237, "step": 82570 }, { "epoch": 0.5861383046492522, "grad_norm": 0.134765625, "learning_rate": 0.00197054456840011, "loss": 0.2344, "step": 82580 }, { "epoch": 0.5862092828890982, "grad_norm": 0.1025390625, "learning_rate": 0.0019705373863812404, "loss": 0.2264, "step": 82590 }, { "epoch": 0.5862802611289444, "grad_norm": 0.0810546875, "learning_rate": 0.001970530203501465, "loss": 0.2351, "step": 82600 }, { "epoch": 0.5863512393687905, "grad_norm": 0.08935546875, "learning_rate": 0.001970523019760789, "loss": 0.2441, "step": 82610 }, { "epoch": 0.5864222176086367, "grad_norm": 0.1767578125, "learning_rate": 0.0019705158351592204, "loss": 0.2293, "step": 82620 }, { "epoch": 0.5864931958484828, "grad_norm": 0.09375, "learning_rate": 0.001970508649696766, "loss": 0.2334, "step": 82630 }, { "epoch": 0.5865641740883288, "grad_norm": 0.09130859375, "learning_rate": 0.0019705014633734337, "loss": 0.2117, "step": 82640 }, { "epoch": 0.586635152328175, "grad_norm": 0.08935546875, "learning_rate": 0.00197049427618923, "loss": 0.2251, "step": 82650 }, { "epoch": 0.5867061305680211, "grad_norm": 0.083984375, "learning_rate": 0.0019704870881441616, "loss": 0.2313, "step": 82660 }, { "epoch": 0.5867771088078673, "grad_norm": 0.1298828125, "learning_rate": 0.0019704798992382366, "loss": 0.2196, "step": 82670 }, { "epoch": 0.5868480870477133, "grad_norm": 0.11181640625, "learning_rate": 0.0019704727094714608, "loss": 0.2354, "step": 82680 }, { "epoch": 0.5869190652875594, "grad_norm": 0.158203125, "learning_rate": 0.0019704655188438423, "loss": 0.2418, "step": 82690 }, { "epoch": 0.5869900435274056, "grad_norm": 0.08544921875, "learning_rate": 0.001970458327355388, "loss": 0.2361, "step": 82700 }, { "epoch": 0.5870610217672517, "grad_norm": 0.111328125, "learning_rate": 0.001970451135006105, "loss": 0.2558, "step": 82710 }, { "epoch": 0.5871320000070979, "grad_norm": 0.0908203125, "learning_rate": 0.0019704439417960007, "loss": 0.2193, "step": 82720 }, { "epoch": 0.5872029782469439, "grad_norm": 0.10498046875, "learning_rate": 0.0019704367477250814, "loss": 0.2339, "step": 82730 }, { "epoch": 0.58727395648679, "grad_norm": 0.11962890625, "learning_rate": 0.001970429552793355, "loss": 0.2466, "step": 82740 }, { "epoch": 0.5873449347266362, "grad_norm": 0.12060546875, "learning_rate": 0.0019704223570008277, "loss": 0.2218, "step": 82750 }, { "epoch": 0.5874159129664823, "grad_norm": 0.072265625, "learning_rate": 0.001970415160347508, "loss": 0.231, "step": 82760 }, { "epoch": 0.5874868912063285, "grad_norm": 0.083984375, "learning_rate": 0.0019704079628334015, "loss": 0.2237, "step": 82770 }, { "epoch": 0.5875578694461745, "grad_norm": 0.1005859375, "learning_rate": 0.001970400764458517, "loss": 0.2344, "step": 82780 }, { "epoch": 0.5876288476860206, "grad_norm": 0.298828125, "learning_rate": 0.0019703935652228602, "loss": 0.2291, "step": 82790 }, { "epoch": 0.5876998259258668, "grad_norm": 0.1376953125, "learning_rate": 0.001970386365126439, "loss": 0.2244, "step": 82800 }, { "epoch": 0.5877708041657129, "grad_norm": 0.08740234375, "learning_rate": 0.00197037916416926, "loss": 0.228, "step": 82810 }, { "epoch": 0.5878417824055591, "grad_norm": 0.0966796875, "learning_rate": 0.0019703719623513304, "loss": 0.233, "step": 82820 }, { "epoch": 0.5879127606454051, "grad_norm": 0.0888671875, "learning_rate": 0.0019703647596726577, "loss": 0.2457, "step": 82830 }, { "epoch": 0.5879837388852512, "grad_norm": 0.09814453125, "learning_rate": 0.0019703575561332487, "loss": 0.2393, "step": 82840 }, { "epoch": 0.5880547171250974, "grad_norm": 0.10986328125, "learning_rate": 0.0019703503517331106, "loss": 0.2108, "step": 82850 }, { "epoch": 0.5881256953649435, "grad_norm": 0.11669921875, "learning_rate": 0.001970343146472251, "loss": 0.234, "step": 82860 }, { "epoch": 0.5881966736047897, "grad_norm": 0.1474609375, "learning_rate": 0.001970335940350676, "loss": 0.227, "step": 82870 }, { "epoch": 0.5882676518446357, "grad_norm": 0.15625, "learning_rate": 0.0019703287333683933, "loss": 0.2257, "step": 82880 }, { "epoch": 0.5883386300844818, "grad_norm": 0.130859375, "learning_rate": 0.0019703215255254107, "loss": 0.2316, "step": 82890 }, { "epoch": 0.588409608324328, "grad_norm": 0.10546875, "learning_rate": 0.001970314316821734, "loss": 0.2344, "step": 82900 }, { "epoch": 0.5884805865641741, "grad_norm": 0.11328125, "learning_rate": 0.001970307107257371, "loss": 0.2352, "step": 82910 }, { "epoch": 0.5885515648040202, "grad_norm": 0.09765625, "learning_rate": 0.0019702998968323293, "loss": 0.2364, "step": 82920 }, { "epoch": 0.5886225430438663, "grad_norm": 0.07373046875, "learning_rate": 0.0019702926855466154, "loss": 0.2316, "step": 82930 }, { "epoch": 0.5886935212837124, "grad_norm": 0.130859375, "learning_rate": 0.001970285473400236, "loss": 0.2293, "step": 82940 }, { "epoch": 0.5887644995235586, "grad_norm": 0.166015625, "learning_rate": 0.0019702782603931994, "loss": 0.242, "step": 82950 }, { "epoch": 0.5888354777634047, "grad_norm": 0.166015625, "learning_rate": 0.001970271046525512, "loss": 0.237, "step": 82960 }, { "epoch": 0.5889064560032508, "grad_norm": 0.09716796875, "learning_rate": 0.001970263831797181, "loss": 0.2398, "step": 82970 }, { "epoch": 0.5889774342430969, "grad_norm": 0.1162109375, "learning_rate": 0.001970256616208214, "loss": 0.2204, "step": 82980 }, { "epoch": 0.589048412482943, "grad_norm": 0.11865234375, "learning_rate": 0.001970249399758617, "loss": 0.2474, "step": 82990 }, { "epoch": 0.5891193907227892, "grad_norm": 0.08984375, "learning_rate": 0.0019702421824483985, "loss": 0.2434, "step": 83000 }, { "epoch": 0.5891903689626353, "grad_norm": 0.0986328125, "learning_rate": 0.001970234964277565, "loss": 0.2332, "step": 83010 }, { "epoch": 0.5892613472024814, "grad_norm": 0.1455078125, "learning_rate": 0.0019702277452461232, "loss": 0.2385, "step": 83020 }, { "epoch": 0.5893323254423275, "grad_norm": 0.10546875, "learning_rate": 0.001970220525354081, "loss": 0.2359, "step": 83030 }, { "epoch": 0.5894033036821736, "grad_norm": 0.09228515625, "learning_rate": 0.0019702133046014454, "loss": 0.22, "step": 83040 }, { "epoch": 0.5894742819220198, "grad_norm": 0.08544921875, "learning_rate": 0.001970206082988223, "loss": 0.2297, "step": 83050 }, { "epoch": 0.5895452601618659, "grad_norm": 0.10302734375, "learning_rate": 0.0019701988605144217, "loss": 0.2269, "step": 83060 }, { "epoch": 0.589616238401712, "grad_norm": 0.10302734375, "learning_rate": 0.001970191637180048, "loss": 0.2387, "step": 83070 }, { "epoch": 0.5896872166415581, "grad_norm": 0.08935546875, "learning_rate": 0.0019701844129851095, "loss": 0.2359, "step": 83080 }, { "epoch": 0.5897581948814042, "grad_norm": 0.1220703125, "learning_rate": 0.001970177187929613, "loss": 0.234, "step": 83090 }, { "epoch": 0.5898291731212504, "grad_norm": 0.08935546875, "learning_rate": 0.001970169962013566, "loss": 0.2452, "step": 83100 }, { "epoch": 0.5899001513610965, "grad_norm": 0.09375, "learning_rate": 0.001970162735236975, "loss": 0.2403, "step": 83110 }, { "epoch": 0.5899711296009426, "grad_norm": 0.2197265625, "learning_rate": 0.0019701555075998483, "loss": 0.2363, "step": 83120 }, { "epoch": 0.5900421078407887, "grad_norm": 0.11572265625, "learning_rate": 0.001970148279102192, "loss": 0.2448, "step": 83130 }, { "epoch": 0.5901130860806348, "grad_norm": 0.08251953125, "learning_rate": 0.0019701410497440135, "loss": 0.2204, "step": 83140 }, { "epoch": 0.590184064320481, "grad_norm": 0.115234375, "learning_rate": 0.00197013381952532, "loss": 0.2199, "step": 83150 }, { "epoch": 0.590255042560327, "grad_norm": 0.115234375, "learning_rate": 0.0019701265884461187, "loss": 0.241, "step": 83160 }, { "epoch": 0.5903260208001732, "grad_norm": 0.08203125, "learning_rate": 0.0019701193565064166, "loss": 0.2152, "step": 83170 }, { "epoch": 0.5903969990400193, "grad_norm": 0.10595703125, "learning_rate": 0.0019701121237062215, "loss": 0.2617, "step": 83180 }, { "epoch": 0.5904679772798654, "grad_norm": 0.091796875, "learning_rate": 0.0019701048900455397, "loss": 0.2352, "step": 83190 }, { "epoch": 0.5905389555197116, "grad_norm": 0.09033203125, "learning_rate": 0.0019700976555243788, "loss": 0.2341, "step": 83200 }, { "epoch": 0.5906099337595576, "grad_norm": 0.11328125, "learning_rate": 0.0019700904201427455, "loss": 0.2368, "step": 83210 }, { "epoch": 0.5906809119994038, "grad_norm": 0.126953125, "learning_rate": 0.001970083183900648, "loss": 0.2482, "step": 83220 }, { "epoch": 0.5907518902392499, "grad_norm": 0.08056640625, "learning_rate": 0.001970075946798092, "loss": 0.2402, "step": 83230 }, { "epoch": 0.590822868479096, "grad_norm": 0.078125, "learning_rate": 0.001970068708835086, "loss": 0.2208, "step": 83240 }, { "epoch": 0.5908938467189422, "grad_norm": 0.09228515625, "learning_rate": 0.0019700614700116363, "loss": 0.263, "step": 83250 }, { "epoch": 0.5909648249587882, "grad_norm": 0.130859375, "learning_rate": 0.0019700542303277505, "loss": 0.2249, "step": 83260 }, { "epoch": 0.5910358031986344, "grad_norm": 0.09326171875, "learning_rate": 0.0019700469897834354, "loss": 0.2424, "step": 83270 }, { "epoch": 0.5911067814384805, "grad_norm": 0.09521484375, "learning_rate": 0.0019700397483786987, "loss": 0.2342, "step": 83280 }, { "epoch": 0.5911777596783266, "grad_norm": 0.07861328125, "learning_rate": 0.0019700325061135465, "loss": 0.2303, "step": 83290 }, { "epoch": 0.5912487379181728, "grad_norm": 0.12451171875, "learning_rate": 0.0019700252629879875, "loss": 0.2541, "step": 83300 }, { "epoch": 0.5913197161580188, "grad_norm": 0.10546875, "learning_rate": 0.0019700180190020274, "loss": 0.2461, "step": 83310 }, { "epoch": 0.591390694397865, "grad_norm": 0.08349609375, "learning_rate": 0.0019700107741556744, "loss": 0.2494, "step": 83320 }, { "epoch": 0.5914616726377111, "grad_norm": 0.11083984375, "learning_rate": 0.0019700035284489353, "loss": 0.2236, "step": 83330 }, { "epoch": 0.5915326508775572, "grad_norm": 0.0966796875, "learning_rate": 0.001969996281881817, "loss": 0.231, "step": 83340 }, { "epoch": 0.5916036291174034, "grad_norm": 0.1181640625, "learning_rate": 0.001969989034454327, "loss": 0.2585, "step": 83350 }, { "epoch": 0.5916746073572494, "grad_norm": 0.08740234375, "learning_rate": 0.001969981786166472, "loss": 0.2475, "step": 83360 }, { "epoch": 0.5917455855970956, "grad_norm": 0.08203125, "learning_rate": 0.0019699745370182604, "loss": 0.2441, "step": 83370 }, { "epoch": 0.5918165638369417, "grad_norm": 0.203125, "learning_rate": 0.0019699672870096977, "loss": 0.2373, "step": 83380 }, { "epoch": 0.5918875420767878, "grad_norm": 0.08203125, "learning_rate": 0.0019699600361407924, "loss": 0.2306, "step": 83390 }, { "epoch": 0.5919585203166339, "grad_norm": 0.1220703125, "learning_rate": 0.001969952784411551, "loss": 0.2304, "step": 83400 }, { "epoch": 0.59202949855648, "grad_norm": 0.064453125, "learning_rate": 0.0019699455318219804, "loss": 0.2111, "step": 83410 }, { "epoch": 0.5921004767963262, "grad_norm": 0.109375, "learning_rate": 0.001969938278372089, "loss": 0.2261, "step": 83420 }, { "epoch": 0.5921714550361723, "grad_norm": 0.1201171875, "learning_rate": 0.0019699310240618825, "loss": 0.2139, "step": 83430 }, { "epoch": 0.5922424332760184, "grad_norm": 0.1494140625, "learning_rate": 0.001969923768891369, "loss": 0.2483, "step": 83440 }, { "epoch": 0.5923134115158645, "grad_norm": 0.10595703125, "learning_rate": 0.0019699165128605556, "loss": 0.2283, "step": 83450 }, { "epoch": 0.5923843897557106, "grad_norm": 0.09130859375, "learning_rate": 0.0019699092559694485, "loss": 0.2353, "step": 83460 }, { "epoch": 0.5924553679955568, "grad_norm": 0.09716796875, "learning_rate": 0.0019699019982180564, "loss": 0.217, "step": 83470 }, { "epoch": 0.5925263462354029, "grad_norm": 0.1201171875, "learning_rate": 0.001969894739606386, "loss": 0.2272, "step": 83480 }, { "epoch": 0.592597324475249, "grad_norm": 0.1845703125, "learning_rate": 0.0019698874801344435, "loss": 0.2307, "step": 83490 }, { "epoch": 0.5926683027150951, "grad_norm": 0.09521484375, "learning_rate": 0.0019698802198022374, "loss": 0.2271, "step": 83500 }, { "epoch": 0.5927392809549412, "grad_norm": 0.09912109375, "learning_rate": 0.0019698729586097737, "loss": 0.2174, "step": 83510 }, { "epoch": 0.5928102591947874, "grad_norm": 0.1015625, "learning_rate": 0.0019698656965570606, "loss": 0.2325, "step": 83520 }, { "epoch": 0.5928812374346335, "grad_norm": 0.068359375, "learning_rate": 0.0019698584336441047, "loss": 0.2322, "step": 83530 }, { "epoch": 0.5929522156744796, "grad_norm": 0.1162109375, "learning_rate": 0.0019698511698709132, "loss": 0.2431, "step": 83540 }, { "epoch": 0.5930231939143257, "grad_norm": 0.126953125, "learning_rate": 0.0019698439052374937, "loss": 0.2351, "step": 83550 }, { "epoch": 0.5930941721541718, "grad_norm": 0.2041015625, "learning_rate": 0.001969836639743853, "loss": 0.2541, "step": 83560 }, { "epoch": 0.593165150394018, "grad_norm": 0.1435546875, "learning_rate": 0.001969829373389998, "loss": 0.2265, "step": 83570 }, { "epoch": 0.5932361286338641, "grad_norm": 0.1220703125, "learning_rate": 0.0019698221061759365, "loss": 0.2453, "step": 83580 }, { "epoch": 0.5933071068737102, "grad_norm": 0.1376953125, "learning_rate": 0.001969814838101676, "loss": 0.2283, "step": 83590 }, { "epoch": 0.5933780851135563, "grad_norm": 0.099609375, "learning_rate": 0.0019698075691672225, "loss": 0.234, "step": 83600 }, { "epoch": 0.5934490633534024, "grad_norm": 0.10986328125, "learning_rate": 0.001969800299372584, "loss": 0.2318, "step": 83610 }, { "epoch": 0.5935200415932486, "grad_norm": 0.111328125, "learning_rate": 0.0019697930287177674, "loss": 0.2129, "step": 83620 }, { "epoch": 0.5935910198330947, "grad_norm": 0.11865234375, "learning_rate": 0.00196978575720278, "loss": 0.2356, "step": 83630 }, { "epoch": 0.5936619980729407, "grad_norm": 0.10107421875, "learning_rate": 0.001969778484827629, "loss": 0.2317, "step": 83640 }, { "epoch": 0.5937329763127869, "grad_norm": 0.1591796875, "learning_rate": 0.0019697712115923217, "loss": 0.2284, "step": 83650 }, { "epoch": 0.593803954552633, "grad_norm": 0.10888671875, "learning_rate": 0.0019697639374968655, "loss": 0.2444, "step": 83660 }, { "epoch": 0.5938749327924792, "grad_norm": 0.1220703125, "learning_rate": 0.0019697566625412667, "loss": 0.235, "step": 83670 }, { "epoch": 0.5939459110323253, "grad_norm": 0.10400390625, "learning_rate": 0.0019697493867255338, "loss": 0.2458, "step": 83680 }, { "epoch": 0.5940168892721713, "grad_norm": 0.078125, "learning_rate": 0.0019697421100496725, "loss": 0.2159, "step": 83690 }, { "epoch": 0.5940878675120175, "grad_norm": 0.1005859375, "learning_rate": 0.001969734832513691, "loss": 0.2317, "step": 83700 }, { "epoch": 0.5941588457518636, "grad_norm": 0.1279296875, "learning_rate": 0.0019697275541175965, "loss": 0.2284, "step": 83710 }, { "epoch": 0.5942298239917098, "grad_norm": 0.09228515625, "learning_rate": 0.0019697202748613957, "loss": 0.221, "step": 83720 }, { "epoch": 0.5943008022315559, "grad_norm": 0.0869140625, "learning_rate": 0.0019697129947450964, "loss": 0.2456, "step": 83730 }, { "epoch": 0.5943717804714019, "grad_norm": 0.0673828125, "learning_rate": 0.0019697057137687047, "loss": 0.2414, "step": 83740 }, { "epoch": 0.5944427587112481, "grad_norm": 0.095703125, "learning_rate": 0.0019696984319322293, "loss": 0.2281, "step": 83750 }, { "epoch": 0.5945137369510942, "grad_norm": 0.1396484375, "learning_rate": 0.0019696911492356767, "loss": 0.2453, "step": 83760 }, { "epoch": 0.5945847151909404, "grad_norm": 0.095703125, "learning_rate": 0.0019696838656790535, "loss": 0.2219, "step": 83770 }, { "epoch": 0.5946556934307865, "grad_norm": 0.1640625, "learning_rate": 0.0019696765812623677, "loss": 0.2285, "step": 83780 }, { "epoch": 0.5947266716706325, "grad_norm": 0.08056640625, "learning_rate": 0.0019696692959856265, "loss": 0.2322, "step": 83790 }, { "epoch": 0.5947976499104787, "grad_norm": 0.0830078125, "learning_rate": 0.0019696620098488367, "loss": 0.2381, "step": 83800 }, { "epoch": 0.5948686281503248, "grad_norm": 0.17578125, "learning_rate": 0.0019696547228520056, "loss": 0.2297, "step": 83810 }, { "epoch": 0.594939606390171, "grad_norm": 0.11083984375, "learning_rate": 0.0019696474349951408, "loss": 0.2244, "step": 83820 }, { "epoch": 0.5950105846300171, "grad_norm": 0.1298828125, "learning_rate": 0.0019696401462782486, "loss": 0.238, "step": 83830 }, { "epoch": 0.5950815628698631, "grad_norm": 0.10693359375, "learning_rate": 0.0019696328567013373, "loss": 0.2123, "step": 83840 }, { "epoch": 0.5951525411097093, "grad_norm": 0.1474609375, "learning_rate": 0.0019696255662644134, "loss": 0.2326, "step": 83850 }, { "epoch": 0.5952235193495554, "grad_norm": 0.0771484375, "learning_rate": 0.0019696182749674843, "loss": 0.243, "step": 83860 }, { "epoch": 0.5952944975894016, "grad_norm": 0.1298828125, "learning_rate": 0.0019696109828105574, "loss": 0.231, "step": 83870 }, { "epoch": 0.5953654758292477, "grad_norm": 0.140625, "learning_rate": 0.0019696036897936396, "loss": 0.2287, "step": 83880 }, { "epoch": 0.5954364540690937, "grad_norm": 0.2060546875, "learning_rate": 0.001969596395916738, "loss": 0.2241, "step": 83890 }, { "epoch": 0.5955074323089399, "grad_norm": 0.09326171875, "learning_rate": 0.0019695891011798604, "loss": 0.235, "step": 83900 }, { "epoch": 0.595578410548786, "grad_norm": 0.103515625, "learning_rate": 0.0019695818055830137, "loss": 0.2353, "step": 83910 }, { "epoch": 0.5956493887886322, "grad_norm": 0.1552734375, "learning_rate": 0.001969574509126205, "loss": 0.2196, "step": 83920 }, { "epoch": 0.5957203670284782, "grad_norm": 0.197265625, "learning_rate": 0.0019695672118094417, "loss": 0.2337, "step": 83930 }, { "epoch": 0.5957913452683243, "grad_norm": 0.111328125, "learning_rate": 0.0019695599136327307, "loss": 0.2443, "step": 83940 }, { "epoch": 0.5958623235081705, "grad_norm": 0.1572265625, "learning_rate": 0.0019695526145960796, "loss": 0.2234, "step": 83950 }, { "epoch": 0.5959333017480166, "grad_norm": 0.11181640625, "learning_rate": 0.0019695453146994957, "loss": 0.2389, "step": 83960 }, { "epoch": 0.5960042799878628, "grad_norm": 0.1748046875, "learning_rate": 0.001969538013942986, "loss": 0.2533, "step": 83970 }, { "epoch": 0.5960752582277088, "grad_norm": 0.103515625, "learning_rate": 0.0019695307123265573, "loss": 0.2321, "step": 83980 }, { "epoch": 0.5961462364675549, "grad_norm": 0.1103515625, "learning_rate": 0.001969523409850217, "loss": 0.2356, "step": 83990 }, { "epoch": 0.5962172147074011, "grad_norm": 0.1611328125, "learning_rate": 0.001969516106513973, "loss": 0.237, "step": 84000 }, { "epoch": 0.5962172147074011, "eval_covost2-zh-en_loss": 3.7767739295959473, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 20.5544, "eval_covost2-zh-en_samples_per_second": 3.114, "eval_covost2-zh-en_steps_per_second": 0.195, "step": 84000 }, { "epoch": 0.5962172147074011, "eval_covost2-en-zh_loss": 3.168778896331787, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 20.5284, "eval_covost2-en-zh_samples_per_second": 3.118, "eval_covost2-en-zh_steps_per_second": 0.195, "step": 84000 }, { "epoch": 0.5962881929472472, "grad_norm": 0.12158203125, "learning_rate": 0.0019695088023178324, "loss": 0.231, "step": 84010 }, { "epoch": 0.5963591711870934, "grad_norm": 0.1572265625, "learning_rate": 0.0019695014972618014, "loss": 0.2326, "step": 84020 }, { "epoch": 0.5964301494269394, "grad_norm": 0.08203125, "learning_rate": 0.0019694941913458884, "loss": 0.22, "step": 84030 }, { "epoch": 0.5965011276667855, "grad_norm": 0.0849609375, "learning_rate": 0.0019694868845701, "loss": 0.2423, "step": 84040 }, { "epoch": 0.5965721059066317, "grad_norm": 0.107421875, "learning_rate": 0.0019694795769344435, "loss": 0.2172, "step": 84050 }, { "epoch": 0.5966430841464778, "grad_norm": 0.095703125, "learning_rate": 0.001969472268438926, "loss": 0.2394, "step": 84060 }, { "epoch": 0.596714062386324, "grad_norm": 0.08984375, "learning_rate": 0.001969464959083555, "loss": 0.2384, "step": 84070 }, { "epoch": 0.59678504062617, "grad_norm": 0.11279296875, "learning_rate": 0.001969457648868338, "loss": 0.2325, "step": 84080 }, { "epoch": 0.5968560188660161, "grad_norm": 0.0908203125, "learning_rate": 0.0019694503377932814, "loss": 0.2357, "step": 84090 }, { "epoch": 0.5969269971058623, "grad_norm": 0.1259765625, "learning_rate": 0.001969443025858393, "loss": 0.2443, "step": 84100 }, { "epoch": 0.5969979753457084, "grad_norm": 0.1044921875, "learning_rate": 0.0019694357130636804, "loss": 0.2349, "step": 84110 }, { "epoch": 0.5970689535855546, "grad_norm": 0.0849609375, "learning_rate": 0.00196942839940915, "loss": 0.2266, "step": 84120 }, { "epoch": 0.5971399318254006, "grad_norm": 0.158203125, "learning_rate": 0.0019694210848948093, "loss": 0.2498, "step": 84130 }, { "epoch": 0.5972109100652467, "grad_norm": 0.1279296875, "learning_rate": 0.001969413769520666, "loss": 0.2377, "step": 84140 }, { "epoch": 0.5972818883050929, "grad_norm": 0.13671875, "learning_rate": 0.0019694064532867267, "loss": 0.2285, "step": 84150 }, { "epoch": 0.597352866544939, "grad_norm": 0.1767578125, "learning_rate": 0.001969399136192999, "loss": 0.2192, "step": 84160 }, { "epoch": 0.597423844784785, "grad_norm": 0.162109375, "learning_rate": 0.00196939181823949, "loss": 0.2377, "step": 84170 }, { "epoch": 0.5974948230246312, "grad_norm": 0.0947265625, "learning_rate": 0.001969384499426207, "loss": 0.2204, "step": 84180 }, { "epoch": 0.5975658012644773, "grad_norm": 0.06494140625, "learning_rate": 0.0019693771797531573, "loss": 0.2207, "step": 84190 }, { "epoch": 0.5976367795043235, "grad_norm": 0.10888671875, "learning_rate": 0.001969369859220348, "loss": 0.2413, "step": 84200 }, { "epoch": 0.5977077577441696, "grad_norm": 0.1259765625, "learning_rate": 0.0019693625378277863, "loss": 0.2419, "step": 84210 }, { "epoch": 0.5977787359840157, "grad_norm": 0.0966796875, "learning_rate": 0.0019693552155754797, "loss": 0.2566, "step": 84220 }, { "epoch": 0.5978497142238618, "grad_norm": 0.111328125, "learning_rate": 0.001969347892463435, "loss": 0.2343, "step": 84230 }, { "epoch": 0.5979206924637079, "grad_norm": 0.09423828125, "learning_rate": 0.00196934056849166, "loss": 0.2257, "step": 84240 }, { "epoch": 0.5979916707035541, "grad_norm": 0.099609375, "learning_rate": 0.0019693332436601613, "loss": 0.2228, "step": 84250 }, { "epoch": 0.5980626489434002, "grad_norm": 0.154296875, "learning_rate": 0.001969325917968947, "loss": 0.2418, "step": 84260 }, { "epoch": 0.5981336271832463, "grad_norm": 0.10009765625, "learning_rate": 0.0019693185914180238, "loss": 0.2255, "step": 84270 }, { "epoch": 0.5982046054230924, "grad_norm": 0.099609375, "learning_rate": 0.0019693112640073985, "loss": 0.2461, "step": 84280 }, { "epoch": 0.5982755836629385, "grad_norm": 0.12158203125, "learning_rate": 0.0019693039357370793, "loss": 0.2425, "step": 84290 }, { "epoch": 0.5983465619027847, "grad_norm": 0.091796875, "learning_rate": 0.001969296606607073, "loss": 0.2253, "step": 84300 }, { "epoch": 0.5984175401426308, "grad_norm": 0.10302734375, "learning_rate": 0.0019692892766173865, "loss": 0.2412, "step": 84310 }, { "epoch": 0.5984885183824769, "grad_norm": 0.462890625, "learning_rate": 0.001969281945768028, "loss": 0.2487, "step": 84320 }, { "epoch": 0.598559496622323, "grad_norm": 0.10595703125, "learning_rate": 0.0019692746140590033, "loss": 0.2377, "step": 84330 }, { "epoch": 0.5986304748621691, "grad_norm": 0.1201171875, "learning_rate": 0.001969267281490321, "loss": 0.23, "step": 84340 }, { "epoch": 0.5987014531020153, "grad_norm": 0.1630859375, "learning_rate": 0.0019692599480619875, "loss": 0.2443, "step": 84350 }, { "epoch": 0.5987724313418614, "grad_norm": 0.1103515625, "learning_rate": 0.0019692526137740107, "loss": 0.2419, "step": 84360 }, { "epoch": 0.5988434095817075, "grad_norm": 0.11865234375, "learning_rate": 0.0019692452786263977, "loss": 0.234, "step": 84370 }, { "epoch": 0.5989143878215536, "grad_norm": 0.138671875, "learning_rate": 0.0019692379426191555, "loss": 0.2326, "step": 84380 }, { "epoch": 0.5989853660613997, "grad_norm": 0.119140625, "learning_rate": 0.001969230605752291, "loss": 0.2385, "step": 84390 }, { "epoch": 0.5990563443012459, "grad_norm": 0.10107421875, "learning_rate": 0.0019692232680258125, "loss": 0.2279, "step": 84400 }, { "epoch": 0.5991273225410919, "grad_norm": 0.10986328125, "learning_rate": 0.0019692159294397264, "loss": 0.2315, "step": 84410 }, { "epoch": 0.599198300780938, "grad_norm": 0.1240234375, "learning_rate": 0.0019692085899940402, "loss": 0.2256, "step": 84420 }, { "epoch": 0.5992692790207842, "grad_norm": 0.1513671875, "learning_rate": 0.0019692012496887613, "loss": 0.2427, "step": 84430 }, { "epoch": 0.5993402572606303, "grad_norm": 0.240234375, "learning_rate": 0.0019691939085238965, "loss": 0.2188, "step": 84440 }, { "epoch": 0.5994112355004765, "grad_norm": 0.0927734375, "learning_rate": 0.0019691865664994536, "loss": 0.2226, "step": 84450 }, { "epoch": 0.5994822137403225, "grad_norm": 0.091796875, "learning_rate": 0.00196917922361544, "loss": 0.2318, "step": 84460 }, { "epoch": 0.5995531919801687, "grad_norm": 0.08056640625, "learning_rate": 0.001969171879871862, "loss": 0.2262, "step": 84470 }, { "epoch": 0.5996241702200148, "grad_norm": 0.185546875, "learning_rate": 0.001969164535268728, "loss": 0.2255, "step": 84480 }, { "epoch": 0.5996951484598609, "grad_norm": 0.1083984375, "learning_rate": 0.0019691571898060446, "loss": 0.2303, "step": 84490 }, { "epoch": 0.5997661266997071, "grad_norm": 0.1591796875, "learning_rate": 0.001969149843483819, "loss": 0.2359, "step": 84500 }, { "epoch": 0.5998371049395531, "grad_norm": 0.08984375, "learning_rate": 0.001969142496302059, "loss": 0.2326, "step": 84510 }, { "epoch": 0.5999080831793993, "grad_norm": 0.0859375, "learning_rate": 0.001969135148260771, "loss": 0.2259, "step": 84520 }, { "epoch": 0.5999790614192454, "grad_norm": 0.10595703125, "learning_rate": 0.001969127799359964, "loss": 0.2213, "step": 84530 }, { "epoch": 0.6000500396590915, "grad_norm": 0.1015625, "learning_rate": 0.001969120449599643, "loss": 0.23, "step": 84540 }, { "epoch": 0.6001210178989377, "grad_norm": 0.1376953125, "learning_rate": 0.0019691130989798167, "loss": 0.2438, "step": 84550 }, { "epoch": 0.6001919961387837, "grad_norm": 0.1328125, "learning_rate": 0.0019691057475004917, "loss": 0.2323, "step": 84560 }, { "epoch": 0.6002629743786299, "grad_norm": 0.1455078125, "learning_rate": 0.0019690983951616762, "loss": 0.2386, "step": 84570 }, { "epoch": 0.600333952618476, "grad_norm": 0.1103515625, "learning_rate": 0.0019690910419633764, "loss": 0.241, "step": 84580 }, { "epoch": 0.6004049308583221, "grad_norm": 0.08203125, "learning_rate": 0.0019690836879056, "loss": 0.2192, "step": 84590 }, { "epoch": 0.6004759090981683, "grad_norm": 0.1103515625, "learning_rate": 0.0019690763329883545, "loss": 0.2364, "step": 84600 }, { "epoch": 0.6005468873380143, "grad_norm": 0.08984375, "learning_rate": 0.0019690689772116467, "loss": 0.229, "step": 84610 }, { "epoch": 0.6006178655778605, "grad_norm": 0.09423828125, "learning_rate": 0.0019690616205754845, "loss": 0.2336, "step": 84620 }, { "epoch": 0.6006888438177066, "grad_norm": 0.1904296875, "learning_rate": 0.0019690542630798748, "loss": 0.2251, "step": 84630 }, { "epoch": 0.6007598220575527, "grad_norm": 0.09033203125, "learning_rate": 0.001969046904724825, "loss": 0.2384, "step": 84640 }, { "epoch": 0.6008308002973988, "grad_norm": 0.11181640625, "learning_rate": 0.001969039545510342, "loss": 0.2532, "step": 84650 }, { "epoch": 0.6009017785372449, "grad_norm": 0.119140625, "learning_rate": 0.0019690321854364335, "loss": 0.2487, "step": 84660 }, { "epoch": 0.600972756777091, "grad_norm": 0.29296875, "learning_rate": 0.001969024824503106, "loss": 0.2712, "step": 84670 }, { "epoch": 0.6010437350169372, "grad_norm": 0.080078125, "learning_rate": 0.0019690174627103683, "loss": 0.2377, "step": 84680 }, { "epoch": 0.6011147132567833, "grad_norm": 0.10693359375, "learning_rate": 0.0019690101000582267, "loss": 0.2389, "step": 84690 }, { "epoch": 0.6011856914966294, "grad_norm": 0.11181640625, "learning_rate": 0.001969002736546688, "loss": 0.2449, "step": 84700 }, { "epoch": 0.6012566697364755, "grad_norm": 0.1123046875, "learning_rate": 0.0019689953721757606, "loss": 0.2281, "step": 84710 }, { "epoch": 0.6013276479763217, "grad_norm": 0.09423828125, "learning_rate": 0.001968988006945451, "loss": 0.2265, "step": 84720 }, { "epoch": 0.6013986262161678, "grad_norm": 0.0888671875, "learning_rate": 0.001968980640855767, "loss": 0.2207, "step": 84730 }, { "epoch": 0.6014696044560139, "grad_norm": 0.134765625, "learning_rate": 0.001968973273906715, "loss": 0.2162, "step": 84740 }, { "epoch": 0.60154058269586, "grad_norm": 0.072265625, "learning_rate": 0.001968965906098303, "loss": 0.2273, "step": 84750 }, { "epoch": 0.6016115609357061, "grad_norm": 0.19921875, "learning_rate": 0.0019689585374305387, "loss": 0.2424, "step": 84760 }, { "epoch": 0.6016825391755523, "grad_norm": 0.08447265625, "learning_rate": 0.001968951167903429, "loss": 0.2266, "step": 84770 }, { "epoch": 0.6017535174153984, "grad_norm": 0.083984375, "learning_rate": 0.0019689437975169803, "loss": 0.2319, "step": 84780 }, { "epoch": 0.6018244956552445, "grad_norm": 0.08740234375, "learning_rate": 0.001968936426271201, "loss": 0.2316, "step": 84790 }, { "epoch": 0.6018954738950906, "grad_norm": 0.11328125, "learning_rate": 0.001968929054166098, "loss": 0.23, "step": 84800 }, { "epoch": 0.6019664521349367, "grad_norm": 0.07763671875, "learning_rate": 0.001968921681201679, "loss": 0.2264, "step": 84810 }, { "epoch": 0.6020374303747829, "grad_norm": 0.08349609375, "learning_rate": 0.0019689143073779503, "loss": 0.2114, "step": 84820 }, { "epoch": 0.602108408614629, "grad_norm": 0.1220703125, "learning_rate": 0.0019689069326949203, "loss": 0.2429, "step": 84830 }, { "epoch": 0.6021793868544751, "grad_norm": 0.1103515625, "learning_rate": 0.0019688995571525954, "loss": 0.2158, "step": 84840 }, { "epoch": 0.6022503650943212, "grad_norm": 0.10400390625, "learning_rate": 0.0019688921807509835, "loss": 0.2214, "step": 84850 }, { "epoch": 0.6023213433341673, "grad_norm": 0.1318359375, "learning_rate": 0.001968884803490092, "loss": 0.212, "step": 84860 }, { "epoch": 0.6023923215740135, "grad_norm": 0.19140625, "learning_rate": 0.001968877425369927, "loss": 0.2536, "step": 84870 }, { "epoch": 0.6024632998138596, "grad_norm": 0.0693359375, "learning_rate": 0.001968870046390498, "loss": 0.2244, "step": 84880 }, { "epoch": 0.6025342780537056, "grad_norm": 0.12353515625, "learning_rate": 0.0019688626665518103, "loss": 0.2476, "step": 84890 }, { "epoch": 0.6026052562935518, "grad_norm": 0.1171875, "learning_rate": 0.001968855285853871, "loss": 0.2295, "step": 84900 }, { "epoch": 0.6026762345333979, "grad_norm": 0.1689453125, "learning_rate": 0.0019688479042966897, "loss": 0.2265, "step": 84910 }, { "epoch": 0.6027472127732441, "grad_norm": 0.09130859375, "learning_rate": 0.0019688405218802715, "loss": 0.2508, "step": 84920 }, { "epoch": 0.6028181910130902, "grad_norm": 0.07861328125, "learning_rate": 0.001968833138604625, "loss": 0.2175, "step": 84930 }, { "epoch": 0.6028891692529362, "grad_norm": 0.345703125, "learning_rate": 0.001968825754469756, "loss": 0.2469, "step": 84940 }, { "epoch": 0.6029601474927824, "grad_norm": 0.12158203125, "learning_rate": 0.001968818369475674, "loss": 0.2474, "step": 84950 }, { "epoch": 0.6030311257326285, "grad_norm": 0.1669921875, "learning_rate": 0.001968810983622384, "loss": 0.2543, "step": 84960 }, { "epoch": 0.6031021039724747, "grad_norm": 0.08740234375, "learning_rate": 0.001968803596909895, "loss": 0.2254, "step": 84970 }, { "epoch": 0.6031730822123208, "grad_norm": 0.171875, "learning_rate": 0.0019687962093382135, "loss": 0.2372, "step": 84980 }, { "epoch": 0.6032440604521668, "grad_norm": 0.13671875, "learning_rate": 0.001968788820907347, "loss": 0.2488, "step": 84990 }, { "epoch": 0.603315038692013, "grad_norm": 0.09130859375, "learning_rate": 0.001968781431617303, "loss": 0.2163, "step": 85000 }, { "epoch": 0.6033860169318591, "grad_norm": 0.10888671875, "learning_rate": 0.0019687740414680885, "loss": 0.2398, "step": 85010 }, { "epoch": 0.6034569951717053, "grad_norm": 0.09375, "learning_rate": 0.001968766650459711, "loss": 0.2283, "step": 85020 }, { "epoch": 0.6035279734115514, "grad_norm": 0.10400390625, "learning_rate": 0.0019687592585921777, "loss": 0.2351, "step": 85030 }, { "epoch": 0.6035989516513974, "grad_norm": 0.09521484375, "learning_rate": 0.001968751865865496, "loss": 0.2307, "step": 85040 }, { "epoch": 0.6036699298912436, "grad_norm": 0.11669921875, "learning_rate": 0.001968744472279673, "loss": 0.2308, "step": 85050 }, { "epoch": 0.6037409081310897, "grad_norm": 0.09619140625, "learning_rate": 0.001968737077834716, "loss": 0.2223, "step": 85060 }, { "epoch": 0.6038118863709359, "grad_norm": 0.10546875, "learning_rate": 0.0019687296825306325, "loss": 0.2332, "step": 85070 }, { "epoch": 0.603882864610782, "grad_norm": 0.12353515625, "learning_rate": 0.00196872228636743, "loss": 0.244, "step": 85080 }, { "epoch": 0.603953842850628, "grad_norm": 0.201171875, "learning_rate": 0.001968714889345116, "loss": 0.2274, "step": 85090 }, { "epoch": 0.6040248210904742, "grad_norm": 0.08154296875, "learning_rate": 0.001968707491463697, "loss": 0.2385, "step": 85100 }, { "epoch": 0.6040957993303203, "grad_norm": 0.1708984375, "learning_rate": 0.0019687000927231806, "loss": 0.2316, "step": 85110 }, { "epoch": 0.6041667775701665, "grad_norm": 0.11572265625, "learning_rate": 0.0019686926931235743, "loss": 0.2398, "step": 85120 }, { "epoch": 0.6042377558100126, "grad_norm": 0.12255859375, "learning_rate": 0.0019686852926648853, "loss": 0.2423, "step": 85130 }, { "epoch": 0.6043087340498586, "grad_norm": 0.10498046875, "learning_rate": 0.0019686778913471214, "loss": 0.2174, "step": 85140 }, { "epoch": 0.6043797122897048, "grad_norm": 0.10498046875, "learning_rate": 0.0019686704891702894, "loss": 0.2331, "step": 85150 }, { "epoch": 0.6044506905295509, "grad_norm": 0.0810546875, "learning_rate": 0.0019686630861343968, "loss": 0.2221, "step": 85160 }, { "epoch": 0.6045216687693971, "grad_norm": 0.1025390625, "learning_rate": 0.0019686556822394504, "loss": 0.2251, "step": 85170 }, { "epoch": 0.6045926470092431, "grad_norm": 0.10205078125, "learning_rate": 0.001968648277485458, "loss": 0.2227, "step": 85180 }, { "epoch": 0.6046636252490892, "grad_norm": 0.14453125, "learning_rate": 0.0019686408718724274, "loss": 0.2342, "step": 85190 }, { "epoch": 0.6047346034889354, "grad_norm": 0.0869140625, "learning_rate": 0.001968633465400365, "loss": 0.2237, "step": 85200 }, { "epoch": 0.6048055817287815, "grad_norm": 0.11865234375, "learning_rate": 0.0019686260580692783, "loss": 0.2639, "step": 85210 }, { "epoch": 0.6048765599686277, "grad_norm": 0.1474609375, "learning_rate": 0.0019686186498791758, "loss": 0.2389, "step": 85220 }, { "epoch": 0.6049475382084737, "grad_norm": 0.0966796875, "learning_rate": 0.0019686112408300632, "loss": 0.2383, "step": 85230 }, { "epoch": 0.6050185164483198, "grad_norm": 0.1025390625, "learning_rate": 0.0019686038309219486, "loss": 0.2452, "step": 85240 }, { "epoch": 0.605089494688166, "grad_norm": 0.2197265625, "learning_rate": 0.0019685964201548393, "loss": 0.2442, "step": 85250 }, { "epoch": 0.6051604729280121, "grad_norm": 0.1337890625, "learning_rate": 0.0019685890085287426, "loss": 0.2509, "step": 85260 }, { "epoch": 0.6052314511678583, "grad_norm": 0.15234375, "learning_rate": 0.001968581596043666, "loss": 0.2313, "step": 85270 }, { "epoch": 0.6053024294077043, "grad_norm": 0.1015625, "learning_rate": 0.001968574182699616, "loss": 0.2358, "step": 85280 }, { "epoch": 0.6053734076475504, "grad_norm": 0.1064453125, "learning_rate": 0.001968566768496601, "loss": 0.2419, "step": 85290 }, { "epoch": 0.6054443858873966, "grad_norm": 0.1337890625, "learning_rate": 0.0019685593534346283, "loss": 0.237, "step": 85300 }, { "epoch": 0.6055153641272427, "grad_norm": 0.103515625, "learning_rate": 0.001968551937513704, "loss": 0.2207, "step": 85310 }, { "epoch": 0.6055863423670889, "grad_norm": 0.09326171875, "learning_rate": 0.001968544520733837, "loss": 0.2278, "step": 85320 }, { "epoch": 0.6056573206069349, "grad_norm": 0.1025390625, "learning_rate": 0.0019685371030950336, "loss": 0.2224, "step": 85330 }, { "epoch": 0.605728298846781, "grad_norm": 0.115234375, "learning_rate": 0.0019685296845973017, "loss": 0.2159, "step": 85340 }, { "epoch": 0.6057992770866272, "grad_norm": 0.1904296875, "learning_rate": 0.001968522265240648, "loss": 0.2192, "step": 85350 }, { "epoch": 0.6058702553264733, "grad_norm": 0.1455078125, "learning_rate": 0.00196851484502508, "loss": 0.2324, "step": 85360 }, { "epoch": 0.6059412335663195, "grad_norm": 0.09033203125, "learning_rate": 0.001968507423950606, "loss": 0.227, "step": 85370 }, { "epoch": 0.6060122118061655, "grad_norm": 0.1279296875, "learning_rate": 0.0019685000020172325, "loss": 0.2214, "step": 85380 }, { "epoch": 0.6060831900460116, "grad_norm": 0.09375, "learning_rate": 0.0019684925792249662, "loss": 0.2244, "step": 85390 }, { "epoch": 0.6061541682858578, "grad_norm": 0.150390625, "learning_rate": 0.001968485155573816, "loss": 0.239, "step": 85400 }, { "epoch": 0.6062251465257039, "grad_norm": 0.076171875, "learning_rate": 0.001968477731063788, "loss": 0.23, "step": 85410 }, { "epoch": 0.60629612476555, "grad_norm": 0.1630859375, "learning_rate": 0.0019684703056948895, "loss": 0.2443, "step": 85420 }, { "epoch": 0.6063671030053961, "grad_norm": 0.08056640625, "learning_rate": 0.0019684628794671287, "loss": 0.2546, "step": 85430 }, { "epoch": 0.6064380812452422, "grad_norm": 0.115234375, "learning_rate": 0.001968455452380513, "loss": 0.2431, "step": 85440 }, { "epoch": 0.6065090594850884, "grad_norm": 0.08203125, "learning_rate": 0.001968448024435049, "loss": 0.2345, "step": 85450 }, { "epoch": 0.6065800377249345, "grad_norm": 0.10498046875, "learning_rate": 0.0019684405956307436, "loss": 0.2256, "step": 85460 }, { "epoch": 0.6066510159647805, "grad_norm": 0.138671875, "learning_rate": 0.001968433165967606, "loss": 0.2432, "step": 85470 }, { "epoch": 0.6067219942046267, "grad_norm": 0.08544921875, "learning_rate": 0.0019684257354456417, "loss": 0.2369, "step": 85480 }, { "epoch": 0.6067929724444728, "grad_norm": 0.08740234375, "learning_rate": 0.0019684183040648592, "loss": 0.2457, "step": 85490 }, { "epoch": 0.606863950684319, "grad_norm": 0.1044921875, "learning_rate": 0.0019684108718252655, "loss": 0.24, "step": 85500 }, { "epoch": 0.6069349289241651, "grad_norm": 0.0732421875, "learning_rate": 0.0019684034387268674, "loss": 0.2339, "step": 85510 }, { "epoch": 0.6070059071640111, "grad_norm": 0.08154296875, "learning_rate": 0.001968396004769673, "loss": 0.2366, "step": 85520 }, { "epoch": 0.6070768854038573, "grad_norm": 0.09326171875, "learning_rate": 0.001968388569953689, "loss": 0.2315, "step": 85530 }, { "epoch": 0.6071478636437034, "grad_norm": 0.15234375, "learning_rate": 0.0019683811342789234, "loss": 0.2399, "step": 85540 }, { "epoch": 0.6072188418835496, "grad_norm": 0.078125, "learning_rate": 0.0019683736977453834, "loss": 0.2214, "step": 85550 }, { "epoch": 0.6072898201233957, "grad_norm": 0.2470703125, "learning_rate": 0.001968366260353076, "loss": 0.2272, "step": 85560 }, { "epoch": 0.6073607983632417, "grad_norm": 0.1845703125, "learning_rate": 0.001968358822102009, "loss": 0.2282, "step": 85570 }, { "epoch": 0.6074317766030879, "grad_norm": 0.185546875, "learning_rate": 0.0019683513829921897, "loss": 0.2363, "step": 85580 }, { "epoch": 0.607502754842934, "grad_norm": 0.083984375, "learning_rate": 0.0019683439430236253, "loss": 0.2284, "step": 85590 }, { "epoch": 0.6075737330827802, "grad_norm": 0.08642578125, "learning_rate": 0.0019683365021963225, "loss": 0.2452, "step": 85600 }, { "epoch": 0.6076447113226263, "grad_norm": 0.12451171875, "learning_rate": 0.00196832906051029, "loss": 0.2591, "step": 85610 }, { "epoch": 0.6077156895624723, "grad_norm": 0.12060546875, "learning_rate": 0.0019683216179655345, "loss": 0.2589, "step": 85620 }, { "epoch": 0.6077866678023185, "grad_norm": 0.10986328125, "learning_rate": 0.0019683141745620632, "loss": 0.2164, "step": 85630 }, { "epoch": 0.6078576460421646, "grad_norm": 0.08837890625, "learning_rate": 0.001968306730299883, "loss": 0.2325, "step": 85640 }, { "epoch": 0.6079286242820108, "grad_norm": 0.11279296875, "learning_rate": 0.0019682992851790026, "loss": 0.2182, "step": 85650 }, { "epoch": 0.6079996025218568, "grad_norm": 0.25, "learning_rate": 0.0019682918391994283, "loss": 0.2131, "step": 85660 }, { "epoch": 0.608070580761703, "grad_norm": 0.07958984375, "learning_rate": 0.001968284392361168, "loss": 0.2362, "step": 85670 }, { "epoch": 0.6081415590015491, "grad_norm": 0.08203125, "learning_rate": 0.001968276944664229, "loss": 0.2338, "step": 85680 }, { "epoch": 0.6082125372413952, "grad_norm": 0.09814453125, "learning_rate": 0.001968269496108618, "loss": 0.2207, "step": 85690 }, { "epoch": 0.6082835154812414, "grad_norm": 0.1630859375, "learning_rate": 0.0019682620466943434, "loss": 0.2318, "step": 85700 }, { "epoch": 0.6083544937210874, "grad_norm": 0.06298828125, "learning_rate": 0.001968254596421412, "loss": 0.2392, "step": 85710 }, { "epoch": 0.6084254719609336, "grad_norm": 0.2021484375, "learning_rate": 0.0019682471452898307, "loss": 0.2504, "step": 85720 }, { "epoch": 0.6084964502007797, "grad_norm": 0.40234375, "learning_rate": 0.001968239693299608, "loss": 0.2342, "step": 85730 }, { "epoch": 0.6085674284406258, "grad_norm": 0.11376953125, "learning_rate": 0.0019682322404507503, "loss": 0.2309, "step": 85740 }, { "epoch": 0.608638406680472, "grad_norm": 0.09423828125, "learning_rate": 0.0019682247867432653, "loss": 0.2286, "step": 85750 }, { "epoch": 0.608709384920318, "grad_norm": 0.0986328125, "learning_rate": 0.0019682173321771607, "loss": 0.2268, "step": 85760 }, { "epoch": 0.6087803631601642, "grad_norm": 0.09814453125, "learning_rate": 0.001968209876752443, "loss": 0.2155, "step": 85770 }, { "epoch": 0.6088513414000103, "grad_norm": 0.10888671875, "learning_rate": 0.0019682024204691205, "loss": 0.2236, "step": 85780 }, { "epoch": 0.6089223196398564, "grad_norm": 0.07568359375, "learning_rate": 0.0019681949633272005, "loss": 0.2415, "step": 85790 }, { "epoch": 0.6089932978797026, "grad_norm": 0.1884765625, "learning_rate": 0.00196818750532669, "loss": 0.2354, "step": 85800 }, { "epoch": 0.6090642761195486, "grad_norm": 0.12255859375, "learning_rate": 0.001968180046467596, "loss": 0.2129, "step": 85810 }, { "epoch": 0.6091352543593948, "grad_norm": 0.10693359375, "learning_rate": 0.0019681725867499267, "loss": 0.2339, "step": 85820 }, { "epoch": 0.6092062325992409, "grad_norm": 0.216796875, "learning_rate": 0.001968165126173689, "loss": 0.2439, "step": 85830 }, { "epoch": 0.609277210839087, "grad_norm": 0.08056640625, "learning_rate": 0.001968157664738891, "loss": 0.234, "step": 85840 }, { "epoch": 0.6093481890789332, "grad_norm": 0.0859375, "learning_rate": 0.0019681502024455387, "loss": 0.2416, "step": 85850 }, { "epoch": 0.6094191673187792, "grad_norm": 0.08642578125, "learning_rate": 0.0019681427392936406, "loss": 0.2176, "step": 85860 }, { "epoch": 0.6094901455586254, "grad_norm": 0.09765625, "learning_rate": 0.001968135275283204, "loss": 0.2472, "step": 85870 }, { "epoch": 0.6095611237984715, "grad_norm": 0.07666015625, "learning_rate": 0.0019681278104142354, "loss": 0.2275, "step": 85880 }, { "epoch": 0.6096321020383176, "grad_norm": 0.10107421875, "learning_rate": 0.001968120344686743, "loss": 0.2374, "step": 85890 }, { "epoch": 0.6097030802781637, "grad_norm": 0.109375, "learning_rate": 0.001968112878100734, "loss": 0.2296, "step": 85900 }, { "epoch": 0.6097740585180098, "grad_norm": 0.142578125, "learning_rate": 0.001968105410656216, "loss": 0.2189, "step": 85910 }, { "epoch": 0.609845036757856, "grad_norm": 0.11962890625, "learning_rate": 0.001968097942353196, "loss": 0.2362, "step": 85920 }, { "epoch": 0.6099160149977021, "grad_norm": 0.10302734375, "learning_rate": 0.0019680904731916816, "loss": 0.2197, "step": 85930 }, { "epoch": 0.6099869932375482, "grad_norm": 0.10107421875, "learning_rate": 0.00196808300317168, "loss": 0.2339, "step": 85940 }, { "epoch": 0.6100579714773943, "grad_norm": 0.09228515625, "learning_rate": 0.0019680755322931992, "loss": 0.2221, "step": 85950 }, { "epoch": 0.6101289497172404, "grad_norm": 0.13671875, "learning_rate": 0.0019680680605562457, "loss": 0.2336, "step": 85960 }, { "epoch": 0.6101999279570866, "grad_norm": 0.140625, "learning_rate": 0.001968060587960827, "loss": 0.2158, "step": 85970 }, { "epoch": 0.6102709061969327, "grad_norm": 0.1220703125, "learning_rate": 0.0019680531145069512, "loss": 0.2359, "step": 85980 }, { "epoch": 0.6103418844367788, "grad_norm": 0.060546875, "learning_rate": 0.001968045640194625, "loss": 0.24, "step": 85990 }, { "epoch": 0.6104128626766249, "grad_norm": 0.2099609375, "learning_rate": 0.001968038165023856, "loss": 0.2343, "step": 86000 }, { "epoch": 0.6104128626766249, "eval_covost2-zh-en_loss": 3.816743850708008, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 20.3879, "eval_covost2-zh-en_samples_per_second": 3.139, "eval_covost2-zh-en_steps_per_second": 0.196, "step": 86000 }, { "epoch": 0.6104128626766249, "eval_covost2-en-zh_loss": 3.1277928352355957, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 20.3766, "eval_covost2-en-zh_samples_per_second": 3.141, "eval_covost2-en-zh_steps_per_second": 0.196, "step": 86000 }, { "epoch": 0.610483840916471, "grad_norm": 0.10302734375, "learning_rate": 0.001968030688994652, "loss": 0.2434, "step": 86010 }, { "epoch": 0.6105548191563172, "grad_norm": 0.11376953125, "learning_rate": 0.0019680232121070205, "loss": 0.2301, "step": 86020 }, { "epoch": 0.6106257973961633, "grad_norm": 0.1484375, "learning_rate": 0.0019680157343609675, "loss": 0.2373, "step": 86030 }, { "epoch": 0.6106967756360094, "grad_norm": 0.12890625, "learning_rate": 0.0019680082557565017, "loss": 0.238, "step": 86040 }, { "epoch": 0.6107677538758555, "grad_norm": 0.11572265625, "learning_rate": 0.0019680007762936306, "loss": 0.2407, "step": 86050 }, { "epoch": 0.6108387321157016, "grad_norm": 0.08544921875, "learning_rate": 0.0019679932959723602, "loss": 0.2355, "step": 86060 }, { "epoch": 0.6109097103555478, "grad_norm": 0.0849609375, "learning_rate": 0.0019679858147926992, "loss": 0.2259, "step": 86070 }, { "epoch": 0.6109806885953939, "grad_norm": 0.150390625, "learning_rate": 0.001967978332754655, "loss": 0.2195, "step": 86080 }, { "epoch": 0.61105166683524, "grad_norm": 0.08544921875, "learning_rate": 0.001967970849858235, "loss": 0.2312, "step": 86090 }, { "epoch": 0.6111226450750861, "grad_norm": 0.09423828125, "learning_rate": 0.001967963366103445, "loss": 0.23, "step": 86100 }, { "epoch": 0.6111936233149322, "grad_norm": 0.0791015625, "learning_rate": 0.0019679558814902944, "loss": 0.2288, "step": 86110 }, { "epoch": 0.6112646015547784, "grad_norm": 0.09765625, "learning_rate": 0.0019679483960187896, "loss": 0.2258, "step": 86120 }, { "epoch": 0.6113355797946245, "grad_norm": 0.12890625, "learning_rate": 0.0019679409096889384, "loss": 0.2286, "step": 86130 }, { "epoch": 0.6114065580344706, "grad_norm": 0.08935546875, "learning_rate": 0.0019679334225007478, "loss": 0.2305, "step": 86140 }, { "epoch": 0.6114775362743167, "grad_norm": 0.07275390625, "learning_rate": 0.001967925934454226, "loss": 0.2226, "step": 86150 }, { "epoch": 0.6115485145141628, "grad_norm": 0.0986328125, "learning_rate": 0.001967918445549379, "loss": 0.2783, "step": 86160 }, { "epoch": 0.611619492754009, "grad_norm": 0.162109375, "learning_rate": 0.0019679109557862156, "loss": 0.2517, "step": 86170 }, { "epoch": 0.6116904709938551, "grad_norm": 0.09814453125, "learning_rate": 0.0019679034651647427, "loss": 0.2399, "step": 86180 }, { "epoch": 0.6117614492337011, "grad_norm": 0.14453125, "learning_rate": 0.001967895973684967, "loss": 0.2382, "step": 86190 }, { "epoch": 0.6118324274735473, "grad_norm": 0.1318359375, "learning_rate": 0.0019678884813468973, "loss": 0.2406, "step": 86200 }, { "epoch": 0.6119034057133934, "grad_norm": 0.12060546875, "learning_rate": 0.00196788098815054, "loss": 0.2253, "step": 86210 }, { "epoch": 0.6119743839532396, "grad_norm": 0.0849609375, "learning_rate": 0.0019678734940959033, "loss": 0.2385, "step": 86220 }, { "epoch": 0.6120453621930857, "grad_norm": 0.11279296875, "learning_rate": 0.001967865999182994, "loss": 0.2423, "step": 86230 }, { "epoch": 0.6121163404329317, "grad_norm": 0.10595703125, "learning_rate": 0.001967858503411819, "loss": 0.2334, "step": 86240 }, { "epoch": 0.6121873186727779, "grad_norm": 0.10986328125, "learning_rate": 0.0019678510067823863, "loss": 0.2222, "step": 86250 }, { "epoch": 0.612258296912624, "grad_norm": 0.10595703125, "learning_rate": 0.001967843509294704, "loss": 0.2321, "step": 86260 }, { "epoch": 0.6123292751524702, "grad_norm": 0.10009765625, "learning_rate": 0.0019678360109487785, "loss": 0.2223, "step": 86270 }, { "epoch": 0.6124002533923163, "grad_norm": 0.13671875, "learning_rate": 0.0019678285117446176, "loss": 0.2184, "step": 86280 }, { "epoch": 0.6124712316321623, "grad_norm": 0.1318359375, "learning_rate": 0.0019678210116822288, "loss": 0.2121, "step": 86290 }, { "epoch": 0.6125422098720085, "grad_norm": 0.11328125, "learning_rate": 0.0019678135107616193, "loss": 0.2333, "step": 86300 }, { "epoch": 0.6126131881118546, "grad_norm": 0.07080078125, "learning_rate": 0.0019678060089827966, "loss": 0.2279, "step": 86310 }, { "epoch": 0.6126841663517008, "grad_norm": 0.06689453125, "learning_rate": 0.001967798506345768, "loss": 0.2262, "step": 86320 }, { "epoch": 0.6127551445915469, "grad_norm": 0.1552734375, "learning_rate": 0.0019677910028505414, "loss": 0.2425, "step": 86330 }, { "epoch": 0.6128261228313929, "grad_norm": 0.08447265625, "learning_rate": 0.0019677834984971236, "loss": 0.2306, "step": 86340 }, { "epoch": 0.6128971010712391, "grad_norm": 0.1142578125, "learning_rate": 0.0019677759932855226, "loss": 0.227, "step": 86350 }, { "epoch": 0.6129680793110852, "grad_norm": 0.0830078125, "learning_rate": 0.0019677684872157456, "loss": 0.2493, "step": 86360 }, { "epoch": 0.6130390575509314, "grad_norm": 0.091796875, "learning_rate": 0.0019677609802877995, "loss": 0.2387, "step": 86370 }, { "epoch": 0.6131100357907775, "grad_norm": 0.099609375, "learning_rate": 0.0019677534725016928, "loss": 0.2362, "step": 86380 }, { "epoch": 0.6131810140306235, "grad_norm": 0.11474609375, "learning_rate": 0.0019677459638574317, "loss": 0.2233, "step": 86390 }, { "epoch": 0.6132519922704697, "grad_norm": 0.224609375, "learning_rate": 0.0019677384543550246, "loss": 0.2403, "step": 86400 }, { "epoch": 0.6133229705103158, "grad_norm": 0.0859375, "learning_rate": 0.001967730943994478, "loss": 0.2487, "step": 86410 }, { "epoch": 0.613393948750162, "grad_norm": 0.1083984375, "learning_rate": 0.0019677234327758007, "loss": 0.2223, "step": 86420 }, { "epoch": 0.613464926990008, "grad_norm": 0.1494140625, "learning_rate": 0.0019677159206989985, "loss": 0.2277, "step": 86430 }, { "epoch": 0.6135359052298541, "grad_norm": 0.09375, "learning_rate": 0.00196770840776408, "loss": 0.2526, "step": 86440 }, { "epoch": 0.6136068834697003, "grad_norm": 0.15234375, "learning_rate": 0.0019677008939710527, "loss": 0.2459, "step": 86450 }, { "epoch": 0.6136778617095464, "grad_norm": 0.1259765625, "learning_rate": 0.001967693379319923, "loss": 0.2218, "step": 86460 }, { "epoch": 0.6137488399493926, "grad_norm": 0.08447265625, "learning_rate": 0.001967685863810699, "loss": 0.2333, "step": 86470 }, { "epoch": 0.6138198181892386, "grad_norm": 0.1318359375, "learning_rate": 0.0019676783474433883, "loss": 0.227, "step": 86480 }, { "epoch": 0.6138907964290847, "grad_norm": 0.11181640625, "learning_rate": 0.001967670830217998, "loss": 0.2356, "step": 86490 }, { "epoch": 0.6139617746689309, "grad_norm": 0.154296875, "learning_rate": 0.0019676633121345354, "loss": 0.2249, "step": 86500 }, { "epoch": 0.614032752908777, "grad_norm": 0.1220703125, "learning_rate": 0.0019676557931930087, "loss": 0.2213, "step": 86510 }, { "epoch": 0.6141037311486232, "grad_norm": 0.11181640625, "learning_rate": 0.0019676482733934244, "loss": 0.231, "step": 86520 }, { "epoch": 0.6141747093884692, "grad_norm": 0.09228515625, "learning_rate": 0.0019676407527357905, "loss": 0.2462, "step": 86530 }, { "epoch": 0.6142456876283153, "grad_norm": 0.1240234375, "learning_rate": 0.001967633231220114, "loss": 0.2296, "step": 86540 }, { "epoch": 0.6143166658681615, "grad_norm": 0.07568359375, "learning_rate": 0.0019676257088464028, "loss": 0.226, "step": 86550 }, { "epoch": 0.6143876441080076, "grad_norm": 0.09814453125, "learning_rate": 0.0019676181856146642, "loss": 0.2271, "step": 86560 }, { "epoch": 0.6144586223478538, "grad_norm": 0.0771484375, "learning_rate": 0.0019676106615249054, "loss": 0.2274, "step": 86570 }, { "epoch": 0.6145296005876998, "grad_norm": 0.07470703125, "learning_rate": 0.0019676031365771346, "loss": 0.2242, "step": 86580 }, { "epoch": 0.6146005788275459, "grad_norm": 0.09228515625, "learning_rate": 0.001967595610771358, "loss": 0.2341, "step": 86590 }, { "epoch": 0.6146715570673921, "grad_norm": 0.12255859375, "learning_rate": 0.001967588084107584, "loss": 0.2218, "step": 86600 }, { "epoch": 0.6147425353072382, "grad_norm": 0.146484375, "learning_rate": 0.00196758055658582, "loss": 0.2393, "step": 86610 }, { "epoch": 0.6148135135470844, "grad_norm": 0.10107421875, "learning_rate": 0.0019675730282060727, "loss": 0.2495, "step": 86620 }, { "epoch": 0.6148844917869304, "grad_norm": 0.52734375, "learning_rate": 0.0019675654989683503, "loss": 0.2302, "step": 86630 }, { "epoch": 0.6149554700267765, "grad_norm": 0.09033203125, "learning_rate": 0.00196755796887266, "loss": 0.2251, "step": 86640 }, { "epoch": 0.6150264482666227, "grad_norm": 0.2470703125, "learning_rate": 0.0019675504379190094, "loss": 0.2313, "step": 86650 }, { "epoch": 0.6150974265064688, "grad_norm": 0.07177734375, "learning_rate": 0.0019675429061074053, "loss": 0.2319, "step": 86660 }, { "epoch": 0.6151684047463148, "grad_norm": 0.1376953125, "learning_rate": 0.001967535373437856, "loss": 0.2367, "step": 86670 }, { "epoch": 0.615239382986161, "grad_norm": 0.068359375, "learning_rate": 0.001967527839910369, "loss": 0.2202, "step": 86680 }, { "epoch": 0.6153103612260071, "grad_norm": 0.08203125, "learning_rate": 0.0019675203055249503, "loss": 0.2222, "step": 86690 }, { "epoch": 0.6153813394658533, "grad_norm": 0.0908203125, "learning_rate": 0.001967512770281609, "loss": 0.2363, "step": 86700 }, { "epoch": 0.6154523177056994, "grad_norm": 0.123046875, "learning_rate": 0.0019675052341803522, "loss": 0.2361, "step": 86710 }, { "epoch": 0.6155232959455454, "grad_norm": 0.11572265625, "learning_rate": 0.0019674976972211865, "loss": 0.232, "step": 86720 }, { "epoch": 0.6155942741853916, "grad_norm": 0.0771484375, "learning_rate": 0.0019674901594041202, "loss": 0.2369, "step": 86730 }, { "epoch": 0.6156652524252377, "grad_norm": 0.0908203125, "learning_rate": 0.0019674826207291608, "loss": 0.2139, "step": 86740 }, { "epoch": 0.6157362306650839, "grad_norm": 0.126953125, "learning_rate": 0.0019674750811963155, "loss": 0.2403, "step": 86750 }, { "epoch": 0.61580720890493, "grad_norm": 0.06396484375, "learning_rate": 0.001967467540805591, "loss": 0.2363, "step": 86760 }, { "epoch": 0.615878187144776, "grad_norm": 0.1298828125, "learning_rate": 0.001967459999556996, "loss": 0.2158, "step": 86770 }, { "epoch": 0.6159491653846222, "grad_norm": 0.09716796875, "learning_rate": 0.0019674524574505373, "loss": 0.2327, "step": 86780 }, { "epoch": 0.6160201436244683, "grad_norm": 0.12158203125, "learning_rate": 0.0019674449144862225, "loss": 0.2231, "step": 86790 }, { "epoch": 0.6160911218643145, "grad_norm": 0.1416015625, "learning_rate": 0.0019674373706640587, "loss": 0.2122, "step": 86800 }, { "epoch": 0.6161621001041606, "grad_norm": 0.10498046875, "learning_rate": 0.0019674298259840543, "loss": 0.243, "step": 86810 }, { "epoch": 0.6162330783440066, "grad_norm": 0.09326171875, "learning_rate": 0.0019674222804462157, "loss": 0.2377, "step": 86820 }, { "epoch": 0.6163040565838528, "grad_norm": 0.0791015625, "learning_rate": 0.0019674147340505515, "loss": 0.2271, "step": 86830 }, { "epoch": 0.6163750348236989, "grad_norm": 0.10205078125, "learning_rate": 0.001967407186797068, "loss": 0.2313, "step": 86840 }, { "epoch": 0.6164460130635451, "grad_norm": 0.09619140625, "learning_rate": 0.001967399638685773, "loss": 0.2264, "step": 86850 }, { "epoch": 0.6165169913033912, "grad_norm": 0.1015625, "learning_rate": 0.0019673920897166743, "loss": 0.2192, "step": 86860 }, { "epoch": 0.6165879695432372, "grad_norm": 0.1416015625, "learning_rate": 0.001967384539889779, "loss": 0.2371, "step": 86870 }, { "epoch": 0.6166589477830834, "grad_norm": 0.12158203125, "learning_rate": 0.0019673769892050953, "loss": 0.2227, "step": 86880 }, { "epoch": 0.6167299260229295, "grad_norm": 0.07861328125, "learning_rate": 0.0019673694376626297, "loss": 0.224, "step": 86890 }, { "epoch": 0.6168009042627757, "grad_norm": 0.091796875, "learning_rate": 0.0019673618852623902, "loss": 0.2209, "step": 86900 }, { "epoch": 0.6168718825026217, "grad_norm": 0.205078125, "learning_rate": 0.001967354332004384, "loss": 0.2429, "step": 86910 }, { "epoch": 0.6169428607424678, "grad_norm": 0.07421875, "learning_rate": 0.001967346777888619, "loss": 0.229, "step": 86920 }, { "epoch": 0.617013838982314, "grad_norm": 0.267578125, "learning_rate": 0.0019673392229151024, "loss": 0.2476, "step": 86930 }, { "epoch": 0.6170848172221601, "grad_norm": 0.1708984375, "learning_rate": 0.0019673316670838414, "loss": 0.2336, "step": 86940 }, { "epoch": 0.6171557954620063, "grad_norm": 0.1259765625, "learning_rate": 0.0019673241103948438, "loss": 0.2524, "step": 86950 }, { "epoch": 0.6172267737018523, "grad_norm": 0.11083984375, "learning_rate": 0.001967316552848117, "loss": 0.2344, "step": 86960 }, { "epoch": 0.6172977519416984, "grad_norm": 0.1025390625, "learning_rate": 0.0019673089944436685, "loss": 0.2247, "step": 86970 }, { "epoch": 0.6173687301815446, "grad_norm": 0.12255859375, "learning_rate": 0.001967301435181506, "loss": 0.2468, "step": 86980 }, { "epoch": 0.6174397084213907, "grad_norm": 0.15234375, "learning_rate": 0.0019672938750616366, "loss": 0.2333, "step": 86990 }, { "epoch": 0.6175106866612369, "grad_norm": 0.1171875, "learning_rate": 0.001967286314084068, "loss": 0.2295, "step": 87000 }, { "epoch": 0.6175816649010829, "grad_norm": 0.1416015625, "learning_rate": 0.0019672787522488073, "loss": 0.2355, "step": 87010 }, { "epoch": 0.617652643140929, "grad_norm": 0.091796875, "learning_rate": 0.0019672711895558624, "loss": 0.2292, "step": 87020 }, { "epoch": 0.6177236213807752, "grad_norm": 0.115234375, "learning_rate": 0.0019672636260052406, "loss": 0.2377, "step": 87030 }, { "epoch": 0.6177945996206213, "grad_norm": 0.08544921875, "learning_rate": 0.00196725606159695, "loss": 0.221, "step": 87040 }, { "epoch": 0.6178655778604675, "grad_norm": 0.0966796875, "learning_rate": 0.0019672484963309967, "loss": 0.2318, "step": 87050 }, { "epoch": 0.6179365561003135, "grad_norm": 0.12890625, "learning_rate": 0.0019672409302073893, "loss": 0.2312, "step": 87060 }, { "epoch": 0.6180075343401596, "grad_norm": 0.09716796875, "learning_rate": 0.001967233363226135, "loss": 0.2266, "step": 87070 }, { "epoch": 0.6180785125800058, "grad_norm": 0.1103515625, "learning_rate": 0.0019672257953872413, "loss": 0.2393, "step": 87080 }, { "epoch": 0.6181494908198519, "grad_norm": 0.1455078125, "learning_rate": 0.0019672182266907154, "loss": 0.2334, "step": 87090 }, { "epoch": 0.6182204690596981, "grad_norm": 0.08349609375, "learning_rate": 0.0019672106571365653, "loss": 0.2548, "step": 87100 }, { "epoch": 0.6182914472995441, "grad_norm": 0.1533203125, "learning_rate": 0.001967203086724798, "loss": 0.2466, "step": 87110 }, { "epoch": 0.6183624255393902, "grad_norm": 0.126953125, "learning_rate": 0.0019671955154554215, "loss": 0.2234, "step": 87120 }, { "epoch": 0.6184334037792364, "grad_norm": 0.064453125, "learning_rate": 0.0019671879433284427, "loss": 0.2285, "step": 87130 }, { "epoch": 0.6185043820190825, "grad_norm": 0.2578125, "learning_rate": 0.0019671803703438695, "loss": 0.2273, "step": 87140 }, { "epoch": 0.6185753602589286, "grad_norm": 0.10791015625, "learning_rate": 0.0019671727965017093, "loss": 0.249, "step": 87150 }, { "epoch": 0.6186463384987747, "grad_norm": 0.15625, "learning_rate": 0.0019671652218019695, "loss": 0.2333, "step": 87160 }, { "epoch": 0.6187173167386208, "grad_norm": 0.10986328125, "learning_rate": 0.001967157646244658, "loss": 0.2235, "step": 87170 }, { "epoch": 0.618788294978467, "grad_norm": 0.11328125, "learning_rate": 0.0019671500698297817, "loss": 0.2248, "step": 87180 }, { "epoch": 0.6188592732183131, "grad_norm": 0.0849609375, "learning_rate": 0.001967142492557348, "loss": 0.2286, "step": 87190 }, { "epoch": 0.6189302514581592, "grad_norm": 0.08251953125, "learning_rate": 0.0019671349144273647, "loss": 0.2239, "step": 87200 }, { "epoch": 0.6190012296980053, "grad_norm": 0.08544921875, "learning_rate": 0.0019671273354398395, "loss": 0.2195, "step": 87210 }, { "epoch": 0.6190722079378514, "grad_norm": 0.099609375, "learning_rate": 0.0019671197555947797, "loss": 0.2382, "step": 87220 }, { "epoch": 0.6191431861776976, "grad_norm": 0.142578125, "learning_rate": 0.001967112174892193, "loss": 0.2208, "step": 87230 }, { "epoch": 0.6192141644175437, "grad_norm": 0.1396484375, "learning_rate": 0.0019671045933320865, "loss": 0.2293, "step": 87240 }, { "epoch": 0.6192851426573898, "grad_norm": 0.1455078125, "learning_rate": 0.0019670970109144682, "loss": 0.2289, "step": 87250 }, { "epoch": 0.6193561208972359, "grad_norm": 0.205078125, "learning_rate": 0.001967089427639345, "loss": 0.225, "step": 87260 }, { "epoch": 0.619427099137082, "grad_norm": 0.07763671875, "learning_rate": 0.0019670818435067244, "loss": 0.2216, "step": 87270 }, { "epoch": 0.6194980773769282, "grad_norm": 0.095703125, "learning_rate": 0.001967074258516615, "loss": 0.2582, "step": 87280 }, { "epoch": 0.6195690556167743, "grad_norm": 0.1416015625, "learning_rate": 0.001967066672669023, "loss": 0.2163, "step": 87290 }, { "epoch": 0.6196400338566204, "grad_norm": 0.087890625, "learning_rate": 0.001967059085963956, "loss": 0.2324, "step": 87300 }, { "epoch": 0.6197110120964665, "grad_norm": 0.1025390625, "learning_rate": 0.001967051498401423, "loss": 0.2397, "step": 87310 }, { "epoch": 0.6197819903363126, "grad_norm": 0.10302734375, "learning_rate": 0.0019670439099814295, "loss": 0.2257, "step": 87320 }, { "epoch": 0.6198529685761588, "grad_norm": 0.0927734375, "learning_rate": 0.0019670363207039843, "loss": 0.2339, "step": 87330 }, { "epoch": 0.6199239468160049, "grad_norm": 0.1220703125, "learning_rate": 0.0019670287305690944, "loss": 0.2301, "step": 87340 }, { "epoch": 0.619994925055851, "grad_norm": 0.1494140625, "learning_rate": 0.0019670211395767675, "loss": 0.2218, "step": 87350 }, { "epoch": 0.6200659032956971, "grad_norm": 0.11376953125, "learning_rate": 0.001967013547727011, "loss": 0.2402, "step": 87360 }, { "epoch": 0.6201368815355432, "grad_norm": 0.0771484375, "learning_rate": 0.0019670059550198323, "loss": 0.2251, "step": 87370 }, { "epoch": 0.6202078597753894, "grad_norm": 0.1455078125, "learning_rate": 0.001966998361455239, "loss": 0.2399, "step": 87380 }, { "epoch": 0.6202788380152355, "grad_norm": 0.19921875, "learning_rate": 0.0019669907670332395, "loss": 0.2308, "step": 87390 }, { "epoch": 0.6203498162550816, "grad_norm": 0.138671875, "learning_rate": 0.0019669831717538397, "loss": 0.233, "step": 87400 }, { "epoch": 0.6204207944949277, "grad_norm": 0.12255859375, "learning_rate": 0.001966975575617048, "loss": 0.2176, "step": 87410 }, { "epoch": 0.6204917727347738, "grad_norm": 0.30859375, "learning_rate": 0.0019669679786228716, "loss": 0.2543, "step": 87420 }, { "epoch": 0.62056275097462, "grad_norm": 0.1201171875, "learning_rate": 0.0019669603807713184, "loss": 0.2513, "step": 87430 }, { "epoch": 0.620633729214466, "grad_norm": 0.1279296875, "learning_rate": 0.001966952782062396, "loss": 0.23, "step": 87440 }, { "epoch": 0.6207047074543122, "grad_norm": 0.2138671875, "learning_rate": 0.001966945182496111, "loss": 0.2341, "step": 87450 }, { "epoch": 0.6207756856941583, "grad_norm": 0.142578125, "learning_rate": 0.001966937582072472, "loss": 0.2176, "step": 87460 }, { "epoch": 0.6208466639340045, "grad_norm": 0.1259765625, "learning_rate": 0.001966929980791486, "loss": 0.2178, "step": 87470 }, { "epoch": 0.6209176421738506, "grad_norm": 0.0830078125, "learning_rate": 0.0019669223786531603, "loss": 0.2231, "step": 87480 }, { "epoch": 0.6209886204136966, "grad_norm": 0.08056640625, "learning_rate": 0.0019669147756575032, "loss": 0.2375, "step": 87490 }, { "epoch": 0.6210595986535428, "grad_norm": 0.123046875, "learning_rate": 0.001966907171804521, "loss": 0.211, "step": 87500 }, { "epoch": 0.6211305768933889, "grad_norm": 0.083984375, "learning_rate": 0.0019668995670942225, "loss": 0.213, "step": 87510 }, { "epoch": 0.621201555133235, "grad_norm": 0.1904296875, "learning_rate": 0.001966891961526615, "loss": 0.2292, "step": 87520 }, { "epoch": 0.6212725333730812, "grad_norm": 0.11328125, "learning_rate": 0.001966884355101705, "loss": 0.2277, "step": 87530 }, { "epoch": 0.6213435116129272, "grad_norm": 0.1181640625, "learning_rate": 0.001966876747819501, "loss": 0.2216, "step": 87540 }, { "epoch": 0.6214144898527734, "grad_norm": 0.189453125, "learning_rate": 0.0019668691396800106, "loss": 0.228, "step": 87550 }, { "epoch": 0.6214854680926195, "grad_norm": 0.06396484375, "learning_rate": 0.00196686153068324, "loss": 0.2352, "step": 87560 }, { "epoch": 0.6215564463324657, "grad_norm": 0.07275390625, "learning_rate": 0.0019668539208291987, "loss": 0.2382, "step": 87570 }, { "epoch": 0.6216274245723118, "grad_norm": 0.11669921875, "learning_rate": 0.0019668463101178923, "loss": 0.228, "step": 87580 }, { "epoch": 0.6216984028121578, "grad_norm": 0.10009765625, "learning_rate": 0.0019668386985493298, "loss": 0.2383, "step": 87590 }, { "epoch": 0.621769381052004, "grad_norm": 0.0849609375, "learning_rate": 0.001966831086123518, "loss": 0.2263, "step": 87600 }, { "epoch": 0.6218403592918501, "grad_norm": 0.13671875, "learning_rate": 0.0019668234728404646, "loss": 0.236, "step": 87610 }, { "epoch": 0.6219113375316963, "grad_norm": 0.1572265625, "learning_rate": 0.001966815858700177, "loss": 0.2442, "step": 87620 }, { "epoch": 0.6219823157715424, "grad_norm": 0.09765625, "learning_rate": 0.0019668082437026627, "loss": 0.2238, "step": 87630 }, { "epoch": 0.6220532940113884, "grad_norm": 0.12255859375, "learning_rate": 0.0019668006278479298, "loss": 0.2233, "step": 87640 }, { "epoch": 0.6221242722512346, "grad_norm": 0.150390625, "learning_rate": 0.0019667930111359855, "loss": 0.2381, "step": 87650 }, { "epoch": 0.6221952504910807, "grad_norm": 0.1328125, "learning_rate": 0.0019667853935668366, "loss": 0.2304, "step": 87660 }, { "epoch": 0.6222662287309269, "grad_norm": 0.0947265625, "learning_rate": 0.0019667777751404916, "loss": 0.23, "step": 87670 }, { "epoch": 0.6223372069707729, "grad_norm": 0.103515625, "learning_rate": 0.001966770155856958, "loss": 0.2277, "step": 87680 }, { "epoch": 0.622408185210619, "grad_norm": 0.07861328125, "learning_rate": 0.0019667625357162426, "loss": 0.224, "step": 87690 }, { "epoch": 0.6224791634504652, "grad_norm": 0.09619140625, "learning_rate": 0.0019667549147183534, "loss": 0.2265, "step": 87700 }, { "epoch": 0.6225501416903113, "grad_norm": 0.140625, "learning_rate": 0.0019667472928632983, "loss": 0.2415, "step": 87710 }, { "epoch": 0.6226211199301575, "grad_norm": 0.0888671875, "learning_rate": 0.0019667396701510843, "loss": 0.2317, "step": 87720 }, { "epoch": 0.6226920981700035, "grad_norm": 0.1728515625, "learning_rate": 0.001966732046581719, "loss": 0.2369, "step": 87730 }, { "epoch": 0.6227630764098496, "grad_norm": 0.09423828125, "learning_rate": 0.0019667244221552097, "loss": 0.2192, "step": 87740 }, { "epoch": 0.6228340546496958, "grad_norm": 0.09521484375, "learning_rate": 0.0019667167968715643, "loss": 0.2347, "step": 87750 }, { "epoch": 0.6229050328895419, "grad_norm": 0.0927734375, "learning_rate": 0.001966709170730791, "loss": 0.2312, "step": 87760 }, { "epoch": 0.622976011129388, "grad_norm": 0.1328125, "learning_rate": 0.001966701543732896, "loss": 0.2379, "step": 87770 }, { "epoch": 0.6230469893692341, "grad_norm": 0.12890625, "learning_rate": 0.001966693915877888, "loss": 0.242, "step": 87780 }, { "epoch": 0.6231179676090802, "grad_norm": 0.10595703125, "learning_rate": 0.0019666862871657734, "loss": 0.2358, "step": 87790 }, { "epoch": 0.6231889458489264, "grad_norm": 0.09228515625, "learning_rate": 0.0019666786575965613, "loss": 0.2355, "step": 87800 }, { "epoch": 0.6232599240887725, "grad_norm": 0.10302734375, "learning_rate": 0.0019666710271702575, "loss": 0.2407, "step": 87810 }, { "epoch": 0.6233309023286187, "grad_norm": 0.11376953125, "learning_rate": 0.0019666633958868706, "loss": 0.2152, "step": 87820 }, { "epoch": 0.6234018805684647, "grad_norm": 0.11767578125, "learning_rate": 0.001966655763746408, "loss": 0.2473, "step": 87830 }, { "epoch": 0.6234728588083108, "grad_norm": 0.1318359375, "learning_rate": 0.001966648130748877, "loss": 0.2593, "step": 87840 }, { "epoch": 0.623543837048157, "grad_norm": 0.10791015625, "learning_rate": 0.001966640496894286, "loss": 0.2487, "step": 87850 }, { "epoch": 0.6236148152880031, "grad_norm": 0.12158203125, "learning_rate": 0.001966632862182641, "loss": 0.2582, "step": 87860 }, { "epoch": 0.6236857935278493, "grad_norm": 0.14453125, "learning_rate": 0.0019666252266139508, "loss": 0.2545, "step": 87870 }, { "epoch": 0.6237567717676953, "grad_norm": 0.091796875, "learning_rate": 0.0019666175901882224, "loss": 0.2424, "step": 87880 }, { "epoch": 0.6238277500075414, "grad_norm": 0.1376953125, "learning_rate": 0.0019666099529054635, "loss": 0.2192, "step": 87890 }, { "epoch": 0.6238987282473876, "grad_norm": 0.06982421875, "learning_rate": 0.001966602314765682, "loss": 0.2298, "step": 87900 }, { "epoch": 0.6239697064872337, "grad_norm": 0.1826171875, "learning_rate": 0.0019665946757688848, "loss": 0.244, "step": 87910 }, { "epoch": 0.6240406847270797, "grad_norm": 0.11572265625, "learning_rate": 0.00196658703591508, "loss": 0.223, "step": 87920 }, { "epoch": 0.6241116629669259, "grad_norm": 0.0869140625, "learning_rate": 0.0019665793952042747, "loss": 0.2347, "step": 87930 }, { "epoch": 0.624182641206772, "grad_norm": 0.06982421875, "learning_rate": 0.001966571753636477, "loss": 0.2305, "step": 87940 }, { "epoch": 0.6242536194466182, "grad_norm": 0.11474609375, "learning_rate": 0.001966564111211694, "loss": 0.2245, "step": 87950 }, { "epoch": 0.6243245976864643, "grad_norm": 0.0947265625, "learning_rate": 0.0019665564679299335, "loss": 0.2286, "step": 87960 }, { "epoch": 0.6243955759263103, "grad_norm": 0.109375, "learning_rate": 0.0019665488237912026, "loss": 0.2335, "step": 87970 }, { "epoch": 0.6244665541661565, "grad_norm": 0.0849609375, "learning_rate": 0.0019665411787955092, "loss": 0.2362, "step": 87980 }, { "epoch": 0.6245375324060026, "grad_norm": 0.2158203125, "learning_rate": 0.001966533532942861, "loss": 0.215, "step": 87990 }, { "epoch": 0.6246085106458488, "grad_norm": 0.107421875, "learning_rate": 0.001966525886233266, "loss": 0.2284, "step": 88000 }, { "epoch": 0.6246085106458488, "eval_covost2-zh-en_loss": 3.789672374725342, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 20.521, "eval_covost2-zh-en_samples_per_second": 3.119, "eval_covost2-zh-en_steps_per_second": 0.195, "step": 88000 }, { "epoch": 0.6246085106458488, "eval_covost2-en-zh_loss": 3.163731575012207, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 20.0951, "eval_covost2-en-zh_samples_per_second": 3.185, "eval_covost2-en-zh_steps_per_second": 0.199, "step": 88000 }, { "epoch": 0.6246794888856949, "grad_norm": 0.11572265625, "learning_rate": 0.001966518238666731, "loss": 0.2205, "step": 88010 }, { "epoch": 0.6247504671255409, "grad_norm": 0.09033203125, "learning_rate": 0.0019665105902432634, "loss": 0.2337, "step": 88020 }, { "epoch": 0.6248214453653871, "grad_norm": 0.12255859375, "learning_rate": 0.001966502940962871, "loss": 0.2283, "step": 88030 }, { "epoch": 0.6248924236052332, "grad_norm": 0.07177734375, "learning_rate": 0.001966495290825562, "loss": 0.2307, "step": 88040 }, { "epoch": 0.6249634018450794, "grad_norm": 0.08935546875, "learning_rate": 0.0019664876398313436, "loss": 0.2105, "step": 88050 }, { "epoch": 0.6250343800849255, "grad_norm": 0.09033203125, "learning_rate": 0.0019664799879802225, "loss": 0.2399, "step": 88060 }, { "epoch": 0.6251053583247715, "grad_norm": 0.09130859375, "learning_rate": 0.0019664723352722075, "loss": 0.251, "step": 88070 }, { "epoch": 0.6251763365646177, "grad_norm": 0.1357421875, "learning_rate": 0.0019664646817073054, "loss": 0.2388, "step": 88080 }, { "epoch": 0.6252473148044638, "grad_norm": 0.0908203125, "learning_rate": 0.0019664570272855245, "loss": 0.2342, "step": 88090 }, { "epoch": 0.62531829304431, "grad_norm": 0.09033203125, "learning_rate": 0.0019664493720068714, "loss": 0.2212, "step": 88100 }, { "epoch": 0.6253892712841561, "grad_norm": 0.09912109375, "learning_rate": 0.001966441715871354, "loss": 0.2301, "step": 88110 }, { "epoch": 0.6254602495240021, "grad_norm": 0.1142578125, "learning_rate": 0.0019664340588789807, "loss": 0.2493, "step": 88120 }, { "epoch": 0.6255312277638483, "grad_norm": 0.25390625, "learning_rate": 0.0019664264010297583, "loss": 0.2363, "step": 88130 }, { "epoch": 0.6256022060036944, "grad_norm": 0.0927734375, "learning_rate": 0.0019664187423236944, "loss": 0.2445, "step": 88140 }, { "epoch": 0.6256731842435406, "grad_norm": 0.1435546875, "learning_rate": 0.0019664110827607964, "loss": 0.2598, "step": 88150 }, { "epoch": 0.6257441624833866, "grad_norm": 0.11767578125, "learning_rate": 0.001966403422341072, "loss": 0.2448, "step": 88160 }, { "epoch": 0.6258151407232327, "grad_norm": 0.09228515625, "learning_rate": 0.0019663957610645297, "loss": 0.2413, "step": 88170 }, { "epoch": 0.6258861189630789, "grad_norm": 0.08642578125, "learning_rate": 0.0019663880989311757, "loss": 0.2187, "step": 88180 }, { "epoch": 0.625957097202925, "grad_norm": 0.130859375, "learning_rate": 0.0019663804359410183, "loss": 0.2365, "step": 88190 }, { "epoch": 0.6260280754427712, "grad_norm": 0.10498046875, "learning_rate": 0.001966372772094065, "loss": 0.2401, "step": 88200 }, { "epoch": 0.6260990536826172, "grad_norm": 0.1005859375, "learning_rate": 0.001966365107390323, "loss": 0.2376, "step": 88210 }, { "epoch": 0.6261700319224633, "grad_norm": 0.16015625, "learning_rate": 0.001966357441829801, "loss": 0.2345, "step": 88220 }, { "epoch": 0.6262410101623095, "grad_norm": 0.154296875, "learning_rate": 0.001966349775412505, "loss": 0.2222, "step": 88230 }, { "epoch": 0.6263119884021556, "grad_norm": 0.13671875, "learning_rate": 0.0019663421081384438, "loss": 0.2358, "step": 88240 }, { "epoch": 0.6263829666420018, "grad_norm": 0.07421875, "learning_rate": 0.0019663344400076242, "loss": 0.2414, "step": 88250 }, { "epoch": 0.6264539448818478, "grad_norm": 0.11083984375, "learning_rate": 0.0019663267710200547, "loss": 0.2352, "step": 88260 }, { "epoch": 0.6265249231216939, "grad_norm": 0.103515625, "learning_rate": 0.0019663191011757416, "loss": 0.2284, "step": 88270 }, { "epoch": 0.6265959013615401, "grad_norm": 0.15234375, "learning_rate": 0.0019663114304746938, "loss": 0.2273, "step": 88280 }, { "epoch": 0.6266668796013862, "grad_norm": 0.095703125, "learning_rate": 0.001966303758916918, "loss": 0.2434, "step": 88290 }, { "epoch": 0.6267378578412324, "grad_norm": 0.134765625, "learning_rate": 0.001966296086502422, "loss": 0.2374, "step": 88300 }, { "epoch": 0.6268088360810784, "grad_norm": 0.0986328125, "learning_rate": 0.0019662884132312136, "loss": 0.238, "step": 88310 }, { "epoch": 0.6268798143209245, "grad_norm": 0.076171875, "learning_rate": 0.0019662807391033, "loss": 0.2242, "step": 88320 }, { "epoch": 0.6269507925607707, "grad_norm": 0.064453125, "learning_rate": 0.0019662730641186897, "loss": 0.2326, "step": 88330 }, { "epoch": 0.6270217708006168, "grad_norm": 0.10986328125, "learning_rate": 0.0019662653882773894, "loss": 0.2423, "step": 88340 }, { "epoch": 0.627092749040463, "grad_norm": 0.1484375, "learning_rate": 0.001966257711579407, "loss": 0.2396, "step": 88350 }, { "epoch": 0.627163727280309, "grad_norm": 0.11279296875, "learning_rate": 0.0019662500340247493, "loss": 0.2376, "step": 88360 }, { "epoch": 0.6272347055201551, "grad_norm": 0.10595703125, "learning_rate": 0.0019662423556134254, "loss": 0.2431, "step": 88370 }, { "epoch": 0.6273056837600013, "grad_norm": 0.08544921875, "learning_rate": 0.0019662346763454413, "loss": 0.235, "step": 88380 }, { "epoch": 0.6273766619998474, "grad_norm": 0.1982421875, "learning_rate": 0.001966226996220806, "loss": 0.2337, "step": 88390 }, { "epoch": 0.6274476402396935, "grad_norm": 0.095703125, "learning_rate": 0.0019662193152395267, "loss": 0.2407, "step": 88400 }, { "epoch": 0.6275186184795396, "grad_norm": 0.1806640625, "learning_rate": 0.00196621163340161, "loss": 0.2317, "step": 88410 }, { "epoch": 0.6275895967193857, "grad_norm": 0.08984375, "learning_rate": 0.001966203950707065, "loss": 0.2195, "step": 88420 }, { "epoch": 0.6276605749592319, "grad_norm": 0.185546875, "learning_rate": 0.001966196267155898, "loss": 0.2319, "step": 88430 }, { "epoch": 0.627731553199078, "grad_norm": 0.12109375, "learning_rate": 0.0019661885827481178, "loss": 0.2333, "step": 88440 }, { "epoch": 0.6278025314389241, "grad_norm": 0.123046875, "learning_rate": 0.001966180897483731, "loss": 0.2226, "step": 88450 }, { "epoch": 0.6278735096787702, "grad_norm": 0.12451171875, "learning_rate": 0.0019661732113627458, "loss": 0.2393, "step": 88460 }, { "epoch": 0.6279444879186163, "grad_norm": 0.1103515625, "learning_rate": 0.0019661655243851696, "loss": 0.2307, "step": 88470 }, { "epoch": 0.6280154661584625, "grad_norm": 0.109375, "learning_rate": 0.0019661578365510095, "loss": 0.2388, "step": 88480 }, { "epoch": 0.6280864443983086, "grad_norm": 0.1494140625, "learning_rate": 0.001966150147860274, "loss": 0.2357, "step": 88490 }, { "epoch": 0.6281574226381547, "grad_norm": 0.099609375, "learning_rate": 0.00196614245831297, "loss": 0.234, "step": 88500 }, { "epoch": 0.6282284008780008, "grad_norm": 0.142578125, "learning_rate": 0.001966134767909106, "loss": 0.2238, "step": 88510 }, { "epoch": 0.628299379117847, "grad_norm": 0.099609375, "learning_rate": 0.001966127076648688, "loss": 0.2399, "step": 88520 }, { "epoch": 0.6283703573576931, "grad_norm": 0.07373046875, "learning_rate": 0.0019661193845317256, "loss": 0.2532, "step": 88530 }, { "epoch": 0.6284413355975392, "grad_norm": 0.107421875, "learning_rate": 0.0019661116915582247, "loss": 0.232, "step": 88540 }, { "epoch": 0.6285123138373853, "grad_norm": 0.1376953125, "learning_rate": 0.001966103997728194, "loss": 0.2259, "step": 88550 }, { "epoch": 0.6285832920772314, "grad_norm": 0.0986328125, "learning_rate": 0.001966096303041641, "loss": 0.246, "step": 88560 }, { "epoch": 0.6286542703170775, "grad_norm": 0.1787109375, "learning_rate": 0.0019660886074985722, "loss": 0.2228, "step": 88570 }, { "epoch": 0.6287252485569237, "grad_norm": 0.13671875, "learning_rate": 0.001966080911098997, "loss": 0.2333, "step": 88580 }, { "epoch": 0.6287962267967698, "grad_norm": 0.103515625, "learning_rate": 0.0019660732138429213, "loss": 0.2326, "step": 88590 }, { "epoch": 0.6288672050366159, "grad_norm": 0.0810546875, "learning_rate": 0.001966065515730354, "loss": 0.2323, "step": 88600 }, { "epoch": 0.628938183276462, "grad_norm": 0.1376953125, "learning_rate": 0.001966057816761302, "loss": 0.2321, "step": 88610 }, { "epoch": 0.6290091615163081, "grad_norm": 0.078125, "learning_rate": 0.001966050116935773, "loss": 0.2137, "step": 88620 }, { "epoch": 0.6290801397561543, "grad_norm": 0.158203125, "learning_rate": 0.001966042416253775, "loss": 0.2431, "step": 88630 }, { "epoch": 0.6291511179960004, "grad_norm": 0.103515625, "learning_rate": 0.001966034714715315, "loss": 0.254, "step": 88640 }, { "epoch": 0.6292220962358465, "grad_norm": 0.11181640625, "learning_rate": 0.0019660270123204007, "loss": 0.2413, "step": 88650 }, { "epoch": 0.6292930744756926, "grad_norm": 0.11865234375, "learning_rate": 0.00196601930906904, "loss": 0.2369, "step": 88660 }, { "epoch": 0.6293640527155387, "grad_norm": 0.109375, "learning_rate": 0.0019660116049612414, "loss": 0.2268, "step": 88670 }, { "epoch": 0.6294350309553849, "grad_norm": 0.09228515625, "learning_rate": 0.001966003899997011, "loss": 0.2393, "step": 88680 }, { "epoch": 0.6295060091952309, "grad_norm": 0.11572265625, "learning_rate": 0.001965996194176357, "loss": 0.2322, "step": 88690 }, { "epoch": 0.6295769874350771, "grad_norm": 0.130859375, "learning_rate": 0.0019659884874992867, "loss": 0.2383, "step": 88700 }, { "epoch": 0.6296479656749232, "grad_norm": 0.11279296875, "learning_rate": 0.0019659807799658085, "loss": 0.2146, "step": 88710 }, { "epoch": 0.6297189439147693, "grad_norm": 0.10888671875, "learning_rate": 0.00196597307157593, "loss": 0.2293, "step": 88720 }, { "epoch": 0.6297899221546155, "grad_norm": 0.11328125, "learning_rate": 0.0019659653623296577, "loss": 0.2204, "step": 88730 }, { "epoch": 0.6298609003944615, "grad_norm": 0.154296875, "learning_rate": 0.001965957652227, "loss": 0.2393, "step": 88740 }, { "epoch": 0.6299318786343077, "grad_norm": 0.11279296875, "learning_rate": 0.0019659499412679644, "loss": 0.2379, "step": 88750 }, { "epoch": 0.6300028568741538, "grad_norm": 0.12158203125, "learning_rate": 0.001965942229452559, "loss": 0.2453, "step": 88760 }, { "epoch": 0.630073835114, "grad_norm": 0.10498046875, "learning_rate": 0.001965934516780791, "loss": 0.2347, "step": 88770 }, { "epoch": 0.6301448133538461, "grad_norm": 0.125, "learning_rate": 0.0019659268032526673, "loss": 0.2261, "step": 88780 }, { "epoch": 0.6302157915936921, "grad_norm": 0.322265625, "learning_rate": 0.001965919088868197, "loss": 0.2271, "step": 88790 }, { "epoch": 0.6302867698335383, "grad_norm": 0.09765625, "learning_rate": 0.0019659113736273867, "loss": 0.2334, "step": 88800 }, { "epoch": 0.6303577480733844, "grad_norm": 0.107421875, "learning_rate": 0.0019659036575302444, "loss": 0.2314, "step": 88810 }, { "epoch": 0.6304287263132305, "grad_norm": 0.1337890625, "learning_rate": 0.001965895940576778, "loss": 0.2271, "step": 88820 }, { "epoch": 0.6304997045530767, "grad_norm": 0.1455078125, "learning_rate": 0.001965888222766994, "loss": 0.2518, "step": 88830 }, { "epoch": 0.6305706827929227, "grad_norm": 0.1171875, "learning_rate": 0.001965880504100901, "loss": 0.2366, "step": 88840 }, { "epoch": 0.6306416610327689, "grad_norm": 0.130859375, "learning_rate": 0.0019658727845785068, "loss": 0.2321, "step": 88850 }, { "epoch": 0.630712639272615, "grad_norm": 0.11474609375, "learning_rate": 0.0019658650641998184, "loss": 0.217, "step": 88860 }, { "epoch": 0.6307836175124611, "grad_norm": 0.0966796875, "learning_rate": 0.001965857342964844, "loss": 0.241, "step": 88870 }, { "epoch": 0.6308545957523073, "grad_norm": 0.126953125, "learning_rate": 0.001965849620873591, "loss": 0.2371, "step": 88880 }, { "epoch": 0.6309255739921533, "grad_norm": 0.09716796875, "learning_rate": 0.0019658418979260667, "loss": 0.2354, "step": 88890 }, { "epoch": 0.6309965522319995, "grad_norm": 0.07958984375, "learning_rate": 0.0019658341741222796, "loss": 0.2294, "step": 88900 }, { "epoch": 0.6310675304718456, "grad_norm": 0.11669921875, "learning_rate": 0.001965826449462236, "loss": 0.2302, "step": 88910 }, { "epoch": 0.6311385087116917, "grad_norm": 0.08251953125, "learning_rate": 0.0019658187239459447, "loss": 0.2455, "step": 88920 }, { "epoch": 0.6312094869515378, "grad_norm": 0.10400390625, "learning_rate": 0.001965810997573413, "loss": 0.2419, "step": 88930 }, { "epoch": 0.6312804651913839, "grad_norm": 0.130859375, "learning_rate": 0.001965803270344648, "loss": 0.2505, "step": 88940 }, { "epoch": 0.6313514434312301, "grad_norm": 0.11328125, "learning_rate": 0.0019657955422596584, "loss": 0.2194, "step": 88950 }, { "epoch": 0.6314224216710762, "grad_norm": 0.15234375, "learning_rate": 0.001965787813318451, "loss": 0.2455, "step": 88960 }, { "epoch": 0.6314933999109223, "grad_norm": 0.07568359375, "learning_rate": 0.0019657800835210337, "loss": 0.2319, "step": 88970 }, { "epoch": 0.6315643781507684, "grad_norm": 0.09130859375, "learning_rate": 0.0019657723528674147, "loss": 0.2281, "step": 88980 }, { "epoch": 0.6316353563906145, "grad_norm": 0.150390625, "learning_rate": 0.0019657646213576004, "loss": 0.2255, "step": 88990 }, { "epoch": 0.6317063346304607, "grad_norm": 0.146484375, "learning_rate": 0.0019657568889915994, "loss": 0.237, "step": 89000 }, { "epoch": 0.6317773128703068, "grad_norm": 0.10693359375, "learning_rate": 0.0019657491557694188, "loss": 0.2402, "step": 89010 }, { "epoch": 0.631848291110153, "grad_norm": 0.185546875, "learning_rate": 0.001965741421691067, "loss": 0.2391, "step": 89020 }, { "epoch": 0.631919269349999, "grad_norm": 0.1328125, "learning_rate": 0.001965733686756551, "loss": 0.2532, "step": 89030 }, { "epoch": 0.6319902475898451, "grad_norm": 0.10205078125, "learning_rate": 0.0019657259509658787, "loss": 0.2376, "step": 89040 }, { "epoch": 0.6320612258296913, "grad_norm": 0.07861328125, "learning_rate": 0.0019657182143190577, "loss": 0.2365, "step": 89050 }, { "epoch": 0.6321322040695374, "grad_norm": 0.095703125, "learning_rate": 0.001965710476816096, "loss": 0.2231, "step": 89060 }, { "epoch": 0.6322031823093835, "grad_norm": 0.10791015625, "learning_rate": 0.001965702738457, "loss": 0.2486, "step": 89070 }, { "epoch": 0.6322741605492296, "grad_norm": 0.07763671875, "learning_rate": 0.0019656949992417788, "loss": 0.2435, "step": 89080 }, { "epoch": 0.6323451387890757, "grad_norm": 0.13671875, "learning_rate": 0.0019656872591704397, "loss": 0.2289, "step": 89090 }, { "epoch": 0.6324161170289219, "grad_norm": 0.07958984375, "learning_rate": 0.00196567951824299, "loss": 0.2244, "step": 89100 }, { "epoch": 0.632487095268768, "grad_norm": 0.10546875, "learning_rate": 0.0019656717764594375, "loss": 0.2366, "step": 89110 }, { "epoch": 0.6325580735086141, "grad_norm": 0.10205078125, "learning_rate": 0.0019656640338197896, "loss": 0.22, "step": 89120 }, { "epoch": 0.6326290517484602, "grad_norm": 0.2080078125, "learning_rate": 0.0019656562903240544, "loss": 0.2306, "step": 89130 }, { "epoch": 0.6327000299883063, "grad_norm": 0.091796875, "learning_rate": 0.0019656485459722396, "loss": 0.241, "step": 89140 }, { "epoch": 0.6327710082281525, "grad_norm": 0.11669921875, "learning_rate": 0.0019656408007643527, "loss": 0.2234, "step": 89150 }, { "epoch": 0.6328419864679986, "grad_norm": 0.0947265625, "learning_rate": 0.0019656330547004005, "loss": 0.2175, "step": 89160 }, { "epoch": 0.6329129647078446, "grad_norm": 0.0947265625, "learning_rate": 0.0019656253077803922, "loss": 0.2221, "step": 89170 }, { "epoch": 0.6329839429476908, "grad_norm": 0.07080078125, "learning_rate": 0.0019656175600043344, "loss": 0.2205, "step": 89180 }, { "epoch": 0.6330549211875369, "grad_norm": 0.09716796875, "learning_rate": 0.0019656098113722356, "loss": 0.2318, "step": 89190 }, { "epoch": 0.6331258994273831, "grad_norm": 0.09326171875, "learning_rate": 0.0019656020618841023, "loss": 0.238, "step": 89200 }, { "epoch": 0.6331968776672292, "grad_norm": 0.107421875, "learning_rate": 0.001965594311539943, "loss": 0.2254, "step": 89210 }, { "epoch": 0.6332678559070752, "grad_norm": 0.07421875, "learning_rate": 0.0019655865603397655, "loss": 0.2182, "step": 89220 }, { "epoch": 0.6333388341469214, "grad_norm": 0.20703125, "learning_rate": 0.0019655788082835767, "loss": 0.2382, "step": 89230 }, { "epoch": 0.6334098123867675, "grad_norm": 0.07275390625, "learning_rate": 0.001965571055371385, "loss": 0.2309, "step": 89240 }, { "epoch": 0.6334807906266137, "grad_norm": 0.16796875, "learning_rate": 0.0019655633016031977, "loss": 0.2473, "step": 89250 }, { "epoch": 0.6335517688664598, "grad_norm": 0.1123046875, "learning_rate": 0.0019655555469790223, "loss": 0.2321, "step": 89260 }, { "epoch": 0.6336227471063058, "grad_norm": 0.09326171875, "learning_rate": 0.001965547791498867, "loss": 0.2615, "step": 89270 }, { "epoch": 0.633693725346152, "grad_norm": 0.1767578125, "learning_rate": 0.0019655400351627395, "loss": 0.2278, "step": 89280 }, { "epoch": 0.6337647035859981, "grad_norm": 0.1064453125, "learning_rate": 0.001965532277970647, "loss": 0.2325, "step": 89290 }, { "epoch": 0.6338356818258443, "grad_norm": 0.08935546875, "learning_rate": 0.0019655245199225965, "loss": 0.2283, "step": 89300 }, { "epoch": 0.6339066600656904, "grad_norm": 0.08642578125, "learning_rate": 0.001965516761018597, "loss": 0.2698, "step": 89310 }, { "epoch": 0.6339776383055364, "grad_norm": 0.11328125, "learning_rate": 0.0019655090012586558, "loss": 0.2388, "step": 89320 }, { "epoch": 0.6340486165453826, "grad_norm": 0.11962890625, "learning_rate": 0.0019655012406427806, "loss": 0.242, "step": 89330 }, { "epoch": 0.6341195947852287, "grad_norm": 0.1748046875, "learning_rate": 0.0019654934791709785, "loss": 0.2311, "step": 89340 }, { "epoch": 0.6341905730250749, "grad_norm": 0.09326171875, "learning_rate": 0.0019654857168432583, "loss": 0.2376, "step": 89350 }, { "epoch": 0.634261551264921, "grad_norm": 0.0732421875, "learning_rate": 0.0019654779536596263, "loss": 0.228, "step": 89360 }, { "epoch": 0.634332529504767, "grad_norm": 0.09912109375, "learning_rate": 0.001965470189620091, "loss": 0.2353, "step": 89370 }, { "epoch": 0.6344035077446132, "grad_norm": 0.06640625, "learning_rate": 0.00196546242472466, "loss": 0.2288, "step": 89380 }, { "epoch": 0.6344744859844593, "grad_norm": 0.07080078125, "learning_rate": 0.0019654546589733405, "loss": 0.2308, "step": 89390 }, { "epoch": 0.6345454642243055, "grad_norm": 0.1337890625, "learning_rate": 0.001965446892366141, "loss": 0.2325, "step": 89400 }, { "epoch": 0.6346164424641515, "grad_norm": 0.09521484375, "learning_rate": 0.001965439124903069, "loss": 0.2414, "step": 89410 }, { "epoch": 0.6346874207039976, "grad_norm": 0.08544921875, "learning_rate": 0.0019654313565841317, "loss": 0.2515, "step": 89420 }, { "epoch": 0.6347583989438438, "grad_norm": 0.10888671875, "learning_rate": 0.001965423587409337, "loss": 0.2496, "step": 89430 }, { "epoch": 0.6348293771836899, "grad_norm": 0.07958984375, "learning_rate": 0.0019654158173786924, "loss": 0.2332, "step": 89440 }, { "epoch": 0.6349003554235361, "grad_norm": 0.083984375, "learning_rate": 0.001965408046492206, "loss": 0.2329, "step": 89450 }, { "epoch": 0.6349713336633821, "grad_norm": 0.119140625, "learning_rate": 0.001965400274749885, "loss": 0.2199, "step": 89460 }, { "epoch": 0.6350423119032282, "grad_norm": 0.09130859375, "learning_rate": 0.001965392502151738, "loss": 0.2325, "step": 89470 }, { "epoch": 0.6351132901430744, "grad_norm": 0.0947265625, "learning_rate": 0.0019653847286977715, "loss": 0.2145, "step": 89480 }, { "epoch": 0.6351842683829205, "grad_norm": 0.15625, "learning_rate": 0.001965376954387994, "loss": 0.2266, "step": 89490 }, { "epoch": 0.6352552466227667, "grad_norm": 0.080078125, "learning_rate": 0.001965369179222413, "loss": 0.237, "step": 89500 }, { "epoch": 0.6353262248626127, "grad_norm": 0.1591796875, "learning_rate": 0.001965361403201036, "loss": 0.2396, "step": 89510 }, { "epoch": 0.6353972031024588, "grad_norm": 0.103515625, "learning_rate": 0.001965353626323871, "loss": 0.2298, "step": 89520 }, { "epoch": 0.635468181342305, "grad_norm": 0.0927734375, "learning_rate": 0.0019653458485909252, "loss": 0.229, "step": 89530 }, { "epoch": 0.6355391595821511, "grad_norm": 0.1142578125, "learning_rate": 0.0019653380700022064, "loss": 0.2302, "step": 89540 }, { "epoch": 0.6356101378219973, "grad_norm": 0.09375, "learning_rate": 0.0019653302905577232, "loss": 0.2469, "step": 89550 }, { "epoch": 0.6356811160618433, "grad_norm": 0.1904296875, "learning_rate": 0.0019653225102574815, "loss": 0.2435, "step": 89560 }, { "epoch": 0.6357520943016894, "grad_norm": 0.12255859375, "learning_rate": 0.001965314729101491, "loss": 0.2267, "step": 89570 }, { "epoch": 0.6358230725415356, "grad_norm": 0.091796875, "learning_rate": 0.001965306947089758, "loss": 0.2188, "step": 89580 }, { "epoch": 0.6358940507813817, "grad_norm": 0.111328125, "learning_rate": 0.001965299164222291, "loss": 0.2297, "step": 89590 }, { "epoch": 0.6359650290212279, "grad_norm": 0.12890625, "learning_rate": 0.001965291380499097, "loss": 0.2343, "step": 89600 }, { "epoch": 0.6360360072610739, "grad_norm": 0.177734375, "learning_rate": 0.0019652835959201843, "loss": 0.2381, "step": 89610 }, { "epoch": 0.63610698550092, "grad_norm": 0.11572265625, "learning_rate": 0.0019652758104855604, "loss": 0.2318, "step": 89620 }, { "epoch": 0.6361779637407662, "grad_norm": 0.12060546875, "learning_rate": 0.001965268024195233, "loss": 0.2279, "step": 89630 }, { "epoch": 0.6362489419806123, "grad_norm": 0.10009765625, "learning_rate": 0.0019652602370492096, "loss": 0.2279, "step": 89640 }, { "epoch": 0.6363199202204585, "grad_norm": 0.10888671875, "learning_rate": 0.001965252449047498, "loss": 0.2301, "step": 89650 }, { "epoch": 0.6363908984603045, "grad_norm": 0.1298828125, "learning_rate": 0.001965244660190106, "loss": 0.2396, "step": 89660 }, { "epoch": 0.6364618767001506, "grad_norm": 0.08154296875, "learning_rate": 0.0019652368704770416, "loss": 0.2346, "step": 89670 }, { "epoch": 0.6365328549399968, "grad_norm": 0.109375, "learning_rate": 0.0019652290799083115, "loss": 0.2266, "step": 89680 }, { "epoch": 0.6366038331798429, "grad_norm": 0.0986328125, "learning_rate": 0.0019652212884839243, "loss": 0.2368, "step": 89690 }, { "epoch": 0.636674811419689, "grad_norm": 0.099609375, "learning_rate": 0.001965213496203888, "loss": 0.2496, "step": 89700 }, { "epoch": 0.6367457896595351, "grad_norm": 0.1171875, "learning_rate": 0.001965205703068209, "loss": 0.2433, "step": 89710 }, { "epoch": 0.6368167678993812, "grad_norm": 0.0947265625, "learning_rate": 0.0019651979090768964, "loss": 0.2346, "step": 89720 }, { "epoch": 0.6368877461392274, "grad_norm": 0.09033203125, "learning_rate": 0.001965190114229957, "loss": 0.2326, "step": 89730 }, { "epoch": 0.6369587243790735, "grad_norm": 0.09423828125, "learning_rate": 0.001965182318527399, "loss": 0.2247, "step": 89740 }, { "epoch": 0.6370297026189196, "grad_norm": 0.15625, "learning_rate": 0.0019651745219692296, "loss": 0.234, "step": 89750 }, { "epoch": 0.6371006808587657, "grad_norm": 0.10205078125, "learning_rate": 0.001965166724555457, "loss": 0.2378, "step": 89760 }, { "epoch": 0.6371716590986118, "grad_norm": 0.09716796875, "learning_rate": 0.001965158926286089, "loss": 0.2382, "step": 89770 }, { "epoch": 0.637242637338458, "grad_norm": 0.087890625, "learning_rate": 0.0019651511271611324, "loss": 0.2201, "step": 89780 }, { "epoch": 0.6373136155783041, "grad_norm": 0.07861328125, "learning_rate": 0.001965143327180596, "loss": 0.2396, "step": 89790 }, { "epoch": 0.6373845938181502, "grad_norm": 0.0712890625, "learning_rate": 0.001965135526344487, "loss": 0.2432, "step": 89800 }, { "epoch": 0.6374555720579963, "grad_norm": 0.126953125, "learning_rate": 0.001965127724652813, "loss": 0.2228, "step": 89810 }, { "epoch": 0.6375265502978424, "grad_norm": 0.1357421875, "learning_rate": 0.0019651199221055824, "loss": 0.2283, "step": 89820 }, { "epoch": 0.6375975285376886, "grad_norm": 0.126953125, "learning_rate": 0.001965112118702802, "loss": 0.2475, "step": 89830 }, { "epoch": 0.6376685067775347, "grad_norm": 0.1416015625, "learning_rate": 0.00196510431444448, "loss": 0.219, "step": 89840 }, { "epoch": 0.6377394850173808, "grad_norm": 0.11474609375, "learning_rate": 0.001965096509330624, "loss": 0.2355, "step": 89850 }, { "epoch": 0.6378104632572269, "grad_norm": 0.1650390625, "learning_rate": 0.001965088703361242, "loss": 0.251, "step": 89860 }, { "epoch": 0.637881441497073, "grad_norm": 0.064453125, "learning_rate": 0.001965080896536341, "loss": 0.2261, "step": 89870 }, { "epoch": 0.6379524197369192, "grad_norm": 0.0859375, "learning_rate": 0.00196507308885593, "loss": 0.2039, "step": 89880 }, { "epoch": 0.6380233979767653, "grad_norm": 0.138671875, "learning_rate": 0.0019650652803200152, "loss": 0.2531, "step": 89890 }, { "epoch": 0.6380943762166114, "grad_norm": 0.09912109375, "learning_rate": 0.0019650574709286057, "loss": 0.2293, "step": 89900 }, { "epoch": 0.6381653544564575, "grad_norm": 0.1240234375, "learning_rate": 0.001965049660681708, "loss": 0.2433, "step": 89910 }, { "epoch": 0.6382363326963036, "grad_norm": 0.0927734375, "learning_rate": 0.0019650418495793303, "loss": 0.2284, "step": 89920 }, { "epoch": 0.6383073109361498, "grad_norm": 0.103515625, "learning_rate": 0.001965034037621481, "loss": 0.2319, "step": 89930 }, { "epoch": 0.6383782891759958, "grad_norm": 0.0869140625, "learning_rate": 0.0019650262248081672, "loss": 0.2323, "step": 89940 }, { "epoch": 0.638449267415842, "grad_norm": 0.08837890625, "learning_rate": 0.0019650184111393966, "loss": 0.2598, "step": 89950 }, { "epoch": 0.6385202456556881, "grad_norm": 0.11474609375, "learning_rate": 0.0019650105966151766, "loss": 0.2325, "step": 89960 }, { "epoch": 0.6385912238955342, "grad_norm": 0.0947265625, "learning_rate": 0.0019650027812355155, "loss": 0.2323, "step": 89970 }, { "epoch": 0.6386622021353804, "grad_norm": 0.13671875, "learning_rate": 0.0019649949650004214, "loss": 0.2382, "step": 89980 }, { "epoch": 0.6387331803752264, "grad_norm": 0.2138671875, "learning_rate": 0.001964987147909901, "loss": 0.2437, "step": 89990 }, { "epoch": 0.6388041586150726, "grad_norm": 0.1123046875, "learning_rate": 0.0019649793299639628, "loss": 0.2278, "step": 90000 }, { "epoch": 0.6388041586150726, "eval_covost2-zh-en_loss": 3.871570110321045, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 20.9995, "eval_covost2-zh-en_samples_per_second": 3.048, "eval_covost2-zh-en_steps_per_second": 0.19, "step": 90000 }, { "epoch": 0.6388041586150726, "eval_covost2-en-zh_loss": 3.1547136306762695, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 21.433, "eval_covost2-en-zh_samples_per_second": 2.986, "eval_covost2-en-zh_steps_per_second": 0.187, "step": 90000 }, { "epoch": 0.6388751368549187, "grad_norm": 0.09521484375, "learning_rate": 0.001964971511162614, "loss": 0.2488, "step": 90010 }, { "epoch": 0.6389461150947648, "grad_norm": 0.107421875, "learning_rate": 0.0019649636915058622, "loss": 0.2367, "step": 90020 }, { "epoch": 0.639017093334611, "grad_norm": 0.09814453125, "learning_rate": 0.0019649558709937164, "loss": 0.241, "step": 90030 }, { "epoch": 0.639088071574457, "grad_norm": 0.1640625, "learning_rate": 0.0019649480496261827, "loss": 0.2274, "step": 90040 }, { "epoch": 0.6391590498143032, "grad_norm": 0.08544921875, "learning_rate": 0.00196494022740327, "loss": 0.2328, "step": 90050 }, { "epoch": 0.6392300280541493, "grad_norm": 0.09619140625, "learning_rate": 0.0019649324043249858, "loss": 0.246, "step": 90060 }, { "epoch": 0.6393010062939954, "grad_norm": 0.123046875, "learning_rate": 0.0019649245803913373, "loss": 0.2334, "step": 90070 }, { "epoch": 0.6393719845338416, "grad_norm": 0.1259765625, "learning_rate": 0.0019649167556023326, "loss": 0.2311, "step": 90080 }, { "epoch": 0.6394429627736876, "grad_norm": 0.0849609375, "learning_rate": 0.0019649089299579796, "loss": 0.2299, "step": 90090 }, { "epoch": 0.6395139410135338, "grad_norm": 0.142578125, "learning_rate": 0.0019649011034582856, "loss": 0.2308, "step": 90100 }, { "epoch": 0.6395849192533799, "grad_norm": 0.12255859375, "learning_rate": 0.001964893276103259, "loss": 0.2215, "step": 90110 }, { "epoch": 0.639655897493226, "grad_norm": 0.10498046875, "learning_rate": 0.001964885447892907, "loss": 0.2211, "step": 90120 }, { "epoch": 0.6397268757330722, "grad_norm": 0.072265625, "learning_rate": 0.001964877618827237, "loss": 0.2482, "step": 90130 }, { "epoch": 0.6397978539729182, "grad_norm": 0.115234375, "learning_rate": 0.001964869788906258, "loss": 0.247, "step": 90140 }, { "epoch": 0.6398688322127644, "grad_norm": 0.0791015625, "learning_rate": 0.001964861958129977, "loss": 0.2337, "step": 90150 }, { "epoch": 0.6399398104526105, "grad_norm": 0.1064453125, "learning_rate": 0.0019648541264984014, "loss": 0.249, "step": 90160 }, { "epoch": 0.6400107886924566, "grad_norm": 0.09716796875, "learning_rate": 0.001964846294011539, "loss": 0.228, "step": 90170 }, { "epoch": 0.6400817669323027, "grad_norm": 0.1484375, "learning_rate": 0.001964838460669398, "loss": 0.2463, "step": 90180 }, { "epoch": 0.6401527451721488, "grad_norm": 0.14453125, "learning_rate": 0.0019648306264719866, "loss": 0.2175, "step": 90190 }, { "epoch": 0.640223723411995, "grad_norm": 0.10400390625, "learning_rate": 0.001964822791419311, "loss": 0.2261, "step": 90200 }, { "epoch": 0.6402947016518411, "grad_norm": 0.0771484375, "learning_rate": 0.00196481495551138, "loss": 0.2268, "step": 90210 }, { "epoch": 0.6403656798916872, "grad_norm": 0.0927734375, "learning_rate": 0.0019648071187482016, "loss": 0.2537, "step": 90220 }, { "epoch": 0.6404366581315333, "grad_norm": 0.1337890625, "learning_rate": 0.0019647992811297832, "loss": 0.2257, "step": 90230 }, { "epoch": 0.6405076363713794, "grad_norm": 0.10546875, "learning_rate": 0.0019647914426561327, "loss": 0.2309, "step": 90240 }, { "epoch": 0.6405786146112256, "grad_norm": 0.138671875, "learning_rate": 0.001964783603327257, "loss": 0.2296, "step": 90250 }, { "epoch": 0.6406495928510717, "grad_norm": 0.0654296875, "learning_rate": 0.0019647757631431647, "loss": 0.2261, "step": 90260 }, { "epoch": 0.6407205710909178, "grad_norm": 0.10595703125, "learning_rate": 0.0019647679221038634, "loss": 0.2302, "step": 90270 }, { "epoch": 0.6407915493307639, "grad_norm": 0.09130859375, "learning_rate": 0.001964760080209361, "loss": 0.2422, "step": 90280 }, { "epoch": 0.64086252757061, "grad_norm": 0.0888671875, "learning_rate": 0.0019647522374596654, "loss": 0.2308, "step": 90290 }, { "epoch": 0.6409335058104562, "grad_norm": 0.07177734375, "learning_rate": 0.0019647443938547835, "loss": 0.2194, "step": 90300 }, { "epoch": 0.6410044840503023, "grad_norm": 0.1689453125, "learning_rate": 0.0019647365493947236, "loss": 0.2301, "step": 90310 }, { "epoch": 0.6410754622901484, "grad_norm": 0.0810546875, "learning_rate": 0.0019647287040794933, "loss": 0.2207, "step": 90320 }, { "epoch": 0.6411464405299945, "grad_norm": 0.12890625, "learning_rate": 0.001964720857909101, "loss": 0.2199, "step": 90330 }, { "epoch": 0.6412174187698406, "grad_norm": 0.10107421875, "learning_rate": 0.001964713010883554, "loss": 0.2324, "step": 90340 }, { "epoch": 0.6412883970096868, "grad_norm": 0.08447265625, "learning_rate": 0.0019647051630028592, "loss": 0.2228, "step": 90350 }, { "epoch": 0.6413593752495329, "grad_norm": 0.109375, "learning_rate": 0.001964697314267026, "loss": 0.2313, "step": 90360 }, { "epoch": 0.641430353489379, "grad_norm": 0.12109375, "learning_rate": 0.0019646894646760614, "loss": 0.2286, "step": 90370 }, { "epoch": 0.6415013317292251, "grad_norm": 0.10888671875, "learning_rate": 0.0019646816142299727, "loss": 0.2403, "step": 90380 }, { "epoch": 0.6415723099690712, "grad_norm": 0.10595703125, "learning_rate": 0.001964673762928768, "loss": 0.2253, "step": 90390 }, { "epoch": 0.6416432882089174, "grad_norm": 0.1435546875, "learning_rate": 0.0019646659107724553, "loss": 0.2416, "step": 90400 }, { "epoch": 0.6417142664487635, "grad_norm": 0.07421875, "learning_rate": 0.0019646580577610422, "loss": 0.2178, "step": 90410 }, { "epoch": 0.6417852446886095, "grad_norm": 0.08251953125, "learning_rate": 0.0019646502038945367, "loss": 0.2378, "step": 90420 }, { "epoch": 0.6418562229284557, "grad_norm": 0.07470703125, "learning_rate": 0.0019646423491729466, "loss": 0.2317, "step": 90430 }, { "epoch": 0.6419272011683018, "grad_norm": 0.09033203125, "learning_rate": 0.0019646344935962787, "loss": 0.2384, "step": 90440 }, { "epoch": 0.641998179408148, "grad_norm": 0.1474609375, "learning_rate": 0.0019646266371645417, "loss": 0.236, "step": 90450 }, { "epoch": 0.6420691576479941, "grad_norm": 0.08203125, "learning_rate": 0.001964618779877743, "loss": 0.2436, "step": 90460 }, { "epoch": 0.6421401358878401, "grad_norm": 0.140625, "learning_rate": 0.0019646109217358906, "loss": 0.2403, "step": 90470 }, { "epoch": 0.6422111141276863, "grad_norm": 0.22265625, "learning_rate": 0.0019646030627389925, "loss": 0.2355, "step": 90480 }, { "epoch": 0.6422820923675324, "grad_norm": 0.11376953125, "learning_rate": 0.001964595202887056, "loss": 0.2523, "step": 90490 }, { "epoch": 0.6423530706073786, "grad_norm": 0.0908203125, "learning_rate": 0.001964587342180089, "loss": 0.2339, "step": 90500 }, { "epoch": 0.6424240488472247, "grad_norm": 0.10888671875, "learning_rate": 0.0019645794806180993, "loss": 0.2119, "step": 90510 }, { "epoch": 0.6424950270870707, "grad_norm": 0.1318359375, "learning_rate": 0.0019645716182010945, "loss": 0.2402, "step": 90520 }, { "epoch": 0.6425660053269169, "grad_norm": 0.083984375, "learning_rate": 0.0019645637549290827, "loss": 0.2323, "step": 90530 }, { "epoch": 0.642636983566763, "grad_norm": 0.09765625, "learning_rate": 0.0019645558908020716, "loss": 0.2491, "step": 90540 }, { "epoch": 0.6427079618066092, "grad_norm": 0.10498046875, "learning_rate": 0.001964548025820069, "loss": 0.2282, "step": 90550 }, { "epoch": 0.6427789400464553, "grad_norm": 0.125, "learning_rate": 0.001964540159983082, "loss": 0.2358, "step": 90560 }, { "epoch": 0.6428499182863013, "grad_norm": 0.11474609375, "learning_rate": 0.00196453229329112, "loss": 0.2324, "step": 90570 }, { "epoch": 0.6429208965261475, "grad_norm": 0.08837890625, "learning_rate": 0.001964524425744189, "loss": 0.2378, "step": 90580 }, { "epoch": 0.6429918747659936, "grad_norm": 0.07421875, "learning_rate": 0.0019645165573422977, "loss": 0.2111, "step": 90590 }, { "epoch": 0.6430628530058398, "grad_norm": 0.1142578125, "learning_rate": 0.001964508688085454, "loss": 0.2289, "step": 90600 }, { "epoch": 0.6431338312456859, "grad_norm": 0.10791015625, "learning_rate": 0.001964500817973665, "loss": 0.2385, "step": 90610 }, { "epoch": 0.6432048094855319, "grad_norm": 0.08349609375, "learning_rate": 0.001964492947006939, "loss": 0.2206, "step": 90620 }, { "epoch": 0.6432757877253781, "grad_norm": 0.0751953125, "learning_rate": 0.0019644850751852835, "loss": 0.226, "step": 90630 }, { "epoch": 0.6433467659652242, "grad_norm": 0.09765625, "learning_rate": 0.0019644772025087065, "loss": 0.2329, "step": 90640 }, { "epoch": 0.6434177442050704, "grad_norm": 0.09814453125, "learning_rate": 0.0019644693289772156, "loss": 0.2254, "step": 90650 }, { "epoch": 0.6434887224449164, "grad_norm": 0.087890625, "learning_rate": 0.0019644614545908192, "loss": 0.2327, "step": 90660 }, { "epoch": 0.6435597006847625, "grad_norm": 0.140625, "learning_rate": 0.001964453579349524, "loss": 0.2224, "step": 90670 }, { "epoch": 0.6436306789246087, "grad_norm": 0.09716796875, "learning_rate": 0.001964445703253339, "loss": 0.2197, "step": 90680 }, { "epoch": 0.6437016571644548, "grad_norm": 0.1142578125, "learning_rate": 0.0019644378263022713, "loss": 0.2222, "step": 90690 }, { "epoch": 0.643772635404301, "grad_norm": 0.0712890625, "learning_rate": 0.0019644299484963285, "loss": 0.2397, "step": 90700 }, { "epoch": 0.643843613644147, "grad_norm": 0.09326171875, "learning_rate": 0.001964422069835519, "loss": 0.2197, "step": 90710 }, { "epoch": 0.6439145918839931, "grad_norm": 0.1298828125, "learning_rate": 0.0019644141903198496, "loss": 0.225, "step": 90720 }, { "epoch": 0.6439855701238393, "grad_norm": 0.146484375, "learning_rate": 0.001964406309949329, "loss": 0.2374, "step": 90730 }, { "epoch": 0.6440565483636854, "grad_norm": 0.0849609375, "learning_rate": 0.0019643984287239648, "loss": 0.2207, "step": 90740 }, { "epoch": 0.6441275266035316, "grad_norm": 0.12060546875, "learning_rate": 0.0019643905466437644, "loss": 0.2294, "step": 90750 }, { "epoch": 0.6441985048433776, "grad_norm": 0.140625, "learning_rate": 0.0019643826637087366, "loss": 0.2417, "step": 90760 }, { "epoch": 0.6442694830832237, "grad_norm": 0.11181640625, "learning_rate": 0.0019643747799188882, "loss": 0.252, "step": 90770 }, { "epoch": 0.6443404613230699, "grad_norm": 0.09326171875, "learning_rate": 0.001964366895274227, "loss": 0.2275, "step": 90780 }, { "epoch": 0.644411439562916, "grad_norm": 0.2041015625, "learning_rate": 0.001964359009774762, "loss": 0.2448, "step": 90790 }, { "epoch": 0.6444824178027622, "grad_norm": 0.08984375, "learning_rate": 0.0019643511234204995, "loss": 0.2165, "step": 90800 }, { "epoch": 0.6445533960426082, "grad_norm": 0.109375, "learning_rate": 0.001964343236211448, "loss": 0.2423, "step": 90810 }, { "epoch": 0.6446243742824543, "grad_norm": 0.1318359375, "learning_rate": 0.001964335348147615, "loss": 0.2355, "step": 90820 }, { "epoch": 0.6446953525223005, "grad_norm": 0.1259765625, "learning_rate": 0.0019643274592290086, "loss": 0.2465, "step": 90830 }, { "epoch": 0.6447663307621466, "grad_norm": 0.1513671875, "learning_rate": 0.0019643195694556367, "loss": 0.2403, "step": 90840 }, { "epoch": 0.6448373090019928, "grad_norm": 0.115234375, "learning_rate": 0.0019643116788275066, "loss": 0.2376, "step": 90850 }, { "epoch": 0.6449082872418388, "grad_norm": 0.1181640625, "learning_rate": 0.0019643037873446267, "loss": 0.2212, "step": 90860 }, { "epoch": 0.6449792654816849, "grad_norm": 0.0888671875, "learning_rate": 0.0019642958950070043, "loss": 0.2275, "step": 90870 }, { "epoch": 0.6450502437215311, "grad_norm": 0.091796875, "learning_rate": 0.0019642880018146476, "loss": 0.2391, "step": 90880 }, { "epoch": 0.6451212219613772, "grad_norm": 0.1337890625, "learning_rate": 0.001964280107767564, "loss": 0.2297, "step": 90890 }, { "epoch": 0.6451922002012234, "grad_norm": 0.146484375, "learning_rate": 0.001964272212865762, "loss": 0.232, "step": 90900 }, { "epoch": 0.6452631784410694, "grad_norm": 0.1650390625, "learning_rate": 0.001964264317109249, "loss": 0.2403, "step": 90910 }, { "epoch": 0.6453341566809155, "grad_norm": 0.20703125, "learning_rate": 0.0019642564204980324, "loss": 0.2237, "step": 90920 }, { "epoch": 0.6454051349207617, "grad_norm": 0.1083984375, "learning_rate": 0.0019642485230321207, "loss": 0.2393, "step": 90930 }, { "epoch": 0.6454761131606078, "grad_norm": 0.14453125, "learning_rate": 0.0019642406247115207, "loss": 0.2306, "step": 90940 }, { "epoch": 0.6455470914004539, "grad_norm": 0.06884765625, "learning_rate": 0.001964232725536241, "loss": 0.225, "step": 90950 }, { "epoch": 0.6456180696403, "grad_norm": 0.0869140625, "learning_rate": 0.00196422482550629, "loss": 0.2216, "step": 90960 }, { "epoch": 0.6456890478801461, "grad_norm": 0.103515625, "learning_rate": 0.0019642169246216743, "loss": 0.2461, "step": 90970 }, { "epoch": 0.6457600261199923, "grad_norm": 0.1396484375, "learning_rate": 0.0019642090228824023, "loss": 0.2295, "step": 90980 }, { "epoch": 0.6458310043598384, "grad_norm": 0.07080078125, "learning_rate": 0.001964201120288482, "loss": 0.2236, "step": 90990 }, { "epoch": 0.6459019825996845, "grad_norm": 0.10546875, "learning_rate": 0.0019641932168399205, "loss": 0.227, "step": 91000 }, { "epoch": 0.6459729608395306, "grad_norm": 0.09765625, "learning_rate": 0.0019641853125367268, "loss": 0.2312, "step": 91010 }, { "epoch": 0.6460439390793767, "grad_norm": 0.1171875, "learning_rate": 0.0019641774073789068, "loss": 0.2309, "step": 91020 }, { "epoch": 0.6461149173192229, "grad_norm": 0.09228515625, "learning_rate": 0.0019641695013664705, "loss": 0.2509, "step": 91030 }, { "epoch": 0.646185895559069, "grad_norm": 0.103515625, "learning_rate": 0.0019641615944994244, "loss": 0.2196, "step": 91040 }, { "epoch": 0.646256873798915, "grad_norm": 0.0732421875, "learning_rate": 0.0019641536867777767, "loss": 0.231, "step": 91050 }, { "epoch": 0.6463278520387612, "grad_norm": 0.1220703125, "learning_rate": 0.001964145778201535, "loss": 0.2461, "step": 91060 }, { "epoch": 0.6463988302786073, "grad_norm": 0.12451171875, "learning_rate": 0.0019641378687707076, "loss": 0.2143, "step": 91070 }, { "epoch": 0.6464698085184535, "grad_norm": 0.1572265625, "learning_rate": 0.0019641299584853018, "loss": 0.2306, "step": 91080 }, { "epoch": 0.6465407867582996, "grad_norm": 0.23046875, "learning_rate": 0.0019641220473453256, "loss": 0.2409, "step": 91090 }, { "epoch": 0.6466117649981457, "grad_norm": 0.12158203125, "learning_rate": 0.001964114135350787, "loss": 0.222, "step": 91100 }, { "epoch": 0.6466827432379918, "grad_norm": 0.0888671875, "learning_rate": 0.0019641062225016935, "loss": 0.2297, "step": 91110 }, { "epoch": 0.6467537214778379, "grad_norm": 0.09716796875, "learning_rate": 0.001964098308798053, "loss": 0.2142, "step": 91120 }, { "epoch": 0.6468246997176841, "grad_norm": 0.083984375, "learning_rate": 0.0019640903942398738, "loss": 0.221, "step": 91130 }, { "epoch": 0.6468956779575302, "grad_norm": 0.09814453125, "learning_rate": 0.001964082478827163, "loss": 0.2325, "step": 91140 }, { "epoch": 0.6469666561973763, "grad_norm": 0.08544921875, "learning_rate": 0.0019640745625599293, "loss": 0.2236, "step": 91150 }, { "epoch": 0.6470376344372224, "grad_norm": 0.09521484375, "learning_rate": 0.0019640666454381795, "loss": 0.2198, "step": 91160 }, { "epoch": 0.6471086126770685, "grad_norm": 0.09765625, "learning_rate": 0.0019640587274619217, "loss": 0.2337, "step": 91170 }, { "epoch": 0.6471795909169147, "grad_norm": 0.11083984375, "learning_rate": 0.001964050808631165, "loss": 0.2354, "step": 91180 }, { "epoch": 0.6472505691567607, "grad_norm": 0.111328125, "learning_rate": 0.001964042888945915, "loss": 0.2221, "step": 91190 }, { "epoch": 0.6473215473966069, "grad_norm": 0.1064453125, "learning_rate": 0.0019640349684061816, "loss": 0.2392, "step": 91200 }, { "epoch": 0.647392525636453, "grad_norm": 0.0625, "learning_rate": 0.0019640270470119713, "loss": 0.2337, "step": 91210 }, { "epoch": 0.6474635038762991, "grad_norm": 0.11083984375, "learning_rate": 0.001964019124763293, "loss": 0.2213, "step": 91220 }, { "epoch": 0.6475344821161453, "grad_norm": 0.3671875, "learning_rate": 0.0019640112016601533, "loss": 0.2325, "step": 91230 }, { "epoch": 0.6476054603559913, "grad_norm": 0.0673828125, "learning_rate": 0.001964003277702561, "loss": 0.2459, "step": 91240 }, { "epoch": 0.6476764385958375, "grad_norm": 0.06591796875, "learning_rate": 0.0019639953528905237, "loss": 0.2253, "step": 91250 }, { "epoch": 0.6477474168356836, "grad_norm": 0.07763671875, "learning_rate": 0.0019639874272240486, "loss": 0.2293, "step": 91260 }, { "epoch": 0.6478183950755297, "grad_norm": 0.1328125, "learning_rate": 0.0019639795007031446, "loss": 0.2359, "step": 91270 }, { "epoch": 0.6478893733153759, "grad_norm": 0.1611328125, "learning_rate": 0.001963971573327819, "loss": 0.2384, "step": 91280 }, { "epoch": 0.6479603515552219, "grad_norm": 0.1455078125, "learning_rate": 0.0019639636450980795, "loss": 0.2307, "step": 91290 }, { "epoch": 0.6480313297950681, "grad_norm": 0.1240234375, "learning_rate": 0.0019639557160139338, "loss": 0.2309, "step": 91300 }, { "epoch": 0.6481023080349142, "grad_norm": 0.1025390625, "learning_rate": 0.0019639477860753903, "loss": 0.2207, "step": 91310 }, { "epoch": 0.6481732862747603, "grad_norm": 0.1005859375, "learning_rate": 0.001963939855282457, "loss": 0.2571, "step": 91320 }, { "epoch": 0.6482442645146065, "grad_norm": 0.109375, "learning_rate": 0.001963931923635141, "loss": 0.2205, "step": 91330 }, { "epoch": 0.6483152427544525, "grad_norm": 0.099609375, "learning_rate": 0.0019639239911334503, "loss": 0.2232, "step": 91340 }, { "epoch": 0.6483862209942987, "grad_norm": 0.0947265625, "learning_rate": 0.001963916057777393, "loss": 0.231, "step": 91350 }, { "epoch": 0.6484571992341448, "grad_norm": 0.07470703125, "learning_rate": 0.0019639081235669773, "loss": 0.2147, "step": 91360 }, { "epoch": 0.6485281774739909, "grad_norm": 0.08349609375, "learning_rate": 0.00196390018850221, "loss": 0.2249, "step": 91370 }, { "epoch": 0.6485991557138371, "grad_norm": 0.0791015625, "learning_rate": 0.0019638922525831, "loss": 0.2255, "step": 91380 }, { "epoch": 0.6486701339536831, "grad_norm": 0.09521484375, "learning_rate": 0.0019638843158096544, "loss": 0.225, "step": 91390 }, { "epoch": 0.6487411121935293, "grad_norm": 0.109375, "learning_rate": 0.0019638763781818815, "loss": 0.2294, "step": 91400 }, { "epoch": 0.6488120904333754, "grad_norm": 0.06494140625, "learning_rate": 0.001963868439699789, "loss": 0.2535, "step": 91410 }, { "epoch": 0.6488830686732215, "grad_norm": 0.109375, "learning_rate": 0.001963860500363385, "loss": 0.2315, "step": 91420 }, { "epoch": 0.6489540469130676, "grad_norm": 0.107421875, "learning_rate": 0.0019638525601726763, "loss": 0.2648, "step": 91430 }, { "epoch": 0.6490250251529137, "grad_norm": 0.109375, "learning_rate": 0.0019638446191276725, "loss": 0.228, "step": 91440 }, { "epoch": 0.6490960033927599, "grad_norm": 0.09716796875, "learning_rate": 0.00196383667722838, "loss": 0.2319, "step": 91450 }, { "epoch": 0.649166981632606, "grad_norm": 0.1171875, "learning_rate": 0.0019638287344748073, "loss": 0.2293, "step": 91460 }, { "epoch": 0.6492379598724521, "grad_norm": 0.0947265625, "learning_rate": 0.001963820790866962, "loss": 0.2456, "step": 91470 }, { "epoch": 0.6493089381122982, "grad_norm": 0.08935546875, "learning_rate": 0.001963812846404852, "loss": 0.2381, "step": 91480 }, { "epoch": 0.6493799163521443, "grad_norm": 0.12451171875, "learning_rate": 0.001963804901088486, "loss": 0.2389, "step": 91490 }, { "epoch": 0.6494508945919905, "grad_norm": 0.07177734375, "learning_rate": 0.0019637969549178703, "loss": 0.2221, "step": 91500 }, { "epoch": 0.6495218728318366, "grad_norm": 0.126953125, "learning_rate": 0.0019637890078930136, "loss": 0.2564, "step": 91510 }, { "epoch": 0.6495928510716827, "grad_norm": 0.134765625, "learning_rate": 0.001963781060013924, "loss": 0.2417, "step": 91520 }, { "epoch": 0.6496638293115288, "grad_norm": 0.1259765625, "learning_rate": 0.0019637731112806087, "loss": 0.2268, "step": 91530 }, { "epoch": 0.6497348075513749, "grad_norm": 0.1923828125, "learning_rate": 0.001963765161693076, "loss": 0.2301, "step": 91540 }, { "epoch": 0.6498057857912211, "grad_norm": 0.076171875, "learning_rate": 0.001963757211251334, "loss": 0.2197, "step": 91550 }, { "epoch": 0.6498767640310672, "grad_norm": 0.09423828125, "learning_rate": 0.0019637492599553904, "loss": 0.2336, "step": 91560 }, { "epoch": 0.6499477422709133, "grad_norm": 0.1162109375, "learning_rate": 0.0019637413078052524, "loss": 0.2241, "step": 91570 }, { "epoch": 0.6500187205107594, "grad_norm": 0.12353515625, "learning_rate": 0.0019637333548009287, "loss": 0.2314, "step": 91580 }, { "epoch": 0.6500896987506055, "grad_norm": 0.1484375, "learning_rate": 0.0019637254009424268, "loss": 0.2505, "step": 91590 }, { "epoch": 0.6501606769904517, "grad_norm": 0.0751953125, "learning_rate": 0.0019637174462297543, "loss": 0.2389, "step": 91600 }, { "epoch": 0.6502316552302978, "grad_norm": 0.107421875, "learning_rate": 0.00196370949066292, "loss": 0.2402, "step": 91610 }, { "epoch": 0.6503026334701439, "grad_norm": 0.10009765625, "learning_rate": 0.0019637015342419306, "loss": 0.2129, "step": 91620 }, { "epoch": 0.65037361170999, "grad_norm": 0.146484375, "learning_rate": 0.001963693576966795, "loss": 0.232, "step": 91630 }, { "epoch": 0.6504445899498361, "grad_norm": 0.07861328125, "learning_rate": 0.00196368561883752, "loss": 0.2152, "step": 91640 }, { "epoch": 0.6505155681896823, "grad_norm": 0.0849609375, "learning_rate": 0.0019636776598541145, "loss": 0.2328, "step": 91650 }, { "epoch": 0.6505865464295284, "grad_norm": 0.109375, "learning_rate": 0.001963669700016586, "loss": 0.2335, "step": 91660 }, { "epoch": 0.6506575246693744, "grad_norm": 0.150390625, "learning_rate": 0.0019636617393249418, "loss": 0.2225, "step": 91670 }, { "epoch": 0.6507285029092206, "grad_norm": 0.1396484375, "learning_rate": 0.00196365377777919, "loss": 0.2195, "step": 91680 }, { "epoch": 0.6507994811490667, "grad_norm": 0.0947265625, "learning_rate": 0.0019636458153793395, "loss": 0.2308, "step": 91690 }, { "epoch": 0.6508704593889129, "grad_norm": 0.1279296875, "learning_rate": 0.0019636378521253974, "loss": 0.2426, "step": 91700 }, { "epoch": 0.650941437628759, "grad_norm": 0.09765625, "learning_rate": 0.001963629888017371, "loss": 0.2349, "step": 91710 }, { "epoch": 0.651012415868605, "grad_norm": 0.1103515625, "learning_rate": 0.0019636219230552694, "loss": 0.2367, "step": 91720 }, { "epoch": 0.6510833941084512, "grad_norm": 0.099609375, "learning_rate": 0.0019636139572391, "loss": 0.2368, "step": 91730 }, { "epoch": 0.6511543723482973, "grad_norm": 0.095703125, "learning_rate": 0.0019636059905688698, "loss": 0.2278, "step": 91740 }, { "epoch": 0.6512253505881435, "grad_norm": 0.10498046875, "learning_rate": 0.001963598023044588, "loss": 0.2277, "step": 91750 }, { "epoch": 0.6512963288279896, "grad_norm": 0.11279296875, "learning_rate": 0.0019635900546662614, "loss": 0.2242, "step": 91760 }, { "epoch": 0.6513673070678356, "grad_norm": 0.10888671875, "learning_rate": 0.0019635820854338984, "loss": 0.2256, "step": 91770 }, { "epoch": 0.6514382853076818, "grad_norm": 0.1376953125, "learning_rate": 0.001963574115347507, "loss": 0.2284, "step": 91780 }, { "epoch": 0.6515092635475279, "grad_norm": 0.1474609375, "learning_rate": 0.001963566144407095, "loss": 0.2284, "step": 91790 }, { "epoch": 0.6515802417873741, "grad_norm": 0.11083984375, "learning_rate": 0.00196355817261267, "loss": 0.2317, "step": 91800 }, { "epoch": 0.6516512200272202, "grad_norm": 0.10009765625, "learning_rate": 0.0019635501999642406, "loss": 0.2319, "step": 91810 }, { "epoch": 0.6517221982670662, "grad_norm": 0.1748046875, "learning_rate": 0.0019635422264618133, "loss": 0.2398, "step": 91820 }, { "epoch": 0.6517931765069124, "grad_norm": 0.1328125, "learning_rate": 0.0019635342521053976, "loss": 0.2459, "step": 91830 }, { "epoch": 0.6518641547467585, "grad_norm": 0.31640625, "learning_rate": 0.0019635262768950006, "loss": 0.2422, "step": 91840 }, { "epoch": 0.6519351329866047, "grad_norm": 0.1259765625, "learning_rate": 0.00196351830083063, "loss": 0.2237, "step": 91850 }, { "epoch": 0.6520061112264508, "grad_norm": 0.107421875, "learning_rate": 0.0019635103239122936, "loss": 0.2457, "step": 91860 }, { "epoch": 0.6520770894662968, "grad_norm": 0.11328125, "learning_rate": 0.00196350234614, "loss": 0.2349, "step": 91870 }, { "epoch": 0.652148067706143, "grad_norm": 0.1201171875, "learning_rate": 0.001963494367513757, "loss": 0.2362, "step": 91880 }, { "epoch": 0.6522190459459891, "grad_norm": 0.11474609375, "learning_rate": 0.001963486388033572, "loss": 0.23, "step": 91890 }, { "epoch": 0.6522900241858353, "grad_norm": 0.0810546875, "learning_rate": 0.0019634784076994526, "loss": 0.2359, "step": 91900 }, { "epoch": 0.6523610024256814, "grad_norm": 0.10595703125, "learning_rate": 0.0019634704265114075, "loss": 0.2235, "step": 91910 }, { "epoch": 0.6524319806655274, "grad_norm": 0.1044921875, "learning_rate": 0.0019634624444694443, "loss": 0.2334, "step": 91920 }, { "epoch": 0.6525029589053736, "grad_norm": 0.09033203125, "learning_rate": 0.001963454461573571, "loss": 0.2205, "step": 91930 }, { "epoch": 0.6525739371452197, "grad_norm": 0.1298828125, "learning_rate": 0.001963446477823795, "loss": 0.2321, "step": 91940 }, { "epoch": 0.6526449153850659, "grad_norm": 0.1083984375, "learning_rate": 0.001963438493220125, "loss": 0.2165, "step": 91950 }, { "epoch": 0.6527158936249119, "grad_norm": 0.1162109375, "learning_rate": 0.001963430507762568, "loss": 0.2188, "step": 91960 }, { "epoch": 0.652786871864758, "grad_norm": 0.09912109375, "learning_rate": 0.0019634225214511325, "loss": 0.2323, "step": 91970 }, { "epoch": 0.6528578501046042, "grad_norm": 0.138671875, "learning_rate": 0.001963414534285826, "loss": 0.2399, "step": 91980 }, { "epoch": 0.6529288283444503, "grad_norm": 0.12890625, "learning_rate": 0.0019634065462666573, "loss": 0.2176, "step": 91990 }, { "epoch": 0.6529998065842965, "grad_norm": 0.197265625, "learning_rate": 0.0019633985573936333, "loss": 0.2423, "step": 92000 }, { "epoch": 0.6529998065842965, "eval_covost2-zh-en_loss": 3.825683355331421, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 20.7306, "eval_covost2-zh-en_samples_per_second": 3.087, "eval_covost2-zh-en_steps_per_second": 0.193, "step": 92000 }, { "epoch": 0.6529998065842965, "eval_covost2-en-zh_loss": 3.1699750423431396, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 19.8809, "eval_covost2-en-zh_samples_per_second": 3.219, "eval_covost2-en-zh_steps_per_second": 0.201, "step": 92000 }, { "epoch": 0.6530707848241425, "grad_norm": 0.1591796875, "learning_rate": 0.001963390567666762, "loss": 0.2427, "step": 92010 }, { "epoch": 0.6531417630639886, "grad_norm": 0.1416015625, "learning_rate": 0.0019633825770860517, "loss": 0.2288, "step": 92020 }, { "epoch": 0.6532127413038348, "grad_norm": 0.1201171875, "learning_rate": 0.0019633745856515103, "loss": 0.2317, "step": 92030 }, { "epoch": 0.6532837195436809, "grad_norm": 0.08544921875, "learning_rate": 0.0019633665933631453, "loss": 0.2437, "step": 92040 }, { "epoch": 0.6533546977835271, "grad_norm": 0.0849609375, "learning_rate": 0.001963358600220965, "loss": 0.2236, "step": 92050 }, { "epoch": 0.6534256760233731, "grad_norm": 0.08251953125, "learning_rate": 0.001963350606224977, "loss": 0.2313, "step": 92060 }, { "epoch": 0.6534966542632192, "grad_norm": 0.1123046875, "learning_rate": 0.00196334261137519, "loss": 0.2293, "step": 92070 }, { "epoch": 0.6535676325030654, "grad_norm": 0.1455078125, "learning_rate": 0.0019633346156716107, "loss": 0.2328, "step": 92080 }, { "epoch": 0.6536386107429115, "grad_norm": 0.11962890625, "learning_rate": 0.0019633266191142473, "loss": 0.2353, "step": 92090 }, { "epoch": 0.6537095889827577, "grad_norm": 0.09228515625, "learning_rate": 0.001963318621703108, "loss": 0.2141, "step": 92100 }, { "epoch": 0.6537805672226037, "grad_norm": 0.08447265625, "learning_rate": 0.0019633106234382015, "loss": 0.2127, "step": 92110 }, { "epoch": 0.6538515454624498, "grad_norm": 0.1357421875, "learning_rate": 0.001963302624319534, "loss": 0.2259, "step": 92120 }, { "epoch": 0.653922523702296, "grad_norm": 0.080078125, "learning_rate": 0.0019632946243471152, "loss": 0.2348, "step": 92130 }, { "epoch": 0.6539935019421421, "grad_norm": 0.09326171875, "learning_rate": 0.0019632866235209513, "loss": 0.2297, "step": 92140 }, { "epoch": 0.6540644801819883, "grad_norm": 0.0673828125, "learning_rate": 0.0019632786218410515, "loss": 0.2318, "step": 92150 }, { "epoch": 0.6541354584218343, "grad_norm": 0.26171875, "learning_rate": 0.001963270619307423, "loss": 0.2245, "step": 92160 }, { "epoch": 0.6542064366616804, "grad_norm": 0.08837890625, "learning_rate": 0.001963262615920074, "loss": 0.2356, "step": 92170 }, { "epoch": 0.6542774149015266, "grad_norm": 0.080078125, "learning_rate": 0.0019632546116790128, "loss": 0.227, "step": 92180 }, { "epoch": 0.6543483931413727, "grad_norm": 0.1328125, "learning_rate": 0.0019632466065842465, "loss": 0.249, "step": 92190 }, { "epoch": 0.6544193713812188, "grad_norm": 0.0791015625, "learning_rate": 0.0019632386006357833, "loss": 0.2425, "step": 92200 }, { "epoch": 0.6544903496210649, "grad_norm": 0.14453125, "learning_rate": 0.0019632305938336314, "loss": 0.2394, "step": 92210 }, { "epoch": 0.654561327860911, "grad_norm": 0.078125, "learning_rate": 0.0019632225861777983, "loss": 0.2196, "step": 92220 }, { "epoch": 0.6546323061007572, "grad_norm": 0.08837890625, "learning_rate": 0.0019632145776682926, "loss": 0.2284, "step": 92230 }, { "epoch": 0.6547032843406033, "grad_norm": 0.10400390625, "learning_rate": 0.0019632065683051216, "loss": 0.2301, "step": 92240 }, { "epoch": 0.6547742625804494, "grad_norm": 0.0791015625, "learning_rate": 0.0019631985580882932, "loss": 0.2356, "step": 92250 }, { "epoch": 0.6548452408202955, "grad_norm": 0.08349609375, "learning_rate": 0.0019631905470178157, "loss": 0.2187, "step": 92260 }, { "epoch": 0.6549162190601416, "grad_norm": 0.1240234375, "learning_rate": 0.0019631825350936967, "loss": 0.233, "step": 92270 }, { "epoch": 0.6549871972999878, "grad_norm": 0.11572265625, "learning_rate": 0.0019631745223159447, "loss": 0.2242, "step": 92280 }, { "epoch": 0.6550581755398339, "grad_norm": 0.3046875, "learning_rate": 0.001963166508684567, "loss": 0.2275, "step": 92290 }, { "epoch": 0.65512915377968, "grad_norm": 0.125, "learning_rate": 0.0019631584941995715, "loss": 0.2353, "step": 92300 }, { "epoch": 0.6552001320195261, "grad_norm": 0.10791015625, "learning_rate": 0.0019631504788609664, "loss": 0.2267, "step": 92310 }, { "epoch": 0.6552711102593722, "grad_norm": 0.10498046875, "learning_rate": 0.00196314246266876, "loss": 0.2323, "step": 92320 }, { "epoch": 0.6553420884992184, "grad_norm": 0.10791015625, "learning_rate": 0.001963134445622959, "loss": 0.216, "step": 92330 }, { "epoch": 0.6554130667390645, "grad_norm": 0.11474609375, "learning_rate": 0.0019631264277235726, "loss": 0.2238, "step": 92340 }, { "epoch": 0.6554840449789106, "grad_norm": 0.08984375, "learning_rate": 0.0019631184089706085, "loss": 0.2188, "step": 92350 }, { "epoch": 0.6555550232187567, "grad_norm": 0.08349609375, "learning_rate": 0.001963110389364074, "loss": 0.2338, "step": 92360 }, { "epoch": 0.6556260014586028, "grad_norm": 0.11474609375, "learning_rate": 0.0019631023689039777, "loss": 0.229, "step": 92370 }, { "epoch": 0.655696979698449, "grad_norm": 0.095703125, "learning_rate": 0.001963094347590327, "loss": 0.2198, "step": 92380 }, { "epoch": 0.6557679579382951, "grad_norm": 0.06982421875, "learning_rate": 0.0019630863254231305, "loss": 0.2249, "step": 92390 }, { "epoch": 0.6558389361781412, "grad_norm": 0.1123046875, "learning_rate": 0.0019630783024023953, "loss": 0.2357, "step": 92400 }, { "epoch": 0.6559099144179873, "grad_norm": 0.1572265625, "learning_rate": 0.0019630702785281297, "loss": 0.2419, "step": 92410 }, { "epoch": 0.6559808926578334, "grad_norm": 0.11865234375, "learning_rate": 0.001963062253800342, "loss": 0.2314, "step": 92420 }, { "epoch": 0.6560518708976796, "grad_norm": 0.1298828125, "learning_rate": 0.0019630542282190397, "loss": 0.2431, "step": 92430 }, { "epoch": 0.6561228491375256, "grad_norm": 0.11669921875, "learning_rate": 0.001963046201784231, "loss": 0.222, "step": 92440 }, { "epoch": 0.6561938273773718, "grad_norm": 0.10986328125, "learning_rate": 0.001963038174495923, "loss": 0.2274, "step": 92450 }, { "epoch": 0.6562648056172179, "grad_norm": 0.1875, "learning_rate": 0.001963030146354125, "loss": 0.2263, "step": 92460 }, { "epoch": 0.656335783857064, "grad_norm": 0.07373046875, "learning_rate": 0.0019630221173588445, "loss": 0.2299, "step": 92470 }, { "epoch": 0.6564067620969102, "grad_norm": 0.0634765625, "learning_rate": 0.001963014087510089, "loss": 0.221, "step": 92480 }, { "epoch": 0.6564777403367562, "grad_norm": 0.1015625, "learning_rate": 0.0019630060568078663, "loss": 0.2325, "step": 92490 }, { "epoch": 0.6565487185766024, "grad_norm": 0.1083984375, "learning_rate": 0.001962998025252185, "loss": 0.2272, "step": 92500 }, { "epoch": 0.6566196968164485, "grad_norm": 0.0966796875, "learning_rate": 0.001962989992843052, "loss": 0.2305, "step": 92510 }, { "epoch": 0.6566906750562946, "grad_norm": 0.1337890625, "learning_rate": 0.001962981959580477, "loss": 0.2291, "step": 92520 }, { "epoch": 0.6567616532961408, "grad_norm": 0.08203125, "learning_rate": 0.0019629739254644666, "loss": 0.2271, "step": 92530 }, { "epoch": 0.6568326315359868, "grad_norm": 0.1064453125, "learning_rate": 0.001962965890495029, "loss": 0.2436, "step": 92540 }, { "epoch": 0.656903609775833, "grad_norm": 0.1171875, "learning_rate": 0.0019629578546721724, "loss": 0.246, "step": 92550 }, { "epoch": 0.6569745880156791, "grad_norm": 0.205078125, "learning_rate": 0.0019629498179959044, "loss": 0.2197, "step": 92560 }, { "epoch": 0.6570455662555252, "grad_norm": 0.1328125, "learning_rate": 0.001962941780466233, "loss": 0.2381, "step": 92570 }, { "epoch": 0.6571165444953714, "grad_norm": 0.08984375, "learning_rate": 0.0019629337420831667, "loss": 0.2367, "step": 92580 }, { "epoch": 0.6571875227352174, "grad_norm": 0.07861328125, "learning_rate": 0.0019629257028467126, "loss": 0.2335, "step": 92590 }, { "epoch": 0.6572585009750636, "grad_norm": 0.341796875, "learning_rate": 0.001962917662756879, "loss": 0.2394, "step": 92600 }, { "epoch": 0.6573294792149097, "grad_norm": 0.08251953125, "learning_rate": 0.001962909621813674, "loss": 0.2437, "step": 92610 }, { "epoch": 0.6574004574547558, "grad_norm": 0.1650390625, "learning_rate": 0.001962901580017106, "loss": 0.2358, "step": 92620 }, { "epoch": 0.657471435694602, "grad_norm": 0.103515625, "learning_rate": 0.001962893537367182, "loss": 0.2352, "step": 92630 }, { "epoch": 0.657542413934448, "grad_norm": 0.1787109375, "learning_rate": 0.00196288549386391, "loss": 0.2302, "step": 92640 }, { "epoch": 0.6576133921742942, "grad_norm": 0.09326171875, "learning_rate": 0.0019628774495072986, "loss": 0.253, "step": 92650 }, { "epoch": 0.6576843704141403, "grad_norm": 0.083984375, "learning_rate": 0.001962869404297356, "loss": 0.2351, "step": 92660 }, { "epoch": 0.6577553486539864, "grad_norm": 0.08056640625, "learning_rate": 0.001962861358234089, "loss": 0.2503, "step": 92670 }, { "epoch": 0.6578263268938325, "grad_norm": 0.1552734375, "learning_rate": 0.0019628533113175066, "loss": 0.2142, "step": 92680 }, { "epoch": 0.6578973051336786, "grad_norm": 0.09033203125, "learning_rate": 0.001962845263547616, "loss": 0.2357, "step": 92690 }, { "epoch": 0.6579682833735248, "grad_norm": 0.10498046875, "learning_rate": 0.0019628372149244257, "loss": 0.2143, "step": 92700 }, { "epoch": 0.6580392616133709, "grad_norm": 0.07958984375, "learning_rate": 0.0019628291654479432, "loss": 0.2134, "step": 92710 }, { "epoch": 0.658110239853217, "grad_norm": 0.09033203125, "learning_rate": 0.001962821115118177, "loss": 0.2264, "step": 92720 }, { "epoch": 0.6581812180930631, "grad_norm": 0.11279296875, "learning_rate": 0.001962813063935135, "loss": 0.2432, "step": 92730 }, { "epoch": 0.6582521963329092, "grad_norm": 0.12255859375, "learning_rate": 0.0019628050118988246, "loss": 0.2434, "step": 92740 }, { "epoch": 0.6583231745727554, "grad_norm": 0.1376953125, "learning_rate": 0.001962796959009254, "loss": 0.228, "step": 92750 }, { "epoch": 0.6583941528126015, "grad_norm": 0.1904296875, "learning_rate": 0.0019627889052664317, "loss": 0.2364, "step": 92760 }, { "epoch": 0.6584651310524476, "grad_norm": 0.07421875, "learning_rate": 0.0019627808506703652, "loss": 0.2164, "step": 92770 }, { "epoch": 0.6585361092922937, "grad_norm": 0.09716796875, "learning_rate": 0.0019627727952210624, "loss": 0.235, "step": 92780 }, { "epoch": 0.6586070875321398, "grad_norm": 0.107421875, "learning_rate": 0.001962764738918531, "loss": 0.2235, "step": 92790 }, { "epoch": 0.658678065771986, "grad_norm": 0.1435546875, "learning_rate": 0.00196275668176278, "loss": 0.225, "step": 92800 }, { "epoch": 0.6587490440118321, "grad_norm": 0.09912109375, "learning_rate": 0.0019627486237538162, "loss": 0.2366, "step": 92810 }, { "epoch": 0.6588200222516782, "grad_norm": 0.1640625, "learning_rate": 0.0019627405648916484, "loss": 0.2269, "step": 92820 }, { "epoch": 0.6588910004915243, "grad_norm": 0.10888671875, "learning_rate": 0.0019627325051762845, "loss": 0.2255, "step": 92830 }, { "epoch": 0.6589619787313704, "grad_norm": 0.1513671875, "learning_rate": 0.0019627244446077316, "loss": 0.232, "step": 92840 }, { "epoch": 0.6590329569712166, "grad_norm": 0.07470703125, "learning_rate": 0.0019627163831859983, "loss": 0.2349, "step": 92850 }, { "epoch": 0.6591039352110627, "grad_norm": 0.125, "learning_rate": 0.0019627083209110933, "loss": 0.2264, "step": 92860 }, { "epoch": 0.6591749134509088, "grad_norm": 0.07958984375, "learning_rate": 0.001962700257783023, "loss": 0.2363, "step": 92870 }, { "epoch": 0.6592458916907549, "grad_norm": 0.07470703125, "learning_rate": 0.001962692193801797, "loss": 0.2452, "step": 92880 }, { "epoch": 0.659316869930601, "grad_norm": 0.1357421875, "learning_rate": 0.001962684128967422, "loss": 0.2249, "step": 92890 }, { "epoch": 0.6593878481704472, "grad_norm": 0.06689453125, "learning_rate": 0.0019626760632799065, "loss": 0.2189, "step": 92900 }, { "epoch": 0.6594588264102933, "grad_norm": 0.16796875, "learning_rate": 0.0019626679967392584, "loss": 0.2335, "step": 92910 }, { "epoch": 0.6595298046501393, "grad_norm": 0.162109375, "learning_rate": 0.001962659929345486, "loss": 0.2351, "step": 92920 }, { "epoch": 0.6596007828899855, "grad_norm": 0.0810546875, "learning_rate": 0.0019626518610985967, "loss": 0.2278, "step": 92930 }, { "epoch": 0.6596717611298316, "grad_norm": 0.11279296875, "learning_rate": 0.001962643791998599, "loss": 0.2248, "step": 92940 }, { "epoch": 0.6597427393696778, "grad_norm": 0.083984375, "learning_rate": 0.0019626357220455005, "loss": 0.242, "step": 92950 }, { "epoch": 0.6598137176095239, "grad_norm": 0.08984375, "learning_rate": 0.0019626276512393096, "loss": 0.2349, "step": 92960 }, { "epoch": 0.6598846958493699, "grad_norm": 0.0869140625, "learning_rate": 0.0019626195795800334, "loss": 0.24, "step": 92970 }, { "epoch": 0.6599556740892161, "grad_norm": 0.07666015625, "learning_rate": 0.001962611507067681, "loss": 0.2114, "step": 92980 }, { "epoch": 0.6600266523290622, "grad_norm": 0.1103515625, "learning_rate": 0.0019626034337022597, "loss": 0.2344, "step": 92990 }, { "epoch": 0.6600976305689084, "grad_norm": 0.11669921875, "learning_rate": 0.001962595359483778, "loss": 0.2176, "step": 93000 }, { "epoch": 0.6601686088087545, "grad_norm": 0.1298828125, "learning_rate": 0.001962587284412243, "loss": 0.2258, "step": 93010 }, { "epoch": 0.6602395870486005, "grad_norm": 0.10205078125, "learning_rate": 0.0019625792084876637, "loss": 0.2396, "step": 93020 }, { "epoch": 0.6603105652884467, "grad_norm": 0.09375, "learning_rate": 0.0019625711317100476, "loss": 0.2398, "step": 93030 }, { "epoch": 0.6603815435282928, "grad_norm": 0.1796875, "learning_rate": 0.0019625630540794025, "loss": 0.2173, "step": 93040 }, { "epoch": 0.660452521768139, "grad_norm": 0.216796875, "learning_rate": 0.0019625549755957366, "loss": 0.2347, "step": 93050 }, { "epoch": 0.6605235000079851, "grad_norm": 0.0927734375, "learning_rate": 0.0019625468962590578, "loss": 0.2153, "step": 93060 }, { "epoch": 0.6605944782478311, "grad_norm": 0.322265625, "learning_rate": 0.0019625388160693746, "loss": 0.2286, "step": 93070 }, { "epoch": 0.6606654564876773, "grad_norm": 0.119140625, "learning_rate": 0.001962530735026694, "loss": 0.2422, "step": 93080 }, { "epoch": 0.6607364347275234, "grad_norm": 0.1572265625, "learning_rate": 0.001962522653131025, "loss": 0.2305, "step": 93090 }, { "epoch": 0.6608074129673696, "grad_norm": 0.1630859375, "learning_rate": 0.001962514570382375, "loss": 0.2298, "step": 93100 }, { "epoch": 0.6608783912072157, "grad_norm": 0.123046875, "learning_rate": 0.001962506486780752, "loss": 0.2455, "step": 93110 }, { "epoch": 0.6609493694470617, "grad_norm": 0.1083984375, "learning_rate": 0.0019624984023261646, "loss": 0.2277, "step": 93120 }, { "epoch": 0.6610203476869079, "grad_norm": 0.10693359375, "learning_rate": 0.00196249031701862, "loss": 0.2192, "step": 93130 }, { "epoch": 0.661091325926754, "grad_norm": 0.07666015625, "learning_rate": 0.0019624822308581267, "loss": 0.2348, "step": 93140 }, { "epoch": 0.6611623041666002, "grad_norm": 0.1279296875, "learning_rate": 0.0019624741438446923, "loss": 0.2194, "step": 93150 }, { "epoch": 0.6612332824064463, "grad_norm": 0.107421875, "learning_rate": 0.0019624660559783252, "loss": 0.231, "step": 93160 }, { "epoch": 0.6613042606462923, "grad_norm": 0.095703125, "learning_rate": 0.001962457967259033, "loss": 0.2165, "step": 93170 }, { "epoch": 0.6613752388861385, "grad_norm": 0.126953125, "learning_rate": 0.001962449877686824, "loss": 0.2333, "step": 93180 }, { "epoch": 0.6614462171259846, "grad_norm": 0.0791015625, "learning_rate": 0.0019624417872617064, "loss": 0.2268, "step": 93190 }, { "epoch": 0.6615171953658308, "grad_norm": 0.07568359375, "learning_rate": 0.0019624336959836878, "loss": 0.2199, "step": 93200 }, { "epoch": 0.6615881736056768, "grad_norm": 0.07177734375, "learning_rate": 0.0019624256038527762, "loss": 0.2244, "step": 93210 }, { "epoch": 0.6616591518455229, "grad_norm": 0.09326171875, "learning_rate": 0.0019624175108689797, "loss": 0.2377, "step": 93220 }, { "epoch": 0.6617301300853691, "grad_norm": 0.1513671875, "learning_rate": 0.0019624094170323063, "loss": 0.2332, "step": 93230 }, { "epoch": 0.6618011083252152, "grad_norm": 0.09423828125, "learning_rate": 0.0019624013223427644, "loss": 0.2282, "step": 93240 }, { "epoch": 0.6618720865650614, "grad_norm": 0.099609375, "learning_rate": 0.0019623932268003613, "loss": 0.232, "step": 93250 }, { "epoch": 0.6619430648049074, "grad_norm": 0.1162109375, "learning_rate": 0.001962385130405105, "loss": 0.2209, "step": 93260 }, { "epoch": 0.6620140430447535, "grad_norm": 0.0791015625, "learning_rate": 0.001962377033157005, "loss": 0.2491, "step": 93270 }, { "epoch": 0.6620850212845997, "grad_norm": 0.09716796875, "learning_rate": 0.0019623689350560672, "loss": 0.2205, "step": 93280 }, { "epoch": 0.6621559995244458, "grad_norm": 0.126953125, "learning_rate": 0.001962360836102301, "loss": 0.2349, "step": 93290 }, { "epoch": 0.662226977764292, "grad_norm": 0.1201171875, "learning_rate": 0.0019623527362957137, "loss": 0.2329, "step": 93300 }, { "epoch": 0.662297956004138, "grad_norm": 0.1181640625, "learning_rate": 0.0019623446356363135, "loss": 0.2236, "step": 93310 }, { "epoch": 0.6623689342439841, "grad_norm": 0.08642578125, "learning_rate": 0.001962336534124109, "loss": 0.2207, "step": 93320 }, { "epoch": 0.6624399124838303, "grad_norm": 0.11181640625, "learning_rate": 0.0019623284317591074, "loss": 0.2397, "step": 93330 }, { "epoch": 0.6625108907236764, "grad_norm": 0.09619140625, "learning_rate": 0.001962320328541317, "loss": 0.2163, "step": 93340 }, { "epoch": 0.6625818689635226, "grad_norm": 0.083984375, "learning_rate": 0.0019623122244707457, "loss": 0.2222, "step": 93350 }, { "epoch": 0.6626528472033686, "grad_norm": 0.091796875, "learning_rate": 0.001962304119547402, "loss": 0.2272, "step": 93360 }, { "epoch": 0.6627238254432147, "grad_norm": 0.134765625, "learning_rate": 0.0019622960137712932, "loss": 0.2337, "step": 93370 }, { "epoch": 0.6627948036830609, "grad_norm": 0.15625, "learning_rate": 0.001962287907142428, "loss": 0.23, "step": 93380 }, { "epoch": 0.662865781922907, "grad_norm": 0.11181640625, "learning_rate": 0.0019622797996608145, "loss": 0.2331, "step": 93390 }, { "epoch": 0.6629367601627532, "grad_norm": 0.171875, "learning_rate": 0.0019622716913264594, "loss": 0.2598, "step": 93400 }, { "epoch": 0.6630077384025992, "grad_norm": 0.12890625, "learning_rate": 0.001962263582139372, "loss": 0.2338, "step": 93410 }, { "epoch": 0.6630787166424453, "grad_norm": 0.08984375, "learning_rate": 0.0019622554720995604, "loss": 0.23, "step": 93420 }, { "epoch": 0.6631496948822915, "grad_norm": 0.087890625, "learning_rate": 0.0019622473612070316, "loss": 0.2297, "step": 93430 }, { "epoch": 0.6632206731221376, "grad_norm": 0.10986328125, "learning_rate": 0.0019622392494617943, "loss": 0.2227, "step": 93440 }, { "epoch": 0.6632916513619836, "grad_norm": 0.08642578125, "learning_rate": 0.0019622311368638567, "loss": 0.2219, "step": 93450 }, { "epoch": 0.6633626296018298, "grad_norm": 0.0927734375, "learning_rate": 0.0019622230234132266, "loss": 0.2303, "step": 93460 }, { "epoch": 0.6634336078416759, "grad_norm": 0.08447265625, "learning_rate": 0.001962214909109912, "loss": 0.216, "step": 93470 }, { "epoch": 0.6635045860815221, "grad_norm": 0.111328125, "learning_rate": 0.00196220679395392, "loss": 0.2353, "step": 93480 }, { "epoch": 0.6635755643213682, "grad_norm": 0.1484375, "learning_rate": 0.0019621986779452603, "loss": 0.2407, "step": 93490 }, { "epoch": 0.6636465425612142, "grad_norm": 0.1259765625, "learning_rate": 0.00196219056108394, "loss": 0.2235, "step": 93500 }, { "epoch": 0.6637175208010604, "grad_norm": 0.07568359375, "learning_rate": 0.0019621824433699673, "loss": 0.2295, "step": 93510 }, { "epoch": 0.6637884990409065, "grad_norm": 0.1142578125, "learning_rate": 0.00196217432480335, "loss": 0.2334, "step": 93520 }, { "epoch": 0.6638594772807527, "grad_norm": 0.3046875, "learning_rate": 0.001962166205384097, "loss": 0.2247, "step": 93530 }, { "epoch": 0.6639304555205988, "grad_norm": 0.08056640625, "learning_rate": 0.001962158085112215, "loss": 0.2318, "step": 93540 }, { "epoch": 0.6640014337604448, "grad_norm": 0.0966796875, "learning_rate": 0.001962149963987713, "loss": 0.2366, "step": 93550 }, { "epoch": 0.664072412000291, "grad_norm": 0.091796875, "learning_rate": 0.001962141842010598, "loss": 0.22, "step": 93560 }, { "epoch": 0.6641433902401371, "grad_norm": 0.0751953125, "learning_rate": 0.00196213371918088, "loss": 0.2335, "step": 93570 }, { "epoch": 0.6642143684799833, "grad_norm": 0.1259765625, "learning_rate": 0.0019621255954985644, "loss": 0.2212, "step": 93580 }, { "epoch": 0.6642853467198294, "grad_norm": 0.10791015625, "learning_rate": 0.001962117470963662, "loss": 0.2408, "step": 93590 }, { "epoch": 0.6643563249596754, "grad_norm": 0.08935546875, "learning_rate": 0.0019621093455761784, "loss": 0.2279, "step": 93600 }, { "epoch": 0.6644273031995216, "grad_norm": 0.1240234375, "learning_rate": 0.001962101219336123, "loss": 0.2155, "step": 93610 }, { "epoch": 0.6644982814393677, "grad_norm": 0.13671875, "learning_rate": 0.0019620930922435035, "loss": 0.2225, "step": 93620 }, { "epoch": 0.6645692596792139, "grad_norm": 0.125, "learning_rate": 0.0019620849642983285, "loss": 0.2557, "step": 93630 }, { "epoch": 0.66464023791906, "grad_norm": 0.07568359375, "learning_rate": 0.001962076835500605, "loss": 0.2133, "step": 93640 }, { "epoch": 0.664711216158906, "grad_norm": 0.1708984375, "learning_rate": 0.0019620687058503417, "loss": 0.232, "step": 93650 }, { "epoch": 0.6647821943987522, "grad_norm": 0.06689453125, "learning_rate": 0.001962060575347546, "loss": 0.2348, "step": 93660 }, { "epoch": 0.6648531726385983, "grad_norm": 0.10546875, "learning_rate": 0.001962052443992227, "loss": 0.2163, "step": 93670 }, { "epoch": 0.6649241508784445, "grad_norm": 0.10205078125, "learning_rate": 0.0019620443117843917, "loss": 0.2263, "step": 93680 }, { "epoch": 0.6649951291182905, "grad_norm": 0.11572265625, "learning_rate": 0.001962036178724049, "loss": 0.2457, "step": 93690 }, { "epoch": 0.6650661073581366, "grad_norm": 0.09423828125, "learning_rate": 0.0019620280448112066, "loss": 0.2312, "step": 93700 }, { "epoch": 0.6651370855979828, "grad_norm": 0.08203125, "learning_rate": 0.001962019910045872, "loss": 0.2363, "step": 93710 }, { "epoch": 0.6652080638378289, "grad_norm": 0.1123046875, "learning_rate": 0.0019620117744280543, "loss": 0.2308, "step": 93720 }, { "epoch": 0.6652790420776751, "grad_norm": 0.09228515625, "learning_rate": 0.0019620036379577608, "loss": 0.2367, "step": 93730 }, { "epoch": 0.6653500203175211, "grad_norm": 0.1357421875, "learning_rate": 0.0019619955006349994, "loss": 0.2194, "step": 93740 }, { "epoch": 0.6654209985573672, "grad_norm": 0.109375, "learning_rate": 0.001961987362459779, "loss": 0.2321, "step": 93750 }, { "epoch": 0.6654919767972134, "grad_norm": 0.138671875, "learning_rate": 0.0019619792234321066, "loss": 0.2289, "step": 93760 }, { "epoch": 0.6655629550370595, "grad_norm": 0.11572265625, "learning_rate": 0.001961971083551991, "loss": 0.2332, "step": 93770 }, { "epoch": 0.6656339332769057, "grad_norm": 0.12890625, "learning_rate": 0.00196196294281944, "loss": 0.2388, "step": 93780 }, { "epoch": 0.6657049115167517, "grad_norm": 0.09375, "learning_rate": 0.0019619548012344617, "loss": 0.2298, "step": 93790 }, { "epoch": 0.6657758897565978, "grad_norm": 0.138671875, "learning_rate": 0.001961946658797064, "loss": 0.2421, "step": 93800 }, { "epoch": 0.665846867996444, "grad_norm": 0.125, "learning_rate": 0.0019619385155072552, "loss": 0.2244, "step": 93810 }, { "epoch": 0.6659178462362901, "grad_norm": 0.080078125, "learning_rate": 0.0019619303713650435, "loss": 0.2291, "step": 93820 }, { "epoch": 0.6659888244761363, "grad_norm": 0.0537109375, "learning_rate": 0.0019619222263704363, "loss": 0.227, "step": 93830 }, { "epoch": 0.6660598027159823, "grad_norm": 0.11279296875, "learning_rate": 0.0019619140805234424, "loss": 0.2334, "step": 93840 }, { "epoch": 0.6661307809558284, "grad_norm": 0.064453125, "learning_rate": 0.001961905933824069, "loss": 0.2117, "step": 93850 }, { "epoch": 0.6662017591956746, "grad_norm": 0.08740234375, "learning_rate": 0.0019618977862723252, "loss": 0.2373, "step": 93860 }, { "epoch": 0.6662727374355207, "grad_norm": 0.08544921875, "learning_rate": 0.001961889637868218, "loss": 0.2282, "step": 93870 }, { "epoch": 0.6663437156753669, "grad_norm": 0.1015625, "learning_rate": 0.0019618814886117565, "loss": 0.229, "step": 93880 }, { "epoch": 0.6664146939152129, "grad_norm": 0.103515625, "learning_rate": 0.0019618733385029477, "loss": 0.2116, "step": 93890 }, { "epoch": 0.666485672155059, "grad_norm": 0.10498046875, "learning_rate": 0.0019618651875418004, "loss": 0.2198, "step": 93900 }, { "epoch": 0.6665566503949052, "grad_norm": 0.10107421875, "learning_rate": 0.0019618570357283227, "loss": 0.2309, "step": 93910 }, { "epoch": 0.6666276286347513, "grad_norm": 0.09375, "learning_rate": 0.001961848883062522, "loss": 0.2242, "step": 93920 }, { "epoch": 0.6666986068745974, "grad_norm": 0.08740234375, "learning_rate": 0.001961840729544407, "loss": 0.2214, "step": 93930 }, { "epoch": 0.6667695851144435, "grad_norm": 0.08203125, "learning_rate": 0.001961832575173985, "loss": 0.2307, "step": 93940 }, { "epoch": 0.6668405633542897, "grad_norm": 0.169921875, "learning_rate": 0.001961824419951265, "loss": 0.2415, "step": 93950 }, { "epoch": 0.6669115415941358, "grad_norm": 0.1298828125, "learning_rate": 0.001961816263876255, "loss": 0.2192, "step": 93960 }, { "epoch": 0.6669825198339819, "grad_norm": 0.0732421875, "learning_rate": 0.0019618081069489625, "loss": 0.2131, "step": 93970 }, { "epoch": 0.667053498073828, "grad_norm": 0.09619140625, "learning_rate": 0.0019617999491693956, "loss": 0.2246, "step": 93980 }, { "epoch": 0.6671244763136741, "grad_norm": 0.10693359375, "learning_rate": 0.001961791790537563, "loss": 0.2212, "step": 93990 }, { "epoch": 0.6671954545535203, "grad_norm": 0.1513671875, "learning_rate": 0.001961783631053472, "loss": 0.2216, "step": 94000 }, { "epoch": 0.6671954545535203, "eval_covost2-zh-en_loss": 3.792738914489746, "eval_covost2-zh-en_model_preparation_time": 0.0085, "eval_covost2-zh-en_runtime": 21.6892, "eval_covost2-zh-en_samples_per_second": 2.951, "eval_covost2-zh-en_steps_per_second": 0.184, "step": 94000 }, { "epoch": 0.6671954545535203, "eval_covost2-en-zh_loss": 3.1823818683624268, "eval_covost2-en-zh_model_preparation_time": 0.0085, "eval_covost2-en-zh_runtime": 22.9744, "eval_covost2-en-zh_samples_per_second": 2.786, "eval_covost2-en-zh_steps_per_second": 0.174, "step": 94000 } ], "logging_steps": 10, "max_steps": 1000000, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9.034780974068321e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }