{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.16090058355193784, "eval_steps": 500, "global_step": 84000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.9154831375230696e-05, "grad_norm": 2.5440807342529297, "learning_rate": 1.7239287698171994e-08, "loss": 10.508, "step": 10 }, { "epoch": 3.830966275046139e-05, "grad_norm": 2.5200700759887695, "learning_rate": 3.639405180725199e-08, "loss": 10.5047, "step": 20 }, { "epoch": 5.746449412569209e-05, "grad_norm": 2.351837158203125, "learning_rate": 5.554881591633198e-08, "loss": 10.5112, "step": 30 }, { "epoch": 7.661932550092278e-05, "grad_norm": 2.647965431213379, "learning_rate": 7.470358002541198e-08, "loss": 10.5094, "step": 40 }, { "epoch": 9.577415687615347e-05, "grad_norm": 2.5292916297912598, "learning_rate": 9.385834413449198e-08, "loss": 10.512, "step": 50 }, { "epoch": 0.00011492898825138418, "grad_norm": 2.6645455360412598, "learning_rate": 1.1301310824357198e-07, "loss": 10.5072, "step": 60 }, { "epoch": 0.00013408381962661488, "grad_norm": 2.6686244010925293, "learning_rate": 1.3216787235265198e-07, "loss": 10.5075, "step": 70 }, { "epoch": 0.00015323865100184557, "grad_norm": 2.612941265106201, "learning_rate": 1.5132263646173197e-07, "loss": 10.5003, "step": 80 }, { "epoch": 0.00017239348237707626, "grad_norm": 2.617279052734375, "learning_rate": 1.7047740057081198e-07, "loss": 10.4949, "step": 90 }, { "epoch": 0.00019154831375230695, "grad_norm": 2.610851526260376, "learning_rate": 1.8963216467989193e-07, "loss": 10.5023, "step": 100 }, { "epoch": 0.00021070314512753766, "grad_norm": 2.855820417404175, "learning_rate": 2.0878692878897194e-07, "loss": 10.5005, "step": 110 }, { "epoch": 0.00022985797650276835, "grad_norm": 2.5639846324920654, "learning_rate": 2.2794169289805195e-07, "loss": 10.4973, "step": 120 }, { "epoch": 0.00024901280787799904, "grad_norm": 2.775618076324463, "learning_rate": 2.4709645700713196e-07, "loss": 10.495, "step": 130 }, { "epoch": 0.00026816763925322976, "grad_norm": 2.834442377090454, "learning_rate": 2.662512211162119e-07, "loss": 10.4964, "step": 140 }, { "epoch": 0.0002873224706284604, "grad_norm": 2.473102569580078, "learning_rate": 2.854059852252919e-07, "loss": 10.4973, "step": 150 }, { "epoch": 0.00030647730200369114, "grad_norm": 2.6998324394226074, "learning_rate": 3.045607493343719e-07, "loss": 10.4838, "step": 160 }, { "epoch": 0.00032563213337892186, "grad_norm": 2.3928730487823486, "learning_rate": 3.2371551344345194e-07, "loss": 10.4906, "step": 170 }, { "epoch": 0.0003447869647541525, "grad_norm": 2.489246129989624, "learning_rate": 3.428702775525319e-07, "loss": 10.4873, "step": 180 }, { "epoch": 0.00036394179612938323, "grad_norm": 2.6731367111206055, "learning_rate": 3.620250416616119e-07, "loss": 10.4815, "step": 190 }, { "epoch": 0.0003830966275046139, "grad_norm": 2.9237053394317627, "learning_rate": 3.811798057706919e-07, "loss": 10.4871, "step": 200 }, { "epoch": 0.0004022514588798446, "grad_norm": 2.408844232559204, "learning_rate": 4.0033456987977187e-07, "loss": 10.4821, "step": 210 }, { "epoch": 0.00042140629025507533, "grad_norm": 2.390153646469116, "learning_rate": 4.1948933398885185e-07, "loss": 10.4703, "step": 220 }, { "epoch": 0.000440561121630306, "grad_norm": 2.5923709869384766, "learning_rate": 4.386440980979319e-07, "loss": 10.473, "step": 230 }, { "epoch": 0.0004597159530055367, "grad_norm": 2.3746848106384277, "learning_rate": 4.5779886220701187e-07, "loss": 10.4688, "step": 240 }, { "epoch": 0.0004788707843807674, "grad_norm": 2.4090585708618164, "learning_rate": 4.769536263160919e-07, "loss": 10.4716, "step": 250 }, { "epoch": 0.0004980256157559981, "grad_norm": 2.341661214828491, "learning_rate": 4.961083904251718e-07, "loss": 10.4655, "step": 260 }, { "epoch": 0.0005171804471312287, "grad_norm": 2.653409242630005, "learning_rate": 5.152631545342519e-07, "loss": 10.4666, "step": 270 }, { "epoch": 0.0005363352785064595, "grad_norm": 2.7457540035247803, "learning_rate": 5.344179186433318e-07, "loss": 10.4582, "step": 280 }, { "epoch": 0.0005554901098816902, "grad_norm": 2.3457586765289307, "learning_rate": 5.535726827524118e-07, "loss": 10.4511, "step": 290 }, { "epoch": 0.0005746449412569208, "grad_norm": 2.4347667694091797, "learning_rate": 5.727274468614919e-07, "loss": 10.441, "step": 300 }, { "epoch": 0.0005937997726321516, "grad_norm": 2.459235429763794, "learning_rate": 5.918822109705719e-07, "loss": 10.4494, "step": 310 }, { "epoch": 0.0006129546040073823, "grad_norm": 2.835768461227417, "learning_rate": 6.110369750796517e-07, "loss": 10.4307, "step": 320 }, { "epoch": 0.0006321094353826129, "grad_norm": 2.5435030460357666, "learning_rate": 6.301917391887318e-07, "loss": 10.4215, "step": 330 }, { "epoch": 0.0006512642667578437, "grad_norm": 2.4884514808654785, "learning_rate": 6.493465032978118e-07, "loss": 10.4287, "step": 340 }, { "epoch": 0.0006704190981330744, "grad_norm": 2.866292715072632, "learning_rate": 6.685012674068918e-07, "loss": 10.42, "step": 350 }, { "epoch": 0.000689573929508305, "grad_norm": 2.448666572570801, "learning_rate": 6.876560315159718e-07, "loss": 10.4118, "step": 360 }, { "epoch": 0.0007087287608835358, "grad_norm": 2.9077935218811035, "learning_rate": 7.068107956250518e-07, "loss": 10.4018, "step": 370 }, { "epoch": 0.0007278835922587665, "grad_norm": 2.7484612464904785, "learning_rate": 7.259655597341318e-07, "loss": 10.3952, "step": 380 }, { "epoch": 0.0007470384236339971, "grad_norm": 2.4217350482940674, "learning_rate": 7.451203238432118e-07, "loss": 10.387, "step": 390 }, { "epoch": 0.0007661932550092278, "grad_norm": 3.0209953784942627, "learning_rate": 7.642750879522918e-07, "loss": 10.3683, "step": 400 }, { "epoch": 0.0007853480863844586, "grad_norm": 2.5182459354400635, "learning_rate": 7.834298520613718e-07, "loss": 10.3367, "step": 410 }, { "epoch": 0.0008045029177596892, "grad_norm": 2.53008770942688, "learning_rate": 8.025846161704518e-07, "loss": 10.3512, "step": 420 }, { "epoch": 0.0008236577491349199, "grad_norm": 2.3772451877593994, "learning_rate": 8.217393802795318e-07, "loss": 10.3567, "step": 430 }, { "epoch": 0.0008428125805101507, "grad_norm": 2.4641432762145996, "learning_rate": 8.408941443886118e-07, "loss": 10.3528, "step": 440 }, { "epoch": 0.0008619674118853813, "grad_norm": 2.811863422393799, "learning_rate": 8.600489084976918e-07, "loss": 10.334, "step": 450 }, { "epoch": 0.000881122243260612, "grad_norm": 2.8869335651397705, "learning_rate": 8.792036726067718e-07, "loss": 10.2925, "step": 460 }, { "epoch": 0.0009002770746358428, "grad_norm": 2.6200757026672363, "learning_rate": 8.983584367158518e-07, "loss": 10.3039, "step": 470 }, { "epoch": 0.0009194319060110734, "grad_norm": 3.6637096405029297, "learning_rate": 9.175132008249317e-07, "loss": 10.2753, "step": 480 }, { "epoch": 0.0009385867373863041, "grad_norm": 2.5522305965423584, "learning_rate": 9.366679649340117e-07, "loss": 10.2931, "step": 490 }, { "epoch": 0.0009577415687615348, "grad_norm": 2.549851179122925, "learning_rate": 9.558227290430917e-07, "loss": 10.2703, "step": 500 }, { "epoch": 0.0009768964001367654, "grad_norm": 2.6337387561798096, "learning_rate": 9.749774931521718e-07, "loss": 10.2368, "step": 510 }, { "epoch": 0.0009960512315119962, "grad_norm": 2.270040512084961, "learning_rate": 9.941322572612518e-07, "loss": 10.2312, "step": 520 }, { "epoch": 0.001015206062887227, "grad_norm": 2.4993247985839844, "learning_rate": 1.0132870213703316e-06, "loss": 10.2181, "step": 530 }, { "epoch": 0.0010343608942624575, "grad_norm": 2.4313130378723145, "learning_rate": 1.0324417854794117e-06, "loss": 10.2159, "step": 540 }, { "epoch": 0.0010535157256376883, "grad_norm": 2.507406234741211, "learning_rate": 1.0515965495884917e-06, "loss": 10.1923, "step": 550 }, { "epoch": 0.001072670557012919, "grad_norm": 2.407334089279175, "learning_rate": 1.0707513136975717e-06, "loss": 10.1575, "step": 560 }, { "epoch": 0.0010918253883881496, "grad_norm": 2.4932613372802734, "learning_rate": 1.0899060778066518e-06, "loss": 10.1893, "step": 570 }, { "epoch": 0.0011109802197633804, "grad_norm": 2.343027114868164, "learning_rate": 1.1090608419157318e-06, "loss": 10.1964, "step": 580 }, { "epoch": 0.0011301350511386111, "grad_norm": 2.2972538471221924, "learning_rate": 1.1282156060248118e-06, "loss": 10.1134, "step": 590 }, { "epoch": 0.0011492898825138417, "grad_norm": 2.00966215133667, "learning_rate": 1.1473703701338917e-06, "loss": 10.1278, "step": 600 }, { "epoch": 0.0011684447138890725, "grad_norm": 2.09635329246521, "learning_rate": 1.1665251342429717e-06, "loss": 10.1119, "step": 610 }, { "epoch": 0.0011875995452643032, "grad_norm": 1.9069201946258545, "learning_rate": 1.1856798983520517e-06, "loss": 10.0846, "step": 620 }, { "epoch": 0.0012067543766395338, "grad_norm": 1.9185922145843506, "learning_rate": 1.2048346624611316e-06, "loss": 10.0794, "step": 630 }, { "epoch": 0.0012259092080147646, "grad_norm": 1.8179051876068115, "learning_rate": 1.2239894265702118e-06, "loss": 10.1029, "step": 640 }, { "epoch": 0.0012450640393899953, "grad_norm": 2.0550334453582764, "learning_rate": 1.2431441906792916e-06, "loss": 10.0257, "step": 650 }, { "epoch": 0.0012642188707652259, "grad_norm": 1.93297278881073, "learning_rate": 1.2622989547883717e-06, "loss": 10.0796, "step": 660 }, { "epoch": 0.0012833737021404566, "grad_norm": 2.434666395187378, "learning_rate": 1.2814537188974517e-06, "loss": 10.0198, "step": 670 }, { "epoch": 0.0013025285335156874, "grad_norm": 1.8838261365890503, "learning_rate": 1.3006084830065317e-06, "loss": 10.0226, "step": 680 }, { "epoch": 0.001321683364890918, "grad_norm": 1.996128797531128, "learning_rate": 1.3197632471156116e-06, "loss": 9.9962, "step": 690 }, { "epoch": 0.0013408381962661487, "grad_norm": 1.8092870712280273, "learning_rate": 1.3389180112246918e-06, "loss": 9.9868, "step": 700 }, { "epoch": 0.0013599930276413795, "grad_norm": 1.719038963317871, "learning_rate": 1.3580727753337716e-06, "loss": 10.0007, "step": 710 }, { "epoch": 0.00137914785901661, "grad_norm": 1.7417131662368774, "learning_rate": 1.3772275394428515e-06, "loss": 9.9996, "step": 720 }, { "epoch": 0.0013983026903918408, "grad_norm": 2.164299249649048, "learning_rate": 1.3963823035519317e-06, "loss": 9.9049, "step": 730 }, { "epoch": 0.0014174575217670716, "grad_norm": 1.7481589317321777, "learning_rate": 1.4155370676610115e-06, "loss": 9.9494, "step": 740 }, { "epoch": 0.0014366123531423022, "grad_norm": 1.5772452354431152, "learning_rate": 1.4346918317700916e-06, "loss": 9.9517, "step": 750 }, { "epoch": 0.001455767184517533, "grad_norm": 1.5541058778762817, "learning_rate": 1.4538465958791716e-06, "loss": 9.8875, "step": 760 }, { "epoch": 0.0014749220158927635, "grad_norm": 1.776672124862671, "learning_rate": 1.4730013599882516e-06, "loss": 9.9058, "step": 770 }, { "epoch": 0.0014940768472679943, "grad_norm": 1.8939794301986694, "learning_rate": 1.4921561240973315e-06, "loss": 9.9414, "step": 780 }, { "epoch": 0.001513231678643225, "grad_norm": 1.5233488082885742, "learning_rate": 1.5113108882064117e-06, "loss": 9.9017, "step": 790 }, { "epoch": 0.0015323865100184556, "grad_norm": 1.4771305322647095, "learning_rate": 1.5304656523154915e-06, "loss": 9.8519, "step": 800 }, { "epoch": 0.0015515413413936864, "grad_norm": 1.5039676427841187, "learning_rate": 1.5496204164245718e-06, "loss": 9.8457, "step": 810 }, { "epoch": 0.0015706961727689171, "grad_norm": 2.244281530380249, "learning_rate": 1.5687751805336516e-06, "loss": 9.8228, "step": 820 }, { "epoch": 0.0015898510041441477, "grad_norm": 1.4825702905654907, "learning_rate": 1.5879299446427316e-06, "loss": 9.8173, "step": 830 }, { "epoch": 0.0016090058355193785, "grad_norm": 1.361830711364746, "learning_rate": 1.6070847087518115e-06, "loss": 9.7777, "step": 840 }, { "epoch": 0.0016281606668946092, "grad_norm": 1.5805838108062744, "learning_rate": 1.6262394728608917e-06, "loss": 9.7383, "step": 850 }, { "epoch": 0.0016473154982698398, "grad_norm": 1.2497448921203613, "learning_rate": 1.6453942369699715e-06, "loss": 9.7507, "step": 860 }, { "epoch": 0.0016664703296450705, "grad_norm": 1.3516417741775513, "learning_rate": 1.6645490010790514e-06, "loss": 9.7842, "step": 870 }, { "epoch": 0.0016856251610203013, "grad_norm": 1.220311164855957, "learning_rate": 1.6837037651881316e-06, "loss": 9.6983, "step": 880 }, { "epoch": 0.0017047799923955319, "grad_norm": 1.234095811843872, "learning_rate": 1.7028585292972114e-06, "loss": 9.742, "step": 890 }, { "epoch": 0.0017239348237707626, "grad_norm": 1.2706629037857056, "learning_rate": 1.7220132934062917e-06, "loss": 9.6919, "step": 900 }, { "epoch": 0.0017430896551459934, "grad_norm": 1.3260141611099243, "learning_rate": 1.7411680575153715e-06, "loss": 9.701, "step": 910 }, { "epoch": 0.001762244486521224, "grad_norm": 1.2895431518554688, "learning_rate": 1.7603228216244515e-06, "loss": 9.6491, "step": 920 }, { "epoch": 0.0017813993178964547, "grad_norm": 1.1289520263671875, "learning_rate": 1.7794775857335314e-06, "loss": 9.6384, "step": 930 }, { "epoch": 0.0018005541492716855, "grad_norm": 1.192362904548645, "learning_rate": 1.7986323498426116e-06, "loss": 9.6486, "step": 940 }, { "epoch": 0.001819708980646916, "grad_norm": 1.866468071937561, "learning_rate": 1.8177871139516914e-06, "loss": 9.684, "step": 950 }, { "epoch": 0.0018388638120221468, "grad_norm": 1.0283859968185425, "learning_rate": 1.8369418780607717e-06, "loss": 9.614, "step": 960 }, { "epoch": 0.0018580186433973776, "grad_norm": 1.173030972480774, "learning_rate": 1.8560966421698515e-06, "loss": 9.6087, "step": 970 }, { "epoch": 0.0018771734747726082, "grad_norm": 1.1873977184295654, "learning_rate": 1.8752514062789315e-06, "loss": 9.6519, "step": 980 }, { "epoch": 0.001896328306147839, "grad_norm": 1.1170827150344849, "learning_rate": 1.8944061703880116e-06, "loss": 9.5769, "step": 990 }, { "epoch": 0.0019154831375230697, "grad_norm": 1.1152029037475586, "learning_rate": 1.9135609344970916e-06, "loss": 9.5405, "step": 1000 }, { "epoch": 0.0019346379688983003, "grad_norm": 1.0373958349227905, "learning_rate": 1.9327156986061716e-06, "loss": 9.5135, "step": 1010 }, { "epoch": 0.001953792800273531, "grad_norm": 1.112820029258728, "learning_rate": 1.9518704627152517e-06, "loss": 9.5343, "step": 1020 }, { "epoch": 0.0019729476316487616, "grad_norm": 1.0631508827209473, "learning_rate": 1.9710252268243313e-06, "loss": 9.5138, "step": 1030 }, { "epoch": 0.0019921024630239923, "grad_norm": 1.117720365524292, "learning_rate": 1.9901799909334113e-06, "loss": 9.5012, "step": 1040 }, { "epoch": 0.002011257294399223, "grad_norm": 1.0829119682312012, "learning_rate": 2.0093347550424914e-06, "loss": 9.5019, "step": 1050 }, { "epoch": 0.002030412125774454, "grad_norm": 0.9570440649986267, "learning_rate": 2.0284895191515714e-06, "loss": 9.4851, "step": 1060 }, { "epoch": 0.0020495669571496847, "grad_norm": 1.0483585596084595, "learning_rate": 2.0476442832606514e-06, "loss": 9.4607, "step": 1070 }, { "epoch": 0.002068721788524915, "grad_norm": 1.0035184621810913, "learning_rate": 2.0667990473697315e-06, "loss": 9.4314, "step": 1080 }, { "epoch": 0.0020878766199001458, "grad_norm": 1.0891762971878052, "learning_rate": 2.0859538114788115e-06, "loss": 9.4632, "step": 1090 }, { "epoch": 0.0021070314512753765, "grad_norm": 1.0847415924072266, "learning_rate": 2.1051085755878915e-06, "loss": 9.3936, "step": 1100 }, { "epoch": 0.0021261862826506073, "grad_norm": 1.0933468341827393, "learning_rate": 2.1242633396969716e-06, "loss": 9.4745, "step": 1110 }, { "epoch": 0.002145341114025838, "grad_norm": 0.9433518052101135, "learning_rate": 2.143418103806051e-06, "loss": 9.4467, "step": 1120 }, { "epoch": 0.002164495945401069, "grad_norm": 1.0817142724990845, "learning_rate": 2.1625728679151316e-06, "loss": 9.3276, "step": 1130 }, { "epoch": 0.002183650776776299, "grad_norm": 0.9570011496543884, "learning_rate": 2.1817276320242112e-06, "loss": 9.3739, "step": 1140 }, { "epoch": 0.00220280560815153, "grad_norm": 0.9877024292945862, "learning_rate": 2.2008823961332917e-06, "loss": 9.3469, "step": 1150 }, { "epoch": 0.0022219604395267607, "grad_norm": 1.0508438348770142, "learning_rate": 2.2200371602423713e-06, "loss": 9.4035, "step": 1160 }, { "epoch": 0.0022411152709019915, "grad_norm": 0.9118192195892334, "learning_rate": 2.2391919243514514e-06, "loss": 9.3296, "step": 1170 }, { "epoch": 0.0022602701022772223, "grad_norm": 0.9220691919326782, "learning_rate": 2.2583466884605314e-06, "loss": 9.3471, "step": 1180 }, { "epoch": 0.002279424933652453, "grad_norm": 1.1906185150146484, "learning_rate": 2.2775014525696114e-06, "loss": 9.3215, "step": 1190 }, { "epoch": 0.0022985797650276834, "grad_norm": 0.9625170826911926, "learning_rate": 2.2966562166786915e-06, "loss": 9.265, "step": 1200 }, { "epoch": 0.002317734596402914, "grad_norm": 0.9243164658546448, "learning_rate": 2.3158109807877715e-06, "loss": 9.219, "step": 1210 }, { "epoch": 0.002336889427778145, "grad_norm": 1.0242040157318115, "learning_rate": 2.3349657448968515e-06, "loss": 9.2452, "step": 1220 }, { "epoch": 0.0023560442591533757, "grad_norm": 0.9893587231636047, "learning_rate": 2.354120509005931e-06, "loss": 9.2815, "step": 1230 }, { "epoch": 0.0023751990905286065, "grad_norm": 1.0581495761871338, "learning_rate": 2.373275273115011e-06, "loss": 9.1895, "step": 1240 }, { "epoch": 0.002394353921903837, "grad_norm": 0.9417503476142883, "learning_rate": 2.3924300372240916e-06, "loss": 9.2751, "step": 1250 }, { "epoch": 0.0024135087532790676, "grad_norm": 0.8899039030075073, "learning_rate": 2.4115848013331712e-06, "loss": 9.2068, "step": 1260 }, { "epoch": 0.0024326635846542983, "grad_norm": 0.8890252113342285, "learning_rate": 2.4307395654422513e-06, "loss": 9.2465, "step": 1270 }, { "epoch": 0.002451818416029529, "grad_norm": 1.1553431749343872, "learning_rate": 2.4498943295513313e-06, "loss": 9.2705, "step": 1280 }, { "epoch": 0.00247097324740476, "grad_norm": 0.9527641534805298, "learning_rate": 2.4690490936604118e-06, "loss": 9.1619, "step": 1290 }, { "epoch": 0.0024901280787799907, "grad_norm": 1.0023311376571655, "learning_rate": 2.4882038577694914e-06, "loss": 9.2271, "step": 1300 }, { "epoch": 0.002509282910155221, "grad_norm": 0.869351863861084, "learning_rate": 2.5073586218785714e-06, "loss": 9.2104, "step": 1310 }, { "epoch": 0.0025284377415304518, "grad_norm": 0.8423483371734619, "learning_rate": 2.526513385987651e-06, "loss": 9.1538, "step": 1320 }, { "epoch": 0.0025475925729056825, "grad_norm": 0.8771184682846069, "learning_rate": 2.5456681500967315e-06, "loss": 9.1255, "step": 1330 }, { "epoch": 0.0025667474042809133, "grad_norm": 0.98565274477005, "learning_rate": 2.5648229142058115e-06, "loss": 9.139, "step": 1340 }, { "epoch": 0.002585902235656144, "grad_norm": 1.001977562904358, "learning_rate": 2.583977678314891e-06, "loss": 9.0668, "step": 1350 }, { "epoch": 0.002605057067031375, "grad_norm": 0.8326733708381653, "learning_rate": 2.603132442423971e-06, "loss": 9.125, "step": 1360 }, { "epoch": 0.002624211898406605, "grad_norm": 1.1057522296905518, "learning_rate": 2.6222872065330512e-06, "loss": 9.0898, "step": 1370 }, { "epoch": 0.002643366729781836, "grad_norm": 0.8796950578689575, "learning_rate": 2.6414419706421317e-06, "loss": 9.1135, "step": 1380 }, { "epoch": 0.0026625215611570667, "grad_norm": 0.8563319444656372, "learning_rate": 2.6605967347512113e-06, "loss": 9.0924, "step": 1390 }, { "epoch": 0.0026816763925322975, "grad_norm": 0.8435609936714172, "learning_rate": 2.6797514988602913e-06, "loss": 9.0647, "step": 1400 }, { "epoch": 0.0027008312239075283, "grad_norm": 0.794014573097229, "learning_rate": 2.698906262969371e-06, "loss": 9.0674, "step": 1410 }, { "epoch": 0.002719986055282759, "grad_norm": 0.7717275023460388, "learning_rate": 2.7180610270784514e-06, "loss": 9.0696, "step": 1420 }, { "epoch": 0.0027391408866579894, "grad_norm": 0.8274510502815247, "learning_rate": 2.7372157911875314e-06, "loss": 9.0114, "step": 1430 }, { "epoch": 0.00275829571803322, "grad_norm": 0.8100062608718872, "learning_rate": 2.756370555296611e-06, "loss": 9.0475, "step": 1440 }, { "epoch": 0.002777450549408451, "grad_norm": 0.8205294609069824, "learning_rate": 2.775525319405691e-06, "loss": 9.0407, "step": 1450 }, { "epoch": 0.0027966053807836817, "grad_norm": 0.7492110729217529, "learning_rate": 2.7946800835147715e-06, "loss": 9.0613, "step": 1460 }, { "epoch": 0.0028157602121589125, "grad_norm": 0.8651508688926697, "learning_rate": 2.8138348476238516e-06, "loss": 9.0011, "step": 1470 }, { "epoch": 0.0028349150435341432, "grad_norm": 1.3769965171813965, "learning_rate": 2.832989611732931e-06, "loss": 8.9817, "step": 1480 }, { "epoch": 0.0028540698749093736, "grad_norm": 0.7822460532188416, "learning_rate": 2.8521443758420112e-06, "loss": 8.9587, "step": 1490 }, { "epoch": 0.0028732247062846043, "grad_norm": 0.8468725085258484, "learning_rate": 2.8712991399510917e-06, "loss": 8.942, "step": 1500 }, { "epoch": 0.002892379537659835, "grad_norm": 0.8026223182678223, "learning_rate": 2.8904539040601713e-06, "loss": 9.0043, "step": 1510 }, { "epoch": 0.002911534369035066, "grad_norm": 0.7883700728416443, "learning_rate": 2.9096086681692513e-06, "loss": 8.9581, "step": 1520 }, { "epoch": 0.0029306892004102966, "grad_norm": 0.7457848191261292, "learning_rate": 2.928763432278331e-06, "loss": 8.9286, "step": 1530 }, { "epoch": 0.002949844031785527, "grad_norm": 0.7883260846138, "learning_rate": 2.947918196387411e-06, "loss": 8.9093, "step": 1540 }, { "epoch": 0.0029689988631607577, "grad_norm": 0.7680001258850098, "learning_rate": 2.9670729604964914e-06, "loss": 8.9433, "step": 1550 }, { "epoch": 0.0029881536945359885, "grad_norm": 0.9090923070907593, "learning_rate": 2.9862277246055715e-06, "loss": 8.9303, "step": 1560 }, { "epoch": 0.0030073085259112193, "grad_norm": 0.8260313868522644, "learning_rate": 3.005382488714651e-06, "loss": 8.9384, "step": 1570 }, { "epoch": 0.00302646335728645, "grad_norm": 0.7666963934898376, "learning_rate": 3.024537252823731e-06, "loss": 8.8777, "step": 1580 }, { "epoch": 0.003045618188661681, "grad_norm": 0.703902006149292, "learning_rate": 3.0436920169328116e-06, "loss": 8.8392, "step": 1590 }, { "epoch": 0.003064773020036911, "grad_norm": 0.7850908637046814, "learning_rate": 3.062846781041891e-06, "loss": 8.8833, "step": 1600 }, { "epoch": 0.003083927851412142, "grad_norm": 0.7010381817817688, "learning_rate": 3.0820015451509712e-06, "loss": 8.8303, "step": 1610 }, { "epoch": 0.0031030826827873727, "grad_norm": 0.7847872376441956, "learning_rate": 3.101156309260051e-06, "loss": 8.8453, "step": 1620 }, { "epoch": 0.0031222375141626035, "grad_norm": 0.7674160599708557, "learning_rate": 3.1203110733691313e-06, "loss": 8.8162, "step": 1630 }, { "epoch": 0.0031413923455378343, "grad_norm": 0.7956439256668091, "learning_rate": 3.1394658374782113e-06, "loss": 8.8032, "step": 1640 }, { "epoch": 0.003160547176913065, "grad_norm": 0.7395278811454773, "learning_rate": 3.1586206015872914e-06, "loss": 8.8044, "step": 1650 }, { "epoch": 0.0031797020082882954, "grad_norm": 0.8228457570075989, "learning_rate": 3.177775365696371e-06, "loss": 8.7937, "step": 1660 }, { "epoch": 0.003198856839663526, "grad_norm": 0.7524309754371643, "learning_rate": 3.196930129805451e-06, "loss": 8.8215, "step": 1670 }, { "epoch": 0.003218011671038757, "grad_norm": 0.8404503464698792, "learning_rate": 3.2160848939145315e-06, "loss": 8.7906, "step": 1680 }, { "epoch": 0.0032371665024139877, "grad_norm": 0.7785541415214539, "learning_rate": 3.235239658023611e-06, "loss": 8.7951, "step": 1690 }, { "epoch": 0.0032563213337892184, "grad_norm": 0.725410521030426, "learning_rate": 3.254394422132691e-06, "loss": 8.7913, "step": 1700 }, { "epoch": 0.003275476165164449, "grad_norm": 0.7253627181053162, "learning_rate": 3.2735491862417707e-06, "loss": 8.7557, "step": 1710 }, { "epoch": 0.0032946309965396796, "grad_norm": 0.7645329236984253, "learning_rate": 3.292703950350851e-06, "loss": 8.749, "step": 1720 }, { "epoch": 0.0033137858279149103, "grad_norm": 0.946212112903595, "learning_rate": 3.3118587144599312e-06, "loss": 8.7197, "step": 1730 }, { "epoch": 0.003332940659290141, "grad_norm": 0.7416749000549316, "learning_rate": 3.3310134785690113e-06, "loss": 8.6969, "step": 1740 }, { "epoch": 0.003352095490665372, "grad_norm": 0.7272405624389648, "learning_rate": 3.350168242678091e-06, "loss": 8.7321, "step": 1750 }, { "epoch": 0.0033712503220406026, "grad_norm": 0.6788675785064697, "learning_rate": 3.3693230067871713e-06, "loss": 8.7261, "step": 1760 }, { "epoch": 0.0033904051534158334, "grad_norm": 0.6878763437271118, "learning_rate": 3.3884777708962514e-06, "loss": 8.7379, "step": 1770 }, { "epoch": 0.0034095599847910637, "grad_norm": 0.686092734336853, "learning_rate": 3.407632535005331e-06, "loss": 8.6603, "step": 1780 }, { "epoch": 0.0034287148161662945, "grad_norm": 0.8296403884887695, "learning_rate": 3.426787299114411e-06, "loss": 8.6946, "step": 1790 }, { "epoch": 0.0034478696475415253, "grad_norm": 0.7239318490028381, "learning_rate": 3.4459420632234915e-06, "loss": 8.73, "step": 1800 }, { "epoch": 0.003467024478916756, "grad_norm": 0.7354301810264587, "learning_rate": 3.465096827332571e-06, "loss": 8.6746, "step": 1810 }, { "epoch": 0.003486179310291987, "grad_norm": 0.7135173678398132, "learning_rate": 3.484251591441651e-06, "loss": 8.6236, "step": 1820 }, { "epoch": 0.003505334141667217, "grad_norm": 0.7457655072212219, "learning_rate": 3.503406355550731e-06, "loss": 8.6432, "step": 1830 }, { "epoch": 0.003524488973042448, "grad_norm": 0.6935797929763794, "learning_rate": 3.5225611196598108e-06, "loss": 8.6209, "step": 1840 }, { "epoch": 0.0035436438044176787, "grad_norm": 0.6665393114089966, "learning_rate": 3.5417158837688912e-06, "loss": 8.6806, "step": 1850 }, { "epoch": 0.0035627986357929095, "grad_norm": 0.6931477785110474, "learning_rate": 3.5608706478779713e-06, "loss": 8.6328, "step": 1860 }, { "epoch": 0.0035819534671681402, "grad_norm": 0.7548337578773499, "learning_rate": 3.580025411987051e-06, "loss": 8.628, "step": 1870 }, { "epoch": 0.003601108298543371, "grad_norm": 0.6931093335151672, "learning_rate": 3.599180176096131e-06, "loss": 8.6126, "step": 1880 }, { "epoch": 0.0036202631299186014, "grad_norm": 0.8627099394798279, "learning_rate": 3.6183349402052114e-06, "loss": 8.5837, "step": 1890 }, { "epoch": 0.003639417961293832, "grad_norm": 0.7473539113998413, "learning_rate": 3.637489704314291e-06, "loss": 8.5962, "step": 1900 }, { "epoch": 0.003658572792669063, "grad_norm": 0.7894121408462524, "learning_rate": 3.656644468423371e-06, "loss": 8.6465, "step": 1910 }, { "epoch": 0.0036777276240442937, "grad_norm": 0.7199526429176331, "learning_rate": 3.675799232532451e-06, "loss": 8.5861, "step": 1920 }, { "epoch": 0.0036968824554195244, "grad_norm": 0.6773931384086609, "learning_rate": 3.694953996641531e-06, "loss": 8.6335, "step": 1930 }, { "epoch": 0.003716037286794755, "grad_norm": 0.9042545557022095, "learning_rate": 3.714108760750611e-06, "loss": 8.5346, "step": 1940 }, { "epoch": 0.0037351921181699855, "grad_norm": 0.6843738555908203, "learning_rate": 3.733263524859691e-06, "loss": 8.5298, "step": 1950 }, { "epoch": 0.0037543469495452163, "grad_norm": 0.8201918005943298, "learning_rate": 3.7524182889687708e-06, "loss": 8.5268, "step": 1960 }, { "epoch": 0.003773501780920447, "grad_norm": 0.6376744508743286, "learning_rate": 3.7715730530778512e-06, "loss": 8.5467, "step": 1970 }, { "epoch": 0.003792656612295678, "grad_norm": 0.7004703283309937, "learning_rate": 3.7907278171869313e-06, "loss": 8.5295, "step": 1980 }, { "epoch": 0.0038118114436709086, "grad_norm": 0.6650729179382324, "learning_rate": 3.809882581296011e-06, "loss": 8.4889, "step": 1990 }, { "epoch": 0.0038309662750461394, "grad_norm": 0.7450000047683716, "learning_rate": 3.8290373454050905e-06, "loss": 8.5468, "step": 2000 }, { "epoch": 0.0038501211064213697, "grad_norm": 0.6359753012657166, "learning_rate": 3.848192109514171e-06, "loss": 8.5041, "step": 2010 }, { "epoch": 0.0038692759377966005, "grad_norm": 0.8122127652168274, "learning_rate": 3.867346873623251e-06, "loss": 8.5272, "step": 2020 }, { "epoch": 0.0038884307691718313, "grad_norm": 0.6557264924049377, "learning_rate": 3.886501637732331e-06, "loss": 8.4774, "step": 2030 }, { "epoch": 0.003907585600547062, "grad_norm": 1.4351856708526611, "learning_rate": 3.905656401841411e-06, "loss": 8.4747, "step": 2040 }, { "epoch": 0.003926740431922293, "grad_norm": 0.6225512623786926, "learning_rate": 3.924811165950491e-06, "loss": 8.4519, "step": 2050 }, { "epoch": 0.003945895263297523, "grad_norm": 0.6050755977630615, "learning_rate": 3.9439659300595715e-06, "loss": 8.4785, "step": 2060 }, { "epoch": 0.003965050094672754, "grad_norm": 1.1672136783599854, "learning_rate": 3.963120694168651e-06, "loss": 8.4764, "step": 2070 }, { "epoch": 0.003984204926047985, "grad_norm": 0.7809194922447205, "learning_rate": 3.982275458277731e-06, "loss": 8.4735, "step": 2080 }, { "epoch": 0.004003359757423215, "grad_norm": 0.6633661985397339, "learning_rate": 4.00143022238681e-06, "loss": 8.46, "step": 2090 }, { "epoch": 0.004022514588798446, "grad_norm": 0.7737755179405212, "learning_rate": 4.020584986495891e-06, "loss": 8.4676, "step": 2100 }, { "epoch": 0.004041669420173677, "grad_norm": 0.7130182981491089, "learning_rate": 4.039739750604971e-06, "loss": 8.4883, "step": 2110 }, { "epoch": 0.004060824251548908, "grad_norm": 0.6267625689506531, "learning_rate": 4.058894514714051e-06, "loss": 8.4652, "step": 2120 }, { "epoch": 0.004079979082924138, "grad_norm": 0.6023935079574585, "learning_rate": 4.0780492788231305e-06, "loss": 8.4333, "step": 2130 }, { "epoch": 0.004099133914299369, "grad_norm": 0.6357032060623169, "learning_rate": 4.097204042932211e-06, "loss": 8.368, "step": 2140 }, { "epoch": 0.0041182887456746, "grad_norm": 0.6431570649147034, "learning_rate": 4.1163588070412914e-06, "loss": 8.3977, "step": 2150 }, { "epoch": 0.00413744357704983, "grad_norm": 0.7114273905754089, "learning_rate": 4.135513571150371e-06, "loss": 8.4054, "step": 2160 }, { "epoch": 0.004156598408425061, "grad_norm": 0.6994202136993408, "learning_rate": 4.154668335259451e-06, "loss": 8.4271, "step": 2170 }, { "epoch": 0.0041757532398002915, "grad_norm": 0.6516277194023132, "learning_rate": 4.17382309936853e-06, "loss": 8.3666, "step": 2180 }, { "epoch": 0.004194908071175523, "grad_norm": 0.8769553899765015, "learning_rate": 4.192977863477611e-06, "loss": 8.3899, "step": 2190 }, { "epoch": 0.004214062902550753, "grad_norm": 0.8381969332695007, "learning_rate": 4.212132627586691e-06, "loss": 8.3769, "step": 2200 }, { "epoch": 0.004233217733925983, "grad_norm": 0.6518877744674683, "learning_rate": 4.231287391695771e-06, "loss": 8.4312, "step": 2210 }, { "epoch": 0.004252372565301215, "grad_norm": 0.6784120202064514, "learning_rate": 4.25044215580485e-06, "loss": 8.4186, "step": 2220 }, { "epoch": 0.004271527396676445, "grad_norm": 0.6213794946670532, "learning_rate": 4.269596919913931e-06, "loss": 8.3738, "step": 2230 }, { "epoch": 0.004290682228051676, "grad_norm": 0.6992650628089905, "learning_rate": 4.288751684023011e-06, "loss": 8.3949, "step": 2240 }, { "epoch": 0.0043098370594269065, "grad_norm": 0.6471234560012817, "learning_rate": 4.307906448132091e-06, "loss": 8.3932, "step": 2250 }, { "epoch": 0.004328991890802138, "grad_norm": 0.6119053363800049, "learning_rate": 4.3270612122411706e-06, "loss": 8.3509, "step": 2260 }, { "epoch": 0.004348146722177368, "grad_norm": 0.662764310836792, "learning_rate": 4.346215976350251e-06, "loss": 8.3973, "step": 2270 }, { "epoch": 0.004367301553552598, "grad_norm": 0.6686667203903198, "learning_rate": 4.365370740459331e-06, "loss": 8.3441, "step": 2280 }, { "epoch": 0.00438645638492783, "grad_norm": 0.6290851831436157, "learning_rate": 4.384525504568411e-06, "loss": 8.3153, "step": 2290 }, { "epoch": 0.00440561121630306, "grad_norm": 0.7814244627952576, "learning_rate": 4.403680268677491e-06, "loss": 8.3159, "step": 2300 }, { "epoch": 0.004424766047678291, "grad_norm": 0.8406811952590942, "learning_rate": 4.42283503278657e-06, "loss": 8.2859, "step": 2310 }, { "epoch": 0.0044439208790535215, "grad_norm": 0.8185849189758301, "learning_rate": 4.441989796895651e-06, "loss": 8.3599, "step": 2320 }, { "epoch": 0.004463075710428752, "grad_norm": 0.7182426452636719, "learning_rate": 4.461144561004731e-06, "loss": 8.4148, "step": 2330 }, { "epoch": 0.004482230541803983, "grad_norm": 1.0023998022079468, "learning_rate": 4.480299325113811e-06, "loss": 8.2697, "step": 2340 }, { "epoch": 0.004501385373179213, "grad_norm": 0.729817807674408, "learning_rate": 4.4994540892228904e-06, "loss": 8.3002, "step": 2350 }, { "epoch": 0.0045205402045544445, "grad_norm": 0.6448858380317688, "learning_rate": 4.518608853331971e-06, "loss": 8.2462, "step": 2360 }, { "epoch": 0.004539695035929675, "grad_norm": 0.8903743028640747, "learning_rate": 4.5377636174410505e-06, "loss": 8.2326, "step": 2370 }, { "epoch": 0.004558849867304906, "grad_norm": 0.6366249918937683, "learning_rate": 4.556918381550131e-06, "loss": 8.2663, "step": 2380 }, { "epoch": 0.004578004698680136, "grad_norm": 0.6604827046394348, "learning_rate": 4.576073145659211e-06, "loss": 8.312, "step": 2390 }, { "epoch": 0.004597159530055367, "grad_norm": 0.6430513262748718, "learning_rate": 4.595227909768291e-06, "loss": 8.2511, "step": 2400 }, { "epoch": 0.004616314361430598, "grad_norm": 0.6089443564414978, "learning_rate": 4.614382673877371e-06, "loss": 8.2659, "step": 2410 }, { "epoch": 0.004635469192805828, "grad_norm": 0.682377278804779, "learning_rate": 4.633537437986451e-06, "loss": 8.2998, "step": 2420 }, { "epoch": 0.0046546240241810595, "grad_norm": 0.5833146572113037, "learning_rate": 4.652692202095531e-06, "loss": 8.231, "step": 2430 }, { "epoch": 0.00467377885555629, "grad_norm": 0.6304609179496765, "learning_rate": 4.671846966204611e-06, "loss": 8.2542, "step": 2440 }, { "epoch": 0.00469293368693152, "grad_norm": 0.6784960031509399, "learning_rate": 4.691001730313691e-06, "loss": 8.2175, "step": 2450 }, { "epoch": 0.004712088518306751, "grad_norm": 0.7231557965278625, "learning_rate": 4.71015649442277e-06, "loss": 8.3182, "step": 2460 }, { "epoch": 0.004731243349681982, "grad_norm": 0.6680861115455627, "learning_rate": 4.729311258531851e-06, "loss": 8.2651, "step": 2470 }, { "epoch": 0.004750398181057213, "grad_norm": 0.6320644617080688, "learning_rate": 4.7484660226409305e-06, "loss": 8.2339, "step": 2480 }, { "epoch": 0.004769553012432443, "grad_norm": 0.7979405522346497, "learning_rate": 4.76762078675001e-06, "loss": 8.2398, "step": 2490 }, { "epoch": 0.004788707843807674, "grad_norm": 0.6795972585678101, "learning_rate": 4.7867755508590906e-06, "loss": 8.1778, "step": 2500 }, { "epoch": 0.004807862675182905, "grad_norm": 0.6071348786354065, "learning_rate": 4.805930314968171e-06, "loss": 8.2043, "step": 2510 }, { "epoch": 0.004827017506558135, "grad_norm": 0.9761671423912048, "learning_rate": 4.8250850790772515e-06, "loss": 8.1996, "step": 2520 }, { "epoch": 0.004846172337933366, "grad_norm": 0.7045311331748962, "learning_rate": 4.844239843186331e-06, "loss": 8.1909, "step": 2530 }, { "epoch": 0.004865327169308597, "grad_norm": 0.7284126281738281, "learning_rate": 4.863394607295411e-06, "loss": 8.2099, "step": 2540 }, { "epoch": 0.004884482000683828, "grad_norm": 0.7045251131057739, "learning_rate": 4.88254937140449e-06, "loss": 8.1822, "step": 2550 }, { "epoch": 0.004903636832059058, "grad_norm": 0.8706386089324951, "learning_rate": 4.901704135513571e-06, "loss": 8.1931, "step": 2560 }, { "epoch": 0.0049227916634342886, "grad_norm": 0.690238893032074, "learning_rate": 4.92085889962265e-06, "loss": 8.2207, "step": 2570 }, { "epoch": 0.00494194649480952, "grad_norm": 0.6909633874893188, "learning_rate": 4.94001366373173e-06, "loss": 8.1603, "step": 2580 }, { "epoch": 0.00496110132618475, "grad_norm": 0.7038037776947021, "learning_rate": 4.959168427840811e-06, "loss": 8.1778, "step": 2590 }, { "epoch": 0.004980256157559981, "grad_norm": 0.6272507905960083, "learning_rate": 4.978323191949891e-06, "loss": 8.2312, "step": 2600 }, { "epoch": 0.004999410988935212, "grad_norm": 0.5973273515701294, "learning_rate": 4.997477956058971e-06, "loss": 8.0728, "step": 2610 }, { "epoch": 0.005018565820310442, "grad_norm": 0.6744768023490906, "learning_rate": 5.016632720168051e-06, "loss": 8.1796, "step": 2620 }, { "epoch": 0.005037720651685673, "grad_norm": 0.6826954483985901, "learning_rate": 5.035787484277131e-06, "loss": 8.097, "step": 2630 }, { "epoch": 0.0050568754830609035, "grad_norm": 0.6590008735656738, "learning_rate": 5.05494224838621e-06, "loss": 8.2091, "step": 2640 }, { "epoch": 0.005076030314436135, "grad_norm": 0.6298124194145203, "learning_rate": 5.074097012495291e-06, "loss": 8.1728, "step": 2650 }, { "epoch": 0.005095185145811365, "grad_norm": 0.6991510391235352, "learning_rate": 5.09325177660437e-06, "loss": 8.1131, "step": 2660 }, { "epoch": 0.005114339977186596, "grad_norm": 0.6359108090400696, "learning_rate": 5.11240654071345e-06, "loss": 8.1573, "step": 2670 }, { "epoch": 0.005133494808561827, "grad_norm": 0.7016558051109314, "learning_rate": 5.131561304822531e-06, "loss": 8.1927, "step": 2680 }, { "epoch": 0.005152649639937057, "grad_norm": 0.6416668891906738, "learning_rate": 5.150716068931611e-06, "loss": 8.1214, "step": 2690 }, { "epoch": 0.005171804471312288, "grad_norm": 0.6394779682159424, "learning_rate": 5.169870833040691e-06, "loss": 8.1203, "step": 2700 }, { "epoch": 0.0051909593026875185, "grad_norm": 0.6376482248306274, "learning_rate": 5.189025597149771e-06, "loss": 8.1501, "step": 2710 }, { "epoch": 0.00521011413406275, "grad_norm": 1.0252981185913086, "learning_rate": 5.2081803612588505e-06, "loss": 8.1106, "step": 2720 }, { "epoch": 0.00522926896543798, "grad_norm": 0.6553211212158203, "learning_rate": 5.22733512536793e-06, "loss": 8.1214, "step": 2730 }, { "epoch": 0.00524842379681321, "grad_norm": 0.7413418292999268, "learning_rate": 5.2464898894770106e-06, "loss": 8.0806, "step": 2740 }, { "epoch": 0.0052675786281884416, "grad_norm": 0.7488899827003479, "learning_rate": 5.26564465358609e-06, "loss": 8.1316, "step": 2750 }, { "epoch": 0.005286733459563672, "grad_norm": 0.5969369411468506, "learning_rate": 5.2847994176951715e-06, "loss": 8.0628, "step": 2760 }, { "epoch": 0.005305888290938903, "grad_norm": 0.6600368022918701, "learning_rate": 5.303954181804251e-06, "loss": 8.1406, "step": 2770 }, { "epoch": 0.0053250431223141334, "grad_norm": 0.6346338391304016, "learning_rate": 5.323108945913331e-06, "loss": 8.0662, "step": 2780 }, { "epoch": 0.005344197953689364, "grad_norm": 0.6691576242446899, "learning_rate": 5.342263710022411e-06, "loss": 8.1307, "step": 2790 }, { "epoch": 0.005363352785064595, "grad_norm": 0.6751534938812256, "learning_rate": 5.361418474131491e-06, "loss": 8.0883, "step": 2800 }, { "epoch": 0.005382507616439825, "grad_norm": 1.0249160528182983, "learning_rate": 5.38057323824057e-06, "loss": 8.0819, "step": 2810 }, { "epoch": 0.0054016624478150565, "grad_norm": 0.7512261271476746, "learning_rate": 5.39972800234965e-06, "loss": 8.0426, "step": 2820 }, { "epoch": 0.005420817279190287, "grad_norm": 0.7370613813400269, "learning_rate": 5.4188827664587305e-06, "loss": 8.0994, "step": 2830 }, { "epoch": 0.005439972110565518, "grad_norm": 0.6814995408058167, "learning_rate": 5.43803753056781e-06, "loss": 8.0904, "step": 2840 }, { "epoch": 0.005459126941940748, "grad_norm": 0.7268229722976685, "learning_rate": 5.457192294676891e-06, "loss": 8.1058, "step": 2850 }, { "epoch": 0.005478281773315979, "grad_norm": 0.8299146294593811, "learning_rate": 5.476347058785971e-06, "loss": 8.0579, "step": 2860 }, { "epoch": 0.00549743660469121, "grad_norm": 0.6839839220046997, "learning_rate": 5.495501822895051e-06, "loss": 8.0605, "step": 2870 }, { "epoch": 0.00551659143606644, "grad_norm": 0.6715013980865479, "learning_rate": 5.514656587004131e-06, "loss": 8.0299, "step": 2880 }, { "epoch": 0.0055357462674416715, "grad_norm": 0.6832875609397888, "learning_rate": 5.533811351113211e-06, "loss": 8.0937, "step": 2890 }, { "epoch": 0.005554901098816902, "grad_norm": 0.7265440225601196, "learning_rate": 5.55296611522229e-06, "loss": 8.0775, "step": 2900 }, { "epoch": 0.005574055930192132, "grad_norm": 0.6955132484436035, "learning_rate": 5.57212087933137e-06, "loss": 8.0134, "step": 2910 }, { "epoch": 0.005593210761567363, "grad_norm": 0.8358678817749023, "learning_rate": 5.59127564344045e-06, "loss": 8.0342, "step": 2920 }, { "epoch": 0.005612365592942594, "grad_norm": 0.768535315990448, "learning_rate": 5.61043040754953e-06, "loss": 7.9929, "step": 2930 }, { "epoch": 0.005631520424317825, "grad_norm": 0.7811728715896606, "learning_rate": 5.629585171658611e-06, "loss": 8.0226, "step": 2940 }, { "epoch": 0.005650675255693055, "grad_norm": 0.6469458341598511, "learning_rate": 5.648739935767691e-06, "loss": 8.0087, "step": 2950 }, { "epoch": 0.0056698300870682864, "grad_norm": 0.6519347429275513, "learning_rate": 5.6678946998767705e-06, "loss": 8.0759, "step": 2960 }, { "epoch": 0.005688984918443517, "grad_norm": 0.735517680644989, "learning_rate": 5.687049463985851e-06, "loss": 8.0521, "step": 2970 }, { "epoch": 0.005708139749818747, "grad_norm": 0.7660520672798157, "learning_rate": 5.7062042280949306e-06, "loss": 7.9641, "step": 2980 }, { "epoch": 0.005727294581193978, "grad_norm": 0.6861337423324585, "learning_rate": 5.72535899220401e-06, "loss": 7.9789, "step": 2990 }, { "epoch": 0.005746449412569209, "grad_norm": 0.6301390528678894, "learning_rate": 5.74451375631309e-06, "loss": 7.944, "step": 3000 }, { "epoch": 0.00576560424394444, "grad_norm": 0.672120988368988, "learning_rate": 5.76366852042217e-06, "loss": 8.0068, "step": 3010 }, { "epoch": 0.00578475907531967, "grad_norm": 0.776816725730896, "learning_rate": 5.782823284531251e-06, "loss": 7.9933, "step": 3020 }, { "epoch": 0.0058039139066949005, "grad_norm": 0.7296085953712463, "learning_rate": 5.801978048640331e-06, "loss": 7.992, "step": 3030 }, { "epoch": 0.005823068738070132, "grad_norm": 0.6561471819877625, "learning_rate": 5.821132812749411e-06, "loss": 7.9735, "step": 3040 }, { "epoch": 0.005842223569445362, "grad_norm": 0.7429147362709045, "learning_rate": 5.84028757685849e-06, "loss": 7.9738, "step": 3050 }, { "epoch": 0.005861378400820593, "grad_norm": 0.7049291133880615, "learning_rate": 5.859442340967571e-06, "loss": 7.9774, "step": 3060 }, { "epoch": 0.005880533232195824, "grad_norm": 0.65732342004776, "learning_rate": 5.8785971050766505e-06, "loss": 7.9841, "step": 3070 }, { "epoch": 0.005899688063571054, "grad_norm": 0.7901176810264587, "learning_rate": 5.89775186918573e-06, "loss": 8.0193, "step": 3080 }, { "epoch": 0.005918842894946285, "grad_norm": 0.690826416015625, "learning_rate": 5.91690663329481e-06, "loss": 7.9871, "step": 3090 }, { "epoch": 0.0059379977263215155, "grad_norm": 0.6740243434906006, "learning_rate": 5.93606139740389e-06, "loss": 7.9825, "step": 3100 }, { "epoch": 0.005957152557696747, "grad_norm": 0.7679795026779175, "learning_rate": 5.955216161512971e-06, "loss": 7.9817, "step": 3110 }, { "epoch": 0.005976307389071977, "grad_norm": 0.6756645441055298, "learning_rate": 5.974370925622051e-06, "loss": 7.9371, "step": 3120 }, { "epoch": 0.005995462220447208, "grad_norm": 0.7135582566261292, "learning_rate": 5.993525689731131e-06, "loss": 8.0065, "step": 3130 }, { "epoch": 0.006014617051822439, "grad_norm": 0.6409218311309814, "learning_rate": 6.01268045384021e-06, "loss": 8.0208, "step": 3140 }, { "epoch": 0.006033771883197669, "grad_norm": 0.9179268479347229, "learning_rate": 6.031835217949291e-06, "loss": 7.9992, "step": 3150 }, { "epoch": 0.0060529267145729, "grad_norm": 0.7586207389831543, "learning_rate": 6.05098998205837e-06, "loss": 7.9342, "step": 3160 }, { "epoch": 0.0060720815459481305, "grad_norm": 0.6353715658187866, "learning_rate": 6.07014474616745e-06, "loss": 7.9853, "step": 3170 }, { "epoch": 0.006091236377323362, "grad_norm": 0.8000138401985168, "learning_rate": 6.08929951027653e-06, "loss": 7.952, "step": 3180 }, { "epoch": 0.006110391208698592, "grad_norm": 0.7004431486129761, "learning_rate": 6.108454274385611e-06, "loss": 7.949, "step": 3190 }, { "epoch": 0.006129546040073822, "grad_norm": 0.6801975965499878, "learning_rate": 6.1276090384946905e-06, "loss": 8.0352, "step": 3200 }, { "epoch": 0.0061487008714490535, "grad_norm": 0.8535010814666748, "learning_rate": 6.146763802603771e-06, "loss": 7.9507, "step": 3210 }, { "epoch": 0.006167855702824284, "grad_norm": 0.8026807904243469, "learning_rate": 6.1659185667128506e-06, "loss": 7.8959, "step": 3220 }, { "epoch": 0.006187010534199515, "grad_norm": 0.7216088771820068, "learning_rate": 6.18507333082193e-06, "loss": 7.9135, "step": 3230 }, { "epoch": 0.006206165365574745, "grad_norm": 1.810956358909607, "learning_rate": 6.204228094931011e-06, "loss": 7.9276, "step": 3240 }, { "epoch": 0.006225320196949977, "grad_norm": 0.591415286064148, "learning_rate": 6.22338285904009e-06, "loss": 7.9747, "step": 3250 }, { "epoch": 0.006244475028325207, "grad_norm": 0.8409293293952942, "learning_rate": 6.24253762314917e-06, "loss": 7.9369, "step": 3260 }, { "epoch": 0.006263629859700437, "grad_norm": 0.822013795375824, "learning_rate": 6.2616923872582495e-06, "loss": 7.9572, "step": 3270 }, { "epoch": 0.0062827846910756685, "grad_norm": 0.806334376335144, "learning_rate": 6.280847151367331e-06, "loss": 7.948, "step": 3280 }, { "epoch": 0.006301939522450899, "grad_norm": 0.7965964078903198, "learning_rate": 6.30000191547641e-06, "loss": 7.9853, "step": 3290 }, { "epoch": 0.00632109435382613, "grad_norm": 0.615915060043335, "learning_rate": 6.319156679585491e-06, "loss": 7.8742, "step": 3300 }, { "epoch": 0.00634024918520136, "grad_norm": 0.7490376234054565, "learning_rate": 6.3383114436945705e-06, "loss": 7.9127, "step": 3310 }, { "epoch": 0.006359404016576591, "grad_norm": 0.8744601607322693, "learning_rate": 6.35746620780365e-06, "loss": 7.8594, "step": 3320 }, { "epoch": 0.006378558847951822, "grad_norm": 0.8357998132705688, "learning_rate": 6.3766209719127305e-06, "loss": 7.8409, "step": 3330 }, { "epoch": 0.006397713679327052, "grad_norm": 0.7577652931213379, "learning_rate": 6.39577573602181e-06, "loss": 7.9162, "step": 3340 }, { "epoch": 0.0064168685107022835, "grad_norm": 0.8535677790641785, "learning_rate": 6.41493050013089e-06, "loss": 7.8772, "step": 3350 }, { "epoch": 0.006436023342077514, "grad_norm": 0.7500529289245605, "learning_rate": 6.434085264239971e-06, "loss": 7.8708, "step": 3360 }, { "epoch": 0.006455178173452744, "grad_norm": 0.790492057800293, "learning_rate": 6.453240028349051e-06, "loss": 7.8572, "step": 3370 }, { "epoch": 0.006474333004827975, "grad_norm": 0.9277523756027222, "learning_rate": 6.47239479245813e-06, "loss": 7.9009, "step": 3380 }, { "epoch": 0.006493487836203206, "grad_norm": 0.687819242477417, "learning_rate": 6.491549556567211e-06, "loss": 7.869, "step": 3390 }, { "epoch": 0.006512642667578437, "grad_norm": 0.8170763254165649, "learning_rate": 6.51070432067629e-06, "loss": 7.7951, "step": 3400 }, { "epoch": 0.006531797498953667, "grad_norm": 1.2937349081039429, "learning_rate": 6.52985908478537e-06, "loss": 7.8568, "step": 3410 }, { "epoch": 0.006550952330328898, "grad_norm": 0.792955756187439, "learning_rate": 6.5490138488944504e-06, "loss": 7.8435, "step": 3420 }, { "epoch": 0.006570107161704129, "grad_norm": 0.8551571369171143, "learning_rate": 6.56816861300353e-06, "loss": 7.8266, "step": 3430 }, { "epoch": 0.006589261993079359, "grad_norm": 0.8191594481468201, "learning_rate": 6.58732337711261e-06, "loss": 7.8727, "step": 3440 }, { "epoch": 0.00660841682445459, "grad_norm": 0.9123461246490479, "learning_rate": 6.606478141221691e-06, "loss": 7.828, "step": 3450 }, { "epoch": 0.006627571655829821, "grad_norm": 0.8778250813484192, "learning_rate": 6.6256329053307706e-06, "loss": 7.8431, "step": 3460 }, { "epoch": 0.006646726487205052, "grad_norm": 0.704301655292511, "learning_rate": 6.64478766943985e-06, "loss": 7.7937, "step": 3470 }, { "epoch": 0.006665881318580282, "grad_norm": 0.7049712538719177, "learning_rate": 6.663942433548931e-06, "loss": 7.9039, "step": 3480 }, { "epoch": 0.0066850361499555125, "grad_norm": 0.8605071902275085, "learning_rate": 6.68309719765801e-06, "loss": 7.826, "step": 3490 }, { "epoch": 0.006704190981330744, "grad_norm": 0.8103715181350708, "learning_rate": 6.70225196176709e-06, "loss": 7.8205, "step": 3500 }, { "epoch": 0.006723345812705974, "grad_norm": 0.9060308337211609, "learning_rate": 6.72140672587617e-06, "loss": 7.8562, "step": 3510 }, { "epoch": 0.006742500644081205, "grad_norm": 0.8560940027236938, "learning_rate": 6.74056148998525e-06, "loss": 7.8117, "step": 3520 }, { "epoch": 0.006761655475456436, "grad_norm": 0.7225240468978882, "learning_rate": 6.75971625409433e-06, "loss": 7.7688, "step": 3530 }, { "epoch": 0.006780810306831667, "grad_norm": 1.1488929986953735, "learning_rate": 6.778871018203411e-06, "loss": 7.8233, "step": 3540 }, { "epoch": 0.006799965138206897, "grad_norm": 0.9784980416297913, "learning_rate": 6.7980257823124905e-06, "loss": 7.8224, "step": 3550 }, { "epoch": 0.0068191199695821275, "grad_norm": 0.8157426714897156, "learning_rate": 6.81718054642157e-06, "loss": 7.8556, "step": 3560 }, { "epoch": 0.006838274800957359, "grad_norm": 0.9462270736694336, "learning_rate": 6.8363353105306505e-06, "loss": 7.8517, "step": 3570 }, { "epoch": 0.006857429632332589, "grad_norm": 0.8688716888427734, "learning_rate": 6.85549007463973e-06, "loss": 7.8581, "step": 3580 }, { "epoch": 0.00687658446370782, "grad_norm": 0.9155051708221436, "learning_rate": 6.87464483874881e-06, "loss": 7.8183, "step": 3590 }, { "epoch": 0.0068957392950830506, "grad_norm": 0.8986802101135254, "learning_rate": 6.89379960285789e-06, "loss": 7.7796, "step": 3600 }, { "epoch": 0.006914894126458281, "grad_norm": 0.8834370970726013, "learning_rate": 6.91295436696697e-06, "loss": 7.7743, "step": 3610 }, { "epoch": 0.006934048957833512, "grad_norm": 0.940233588218689, "learning_rate": 6.93210913107605e-06, "loss": 7.7899, "step": 3620 }, { "epoch": 0.0069532037892087424, "grad_norm": 0.8881641030311584, "learning_rate": 6.951263895185131e-06, "loss": 7.7408, "step": 3630 }, { "epoch": 0.006972358620583974, "grad_norm": 0.8086764216423035, "learning_rate": 6.97041865929421e-06, "loss": 7.7534, "step": 3640 }, { "epoch": 0.006991513451959204, "grad_norm": 0.7865943312644958, "learning_rate": 6.98957342340329e-06, "loss": 7.7864, "step": 3650 }, { "epoch": 0.007010668283334434, "grad_norm": 0.9082685708999634, "learning_rate": 7.0087281875123704e-06, "loss": 7.8089, "step": 3660 }, { "epoch": 0.0070298231147096655, "grad_norm": 0.9830219745635986, "learning_rate": 7.02788295162145e-06, "loss": 7.7484, "step": 3670 }, { "epoch": 0.007048977946084896, "grad_norm": 0.7984902858734131, "learning_rate": 7.04703771573053e-06, "loss": 7.9069, "step": 3680 }, { "epoch": 0.007068132777460127, "grad_norm": 0.9335933923721313, "learning_rate": 7.06619247983961e-06, "loss": 7.8505, "step": 3690 }, { "epoch": 0.007087287608835357, "grad_norm": 0.8844707012176514, "learning_rate": 7.0853472439486906e-06, "loss": 7.7606, "step": 3700 }, { "epoch": 0.007106442440210589, "grad_norm": 0.876777708530426, "learning_rate": 7.10450200805777e-06, "loss": 7.7835, "step": 3710 }, { "epoch": 0.007125597271585819, "grad_norm": 1.3071036338806152, "learning_rate": 7.123656772166851e-06, "loss": 7.7767, "step": 3720 }, { "epoch": 0.007144752102961049, "grad_norm": 1.2284809350967407, "learning_rate": 7.14281153627593e-06, "loss": 7.6873, "step": 3730 }, { "epoch": 0.0071639069343362805, "grad_norm": 0.7905216217041016, "learning_rate": 7.16196630038501e-06, "loss": 7.7844, "step": 3740 }, { "epoch": 0.007183061765711511, "grad_norm": 0.8900864720344543, "learning_rate": 7.18112106449409e-06, "loss": 7.8184, "step": 3750 }, { "epoch": 0.007202216597086742, "grad_norm": 0.8239138722419739, "learning_rate": 7.20027582860317e-06, "loss": 7.82, "step": 3760 }, { "epoch": 0.007221371428461972, "grad_norm": 0.9380962252616882, "learning_rate": 7.2194305927122495e-06, "loss": 7.6874, "step": 3770 }, { "epoch": 0.007240526259837203, "grad_norm": 1.0639327764511108, "learning_rate": 7.23858535682133e-06, "loss": 7.7246, "step": 3780 }, { "epoch": 0.007259681091212434, "grad_norm": 0.9882181286811829, "learning_rate": 7.2577401209304105e-06, "loss": 7.7411, "step": 3790 }, { "epoch": 0.007278835922587664, "grad_norm": 0.746649444103241, "learning_rate": 7.27689488503949e-06, "loss": 7.7775, "step": 3800 }, { "epoch": 0.0072979907539628954, "grad_norm": 0.7576432228088379, "learning_rate": 7.2960496491485705e-06, "loss": 7.6867, "step": 3810 }, { "epoch": 0.007317145585338126, "grad_norm": 0.8830891251564026, "learning_rate": 7.31520441325765e-06, "loss": 7.7742, "step": 3820 }, { "epoch": 0.007336300416713357, "grad_norm": 0.7717125415802002, "learning_rate": 7.33435917736673e-06, "loss": 7.7436, "step": 3830 }, { "epoch": 0.007355455248088587, "grad_norm": 1.0291541814804077, "learning_rate": 7.35351394147581e-06, "loss": 7.7597, "step": 3840 }, { "epoch": 0.007374610079463818, "grad_norm": 0.9285041093826294, "learning_rate": 7.37266870558489e-06, "loss": 7.7371, "step": 3850 }, { "epoch": 0.007393764910839049, "grad_norm": 0.8432379961013794, "learning_rate": 7.3918234696939694e-06, "loss": 7.745, "step": 3860 }, { "epoch": 0.007412919742214279, "grad_norm": 0.9926259517669678, "learning_rate": 7.410978233803051e-06, "loss": 7.7329, "step": 3870 }, { "epoch": 0.00743207457358951, "grad_norm": 0.9965758323669434, "learning_rate": 7.43013299791213e-06, "loss": 7.7091, "step": 3880 }, { "epoch": 0.007451229404964741, "grad_norm": 0.8275953531265259, "learning_rate": 7.44928776202121e-06, "loss": 7.7077, "step": 3890 }, { "epoch": 0.007470384236339971, "grad_norm": 0.9433900713920593, "learning_rate": 7.4684425261302904e-06, "loss": 7.7136, "step": 3900 }, { "epoch": 0.007489539067715202, "grad_norm": 0.9548048377037048, "learning_rate": 7.48759729023937e-06, "loss": 7.6307, "step": 3910 }, { "epoch": 0.007508693899090433, "grad_norm": 0.9306312799453735, "learning_rate": 7.50675205434845e-06, "loss": 7.6834, "step": 3920 }, { "epoch": 0.007527848730465664, "grad_norm": 0.9455264210700989, "learning_rate": 7.52590681845753e-06, "loss": 7.6626, "step": 3930 }, { "epoch": 0.007547003561840894, "grad_norm": 0.7962921261787415, "learning_rate": 7.54506158256661e-06, "loss": 7.7292, "step": 3940 }, { "epoch": 0.0075661583932161245, "grad_norm": 1.0785493850708008, "learning_rate": 7.564216346675689e-06, "loss": 7.5983, "step": 3950 }, { "epoch": 0.007585313224591356, "grad_norm": 0.9223509430885315, "learning_rate": 7.583371110784771e-06, "loss": 7.7241, "step": 3960 }, { "epoch": 0.007604468055966586, "grad_norm": 0.8340654373168945, "learning_rate": 7.60252587489385e-06, "loss": 7.6974, "step": 3970 }, { "epoch": 0.007623622887341817, "grad_norm": 0.8189052939414978, "learning_rate": 7.62168063900293e-06, "loss": 7.6526, "step": 3980 }, { "epoch": 0.007642777718717048, "grad_norm": 0.936347484588623, "learning_rate": 7.64083540311201e-06, "loss": 7.6218, "step": 3990 }, { "epoch": 0.007661932550092279, "grad_norm": 0.8927524089813232, "learning_rate": 7.65999016722109e-06, "loss": 7.7703, "step": 4000 }, { "epoch": 0.007681087381467509, "grad_norm": 1.1881896257400513, "learning_rate": 7.67914493133017e-06, "loss": 7.6372, "step": 4010 }, { "epoch": 0.0077002422128427395, "grad_norm": 0.8373076915740967, "learning_rate": 7.69829969543925e-06, "loss": 7.6705, "step": 4020 }, { "epoch": 0.007719397044217971, "grad_norm": 0.8512048125267029, "learning_rate": 7.71745445954833e-06, "loss": 7.7114, "step": 4030 }, { "epoch": 0.007738551875593201, "grad_norm": 0.9182966351509094, "learning_rate": 7.73660922365741e-06, "loss": 7.6728, "step": 4040 }, { "epoch": 0.007757706706968432, "grad_norm": 0.9684514403343201, "learning_rate": 7.75576398776649e-06, "loss": 7.7068, "step": 4050 }, { "epoch": 0.0077768615383436625, "grad_norm": 1.038651943206787, "learning_rate": 7.77491875187557e-06, "loss": 7.6771, "step": 4060 }, { "epoch": 0.007796016369718893, "grad_norm": 0.9465256929397583, "learning_rate": 7.79407351598465e-06, "loss": 7.694, "step": 4070 }, { "epoch": 0.007815171201094123, "grad_norm": 1.1304473876953125, "learning_rate": 7.81322828009373e-06, "loss": 7.6611, "step": 4080 }, { "epoch": 0.007834326032469354, "grad_norm": 0.7061340808868408, "learning_rate": 7.83238304420281e-06, "loss": 7.6354, "step": 4090 }, { "epoch": 0.007853480863844586, "grad_norm": 0.9259918928146362, "learning_rate": 7.85153780831189e-06, "loss": 7.7524, "step": 4100 }, { "epoch": 0.007872635695219817, "grad_norm": 0.9027718901634216, "learning_rate": 7.87069257242097e-06, "loss": 7.7455, "step": 4110 }, { "epoch": 0.007891790526595046, "grad_norm": 1.1694250106811523, "learning_rate": 7.88984733653005e-06, "loss": 7.6558, "step": 4120 }, { "epoch": 0.007910945357970278, "grad_norm": 0.8873170614242554, "learning_rate": 7.90900210063913e-06, "loss": 7.615, "step": 4130 }, { "epoch": 0.007930100189345509, "grad_norm": 1.0792900323867798, "learning_rate": 7.92815686474821e-06, "loss": 7.6242, "step": 4140 }, { "epoch": 0.007949255020720738, "grad_norm": 1.052432894706726, "learning_rate": 7.94731162885729e-06, "loss": 7.648, "step": 4150 }, { "epoch": 0.00796840985209597, "grad_norm": 1.2588390111923218, "learning_rate": 7.96646639296637e-06, "loss": 7.6508, "step": 4160 }, { "epoch": 0.0079875646834712, "grad_norm": 1.006081461906433, "learning_rate": 7.98562115707545e-06, "loss": 7.6116, "step": 4170 }, { "epoch": 0.00800671951484643, "grad_norm": 1.2221288681030273, "learning_rate": 8.00477592118453e-06, "loss": 7.5376, "step": 4180 }, { "epoch": 0.008025874346221661, "grad_norm": 0.8307253122329712, "learning_rate": 8.02393068529361e-06, "loss": 7.6392, "step": 4190 }, { "epoch": 0.008045029177596892, "grad_norm": 0.8040696978569031, "learning_rate": 8.04308544940269e-06, "loss": 7.5926, "step": 4200 }, { "epoch": 0.008064184008972124, "grad_norm": 1.0175702571868896, "learning_rate": 8.06224021351177e-06, "loss": 7.6216, "step": 4210 }, { "epoch": 0.008083338840347353, "grad_norm": 1.1833854913711548, "learning_rate": 8.08139497762085e-06, "loss": 7.5686, "step": 4220 }, { "epoch": 0.008102493671722584, "grad_norm": 0.8086752891540527, "learning_rate": 8.10054974172993e-06, "loss": 7.6214, "step": 4230 }, { "epoch": 0.008121648503097816, "grad_norm": 0.9776495099067688, "learning_rate": 8.11970450583901e-06, "loss": 7.7445, "step": 4240 }, { "epoch": 0.008140803334473045, "grad_norm": 1.077842354774475, "learning_rate": 8.13885926994809e-06, "loss": 7.6489, "step": 4250 }, { "epoch": 0.008159958165848276, "grad_norm": 0.9204802513122559, "learning_rate": 8.15801403405717e-06, "loss": 7.6187, "step": 4260 }, { "epoch": 0.008179112997223507, "grad_norm": 1.0089592933654785, "learning_rate": 8.17716879816625e-06, "loss": 7.687, "step": 4270 }, { "epoch": 0.008198267828598739, "grad_norm": 1.0485634803771973, "learning_rate": 8.19632356227533e-06, "loss": 7.5487, "step": 4280 }, { "epoch": 0.008217422659973968, "grad_norm": 1.0100159645080566, "learning_rate": 8.21547832638441e-06, "loss": 7.5311, "step": 4290 }, { "epoch": 0.0082365774913492, "grad_norm": 0.8788320422172546, "learning_rate": 8.234633090493491e-06, "loss": 7.6337, "step": 4300 }, { "epoch": 0.00825573232272443, "grad_norm": 0.8195133209228516, "learning_rate": 8.25378785460257e-06, "loss": 7.6689, "step": 4310 }, { "epoch": 0.00827488715409966, "grad_norm": 1.0084341764450073, "learning_rate": 8.27294261871165e-06, "loss": 7.6166, "step": 4320 }, { "epoch": 0.008294041985474891, "grad_norm": 1.086911916732788, "learning_rate": 8.29209738282073e-06, "loss": 7.5941, "step": 4330 }, { "epoch": 0.008313196816850122, "grad_norm": 0.954012930393219, "learning_rate": 8.31125214692981e-06, "loss": 7.6311, "step": 4340 }, { "epoch": 0.008332351648225354, "grad_norm": 0.892310380935669, "learning_rate": 8.330406911038889e-06, "loss": 7.6121, "step": 4350 }, { "epoch": 0.008351506479600583, "grad_norm": 1.1810646057128906, "learning_rate": 8.34956167514797e-06, "loss": 7.6667, "step": 4360 }, { "epoch": 0.008370661310975814, "grad_norm": 0.8186820149421692, "learning_rate": 8.36871643925705e-06, "loss": 7.6877, "step": 4370 }, { "epoch": 0.008389816142351045, "grad_norm": 0.9536104798316956, "learning_rate": 8.38787120336613e-06, "loss": 7.6009, "step": 4380 }, { "epoch": 0.008408970973726275, "grad_norm": 1.197795033454895, "learning_rate": 8.407025967475211e-06, "loss": 7.6123, "step": 4390 }, { "epoch": 0.008428125805101506, "grad_norm": 1.0664905309677124, "learning_rate": 8.42618073158429e-06, "loss": 7.6047, "step": 4400 }, { "epoch": 0.008447280636476737, "grad_norm": 0.9903571009635925, "learning_rate": 8.44533549569337e-06, "loss": 7.555, "step": 4410 }, { "epoch": 0.008466435467851967, "grad_norm": 0.7639854550361633, "learning_rate": 8.46449025980245e-06, "loss": 7.5753, "step": 4420 }, { "epoch": 0.008485590299227198, "grad_norm": 1.0148392915725708, "learning_rate": 8.48364502391153e-06, "loss": 7.5508, "step": 4430 }, { "epoch": 0.00850474513060243, "grad_norm": 1.0150182247161865, "learning_rate": 8.502799788020609e-06, "loss": 7.6268, "step": 4440 }, { "epoch": 0.00852389996197766, "grad_norm": 1.1877341270446777, "learning_rate": 8.52195455212969e-06, "loss": 7.6384, "step": 4450 }, { "epoch": 0.00854305479335289, "grad_norm": 0.9790689945220947, "learning_rate": 8.54110931623877e-06, "loss": 7.5581, "step": 4460 }, { "epoch": 0.008562209624728121, "grad_norm": 1.1694639921188354, "learning_rate": 8.56026408034785e-06, "loss": 7.6255, "step": 4470 }, { "epoch": 0.008581364456103352, "grad_norm": 0.9280096888542175, "learning_rate": 8.57941884445693e-06, "loss": 7.5898, "step": 4480 }, { "epoch": 0.008600519287478582, "grad_norm": 1.3602865934371948, "learning_rate": 8.59857360856601e-06, "loss": 7.5084, "step": 4490 }, { "epoch": 0.008619674118853813, "grad_norm": 1.147404432296753, "learning_rate": 8.61772837267509e-06, "loss": 7.6652, "step": 4500 }, { "epoch": 0.008638828950229044, "grad_norm": 1.253784418106079, "learning_rate": 8.63688313678417e-06, "loss": 7.4986, "step": 4510 }, { "epoch": 0.008657983781604275, "grad_norm": 1.0453466176986694, "learning_rate": 8.65603790089325e-06, "loss": 7.5258, "step": 4520 }, { "epoch": 0.008677138612979505, "grad_norm": 1.3912415504455566, "learning_rate": 8.675192665002329e-06, "loss": 7.5589, "step": 4530 }, { "epoch": 0.008696293444354736, "grad_norm": 1.0608632564544678, "learning_rate": 8.69434742911141e-06, "loss": 7.533, "step": 4540 }, { "epoch": 0.008715448275729967, "grad_norm": 0.8687779903411865, "learning_rate": 8.71350219322049e-06, "loss": 7.5044, "step": 4550 }, { "epoch": 0.008734603107105197, "grad_norm": 1.4010088443756104, "learning_rate": 8.73265695732957e-06, "loss": 7.5785, "step": 4560 }, { "epoch": 0.008753757938480428, "grad_norm": 1.07248055934906, "learning_rate": 8.75181172143865e-06, "loss": 7.6105, "step": 4570 }, { "epoch": 0.00877291276985566, "grad_norm": 0.9393342137336731, "learning_rate": 8.77096648554773e-06, "loss": 7.5673, "step": 4580 }, { "epoch": 0.008792067601230889, "grad_norm": 1.6082881689071655, "learning_rate": 8.79012124965681e-06, "loss": 7.5515, "step": 4590 }, { "epoch": 0.00881122243260612, "grad_norm": 0.8518067002296448, "learning_rate": 8.80927601376589e-06, "loss": 7.5229, "step": 4600 }, { "epoch": 0.008830377263981351, "grad_norm": 1.1099989414215088, "learning_rate": 8.828430777874969e-06, "loss": 7.5305, "step": 4610 }, { "epoch": 0.008849532095356582, "grad_norm": 0.9434196949005127, "learning_rate": 8.847585541984049e-06, "loss": 7.5318, "step": 4620 }, { "epoch": 0.008868686926731812, "grad_norm": 1.6066151857376099, "learning_rate": 8.86674030609313e-06, "loss": 7.5768, "step": 4630 }, { "epoch": 0.008887841758107043, "grad_norm": 1.0753730535507202, "learning_rate": 8.88589507020221e-06, "loss": 7.559, "step": 4640 }, { "epoch": 0.008906996589482274, "grad_norm": 1.5253887176513672, "learning_rate": 8.90504983431129e-06, "loss": 7.5481, "step": 4650 }, { "epoch": 0.008926151420857504, "grad_norm": 1.1420261859893799, "learning_rate": 8.92420459842037e-06, "loss": 7.5155, "step": 4660 }, { "epoch": 0.008945306252232735, "grad_norm": 1.0828526020050049, "learning_rate": 8.94335936252945e-06, "loss": 7.5098, "step": 4670 }, { "epoch": 0.008964461083607966, "grad_norm": 0.9205243587493896, "learning_rate": 8.96251412663853e-06, "loss": 7.5295, "step": 4680 }, { "epoch": 0.008983615914983197, "grad_norm": 1.2998793125152588, "learning_rate": 8.98166889074761e-06, "loss": 7.4842, "step": 4690 }, { "epoch": 0.009002770746358427, "grad_norm": 1.0609490871429443, "learning_rate": 9.000823654856689e-06, "loss": 7.5448, "step": 4700 }, { "epoch": 0.009021925577733658, "grad_norm": 1.3448905944824219, "learning_rate": 9.019978418965769e-06, "loss": 7.5406, "step": 4710 }, { "epoch": 0.009041080409108889, "grad_norm": 1.1414577960968018, "learning_rate": 9.03913318307485e-06, "loss": 7.5927, "step": 4720 }, { "epoch": 0.009060235240484119, "grad_norm": 1.162202000617981, "learning_rate": 9.05828794718393e-06, "loss": 7.4749, "step": 4730 }, { "epoch": 0.00907939007185935, "grad_norm": 1.0790334939956665, "learning_rate": 9.07744271129301e-06, "loss": 7.4816, "step": 4740 }, { "epoch": 0.009098544903234581, "grad_norm": 1.330404281616211, "learning_rate": 9.09659747540209e-06, "loss": 7.6053, "step": 4750 }, { "epoch": 0.009117699734609812, "grad_norm": 0.8892325758934021, "learning_rate": 9.11575223951117e-06, "loss": 7.5088, "step": 4760 }, { "epoch": 0.009136854565985042, "grad_norm": 0.9379948377609253, "learning_rate": 9.13490700362025e-06, "loss": 7.5495, "step": 4770 }, { "epoch": 0.009156009397360273, "grad_norm": 0.965388298034668, "learning_rate": 9.15406176772933e-06, "loss": 7.484, "step": 4780 }, { "epoch": 0.009175164228735504, "grad_norm": 1.1472207307815552, "learning_rate": 9.173216531838409e-06, "loss": 7.4459, "step": 4790 }, { "epoch": 0.009194319060110734, "grad_norm": 1.0933728218078613, "learning_rate": 9.192371295947489e-06, "loss": 7.44, "step": 4800 }, { "epoch": 0.009213473891485965, "grad_norm": 1.1490086317062378, "learning_rate": 9.21152606005657e-06, "loss": 7.4273, "step": 4810 }, { "epoch": 0.009232628722861196, "grad_norm": 0.8695114850997925, "learning_rate": 9.23068082416565e-06, "loss": 7.4925, "step": 4820 }, { "epoch": 0.009251783554236425, "grad_norm": 0.9863635897636414, "learning_rate": 9.249835588274729e-06, "loss": 7.5072, "step": 4830 }, { "epoch": 0.009270938385611657, "grad_norm": 1.5144778490066528, "learning_rate": 9.26899035238381e-06, "loss": 7.3924, "step": 4840 }, { "epoch": 0.009290093216986888, "grad_norm": 1.3381047248840332, "learning_rate": 9.28814511649289e-06, "loss": 7.4398, "step": 4850 }, { "epoch": 0.009309248048362119, "grad_norm": 0.9596222639083862, "learning_rate": 9.30729988060197e-06, "loss": 7.43, "step": 4860 }, { "epoch": 0.009328402879737348, "grad_norm": 0.9443740248680115, "learning_rate": 9.32645464471105e-06, "loss": 7.4712, "step": 4870 }, { "epoch": 0.00934755771111258, "grad_norm": 1.082110047340393, "learning_rate": 9.345609408820129e-06, "loss": 7.5004, "step": 4880 }, { "epoch": 0.009366712542487811, "grad_norm": 1.1816338300704956, "learning_rate": 9.364764172929208e-06, "loss": 7.4553, "step": 4890 }, { "epoch": 0.00938586737386304, "grad_norm": 1.4005177021026611, "learning_rate": 9.38391893703829e-06, "loss": 7.4872, "step": 4900 }, { "epoch": 0.009405022205238272, "grad_norm": 1.0551475286483765, "learning_rate": 9.40307370114737e-06, "loss": 7.5107, "step": 4910 }, { "epoch": 0.009424177036613503, "grad_norm": 1.1775835752487183, "learning_rate": 9.422228465256449e-06, "loss": 7.4832, "step": 4920 }, { "epoch": 0.009443331867988734, "grad_norm": 1.2163670063018799, "learning_rate": 9.44138322936553e-06, "loss": 7.4249, "step": 4930 }, { "epoch": 0.009462486699363963, "grad_norm": 1.1473047733306885, "learning_rate": 9.46053799347461e-06, "loss": 7.4554, "step": 4940 }, { "epoch": 0.009481641530739195, "grad_norm": 0.8548535108566284, "learning_rate": 9.47969275758369e-06, "loss": 7.4569, "step": 4950 }, { "epoch": 0.009500796362114426, "grad_norm": 1.15328049659729, "learning_rate": 9.498847521692769e-06, "loss": 7.3895, "step": 4960 }, { "epoch": 0.009519951193489655, "grad_norm": 1.1681143045425415, "learning_rate": 9.518002285801849e-06, "loss": 7.4085, "step": 4970 }, { "epoch": 0.009539106024864887, "grad_norm": 1.4308291673660278, "learning_rate": 9.537157049910928e-06, "loss": 7.5253, "step": 4980 }, { "epoch": 0.009558260856240118, "grad_norm": 1.0850592851638794, "learning_rate": 9.55631181402001e-06, "loss": 7.4393, "step": 4990 }, { "epoch": 0.009577415687615347, "grad_norm": 0.9155843257904053, "learning_rate": 9.57546657812909e-06, "loss": 7.4315, "step": 5000 }, { "epoch": 0.009596570518990578, "grad_norm": 1.086708903312683, "learning_rate": 9.594621342238169e-06, "loss": 7.416, "step": 5010 }, { "epoch": 0.00961572535036581, "grad_norm": 0.9425157904624939, "learning_rate": 9.613776106347248e-06, "loss": 7.4511, "step": 5020 }, { "epoch": 0.00963488018174104, "grad_norm": 0.9190508723258972, "learning_rate": 9.63293087045633e-06, "loss": 7.4271, "step": 5030 }, { "epoch": 0.00965403501311627, "grad_norm": 1.0688143968582153, "learning_rate": 9.652085634565411e-06, "loss": 7.4214, "step": 5040 }, { "epoch": 0.009673189844491501, "grad_norm": 1.3966342210769653, "learning_rate": 9.67124039867449e-06, "loss": 7.3835, "step": 5050 }, { "epoch": 0.009692344675866733, "grad_norm": 1.0168150663375854, "learning_rate": 9.69039516278357e-06, "loss": 7.5341, "step": 5060 }, { "epoch": 0.009711499507241962, "grad_norm": 1.0859190225601196, "learning_rate": 9.70954992689265e-06, "loss": 7.4738, "step": 5070 }, { "epoch": 0.009730654338617193, "grad_norm": 1.058058738708496, "learning_rate": 9.72870469100173e-06, "loss": 7.426, "step": 5080 }, { "epoch": 0.009749809169992425, "grad_norm": 1.258900761604309, "learning_rate": 9.747859455110809e-06, "loss": 7.4009, "step": 5090 }, { "epoch": 0.009768964001367656, "grad_norm": 1.2450281381607056, "learning_rate": 9.767014219219889e-06, "loss": 7.4268, "step": 5100 }, { "epoch": 0.009788118832742885, "grad_norm": 1.2696000337600708, "learning_rate": 9.78616898332897e-06, "loss": 7.4299, "step": 5110 }, { "epoch": 0.009807273664118116, "grad_norm": 1.2268974781036377, "learning_rate": 9.80532374743805e-06, "loss": 7.3932, "step": 5120 }, { "epoch": 0.009826428495493348, "grad_norm": 1.0750620365142822, "learning_rate": 9.82447851154713e-06, "loss": 7.4154, "step": 5130 }, { "epoch": 0.009845583326868577, "grad_norm": 1.4234400987625122, "learning_rate": 9.843633275656209e-06, "loss": 7.3448, "step": 5140 }, { "epoch": 0.009864738158243808, "grad_norm": 1.1414262056350708, "learning_rate": 9.862788039765289e-06, "loss": 7.3338, "step": 5150 }, { "epoch": 0.00988389298961904, "grad_norm": 1.1069281101226807, "learning_rate": 9.881942803874368e-06, "loss": 7.3855, "step": 5160 }, { "epoch": 0.009903047820994269, "grad_norm": 1.165891170501709, "learning_rate": 9.90109756798345e-06, "loss": 7.3698, "step": 5170 }, { "epoch": 0.0099222026523695, "grad_norm": 1.133309006690979, "learning_rate": 9.920252332092529e-06, "loss": 7.3684, "step": 5180 }, { "epoch": 0.009941357483744731, "grad_norm": 1.1371382474899292, "learning_rate": 9.939407096201609e-06, "loss": 7.4658, "step": 5190 }, { "epoch": 0.009960512315119963, "grad_norm": 1.1489934921264648, "learning_rate": 9.95856186031069e-06, "loss": 7.2637, "step": 5200 }, { "epoch": 0.009979667146495192, "grad_norm": 1.1300097703933716, "learning_rate": 9.97771662441977e-06, "loss": 7.3436, "step": 5210 }, { "epoch": 0.009998821977870423, "grad_norm": 1.4264867305755615, "learning_rate": 9.996871388528851e-06, "loss": 7.3735, "step": 5220 }, { "epoch": 0.010017976809245654, "grad_norm": 1.1562938690185547, "learning_rate": 1.001602615263793e-05, "loss": 7.3589, "step": 5230 }, { "epoch": 0.010037131640620884, "grad_norm": 1.218908667564392, "learning_rate": 1.003518091674701e-05, "loss": 7.3674, "step": 5240 }, { "epoch": 0.010056286471996115, "grad_norm": 1.1911498308181763, "learning_rate": 1.005433568085609e-05, "loss": 7.3846, "step": 5250 }, { "epoch": 0.010075441303371346, "grad_norm": 1.0677804946899414, "learning_rate": 1.007349044496517e-05, "loss": 7.461, "step": 5260 }, { "epoch": 0.010094596134746578, "grad_norm": 1.4832419157028198, "learning_rate": 1.0092645209074249e-05, "loss": 7.3497, "step": 5270 }, { "epoch": 0.010113750966121807, "grad_norm": 1.0002113580703735, "learning_rate": 1.0111799973183329e-05, "loss": 7.4132, "step": 5280 }, { "epoch": 0.010132905797497038, "grad_norm": 1.160153865814209, "learning_rate": 1.013095473729241e-05, "loss": 7.3332, "step": 5290 }, { "epoch": 0.01015206062887227, "grad_norm": 1.1734474897384644, "learning_rate": 1.015010950140149e-05, "loss": 7.414, "step": 5300 }, { "epoch": 0.010171215460247499, "grad_norm": 1.1795401573181152, "learning_rate": 1.0169264265510569e-05, "loss": 7.3828, "step": 5310 }, { "epoch": 0.01019037029162273, "grad_norm": 1.1074190139770508, "learning_rate": 1.0188419029619649e-05, "loss": 7.2989, "step": 5320 }, { "epoch": 0.010209525122997961, "grad_norm": 1.1723424196243286, "learning_rate": 1.0207573793728728e-05, "loss": 7.3113, "step": 5330 }, { "epoch": 0.010228679954373193, "grad_norm": 1.087101697921753, "learning_rate": 1.0226728557837808e-05, "loss": 7.4071, "step": 5340 }, { "epoch": 0.010247834785748422, "grad_norm": 1.2394529581069946, "learning_rate": 1.024588332194689e-05, "loss": 7.3523, "step": 5350 }, { "epoch": 0.010266989617123653, "grad_norm": 1.0652803182601929, "learning_rate": 1.0265038086055969e-05, "loss": 7.4406, "step": 5360 }, { "epoch": 0.010286144448498884, "grad_norm": 1.1549327373504639, "learning_rate": 1.028419285016505e-05, "loss": 7.3644, "step": 5370 }, { "epoch": 0.010305299279874114, "grad_norm": 1.2247416973114014, "learning_rate": 1.030334761427413e-05, "loss": 7.3954, "step": 5380 }, { "epoch": 0.010324454111249345, "grad_norm": 1.3642778396606445, "learning_rate": 1.032250237838321e-05, "loss": 7.2912, "step": 5390 }, { "epoch": 0.010343608942624576, "grad_norm": 1.0170115232467651, "learning_rate": 1.034165714249229e-05, "loss": 7.4226, "step": 5400 }, { "epoch": 0.010362763773999806, "grad_norm": 1.1461377143859863, "learning_rate": 1.036081190660137e-05, "loss": 7.3546, "step": 5410 }, { "epoch": 0.010381918605375037, "grad_norm": 1.1991313695907593, "learning_rate": 1.037996667071045e-05, "loss": 7.4009, "step": 5420 }, { "epoch": 0.010401073436750268, "grad_norm": 1.2714647054672241, "learning_rate": 1.039912143481953e-05, "loss": 7.3503, "step": 5430 }, { "epoch": 0.0104202282681255, "grad_norm": 1.1686193943023682, "learning_rate": 1.0418276198928609e-05, "loss": 7.3821, "step": 5440 }, { "epoch": 0.010439383099500729, "grad_norm": 1.3878347873687744, "learning_rate": 1.0437430963037689e-05, "loss": 7.365, "step": 5450 }, { "epoch": 0.01045853793087596, "grad_norm": 1.100562334060669, "learning_rate": 1.0456585727146768e-05, "loss": 7.3043, "step": 5460 }, { "epoch": 0.010477692762251191, "grad_norm": 1.1418113708496094, "learning_rate": 1.047574049125585e-05, "loss": 7.3362, "step": 5470 }, { "epoch": 0.01049684759362642, "grad_norm": 1.028541922569275, "learning_rate": 1.049489525536493e-05, "loss": 7.3326, "step": 5480 }, { "epoch": 0.010516002425001652, "grad_norm": 1.3116049766540527, "learning_rate": 1.0514050019474009e-05, "loss": 7.2938, "step": 5490 }, { "epoch": 0.010535157256376883, "grad_norm": 1.1144912242889404, "learning_rate": 1.0533204783583088e-05, "loss": 7.2801, "step": 5500 }, { "epoch": 0.010554312087752114, "grad_norm": 1.2840921878814697, "learning_rate": 1.0552359547692168e-05, "loss": 7.278, "step": 5510 }, { "epoch": 0.010573466919127344, "grad_norm": 1.1299779415130615, "learning_rate": 1.0571514311801248e-05, "loss": 7.2832, "step": 5520 }, { "epoch": 0.010592621750502575, "grad_norm": 1.1277837753295898, "learning_rate": 1.0590669075910329e-05, "loss": 7.3627, "step": 5530 }, { "epoch": 0.010611776581877806, "grad_norm": 1.1917009353637695, "learning_rate": 1.060982384001941e-05, "loss": 7.3117, "step": 5540 }, { "epoch": 0.010630931413253036, "grad_norm": 1.3909053802490234, "learning_rate": 1.062897860412849e-05, "loss": 7.3466, "step": 5550 }, { "epoch": 0.010650086244628267, "grad_norm": 1.049137830734253, "learning_rate": 1.064813336823757e-05, "loss": 7.2947, "step": 5560 }, { "epoch": 0.010669241076003498, "grad_norm": 1.3602185249328613, "learning_rate": 1.0667288132346649e-05, "loss": 7.3612, "step": 5570 }, { "epoch": 0.010688395907378728, "grad_norm": 1.4821431636810303, "learning_rate": 1.068644289645573e-05, "loss": 7.3597, "step": 5580 }, { "epoch": 0.010707550738753959, "grad_norm": 1.4591073989868164, "learning_rate": 1.070559766056481e-05, "loss": 7.2009, "step": 5590 }, { "epoch": 0.01072670557012919, "grad_norm": 1.2938451766967773, "learning_rate": 1.072475242467389e-05, "loss": 7.1758, "step": 5600 }, { "epoch": 0.010745860401504421, "grad_norm": 1.1046041250228882, "learning_rate": 1.074390718878297e-05, "loss": 7.2726, "step": 5610 }, { "epoch": 0.01076501523287965, "grad_norm": 1.1924724578857422, "learning_rate": 1.0763061952892049e-05, "loss": 7.3152, "step": 5620 }, { "epoch": 0.010784170064254882, "grad_norm": 1.2147047519683838, "learning_rate": 1.0782216717001129e-05, "loss": 7.2517, "step": 5630 }, { "epoch": 0.010803324895630113, "grad_norm": 1.1351383924484253, "learning_rate": 1.0801371481110208e-05, "loss": 7.2563, "step": 5640 }, { "epoch": 0.010822479727005343, "grad_norm": 1.2805843353271484, "learning_rate": 1.082052624521929e-05, "loss": 7.3942, "step": 5650 }, { "epoch": 0.010841634558380574, "grad_norm": 1.136907935142517, "learning_rate": 1.0839681009328369e-05, "loss": 7.325, "step": 5660 }, { "epoch": 0.010860789389755805, "grad_norm": 1.4050108194351196, "learning_rate": 1.0858835773437449e-05, "loss": 7.3132, "step": 5670 }, { "epoch": 0.010879944221131036, "grad_norm": 1.047636866569519, "learning_rate": 1.0877990537546528e-05, "loss": 7.3492, "step": 5680 }, { "epoch": 0.010899099052506266, "grad_norm": 0.9795944094657898, "learning_rate": 1.0897145301655608e-05, "loss": 7.2872, "step": 5690 }, { "epoch": 0.010918253883881497, "grad_norm": 1.2453376054763794, "learning_rate": 1.0916300065764687e-05, "loss": 7.2443, "step": 5700 }, { "epoch": 0.010937408715256728, "grad_norm": 1.263778567314148, "learning_rate": 1.093545482987377e-05, "loss": 7.2724, "step": 5710 }, { "epoch": 0.010956563546631957, "grad_norm": 0.9742178320884705, "learning_rate": 1.095460959398285e-05, "loss": 7.2203, "step": 5720 }, { "epoch": 0.010975718378007189, "grad_norm": 1.0043997764587402, "learning_rate": 1.097376435809193e-05, "loss": 7.2698, "step": 5730 }, { "epoch": 0.01099487320938242, "grad_norm": 1.2845556735992432, "learning_rate": 1.099291912220101e-05, "loss": 7.3415, "step": 5740 }, { "epoch": 0.01101402804075765, "grad_norm": 1.3697859048843384, "learning_rate": 1.1012073886310089e-05, "loss": 7.3263, "step": 5750 }, { "epoch": 0.01103318287213288, "grad_norm": 1.072908878326416, "learning_rate": 1.103122865041917e-05, "loss": 7.3035, "step": 5760 }, { "epoch": 0.011052337703508112, "grad_norm": 1.3913381099700928, "learning_rate": 1.105038341452825e-05, "loss": 7.1847, "step": 5770 }, { "epoch": 0.011071492534883343, "grad_norm": 1.1290525197982788, "learning_rate": 1.106953817863733e-05, "loss": 7.3566, "step": 5780 }, { "epoch": 0.011090647366258572, "grad_norm": 1.073958158493042, "learning_rate": 1.1088692942746409e-05, "loss": 7.3494, "step": 5790 }, { "epoch": 0.011109802197633804, "grad_norm": 1.0993953943252563, "learning_rate": 1.1107847706855489e-05, "loss": 7.2118, "step": 5800 }, { "epoch": 0.011128957029009035, "grad_norm": 1.225531816482544, "learning_rate": 1.1127002470964568e-05, "loss": 7.1964, "step": 5810 }, { "epoch": 0.011148111860384264, "grad_norm": 1.3425381183624268, "learning_rate": 1.1146157235073648e-05, "loss": 7.2905, "step": 5820 }, { "epoch": 0.011167266691759496, "grad_norm": 1.2176437377929688, "learning_rate": 1.116531199918273e-05, "loss": 7.1685, "step": 5830 }, { "epoch": 0.011186421523134727, "grad_norm": 1.3506602048873901, "learning_rate": 1.1184466763291809e-05, "loss": 7.4378, "step": 5840 }, { "epoch": 0.011205576354509958, "grad_norm": 1.371074914932251, "learning_rate": 1.1203621527400888e-05, "loss": 7.3043, "step": 5850 }, { "epoch": 0.011224731185885187, "grad_norm": 1.3084396123886108, "learning_rate": 1.1222776291509968e-05, "loss": 7.2806, "step": 5860 }, { "epoch": 0.011243886017260419, "grad_norm": 1.4237570762634277, "learning_rate": 1.1241931055619048e-05, "loss": 7.2762, "step": 5870 }, { "epoch": 0.01126304084863565, "grad_norm": 1.244276523590088, "learning_rate": 1.126108581972813e-05, "loss": 7.3274, "step": 5880 }, { "epoch": 0.01128219568001088, "grad_norm": 1.294726848602295, "learning_rate": 1.128024058383721e-05, "loss": 7.2709, "step": 5890 }, { "epoch": 0.01130135051138611, "grad_norm": 1.331621766090393, "learning_rate": 1.129939534794629e-05, "loss": 7.1983, "step": 5900 }, { "epoch": 0.011320505342761342, "grad_norm": 1.3155306577682495, "learning_rate": 1.131855011205537e-05, "loss": 7.2975, "step": 5910 }, { "epoch": 0.011339660174136573, "grad_norm": 1.2269827127456665, "learning_rate": 1.1337704876164449e-05, "loss": 7.264, "step": 5920 }, { "epoch": 0.011358815005511802, "grad_norm": 1.619785189628601, "learning_rate": 1.1356859640273529e-05, "loss": 7.2071, "step": 5930 }, { "epoch": 0.011377969836887034, "grad_norm": 1.4411396980285645, "learning_rate": 1.137601440438261e-05, "loss": 7.205, "step": 5940 }, { "epoch": 0.011397124668262265, "grad_norm": 1.1980805397033691, "learning_rate": 1.139516916849169e-05, "loss": 7.165, "step": 5950 }, { "epoch": 0.011416279499637494, "grad_norm": 1.2822858095169067, "learning_rate": 1.141432393260077e-05, "loss": 7.268, "step": 5960 }, { "epoch": 0.011435434331012725, "grad_norm": 1.3302096128463745, "learning_rate": 1.1433478696709849e-05, "loss": 7.2318, "step": 5970 }, { "epoch": 0.011454589162387957, "grad_norm": 1.2933225631713867, "learning_rate": 1.1452633460818928e-05, "loss": 7.1924, "step": 5980 }, { "epoch": 0.011473743993763186, "grad_norm": 1.2886408567428589, "learning_rate": 1.1471788224928008e-05, "loss": 7.1431, "step": 5990 }, { "epoch": 0.011492898825138417, "grad_norm": 1.283137321472168, "learning_rate": 1.1490942989037088e-05, "loss": 7.2035, "step": 6000 }, { "epoch": 0.011512053656513649, "grad_norm": 1.2498443126678467, "learning_rate": 1.1510097753146169e-05, "loss": 7.2231, "step": 6010 }, { "epoch": 0.01153120848788888, "grad_norm": 1.1903072595596313, "learning_rate": 1.1529252517255249e-05, "loss": 7.2257, "step": 6020 }, { "epoch": 0.01155036331926411, "grad_norm": 1.5663899183273315, "learning_rate": 1.1548407281364328e-05, "loss": 7.1418, "step": 6030 }, { "epoch": 0.01156951815063934, "grad_norm": 1.3882744312286377, "learning_rate": 1.1567562045473408e-05, "loss": 7.1841, "step": 6040 }, { "epoch": 0.011588672982014572, "grad_norm": 1.203092098236084, "learning_rate": 1.1586716809582489e-05, "loss": 7.1233, "step": 6050 }, { "epoch": 0.011607827813389801, "grad_norm": 1.246506690979004, "learning_rate": 1.160587157369157e-05, "loss": 7.1614, "step": 6060 }, { "epoch": 0.011626982644765032, "grad_norm": 1.1687778234481812, "learning_rate": 1.162502633780065e-05, "loss": 7.2533, "step": 6070 }, { "epoch": 0.011646137476140263, "grad_norm": 1.4713791608810425, "learning_rate": 1.164418110190973e-05, "loss": 7.2582, "step": 6080 }, { "epoch": 0.011665292307515495, "grad_norm": 1.3238089084625244, "learning_rate": 1.166333586601881e-05, "loss": 7.3, "step": 6090 }, { "epoch": 0.011684447138890724, "grad_norm": 1.3874738216400146, "learning_rate": 1.1682490630127889e-05, "loss": 7.28, "step": 6100 }, { "epoch": 0.011703601970265955, "grad_norm": 1.1075962781906128, "learning_rate": 1.1701645394236969e-05, "loss": 7.2114, "step": 6110 }, { "epoch": 0.011722756801641187, "grad_norm": 1.1557226181030273, "learning_rate": 1.172080015834605e-05, "loss": 7.0918, "step": 6120 }, { "epoch": 0.011741911633016416, "grad_norm": 1.2886159420013428, "learning_rate": 1.173995492245513e-05, "loss": 7.1383, "step": 6130 }, { "epoch": 0.011761066464391647, "grad_norm": 1.199643611907959, "learning_rate": 1.1759109686564209e-05, "loss": 7.2031, "step": 6140 }, { "epoch": 0.011780221295766878, "grad_norm": 1.462892770767212, "learning_rate": 1.1778264450673289e-05, "loss": 7.1575, "step": 6150 }, { "epoch": 0.011799376127142108, "grad_norm": 1.3286123275756836, "learning_rate": 1.1797419214782368e-05, "loss": 7.1839, "step": 6160 }, { "epoch": 0.011818530958517339, "grad_norm": 1.3288229703903198, "learning_rate": 1.1816573978891448e-05, "loss": 7.1825, "step": 6170 }, { "epoch": 0.01183768578989257, "grad_norm": 1.1503456830978394, "learning_rate": 1.1835728743000527e-05, "loss": 7.2586, "step": 6180 }, { "epoch": 0.011856840621267802, "grad_norm": 1.616137146949768, "learning_rate": 1.1854883507109609e-05, "loss": 7.2017, "step": 6190 }, { "epoch": 0.011875995452643031, "grad_norm": 1.351486325263977, "learning_rate": 1.1874038271218688e-05, "loss": 7.2147, "step": 6200 }, { "epoch": 0.011895150284018262, "grad_norm": 1.4854779243469238, "learning_rate": 1.1893193035327768e-05, "loss": 7.157, "step": 6210 }, { "epoch": 0.011914305115393493, "grad_norm": 1.6351078748703003, "learning_rate": 1.191234779943685e-05, "loss": 7.176, "step": 6220 }, { "epoch": 0.011933459946768723, "grad_norm": 0.9964414834976196, "learning_rate": 1.1931502563545929e-05, "loss": 7.1813, "step": 6230 }, { "epoch": 0.011952614778143954, "grad_norm": 1.1613564491271973, "learning_rate": 1.195065732765501e-05, "loss": 7.1328, "step": 6240 }, { "epoch": 0.011971769609519185, "grad_norm": 1.5222305059432983, "learning_rate": 1.196981209176409e-05, "loss": 7.1669, "step": 6250 }, { "epoch": 0.011990924440894416, "grad_norm": 1.1971509456634521, "learning_rate": 1.198896685587317e-05, "loss": 7.25, "step": 6260 }, { "epoch": 0.012010079272269646, "grad_norm": 1.18362295627594, "learning_rate": 1.2008121619982249e-05, "loss": 7.0521, "step": 6270 }, { "epoch": 0.012029234103644877, "grad_norm": 1.290324330329895, "learning_rate": 1.2027276384091329e-05, "loss": 7.1563, "step": 6280 }, { "epoch": 0.012048388935020108, "grad_norm": 1.3201510906219482, "learning_rate": 1.2046431148200408e-05, "loss": 7.1202, "step": 6290 }, { "epoch": 0.012067543766395338, "grad_norm": 1.3051029443740845, "learning_rate": 1.206558591230949e-05, "loss": 7.2251, "step": 6300 }, { "epoch": 0.012086698597770569, "grad_norm": 1.7466986179351807, "learning_rate": 1.208474067641857e-05, "loss": 7.2034, "step": 6310 }, { "epoch": 0.0121058534291458, "grad_norm": 1.3473097085952759, "learning_rate": 1.2103895440527649e-05, "loss": 7.1473, "step": 6320 }, { "epoch": 0.01212500826052103, "grad_norm": 1.4585765600204468, "learning_rate": 1.2123050204636728e-05, "loss": 7.1635, "step": 6330 }, { "epoch": 0.012144163091896261, "grad_norm": 1.4975831508636475, "learning_rate": 1.2142204968745808e-05, "loss": 7.1103, "step": 6340 }, { "epoch": 0.012163317923271492, "grad_norm": 1.2499585151672363, "learning_rate": 1.2161359732854888e-05, "loss": 7.185, "step": 6350 }, { "epoch": 0.012182472754646723, "grad_norm": 1.577142596244812, "learning_rate": 1.2180514496963969e-05, "loss": 7.0943, "step": 6360 }, { "epoch": 0.012201627586021953, "grad_norm": 1.4166862964630127, "learning_rate": 1.2199669261073049e-05, "loss": 7.1026, "step": 6370 }, { "epoch": 0.012220782417397184, "grad_norm": 1.3467305898666382, "learning_rate": 1.2218824025182128e-05, "loss": 7.1906, "step": 6380 }, { "epoch": 0.012239937248772415, "grad_norm": 1.2367794513702393, "learning_rate": 1.223797878929121e-05, "loss": 7.1617, "step": 6390 }, { "epoch": 0.012259092080147645, "grad_norm": 1.3551909923553467, "learning_rate": 1.2257133553400289e-05, "loss": 7.1792, "step": 6400 }, { "epoch": 0.012278246911522876, "grad_norm": 1.5466651916503906, "learning_rate": 1.227628831750937e-05, "loss": 7.2374, "step": 6410 }, { "epoch": 0.012297401742898107, "grad_norm": 1.276861310005188, "learning_rate": 1.229544308161845e-05, "loss": 7.0785, "step": 6420 }, { "epoch": 0.012316556574273338, "grad_norm": 1.5108709335327148, "learning_rate": 1.231459784572753e-05, "loss": 7.0904, "step": 6430 }, { "epoch": 0.012335711405648568, "grad_norm": 1.4005743265151978, "learning_rate": 1.233375260983661e-05, "loss": 7.1371, "step": 6440 }, { "epoch": 0.012354866237023799, "grad_norm": 1.2345638275146484, "learning_rate": 1.2352907373945689e-05, "loss": 7.0903, "step": 6450 }, { "epoch": 0.01237402106839903, "grad_norm": 1.0539698600769043, "learning_rate": 1.2372062138054768e-05, "loss": 7.1677, "step": 6460 }, { "epoch": 0.01239317589977426, "grad_norm": 1.3469603061676025, "learning_rate": 1.2391216902163848e-05, "loss": 7.1703, "step": 6470 }, { "epoch": 0.01241233073114949, "grad_norm": 1.3122016191482544, "learning_rate": 1.241037166627293e-05, "loss": 7.1359, "step": 6480 }, { "epoch": 0.012431485562524722, "grad_norm": 1.3544859886169434, "learning_rate": 1.2429526430382009e-05, "loss": 7.1783, "step": 6490 }, { "epoch": 0.012450640393899953, "grad_norm": 1.2275125980377197, "learning_rate": 1.2448681194491089e-05, "loss": 7.1708, "step": 6500 }, { "epoch": 0.012469795225275183, "grad_norm": 1.8058714866638184, "learning_rate": 1.2467835958600168e-05, "loss": 7.1554, "step": 6510 }, { "epoch": 0.012488950056650414, "grad_norm": 1.361082673072815, "learning_rate": 1.2486990722709248e-05, "loss": 7.1836, "step": 6520 }, { "epoch": 0.012508104888025645, "grad_norm": 1.4159489870071411, "learning_rate": 1.2506145486818327e-05, "loss": 6.9983, "step": 6530 }, { "epoch": 0.012527259719400875, "grad_norm": 1.1816598176956177, "learning_rate": 1.2525300250927409e-05, "loss": 7.1063, "step": 6540 }, { "epoch": 0.012546414550776106, "grad_norm": 1.471374273300171, "learning_rate": 1.2544455015036488e-05, "loss": 7.1475, "step": 6550 }, { "epoch": 0.012565569382151337, "grad_norm": 1.2326059341430664, "learning_rate": 1.2563609779145568e-05, "loss": 7.2214, "step": 6560 }, { "epoch": 0.012584724213526566, "grad_norm": 1.356541395187378, "learning_rate": 1.258276454325465e-05, "loss": 7.0123, "step": 6570 }, { "epoch": 0.012603879044901798, "grad_norm": 1.8499162197113037, "learning_rate": 1.2601919307363729e-05, "loss": 7.1774, "step": 6580 }, { "epoch": 0.012623033876277029, "grad_norm": 1.6006790399551392, "learning_rate": 1.262107407147281e-05, "loss": 7.0568, "step": 6590 }, { "epoch": 0.01264218870765226, "grad_norm": 1.4427088499069214, "learning_rate": 1.264022883558189e-05, "loss": 7.1843, "step": 6600 }, { "epoch": 0.01266134353902749, "grad_norm": 1.38870108127594, "learning_rate": 1.265938359969097e-05, "loss": 7.1506, "step": 6610 }, { "epoch": 0.01268049837040272, "grad_norm": 1.6334567070007324, "learning_rate": 1.2678538363800049e-05, "loss": 7.1572, "step": 6620 }, { "epoch": 0.012699653201777952, "grad_norm": 1.3532588481903076, "learning_rate": 1.2697693127909129e-05, "loss": 7.1181, "step": 6630 }, { "epoch": 0.012718808033153181, "grad_norm": 1.5171034336090088, "learning_rate": 1.2716847892018208e-05, "loss": 7.1419, "step": 6640 }, { "epoch": 0.012737962864528413, "grad_norm": 1.647733449935913, "learning_rate": 1.2736002656127288e-05, "loss": 7.1536, "step": 6650 }, { "epoch": 0.012757117695903644, "grad_norm": 1.5446168184280396, "learning_rate": 1.275515742023637e-05, "loss": 7.0527, "step": 6660 }, { "epoch": 0.012776272527278875, "grad_norm": 1.7824991941452026, "learning_rate": 1.2774312184345449e-05, "loss": 7.0593, "step": 6670 }, { "epoch": 0.012795427358654105, "grad_norm": 1.2586312294006348, "learning_rate": 1.2793466948454528e-05, "loss": 7.0354, "step": 6680 }, { "epoch": 0.012814582190029336, "grad_norm": 1.3855129480361938, "learning_rate": 1.2812621712563608e-05, "loss": 7.0965, "step": 6690 }, { "epoch": 0.012833737021404567, "grad_norm": 1.3212134838104248, "learning_rate": 1.2831776476672688e-05, "loss": 7.1811, "step": 6700 }, { "epoch": 0.012852891852779796, "grad_norm": 1.08016037940979, "learning_rate": 1.2850931240781767e-05, "loss": 7.1327, "step": 6710 }, { "epoch": 0.012872046684155028, "grad_norm": 1.3234882354736328, "learning_rate": 1.2870086004890849e-05, "loss": 7.1724, "step": 6720 }, { "epoch": 0.012891201515530259, "grad_norm": 1.2944332361221313, "learning_rate": 1.2889240768999928e-05, "loss": 7.0761, "step": 6730 }, { "epoch": 0.012910356346905488, "grad_norm": 1.7215232849121094, "learning_rate": 1.290839553310901e-05, "loss": 7.0527, "step": 6740 }, { "epoch": 0.01292951117828072, "grad_norm": 1.2631245851516724, "learning_rate": 1.2927550297218089e-05, "loss": 7.2037, "step": 6750 }, { "epoch": 0.01294866600965595, "grad_norm": 1.3558971881866455, "learning_rate": 1.2946705061327169e-05, "loss": 7.0093, "step": 6760 }, { "epoch": 0.012967820841031182, "grad_norm": 1.2239710092544556, "learning_rate": 1.296585982543625e-05, "loss": 7.1176, "step": 6770 }, { "epoch": 0.012986975672406411, "grad_norm": 1.486100196838379, "learning_rate": 1.298501458954533e-05, "loss": 7.0297, "step": 6780 }, { "epoch": 0.013006130503781643, "grad_norm": 1.3822866678237915, "learning_rate": 1.300416935365441e-05, "loss": 7.1435, "step": 6790 }, { "epoch": 0.013025285335156874, "grad_norm": 1.5523566007614136, "learning_rate": 1.3023324117763489e-05, "loss": 7.1125, "step": 6800 }, { "epoch": 0.013044440166532103, "grad_norm": 1.1813997030258179, "learning_rate": 1.3042478881872568e-05, "loss": 7.0663, "step": 6810 }, { "epoch": 0.013063594997907334, "grad_norm": 1.1165132522583008, "learning_rate": 1.3061633645981648e-05, "loss": 7.1203, "step": 6820 }, { "epoch": 0.013082749829282566, "grad_norm": 1.5367895364761353, "learning_rate": 1.3080788410090728e-05, "loss": 7.0281, "step": 6830 }, { "epoch": 0.013101904660657797, "grad_norm": 1.1747970581054688, "learning_rate": 1.3099943174199809e-05, "loss": 7.0439, "step": 6840 }, { "epoch": 0.013121059492033026, "grad_norm": 1.5485713481903076, "learning_rate": 1.3119097938308889e-05, "loss": 7.0715, "step": 6850 }, { "epoch": 0.013140214323408258, "grad_norm": 1.4922367334365845, "learning_rate": 1.3138252702417968e-05, "loss": 7.1043, "step": 6860 }, { "epoch": 0.013159369154783489, "grad_norm": 1.4926304817199707, "learning_rate": 1.3157407466527048e-05, "loss": 7.097, "step": 6870 }, { "epoch": 0.013178523986158718, "grad_norm": 1.5167653560638428, "learning_rate": 1.3176562230636127e-05, "loss": 6.9531, "step": 6880 }, { "epoch": 0.01319767881753395, "grad_norm": 1.4030276536941528, "learning_rate": 1.3195716994745207e-05, "loss": 7.0637, "step": 6890 }, { "epoch": 0.01321683364890918, "grad_norm": 1.3781110048294067, "learning_rate": 1.3214871758854288e-05, "loss": 7.0767, "step": 6900 }, { "epoch": 0.01323598848028441, "grad_norm": 1.8985297679901123, "learning_rate": 1.323402652296337e-05, "loss": 7.0195, "step": 6910 }, { "epoch": 0.013255143311659641, "grad_norm": 1.4447600841522217, "learning_rate": 1.325318128707245e-05, "loss": 7.1368, "step": 6920 }, { "epoch": 0.013274298143034872, "grad_norm": 1.3569447994232178, "learning_rate": 1.3272336051181529e-05, "loss": 7.116, "step": 6930 }, { "epoch": 0.013293452974410104, "grad_norm": 1.5059735774993896, "learning_rate": 1.3291490815290608e-05, "loss": 7.0081, "step": 6940 }, { "epoch": 0.013312607805785333, "grad_norm": 1.4181277751922607, "learning_rate": 1.331064557939969e-05, "loss": 7.1003, "step": 6950 }, { "epoch": 0.013331762637160564, "grad_norm": 1.4989289045333862, "learning_rate": 1.332980034350877e-05, "loss": 7.029, "step": 6960 }, { "epoch": 0.013350917468535796, "grad_norm": 1.4802162647247314, "learning_rate": 1.3348955107617849e-05, "loss": 7.0989, "step": 6970 }, { "epoch": 0.013370072299911025, "grad_norm": 1.2799993753433228, "learning_rate": 1.3368109871726929e-05, "loss": 6.9646, "step": 6980 }, { "epoch": 0.013389227131286256, "grad_norm": 1.144010305404663, "learning_rate": 1.3387264635836008e-05, "loss": 7.0504, "step": 6990 }, { "epoch": 0.013408381962661487, "grad_norm": 1.2155886888504028, "learning_rate": 1.3406419399945088e-05, "loss": 7.0886, "step": 7000 }, { "epoch": 0.013427536794036719, "grad_norm": 1.4716991186141968, "learning_rate": 1.3425574164054167e-05, "loss": 6.9909, "step": 7010 }, { "epoch": 0.013446691625411948, "grad_norm": 1.226123571395874, "learning_rate": 1.3444728928163249e-05, "loss": 7.0203, "step": 7020 }, { "epoch": 0.01346584645678718, "grad_norm": 1.3355987071990967, "learning_rate": 1.3463883692272328e-05, "loss": 7.0368, "step": 7030 }, { "epoch": 0.01348500128816241, "grad_norm": 1.5163650512695312, "learning_rate": 1.3483038456381408e-05, "loss": 7.0543, "step": 7040 }, { "epoch": 0.01350415611953764, "grad_norm": 1.5304055213928223, "learning_rate": 1.3502193220490488e-05, "loss": 7.0905, "step": 7050 }, { "epoch": 0.013523310950912871, "grad_norm": 1.5520268678665161, "learning_rate": 1.3521347984599567e-05, "loss": 7.1185, "step": 7060 }, { "epoch": 0.013542465782288102, "grad_norm": 1.4951072931289673, "learning_rate": 1.3540502748708647e-05, "loss": 7.1861, "step": 7070 }, { "epoch": 0.013561620613663334, "grad_norm": 1.4207329750061035, "learning_rate": 1.355965751281773e-05, "loss": 6.9464, "step": 7080 }, { "epoch": 0.013580775445038563, "grad_norm": 1.4736515283584595, "learning_rate": 1.357881227692681e-05, "loss": 7.0209, "step": 7090 }, { "epoch": 0.013599930276413794, "grad_norm": 1.4010246992111206, "learning_rate": 1.3597967041035889e-05, "loss": 7.0986, "step": 7100 }, { "epoch": 0.013619085107789025, "grad_norm": 1.5325231552124023, "learning_rate": 1.3617121805144969e-05, "loss": 7.0788, "step": 7110 }, { "epoch": 0.013638239939164255, "grad_norm": 1.3187881708145142, "learning_rate": 1.3636276569254048e-05, "loss": 6.9501, "step": 7120 }, { "epoch": 0.013657394770539486, "grad_norm": 1.3883031606674194, "learning_rate": 1.365543133336313e-05, "loss": 7.0241, "step": 7130 }, { "epoch": 0.013676549601914717, "grad_norm": 1.342551350593567, "learning_rate": 1.367458609747221e-05, "loss": 7.0466, "step": 7140 }, { "epoch": 0.013695704433289947, "grad_norm": 1.266283631324768, "learning_rate": 1.3693740861581289e-05, "loss": 6.9666, "step": 7150 }, { "epoch": 0.013714859264665178, "grad_norm": 1.1881585121154785, "learning_rate": 1.3712895625690368e-05, "loss": 7.0819, "step": 7160 }, { "epoch": 0.01373401409604041, "grad_norm": 1.4991761445999146, "learning_rate": 1.3732050389799448e-05, "loss": 6.9711, "step": 7170 }, { "epoch": 0.01375316892741564, "grad_norm": 1.5111137628555298, "learning_rate": 1.3751205153908528e-05, "loss": 6.9696, "step": 7180 }, { "epoch": 0.01377232375879087, "grad_norm": 1.3870344161987305, "learning_rate": 1.3770359918017607e-05, "loss": 6.9291, "step": 7190 }, { "epoch": 0.013791478590166101, "grad_norm": 1.4039572477340698, "learning_rate": 1.3789514682126689e-05, "loss": 6.9612, "step": 7200 }, { "epoch": 0.013810633421541332, "grad_norm": 1.526779055595398, "learning_rate": 1.3808669446235768e-05, "loss": 6.97, "step": 7210 }, { "epoch": 0.013829788252916562, "grad_norm": 1.5299268960952759, "learning_rate": 1.3827824210344848e-05, "loss": 7.0294, "step": 7220 }, { "epoch": 0.013848943084291793, "grad_norm": 1.2472022771835327, "learning_rate": 1.3846978974453927e-05, "loss": 7.0604, "step": 7230 }, { "epoch": 0.013868097915667024, "grad_norm": 1.573847770690918, "learning_rate": 1.3866133738563007e-05, "loss": 7.0249, "step": 7240 }, { "epoch": 0.013887252747042255, "grad_norm": 1.434487223625183, "learning_rate": 1.388528850267209e-05, "loss": 6.9178, "step": 7250 }, { "epoch": 0.013906407578417485, "grad_norm": 1.369840383529663, "learning_rate": 1.390444326678117e-05, "loss": 7.0624, "step": 7260 }, { "epoch": 0.013925562409792716, "grad_norm": 1.2987500429153442, "learning_rate": 1.392359803089025e-05, "loss": 7.1098, "step": 7270 }, { "epoch": 0.013944717241167947, "grad_norm": 1.2922298908233643, "learning_rate": 1.3942752794999329e-05, "loss": 6.9371, "step": 7280 }, { "epoch": 0.013963872072543177, "grad_norm": 1.5693293809890747, "learning_rate": 1.3961907559108408e-05, "loss": 6.9526, "step": 7290 }, { "epoch": 0.013983026903918408, "grad_norm": 1.3919734954833984, "learning_rate": 1.3981062323217488e-05, "loss": 6.9509, "step": 7300 }, { "epoch": 0.01400218173529364, "grad_norm": 1.4405567646026611, "learning_rate": 1.400021708732657e-05, "loss": 6.9868, "step": 7310 }, { "epoch": 0.014021336566668869, "grad_norm": 1.457655668258667, "learning_rate": 1.4019371851435649e-05, "loss": 7.0364, "step": 7320 }, { "epoch": 0.0140404913980441, "grad_norm": 1.410909652709961, "learning_rate": 1.4038526615544729e-05, "loss": 6.9121, "step": 7330 }, { "epoch": 0.014059646229419331, "grad_norm": 1.5069962739944458, "learning_rate": 1.4057681379653808e-05, "loss": 7.1544, "step": 7340 }, { "epoch": 0.014078801060794562, "grad_norm": 1.392451524734497, "learning_rate": 1.4076836143762888e-05, "loss": 7.0007, "step": 7350 }, { "epoch": 0.014097955892169792, "grad_norm": 1.2809514999389648, "learning_rate": 1.4095990907871967e-05, "loss": 7.0742, "step": 7360 }, { "epoch": 0.014117110723545023, "grad_norm": 1.5579949617385864, "learning_rate": 1.4115145671981047e-05, "loss": 6.9618, "step": 7370 }, { "epoch": 0.014136265554920254, "grad_norm": 1.2356388568878174, "learning_rate": 1.4134300436090128e-05, "loss": 7.0123, "step": 7380 }, { "epoch": 0.014155420386295484, "grad_norm": 1.4863810539245605, "learning_rate": 1.4153455200199208e-05, "loss": 6.9259, "step": 7390 }, { "epoch": 0.014174575217670715, "grad_norm": 1.582063913345337, "learning_rate": 1.4172609964308288e-05, "loss": 6.9144, "step": 7400 }, { "epoch": 0.014193730049045946, "grad_norm": 1.32565438747406, "learning_rate": 1.4191764728417367e-05, "loss": 7.0, "step": 7410 }, { "epoch": 0.014212884880421177, "grad_norm": 1.4345077276229858, "learning_rate": 1.4210919492526448e-05, "loss": 6.9814, "step": 7420 }, { "epoch": 0.014232039711796407, "grad_norm": 1.641484022140503, "learning_rate": 1.423007425663553e-05, "loss": 6.8899, "step": 7430 }, { "epoch": 0.014251194543171638, "grad_norm": 1.4140636920928955, "learning_rate": 1.424922902074461e-05, "loss": 7.0208, "step": 7440 }, { "epoch": 0.014270349374546869, "grad_norm": 1.467553734779358, "learning_rate": 1.4268383784853689e-05, "loss": 6.876, "step": 7450 }, { "epoch": 0.014289504205922099, "grad_norm": 1.5154932737350464, "learning_rate": 1.4287538548962769e-05, "loss": 6.9723, "step": 7460 }, { "epoch": 0.01430865903729733, "grad_norm": 1.3633487224578857, "learning_rate": 1.4306693313071848e-05, "loss": 6.8939, "step": 7470 }, { "epoch": 0.014327813868672561, "grad_norm": 1.4098021984100342, "learning_rate": 1.4325848077180928e-05, "loss": 7.0119, "step": 7480 }, { "epoch": 0.01434696870004779, "grad_norm": 1.576023817062378, "learning_rate": 1.434500284129001e-05, "loss": 6.8806, "step": 7490 }, { "epoch": 0.014366123531423022, "grad_norm": 1.5905028581619263, "learning_rate": 1.4364157605399089e-05, "loss": 6.8935, "step": 7500 }, { "epoch": 0.014385278362798253, "grad_norm": 1.6185882091522217, "learning_rate": 1.4383312369508168e-05, "loss": 6.9437, "step": 7510 }, { "epoch": 0.014404433194173484, "grad_norm": 1.480391502380371, "learning_rate": 1.4402467133617248e-05, "loss": 6.919, "step": 7520 }, { "epoch": 0.014423588025548714, "grad_norm": 1.338910460472107, "learning_rate": 1.4421621897726328e-05, "loss": 6.9903, "step": 7530 }, { "epoch": 0.014442742856923945, "grad_norm": 1.5728352069854736, "learning_rate": 1.4440776661835407e-05, "loss": 7.0436, "step": 7540 }, { "epoch": 0.014461897688299176, "grad_norm": 1.4862483739852905, "learning_rate": 1.4459931425944487e-05, "loss": 6.9497, "step": 7550 }, { "epoch": 0.014481052519674405, "grad_norm": 1.2361242771148682, "learning_rate": 1.4479086190053568e-05, "loss": 6.8863, "step": 7560 }, { "epoch": 0.014500207351049637, "grad_norm": 1.4292306900024414, "learning_rate": 1.4498240954162648e-05, "loss": 7.0198, "step": 7570 }, { "epoch": 0.014519362182424868, "grad_norm": 1.327257752418518, "learning_rate": 1.4517395718271727e-05, "loss": 6.9253, "step": 7580 }, { "epoch": 0.014538517013800099, "grad_norm": 1.3535081148147583, "learning_rate": 1.4536550482380809e-05, "loss": 7.0154, "step": 7590 }, { "epoch": 0.014557671845175328, "grad_norm": 1.3258945941925049, "learning_rate": 1.4555705246489888e-05, "loss": 7.0307, "step": 7600 }, { "epoch": 0.01457682667655056, "grad_norm": 1.3901755809783936, "learning_rate": 1.457486001059897e-05, "loss": 6.8711, "step": 7610 }, { "epoch": 0.014595981507925791, "grad_norm": 1.548964023590088, "learning_rate": 1.459401477470805e-05, "loss": 6.9313, "step": 7620 }, { "epoch": 0.01461513633930102, "grad_norm": 1.5207897424697876, "learning_rate": 1.4613169538817129e-05, "loss": 6.9355, "step": 7630 }, { "epoch": 0.014634291170676252, "grad_norm": 1.4371168613433838, "learning_rate": 1.4632324302926208e-05, "loss": 6.9064, "step": 7640 }, { "epoch": 0.014653446002051483, "grad_norm": 1.412644624710083, "learning_rate": 1.4651479067035288e-05, "loss": 6.881, "step": 7650 }, { "epoch": 0.014672600833426714, "grad_norm": 1.5779515504837036, "learning_rate": 1.4670633831144368e-05, "loss": 6.9937, "step": 7660 }, { "epoch": 0.014691755664801943, "grad_norm": 1.1552032232284546, "learning_rate": 1.4689788595253449e-05, "loss": 6.9913, "step": 7670 }, { "epoch": 0.014710910496177175, "grad_norm": 1.5876814126968384, "learning_rate": 1.4708943359362529e-05, "loss": 6.9364, "step": 7680 }, { "epoch": 0.014730065327552406, "grad_norm": 1.4754682779312134, "learning_rate": 1.4728098123471608e-05, "loss": 6.895, "step": 7690 }, { "epoch": 0.014749220158927635, "grad_norm": 1.2300512790679932, "learning_rate": 1.4747252887580688e-05, "loss": 6.9058, "step": 7700 }, { "epoch": 0.014768374990302867, "grad_norm": 1.3804477453231812, "learning_rate": 1.4766407651689767e-05, "loss": 6.9201, "step": 7710 }, { "epoch": 0.014787529821678098, "grad_norm": 1.578547716140747, "learning_rate": 1.4785562415798847e-05, "loss": 6.9758, "step": 7720 }, { "epoch": 0.014806684653053327, "grad_norm": 1.4232908487319946, "learning_rate": 1.4804717179907927e-05, "loss": 6.9725, "step": 7730 }, { "epoch": 0.014825839484428558, "grad_norm": 1.6805802583694458, "learning_rate": 1.4823871944017008e-05, "loss": 6.8861, "step": 7740 }, { "epoch": 0.01484499431580379, "grad_norm": 1.4161819219589233, "learning_rate": 1.4843026708126088e-05, "loss": 6.9445, "step": 7750 }, { "epoch": 0.01486414914717902, "grad_norm": 1.4555299282073975, "learning_rate": 1.4862181472235169e-05, "loss": 6.8655, "step": 7760 }, { "epoch": 0.01488330397855425, "grad_norm": 1.465808391571045, "learning_rate": 1.4881336236344248e-05, "loss": 6.946, "step": 7770 }, { "epoch": 0.014902458809929481, "grad_norm": 1.4918655157089233, "learning_rate": 1.4900491000453328e-05, "loss": 6.8193, "step": 7780 }, { "epoch": 0.014921613641304713, "grad_norm": 1.6244404315948486, "learning_rate": 1.491964576456241e-05, "loss": 6.8982, "step": 7790 }, { "epoch": 0.014940768472679942, "grad_norm": 1.3965954780578613, "learning_rate": 1.4938800528671489e-05, "loss": 6.8281, "step": 7800 }, { "epoch": 0.014959923304055173, "grad_norm": 1.4422506093978882, "learning_rate": 1.4957955292780569e-05, "loss": 6.8567, "step": 7810 }, { "epoch": 0.014979078135430405, "grad_norm": 1.878503680229187, "learning_rate": 1.4977110056889648e-05, "loss": 6.967, "step": 7820 }, { "epoch": 0.014998232966805636, "grad_norm": 1.3974978923797607, "learning_rate": 1.4996264820998728e-05, "loss": 6.9193, "step": 7830 }, { "epoch": 0.015017387798180865, "grad_norm": 1.4368040561676025, "learning_rate": 1.5015419585107807e-05, "loss": 6.8382, "step": 7840 }, { "epoch": 0.015036542629556096, "grad_norm": 1.292326807975769, "learning_rate": 1.5034574349216889e-05, "loss": 6.8334, "step": 7850 }, { "epoch": 0.015055697460931328, "grad_norm": 1.4514111280441284, "learning_rate": 1.5053729113325968e-05, "loss": 6.8516, "step": 7860 }, { "epoch": 0.015074852292306557, "grad_norm": 1.6531791687011719, "learning_rate": 1.5072883877435048e-05, "loss": 6.8071, "step": 7870 }, { "epoch": 0.015094007123681788, "grad_norm": 1.6571725606918335, "learning_rate": 1.5092038641544128e-05, "loss": 6.9133, "step": 7880 }, { "epoch": 0.01511316195505702, "grad_norm": 1.4031537771224976, "learning_rate": 1.5111193405653207e-05, "loss": 6.9205, "step": 7890 }, { "epoch": 0.015132316786432249, "grad_norm": 1.3611589670181274, "learning_rate": 1.5130348169762287e-05, "loss": 6.8456, "step": 7900 }, { "epoch": 0.01515147161780748, "grad_norm": 1.6364729404449463, "learning_rate": 1.5149502933871366e-05, "loss": 6.9857, "step": 7910 }, { "epoch": 0.015170626449182711, "grad_norm": 1.6541557312011719, "learning_rate": 1.5168657697980448e-05, "loss": 6.893, "step": 7920 }, { "epoch": 0.015189781280557943, "grad_norm": 1.3292745351791382, "learning_rate": 1.5187812462089529e-05, "loss": 6.904, "step": 7930 }, { "epoch": 0.015208936111933172, "grad_norm": 1.6273618936538696, "learning_rate": 1.5206967226198609e-05, "loss": 6.9021, "step": 7940 }, { "epoch": 0.015228090943308403, "grad_norm": 1.3524209260940552, "learning_rate": 1.5226121990307688e-05, "loss": 6.8692, "step": 7950 }, { "epoch": 0.015247245774683634, "grad_norm": 1.3934863805770874, "learning_rate": 1.5245276754416768e-05, "loss": 6.907, "step": 7960 }, { "epoch": 0.015266400606058864, "grad_norm": 1.254092812538147, "learning_rate": 1.526443151852585e-05, "loss": 6.8997, "step": 7970 }, { "epoch": 0.015285555437434095, "grad_norm": 1.3862252235412598, "learning_rate": 1.5283586282634927e-05, "loss": 6.8041, "step": 7980 }, { "epoch": 0.015304710268809326, "grad_norm": 1.5035855770111084, "learning_rate": 1.530274104674401e-05, "loss": 6.8431, "step": 7990 }, { "epoch": 0.015323865100184558, "grad_norm": 1.550031065940857, "learning_rate": 1.532189581085309e-05, "loss": 6.7724, "step": 8000 }, { "epoch": 0.015343019931559787, "grad_norm": 1.4005414247512817, "learning_rate": 1.5341050574962168e-05, "loss": 6.947, "step": 8010 }, { "epoch": 0.015362174762935018, "grad_norm": 1.4765024185180664, "learning_rate": 1.536020533907125e-05, "loss": 6.83, "step": 8020 }, { "epoch": 0.01538132959431025, "grad_norm": 1.4665371179580688, "learning_rate": 1.5379360103180327e-05, "loss": 6.9782, "step": 8030 }, { "epoch": 0.015400484425685479, "grad_norm": 1.4976885318756104, "learning_rate": 1.5398514867289408e-05, "loss": 6.7157, "step": 8040 }, { "epoch": 0.01541963925706071, "grad_norm": 1.270721197128296, "learning_rate": 1.5417669631398486e-05, "loss": 6.913, "step": 8050 }, { "epoch": 0.015438794088435941, "grad_norm": 1.1081750392913818, "learning_rate": 1.5436824395507567e-05, "loss": 6.9846, "step": 8060 }, { "epoch": 0.01545794891981117, "grad_norm": 1.2899068593978882, "learning_rate": 1.545597915961665e-05, "loss": 6.918, "step": 8070 }, { "epoch": 0.015477103751186402, "grad_norm": 1.4366354942321777, "learning_rate": 1.5475133923725727e-05, "loss": 6.829, "step": 8080 }, { "epoch": 0.015496258582561633, "grad_norm": 1.3975414037704468, "learning_rate": 1.5494288687834808e-05, "loss": 6.8433, "step": 8090 }, { "epoch": 0.015515413413936864, "grad_norm": 1.4542040824890137, "learning_rate": 1.551344345194389e-05, "loss": 6.8488, "step": 8100 }, { "epoch": 0.015534568245312094, "grad_norm": 1.5058010816574097, "learning_rate": 1.553259821605297e-05, "loss": 6.7968, "step": 8110 }, { "epoch": 0.015553723076687325, "grad_norm": 1.650072455406189, "learning_rate": 1.555175298016205e-05, "loss": 6.8834, "step": 8120 }, { "epoch": 0.015572877908062556, "grad_norm": 1.5454399585723877, "learning_rate": 1.557090774427113e-05, "loss": 6.8963, "step": 8130 }, { "epoch": 0.015592032739437786, "grad_norm": 1.397944688796997, "learning_rate": 1.5590062508380208e-05, "loss": 6.8763, "step": 8140 }, { "epoch": 0.015611187570813017, "grad_norm": 1.4429118633270264, "learning_rate": 1.560921727248929e-05, "loss": 6.7876, "step": 8150 }, { "epoch": 0.015630342402188246, "grad_norm": 1.406246542930603, "learning_rate": 1.5628372036598367e-05, "loss": 6.9422, "step": 8160 }, { "epoch": 0.01564949723356348, "grad_norm": 1.2875165939331055, "learning_rate": 1.5647526800707448e-05, "loss": 6.8388, "step": 8170 }, { "epoch": 0.01566865206493871, "grad_norm": 1.3891210556030273, "learning_rate": 1.566668156481653e-05, "loss": 6.8343, "step": 8180 }, { "epoch": 0.01568780689631394, "grad_norm": 1.3361148834228516, "learning_rate": 1.5685836328925607e-05, "loss": 6.9427, "step": 8190 }, { "epoch": 0.01570696172768917, "grad_norm": 1.305341124534607, "learning_rate": 1.570499109303469e-05, "loss": 6.8084, "step": 8200 }, { "epoch": 0.0157261165590644, "grad_norm": 1.6422538757324219, "learning_rate": 1.5724145857143767e-05, "loss": 6.8679, "step": 8210 }, { "epoch": 0.015745271390439634, "grad_norm": 1.3839759826660156, "learning_rate": 1.5743300621252848e-05, "loss": 6.858, "step": 8220 }, { "epoch": 0.015764426221814863, "grad_norm": 1.6874349117279053, "learning_rate": 1.5762455385361926e-05, "loss": 6.7808, "step": 8230 }, { "epoch": 0.015783581053190093, "grad_norm": 1.7613139152526855, "learning_rate": 1.5781610149471007e-05, "loss": 6.9005, "step": 8240 }, { "epoch": 0.015802735884565326, "grad_norm": 2.1231462955474854, "learning_rate": 1.580076491358009e-05, "loss": 6.8456, "step": 8250 }, { "epoch": 0.015821890715940555, "grad_norm": 1.6517682075500488, "learning_rate": 1.5819919677689166e-05, "loss": 6.7427, "step": 8260 }, { "epoch": 0.015841045547315784, "grad_norm": 1.5726068019866943, "learning_rate": 1.5839074441798248e-05, "loss": 6.8478, "step": 8270 }, { "epoch": 0.015860200378691017, "grad_norm": 1.3430166244506836, "learning_rate": 1.585822920590733e-05, "loss": 6.8775, "step": 8280 }, { "epoch": 0.015879355210066247, "grad_norm": 1.407331943511963, "learning_rate": 1.587738397001641e-05, "loss": 6.856, "step": 8290 }, { "epoch": 0.015898510041441476, "grad_norm": 1.3196405172348022, "learning_rate": 1.5896538734125488e-05, "loss": 6.762, "step": 8300 }, { "epoch": 0.01591766487281671, "grad_norm": 1.2688899040222168, "learning_rate": 1.591569349823457e-05, "loss": 6.9191, "step": 8310 }, { "epoch": 0.01593681970419194, "grad_norm": 1.3656967878341675, "learning_rate": 1.5934848262343647e-05, "loss": 6.7952, "step": 8320 }, { "epoch": 0.015955974535567168, "grad_norm": 1.707470417022705, "learning_rate": 1.595400302645273e-05, "loss": 6.8362, "step": 8330 }, { "epoch": 0.0159751293669424, "grad_norm": 1.4369134902954102, "learning_rate": 1.5973157790561807e-05, "loss": 6.7666, "step": 8340 }, { "epoch": 0.01599428419831763, "grad_norm": 1.3370652198791504, "learning_rate": 1.5992312554670888e-05, "loss": 6.9195, "step": 8350 }, { "epoch": 0.01601343902969286, "grad_norm": 1.596701979637146, "learning_rate": 1.601146731877997e-05, "loss": 6.8068, "step": 8360 }, { "epoch": 0.016032593861068093, "grad_norm": 1.6702816486358643, "learning_rate": 1.6030622082889047e-05, "loss": 6.7734, "step": 8370 }, { "epoch": 0.016051748692443323, "grad_norm": 1.1472667455673218, "learning_rate": 1.604977684699813e-05, "loss": 6.9457, "step": 8380 }, { "epoch": 0.016070903523818555, "grad_norm": 1.6517398357391357, "learning_rate": 1.6068931611107206e-05, "loss": 6.8585, "step": 8390 }, { "epoch": 0.016090058355193785, "grad_norm": 1.1568183898925781, "learning_rate": 1.6088086375216288e-05, "loss": 6.8288, "step": 8400 }, { "epoch": 0.016109213186569014, "grad_norm": 1.7125110626220703, "learning_rate": 1.6107241139325366e-05, "loss": 6.8064, "step": 8410 }, { "epoch": 0.016128368017944247, "grad_norm": 1.371862769126892, "learning_rate": 1.6126395903434447e-05, "loss": 6.877, "step": 8420 }, { "epoch": 0.016147522849319477, "grad_norm": 1.4662631750106812, "learning_rate": 1.6145550667543528e-05, "loss": 6.8801, "step": 8430 }, { "epoch": 0.016166677680694706, "grad_norm": 1.682071328163147, "learning_rate": 1.616470543165261e-05, "loss": 6.8497, "step": 8440 }, { "epoch": 0.01618583251206994, "grad_norm": 1.3993191719055176, "learning_rate": 1.6183860195761687e-05, "loss": 6.8533, "step": 8450 }, { "epoch": 0.01620498734344517, "grad_norm": 1.297680377960205, "learning_rate": 1.620301495987077e-05, "loss": 6.7539, "step": 8460 }, { "epoch": 0.016224142174820398, "grad_norm": 1.3744157552719116, "learning_rate": 1.622216972397985e-05, "loss": 6.881, "step": 8470 }, { "epoch": 0.01624329700619563, "grad_norm": 1.425794243812561, "learning_rate": 1.6241324488088928e-05, "loss": 6.7794, "step": 8480 }, { "epoch": 0.01626245183757086, "grad_norm": 1.300492763519287, "learning_rate": 1.626047925219801e-05, "loss": 6.8115, "step": 8490 }, { "epoch": 0.01628160666894609, "grad_norm": 1.706079363822937, "learning_rate": 1.6279634016307087e-05, "loss": 6.7424, "step": 8500 }, { "epoch": 0.016300761500321323, "grad_norm": 1.4790494441986084, "learning_rate": 1.629878878041617e-05, "loss": 6.8851, "step": 8510 }, { "epoch": 0.016319916331696552, "grad_norm": 1.3914819955825806, "learning_rate": 1.6317943544525246e-05, "loss": 6.862, "step": 8520 }, { "epoch": 0.016339071163071785, "grad_norm": 1.5918484926223755, "learning_rate": 1.6337098308634328e-05, "loss": 6.8753, "step": 8530 }, { "epoch": 0.016358225994447015, "grad_norm": 1.4617615938186646, "learning_rate": 1.635625307274341e-05, "loss": 6.8582, "step": 8540 }, { "epoch": 0.016377380825822244, "grad_norm": 1.6049302816390991, "learning_rate": 1.6375407836852487e-05, "loss": 6.7031, "step": 8550 }, { "epoch": 0.016396535657197477, "grad_norm": 1.8244593143463135, "learning_rate": 1.6394562600961568e-05, "loss": 6.8349, "step": 8560 }, { "epoch": 0.016415690488572707, "grad_norm": 1.5242778062820435, "learning_rate": 1.6413717365070646e-05, "loss": 6.7594, "step": 8570 }, { "epoch": 0.016434845319947936, "grad_norm": 1.5086243152618408, "learning_rate": 1.6432872129179728e-05, "loss": 6.7753, "step": 8580 }, { "epoch": 0.01645400015132317, "grad_norm": 2.0745832920074463, "learning_rate": 1.6452026893288805e-05, "loss": 6.8605, "step": 8590 }, { "epoch": 0.0164731549826984, "grad_norm": 1.4907879829406738, "learning_rate": 1.6471181657397887e-05, "loss": 6.8573, "step": 8600 }, { "epoch": 0.016492309814073628, "grad_norm": 2.076223611831665, "learning_rate": 1.6490336421506968e-05, "loss": 6.7646, "step": 8610 }, { "epoch": 0.01651146464544886, "grad_norm": 1.3471840620040894, "learning_rate": 1.650949118561605e-05, "loss": 6.7792, "step": 8620 }, { "epoch": 0.01653061947682409, "grad_norm": 1.5100154876708984, "learning_rate": 1.6528645949725127e-05, "loss": 6.8391, "step": 8630 }, { "epoch": 0.01654977430819932, "grad_norm": 1.9894942045211792, "learning_rate": 1.654780071383421e-05, "loss": 6.6882, "step": 8640 }, { "epoch": 0.016568929139574553, "grad_norm": 1.73322594165802, "learning_rate": 1.656695547794329e-05, "loss": 6.8207, "step": 8650 }, { "epoch": 0.016588083970949782, "grad_norm": 1.4846559762954712, "learning_rate": 1.6586110242052368e-05, "loss": 6.8591, "step": 8660 }, { "epoch": 0.016607238802325012, "grad_norm": 1.4204062223434448, "learning_rate": 1.660526500616145e-05, "loss": 6.8365, "step": 8670 }, { "epoch": 0.016626393633700245, "grad_norm": 1.6675363779067993, "learning_rate": 1.6624419770270527e-05, "loss": 6.9052, "step": 8680 }, { "epoch": 0.016645548465075474, "grad_norm": 1.5925028324127197, "learning_rate": 1.664357453437961e-05, "loss": 6.857, "step": 8690 }, { "epoch": 0.016664703296450707, "grad_norm": 1.8146841526031494, "learning_rate": 1.6662729298488686e-05, "loss": 6.8177, "step": 8700 }, { "epoch": 0.016683858127825937, "grad_norm": 1.5402036905288696, "learning_rate": 1.667996858618686e-05, "loss": 6.7342, "step": 8710 }, { "epoch": 0.016703012959201166, "grad_norm": 1.3460150957107544, "learning_rate": 1.669912335029594e-05, "loss": 6.8291, "step": 8720 }, { "epoch": 0.0167221677905764, "grad_norm": 1.2969653606414795, "learning_rate": 1.671827811440502e-05, "loss": 6.8889, "step": 8730 }, { "epoch": 0.01674132262195163, "grad_norm": 1.4528985023498535, "learning_rate": 1.67374328785141e-05, "loss": 6.7873, "step": 8740 }, { "epoch": 0.016760477453326858, "grad_norm": 1.801450252532959, "learning_rate": 1.6756587642623178e-05, "loss": 6.783, "step": 8750 }, { "epoch": 0.01677963228470209, "grad_norm": 1.6895339488983154, "learning_rate": 1.677574240673226e-05, "loss": 6.7342, "step": 8760 }, { "epoch": 0.01679878711607732, "grad_norm": 1.683119535446167, "learning_rate": 1.679489717084134e-05, "loss": 6.8143, "step": 8770 }, { "epoch": 0.01681794194745255, "grad_norm": 1.5010899305343628, "learning_rate": 1.6814051934950422e-05, "loss": 6.7464, "step": 8780 }, { "epoch": 0.016837096778827783, "grad_norm": 1.3620615005493164, "learning_rate": 1.68332066990595e-05, "loss": 6.6705, "step": 8790 }, { "epoch": 0.016856251610203012, "grad_norm": 1.5514830350875854, "learning_rate": 1.685236146316858e-05, "loss": 6.6365, "step": 8800 }, { "epoch": 0.016875406441578242, "grad_norm": 1.197733759880066, "learning_rate": 1.687151622727766e-05, "loss": 6.8756, "step": 8810 }, { "epoch": 0.016894561272953475, "grad_norm": 1.5717819929122925, "learning_rate": 1.689067099138674e-05, "loss": 6.6801, "step": 8820 }, { "epoch": 0.016913716104328704, "grad_norm": 1.5630807876586914, "learning_rate": 1.6909825755495818e-05, "loss": 6.6918, "step": 8830 }, { "epoch": 0.016932870935703934, "grad_norm": 1.7554272413253784, "learning_rate": 1.69289805196049e-05, "loss": 6.6872, "step": 8840 }, { "epoch": 0.016952025767079167, "grad_norm": 1.4804017543792725, "learning_rate": 1.694813528371398e-05, "loss": 6.7497, "step": 8850 }, { "epoch": 0.016971180598454396, "grad_norm": 1.5014747381210327, "learning_rate": 1.696729004782306e-05, "loss": 6.7962, "step": 8860 }, { "epoch": 0.01699033542982963, "grad_norm": 1.5861601829528809, "learning_rate": 1.698644481193214e-05, "loss": 6.7201, "step": 8870 }, { "epoch": 0.01700949026120486, "grad_norm": 1.5173704624176025, "learning_rate": 1.7005599576041218e-05, "loss": 6.7594, "step": 8880 }, { "epoch": 0.017028645092580088, "grad_norm": 1.546862244606018, "learning_rate": 1.70247543401503e-05, "loss": 6.7578, "step": 8890 }, { "epoch": 0.01704779992395532, "grad_norm": 1.6836639642715454, "learning_rate": 1.704390910425938e-05, "loss": 6.7871, "step": 8900 }, { "epoch": 0.01706695475533055, "grad_norm": 1.4892204999923706, "learning_rate": 1.706306386836846e-05, "loss": 6.7827, "step": 8910 }, { "epoch": 0.01708610958670578, "grad_norm": 1.714837908744812, "learning_rate": 1.708221863247754e-05, "loss": 6.6676, "step": 8920 }, { "epoch": 0.017105264418081013, "grad_norm": 1.7168089151382446, "learning_rate": 1.7101373396586618e-05, "loss": 6.6829, "step": 8930 }, { "epoch": 0.017124419249456242, "grad_norm": 1.64242422580719, "learning_rate": 1.71205281606957e-05, "loss": 6.8317, "step": 8940 }, { "epoch": 0.01714357408083147, "grad_norm": 1.5903769731521606, "learning_rate": 1.713968292480478e-05, "loss": 6.7537, "step": 8950 }, { "epoch": 0.017162728912206705, "grad_norm": 1.3771452903747559, "learning_rate": 1.715883768891386e-05, "loss": 6.8075, "step": 8960 }, { "epoch": 0.017181883743581934, "grad_norm": 1.322149395942688, "learning_rate": 1.717799245302294e-05, "loss": 6.8471, "step": 8970 }, { "epoch": 0.017201038574957164, "grad_norm": 1.5501477718353271, "learning_rate": 1.719714721713202e-05, "loss": 6.7656, "step": 8980 }, { "epoch": 0.017220193406332397, "grad_norm": 1.6030365228652954, "learning_rate": 1.72163019812411e-05, "loss": 6.7645, "step": 8990 }, { "epoch": 0.017239348237707626, "grad_norm": 1.5936988592147827, "learning_rate": 1.723545674535018e-05, "loss": 6.7185, "step": 9000 }, { "epoch": 0.017258503069082855, "grad_norm": 1.611383080482483, "learning_rate": 1.7254611509459258e-05, "loss": 6.7421, "step": 9010 }, { "epoch": 0.01727765790045809, "grad_norm": 1.6058357954025269, "learning_rate": 1.727376627356834e-05, "loss": 6.6674, "step": 9020 }, { "epoch": 0.017296812731833318, "grad_norm": 1.8383845090866089, "learning_rate": 1.729292103767742e-05, "loss": 6.7619, "step": 9030 }, { "epoch": 0.01731596756320855, "grad_norm": 1.7764360904693604, "learning_rate": 1.73120758017865e-05, "loss": 6.7249, "step": 9040 }, { "epoch": 0.01733512239458378, "grad_norm": 1.69424307346344, "learning_rate": 1.733123056589558e-05, "loss": 6.738, "step": 9050 }, { "epoch": 0.01735427722595901, "grad_norm": 1.3952553272247314, "learning_rate": 1.7350385330004658e-05, "loss": 6.6711, "step": 9060 }, { "epoch": 0.017373432057334243, "grad_norm": 1.4935435056686401, "learning_rate": 1.736954009411374e-05, "loss": 6.6638, "step": 9070 }, { "epoch": 0.017392586888709472, "grad_norm": 1.7054884433746338, "learning_rate": 1.738869485822282e-05, "loss": 6.6873, "step": 9080 }, { "epoch": 0.0174117417200847, "grad_norm": 1.5917373895645142, "learning_rate": 1.7407849622331898e-05, "loss": 6.7873, "step": 9090 }, { "epoch": 0.017430896551459935, "grad_norm": 1.573307752609253, "learning_rate": 1.742700438644098e-05, "loss": 6.5986, "step": 9100 }, { "epoch": 0.017450051382835164, "grad_norm": 1.5926398038864136, "learning_rate": 1.744615915055006e-05, "loss": 6.7775, "step": 9110 }, { "epoch": 0.017469206214210393, "grad_norm": 1.5743650197982788, "learning_rate": 1.746531391465914e-05, "loss": 6.6732, "step": 9120 }, { "epoch": 0.017488361045585626, "grad_norm": 1.7861862182617188, "learning_rate": 1.748446867876822e-05, "loss": 6.7309, "step": 9130 }, { "epoch": 0.017507515876960856, "grad_norm": 1.6222121715545654, "learning_rate": 1.75036234428773e-05, "loss": 6.7705, "step": 9140 }, { "epoch": 0.017526670708336085, "grad_norm": 1.3721282482147217, "learning_rate": 1.752277820698638e-05, "loss": 6.6929, "step": 9150 }, { "epoch": 0.01754582553971132, "grad_norm": 1.622371792793274, "learning_rate": 1.754193297109546e-05, "loss": 6.6999, "step": 9160 }, { "epoch": 0.017564980371086548, "grad_norm": 1.4883249998092651, "learning_rate": 1.756108773520454e-05, "loss": 6.6885, "step": 9170 }, { "epoch": 0.017584135202461777, "grad_norm": 1.5171090364456177, "learning_rate": 1.758024249931362e-05, "loss": 6.6876, "step": 9180 }, { "epoch": 0.01760329003383701, "grad_norm": 1.5900779962539673, "learning_rate": 1.7599397263422698e-05, "loss": 6.703, "step": 9190 }, { "epoch": 0.01762244486521224, "grad_norm": 1.7490766048431396, "learning_rate": 1.761855202753178e-05, "loss": 6.6565, "step": 9200 }, { "epoch": 0.017641599696587473, "grad_norm": 1.6079915761947632, "learning_rate": 1.763770679164086e-05, "loss": 6.673, "step": 9210 }, { "epoch": 0.017660754527962702, "grad_norm": 2.089930534362793, "learning_rate": 1.7656861555749938e-05, "loss": 6.7057, "step": 9220 }, { "epoch": 0.01767990935933793, "grad_norm": 1.5525565147399902, "learning_rate": 1.767601631985902e-05, "loss": 6.5905, "step": 9230 }, { "epoch": 0.017699064190713164, "grad_norm": 1.4706906080245972, "learning_rate": 1.7695171083968097e-05, "loss": 6.7098, "step": 9240 }, { "epoch": 0.017718219022088394, "grad_norm": 1.9297091960906982, "learning_rate": 1.771432584807718e-05, "loss": 6.7023, "step": 9250 }, { "epoch": 0.017737373853463623, "grad_norm": 1.9557620286941528, "learning_rate": 1.773348061218626e-05, "loss": 6.6782, "step": 9260 }, { "epoch": 0.017756528684838856, "grad_norm": 1.3754831552505493, "learning_rate": 1.7752635376295338e-05, "loss": 6.6755, "step": 9270 }, { "epoch": 0.017775683516214086, "grad_norm": 1.7869787216186523, "learning_rate": 1.777179014040442e-05, "loss": 6.7065, "step": 9280 }, { "epoch": 0.017794838347589315, "grad_norm": 1.481744408607483, "learning_rate": 1.77909449045135e-05, "loss": 6.7382, "step": 9290 }, { "epoch": 0.017813993178964548, "grad_norm": 1.8006576299667358, "learning_rate": 1.781009966862258e-05, "loss": 6.6613, "step": 9300 }, { "epoch": 0.017833148010339778, "grad_norm": 1.501020073890686, "learning_rate": 1.782925443273166e-05, "loss": 6.754, "step": 9310 }, { "epoch": 0.017852302841715007, "grad_norm": 1.684753179550171, "learning_rate": 1.784840919684074e-05, "loss": 6.7169, "step": 9320 }, { "epoch": 0.01787145767309024, "grad_norm": 1.748565912246704, "learning_rate": 1.786756396094982e-05, "loss": 6.7012, "step": 9330 }, { "epoch": 0.01789061250446547, "grad_norm": 1.4200477600097656, "learning_rate": 1.78867187250589e-05, "loss": 6.7107, "step": 9340 }, { "epoch": 0.0179097673358407, "grad_norm": 1.594382405281067, "learning_rate": 1.790587348916798e-05, "loss": 6.6197, "step": 9350 }, { "epoch": 0.017928922167215932, "grad_norm": 1.4465962648391724, "learning_rate": 1.792502825327706e-05, "loss": 6.6845, "step": 9360 }, { "epoch": 0.01794807699859116, "grad_norm": 1.8290971517562866, "learning_rate": 1.794418301738614e-05, "loss": 6.5512, "step": 9370 }, { "epoch": 0.017967231829966394, "grad_norm": 1.6820993423461914, "learning_rate": 1.796333778149522e-05, "loss": 6.7634, "step": 9380 }, { "epoch": 0.017986386661341624, "grad_norm": 1.3178914785385132, "learning_rate": 1.79824925456043e-05, "loss": 6.7643, "step": 9390 }, { "epoch": 0.018005541492716853, "grad_norm": 1.5965347290039062, "learning_rate": 1.8001647309713378e-05, "loss": 6.6923, "step": 9400 }, { "epoch": 0.018024696324092086, "grad_norm": 1.3732357025146484, "learning_rate": 1.802080207382246e-05, "loss": 6.6654, "step": 9410 }, { "epoch": 0.018043851155467316, "grad_norm": 1.5317432880401611, "learning_rate": 1.8039956837931537e-05, "loss": 6.7159, "step": 9420 }, { "epoch": 0.018063005986842545, "grad_norm": 1.6788381338119507, "learning_rate": 1.805911160204062e-05, "loss": 6.6535, "step": 9430 }, { "epoch": 0.018082160818217778, "grad_norm": 1.3346718549728394, "learning_rate": 1.80782663661497e-05, "loss": 6.6854, "step": 9440 }, { "epoch": 0.018101315649593008, "grad_norm": 1.46698796749115, "learning_rate": 1.809742113025878e-05, "loss": 6.6562, "step": 9450 }, { "epoch": 0.018120470480968237, "grad_norm": 1.8655989170074463, "learning_rate": 1.811657589436786e-05, "loss": 6.559, "step": 9460 }, { "epoch": 0.01813962531234347, "grad_norm": 1.5411816835403442, "learning_rate": 1.813573065847694e-05, "loss": 6.6492, "step": 9470 }, { "epoch": 0.0181587801437187, "grad_norm": 1.6621730327606201, "learning_rate": 1.815488542258602e-05, "loss": 6.6653, "step": 9480 }, { "epoch": 0.01817793497509393, "grad_norm": 1.5066285133361816, "learning_rate": 1.81740401866951e-05, "loss": 6.6726, "step": 9490 }, { "epoch": 0.018197089806469162, "grad_norm": 1.9892487525939941, "learning_rate": 1.819319495080418e-05, "loss": 6.5759, "step": 9500 }, { "epoch": 0.01821624463784439, "grad_norm": 1.4319474697113037, "learning_rate": 1.821234971491326e-05, "loss": 6.6825, "step": 9510 }, { "epoch": 0.018235399469219624, "grad_norm": 1.6457016468048096, "learning_rate": 1.823150447902234e-05, "loss": 6.6015, "step": 9520 }, { "epoch": 0.018254554300594854, "grad_norm": 2.16097354888916, "learning_rate": 1.8250659243131418e-05, "loss": 6.594, "step": 9530 }, { "epoch": 0.018273709131970083, "grad_norm": 1.5903035402297974, "learning_rate": 1.82698140072405e-05, "loss": 6.5507, "step": 9540 }, { "epoch": 0.018292863963345316, "grad_norm": 1.5666700601577759, "learning_rate": 1.828896877134958e-05, "loss": 6.6732, "step": 9550 }, { "epoch": 0.018312018794720546, "grad_norm": 1.6218091249465942, "learning_rate": 1.830812353545866e-05, "loss": 6.6895, "step": 9560 }, { "epoch": 0.018331173626095775, "grad_norm": 2.0417492389678955, "learning_rate": 1.832727829956774e-05, "loss": 6.6269, "step": 9570 }, { "epoch": 0.018350328457471008, "grad_norm": 1.7581851482391357, "learning_rate": 1.8346433063676818e-05, "loss": 6.6513, "step": 9580 }, { "epoch": 0.018369483288846238, "grad_norm": 2.3180525302886963, "learning_rate": 1.83655878277859e-05, "loss": 6.7109, "step": 9590 }, { "epoch": 0.018388638120221467, "grad_norm": 1.531840205192566, "learning_rate": 1.8384742591894977e-05, "loss": 6.6276, "step": 9600 }, { "epoch": 0.0184077929515967, "grad_norm": 1.5209648609161377, "learning_rate": 1.840389735600406e-05, "loss": 6.5447, "step": 9610 }, { "epoch": 0.01842694778297193, "grad_norm": 1.482812762260437, "learning_rate": 1.842305212011314e-05, "loss": 6.6367, "step": 9620 }, { "epoch": 0.01844610261434716, "grad_norm": 1.6363201141357422, "learning_rate": 1.844220688422222e-05, "loss": 6.6934, "step": 9630 }, { "epoch": 0.018465257445722392, "grad_norm": 1.6831753253936768, "learning_rate": 1.84613616483313e-05, "loss": 6.6184, "step": 9640 }, { "epoch": 0.01848441227709762, "grad_norm": 1.4054652452468872, "learning_rate": 1.848051641244038e-05, "loss": 6.6296, "step": 9650 }, { "epoch": 0.01850356710847285, "grad_norm": 1.4718677997589111, "learning_rate": 1.8499671176549458e-05, "loss": 6.5776, "step": 9660 }, { "epoch": 0.018522721939848084, "grad_norm": 1.4058605432510376, "learning_rate": 1.851882594065854e-05, "loss": 6.6617, "step": 9670 }, { "epoch": 0.018541876771223313, "grad_norm": 1.4505248069763184, "learning_rate": 1.853798070476762e-05, "loss": 6.5818, "step": 9680 }, { "epoch": 0.018561031602598546, "grad_norm": 1.3321605920791626, "learning_rate": 1.85571354688767e-05, "loss": 6.6939, "step": 9690 }, { "epoch": 0.018580186433973776, "grad_norm": 1.8517169952392578, "learning_rate": 1.857629023298578e-05, "loss": 6.6164, "step": 9700 }, { "epoch": 0.018599341265349005, "grad_norm": 1.6646482944488525, "learning_rate": 1.8595444997094858e-05, "loss": 6.5616, "step": 9710 }, { "epoch": 0.018618496096724238, "grad_norm": 1.6570708751678467, "learning_rate": 1.861459976120394e-05, "loss": 6.6491, "step": 9720 }, { "epoch": 0.018637650928099467, "grad_norm": 1.7577630281448364, "learning_rate": 1.863375452531302e-05, "loss": 6.6513, "step": 9730 }, { "epoch": 0.018656805759474697, "grad_norm": 1.686432957649231, "learning_rate": 1.86529092894221e-05, "loss": 6.6494, "step": 9740 }, { "epoch": 0.01867596059084993, "grad_norm": 1.8314613103866577, "learning_rate": 1.867206405353118e-05, "loss": 6.6033, "step": 9750 }, { "epoch": 0.01869511542222516, "grad_norm": 1.6446055173873901, "learning_rate": 1.8691218817640258e-05, "loss": 6.6293, "step": 9760 }, { "epoch": 0.01871427025360039, "grad_norm": 1.4693223237991333, "learning_rate": 1.871037358174934e-05, "loss": 6.6481, "step": 9770 }, { "epoch": 0.018733425084975622, "grad_norm": 1.484470248222351, "learning_rate": 1.8729528345858417e-05, "loss": 6.5493, "step": 9780 }, { "epoch": 0.01875257991635085, "grad_norm": 1.5744935274124146, "learning_rate": 1.87486831099675e-05, "loss": 6.6175, "step": 9790 }, { "epoch": 0.01877173474772608, "grad_norm": 1.5704219341278076, "learning_rate": 1.876783787407658e-05, "loss": 6.5347, "step": 9800 }, { "epoch": 0.018790889579101314, "grad_norm": 1.7389434576034546, "learning_rate": 1.8786992638185657e-05, "loss": 6.5834, "step": 9810 }, { "epoch": 0.018810044410476543, "grad_norm": 1.6250368356704712, "learning_rate": 1.880614740229474e-05, "loss": 6.7024, "step": 9820 }, { "epoch": 0.018829199241851773, "grad_norm": 1.5277962684631348, "learning_rate": 1.8825302166403817e-05, "loss": 6.6258, "step": 9830 }, { "epoch": 0.018848354073227006, "grad_norm": 1.5019268989562988, "learning_rate": 1.8844456930512898e-05, "loss": 6.6648, "step": 9840 }, { "epoch": 0.018867508904602235, "grad_norm": 1.5886468887329102, "learning_rate": 1.8863611694621976e-05, "loss": 6.5888, "step": 9850 }, { "epoch": 0.018886663735977468, "grad_norm": 1.947816014289856, "learning_rate": 1.888276645873106e-05, "loss": 6.5524, "step": 9860 }, { "epoch": 0.018905818567352697, "grad_norm": 1.6337705850601196, "learning_rate": 1.890192122284014e-05, "loss": 6.6289, "step": 9870 }, { "epoch": 0.018924973398727927, "grad_norm": 1.6900488138198853, "learning_rate": 1.892107598694922e-05, "loss": 6.6197, "step": 9880 }, { "epoch": 0.01894412823010316, "grad_norm": 1.8131729364395142, "learning_rate": 1.89402307510583e-05, "loss": 6.6279, "step": 9890 }, { "epoch": 0.01896328306147839, "grad_norm": 1.4306223392486572, "learning_rate": 1.895938551516738e-05, "loss": 6.6479, "step": 9900 }, { "epoch": 0.01898243789285362, "grad_norm": 1.5793429613113403, "learning_rate": 1.897854027927646e-05, "loss": 6.7295, "step": 9910 }, { "epoch": 0.01900159272422885, "grad_norm": 1.7953273057937622, "learning_rate": 1.8997695043385538e-05, "loss": 6.4844, "step": 9920 }, { "epoch": 0.01902074755560408, "grad_norm": 1.4539650678634644, "learning_rate": 1.901684980749462e-05, "loss": 6.6866, "step": 9930 }, { "epoch": 0.01903990238697931, "grad_norm": 2.21219539642334, "learning_rate": 1.9036004571603697e-05, "loss": 6.6254, "step": 9940 }, { "epoch": 0.019059057218354544, "grad_norm": 1.8473824262619019, "learning_rate": 1.905515933571278e-05, "loss": 6.4908, "step": 9950 }, { "epoch": 0.019078212049729773, "grad_norm": 1.6467056274414062, "learning_rate": 1.9074314099821857e-05, "loss": 6.4875, "step": 9960 }, { "epoch": 0.019097366881105002, "grad_norm": 2.489543914794922, "learning_rate": 1.909346886393094e-05, "loss": 6.6007, "step": 9970 }, { "epoch": 0.019116521712480235, "grad_norm": 1.7911261320114136, "learning_rate": 1.911262362804002e-05, "loss": 6.5588, "step": 9980 }, { "epoch": 0.019135676543855465, "grad_norm": 1.554165244102478, "learning_rate": 1.91317783921491e-05, "loss": 6.5845, "step": 9990 }, { "epoch": 0.019154831375230694, "grad_norm": 1.6501286029815674, "learning_rate": 1.915093315625818e-05, "loss": 6.7354, "step": 10000 }, { "epoch": 0.019173986206605927, "grad_norm": 1.4637367725372314, "learning_rate": 1.917008792036726e-05, "loss": 6.5445, "step": 10010 }, { "epoch": 0.019193141037981157, "grad_norm": 1.612842321395874, "learning_rate": 1.9189242684476338e-05, "loss": 6.535, "step": 10020 }, { "epoch": 0.01921229586935639, "grad_norm": 1.633095622062683, "learning_rate": 1.920839744858542e-05, "loss": 6.5419, "step": 10030 }, { "epoch": 0.01923145070073162, "grad_norm": 1.7135337591171265, "learning_rate": 1.9227552212694497e-05, "loss": 6.6247, "step": 10040 }, { "epoch": 0.01925060553210685, "grad_norm": 1.4456994533538818, "learning_rate": 1.9246706976803578e-05, "loss": 6.6381, "step": 10050 }, { "epoch": 0.01926976036348208, "grad_norm": 1.5889265537261963, "learning_rate": 1.926586174091266e-05, "loss": 6.5104, "step": 10060 }, { "epoch": 0.01928891519485731, "grad_norm": 1.729001760482788, "learning_rate": 1.9285016505021737e-05, "loss": 6.5042, "step": 10070 }, { "epoch": 0.01930807002623254, "grad_norm": 1.4121853113174438, "learning_rate": 1.9304171269130822e-05, "loss": 6.6275, "step": 10080 }, { "epoch": 0.019327224857607773, "grad_norm": 1.7814034223556519, "learning_rate": 1.93233260332399e-05, "loss": 6.4936, "step": 10090 }, { "epoch": 0.019346379688983003, "grad_norm": 1.6384745836257935, "learning_rate": 1.934248079734898e-05, "loss": 6.64, "step": 10100 }, { "epoch": 0.019365534520358232, "grad_norm": 1.4563788175582886, "learning_rate": 1.936163556145806e-05, "loss": 6.5992, "step": 10110 }, { "epoch": 0.019384689351733465, "grad_norm": 1.445471167564392, "learning_rate": 1.938079032556714e-05, "loss": 6.676, "step": 10120 }, { "epoch": 0.019403844183108695, "grad_norm": 1.7177507877349854, "learning_rate": 1.939994508967622e-05, "loss": 6.6041, "step": 10130 }, { "epoch": 0.019422999014483924, "grad_norm": 1.884621024131775, "learning_rate": 1.94190998537853e-05, "loss": 6.6402, "step": 10140 }, { "epoch": 0.019442153845859157, "grad_norm": 1.5693126916885376, "learning_rate": 1.9438254617894378e-05, "loss": 6.6158, "step": 10150 }, { "epoch": 0.019461308677234387, "grad_norm": 1.817687749862671, "learning_rate": 1.945740938200346e-05, "loss": 6.5224, "step": 10160 }, { "epoch": 0.019480463508609616, "grad_norm": 1.7543795108795166, "learning_rate": 1.9476564146112537e-05, "loss": 6.6885, "step": 10170 }, { "epoch": 0.01949961833998485, "grad_norm": 1.5250122547149658, "learning_rate": 1.9495718910221618e-05, "loss": 6.4877, "step": 10180 }, { "epoch": 0.01951877317136008, "grad_norm": 1.5306978225708008, "learning_rate": 1.9514873674330696e-05, "loss": 6.4954, "step": 10190 }, { "epoch": 0.01953792800273531, "grad_norm": 1.7788914442062378, "learning_rate": 1.9534028438439778e-05, "loss": 6.7009, "step": 10200 }, { "epoch": 0.01955708283411054, "grad_norm": 1.583964467048645, "learning_rate": 1.9553183202548855e-05, "loss": 6.6145, "step": 10210 }, { "epoch": 0.01957623766548577, "grad_norm": 1.4825247526168823, "learning_rate": 1.957233796665794e-05, "loss": 6.5917, "step": 10220 }, { "epoch": 0.019595392496861003, "grad_norm": 1.4621261358261108, "learning_rate": 1.959149273076702e-05, "loss": 6.5866, "step": 10230 }, { "epoch": 0.019614547328236233, "grad_norm": 1.6984165906906128, "learning_rate": 1.96106474948761e-05, "loss": 6.5397, "step": 10240 }, { "epoch": 0.019633702159611462, "grad_norm": 1.6250255107879639, "learning_rate": 1.962980225898518e-05, "loss": 6.4622, "step": 10250 }, { "epoch": 0.019652856990986695, "grad_norm": 1.6083265542984009, "learning_rate": 1.964895702309426e-05, "loss": 6.6075, "step": 10260 }, { "epoch": 0.019672011822361925, "grad_norm": 1.6887290477752686, "learning_rate": 1.966811178720334e-05, "loss": 6.5766, "step": 10270 }, { "epoch": 0.019691166653737154, "grad_norm": 1.6142712831497192, "learning_rate": 1.9687266551312418e-05, "loss": 6.5309, "step": 10280 }, { "epoch": 0.019710321485112387, "grad_norm": 1.5701175928115845, "learning_rate": 1.97064213154215e-05, "loss": 6.4747, "step": 10290 }, { "epoch": 0.019729476316487617, "grad_norm": 1.4451370239257812, "learning_rate": 1.9725576079530577e-05, "loss": 6.5501, "step": 10300 }, { "epoch": 0.019748631147862846, "grad_norm": 1.805773377418518, "learning_rate": 1.974473084363966e-05, "loss": 6.5588, "step": 10310 }, { "epoch": 0.01976778597923808, "grad_norm": 1.558671236038208, "learning_rate": 1.9763885607748736e-05, "loss": 6.5465, "step": 10320 }, { "epoch": 0.01978694081061331, "grad_norm": 1.6228116750717163, "learning_rate": 1.978304037185782e-05, "loss": 6.5284, "step": 10330 }, { "epoch": 0.019806095641988538, "grad_norm": 1.5837953090667725, "learning_rate": 1.98021951359669e-05, "loss": 6.5038, "step": 10340 }, { "epoch": 0.01982525047336377, "grad_norm": 1.4137009382247925, "learning_rate": 1.982134990007598e-05, "loss": 6.5896, "step": 10350 }, { "epoch": 0.019844405304739, "grad_norm": 1.6187065839767456, "learning_rate": 1.9840504664185058e-05, "loss": 6.5239, "step": 10360 }, { "epoch": 0.019863560136114233, "grad_norm": 1.6819394826889038, "learning_rate": 1.985965942829414e-05, "loss": 6.4767, "step": 10370 }, { "epoch": 0.019882714967489463, "grad_norm": 1.7314453125, "learning_rate": 1.9878814192403217e-05, "loss": 6.4503, "step": 10380 }, { "epoch": 0.019901869798864692, "grad_norm": 1.394924521446228, "learning_rate": 1.98979689565123e-05, "loss": 6.473, "step": 10390 }, { "epoch": 0.019921024630239925, "grad_norm": 1.285129427909851, "learning_rate": 1.991712372062138e-05, "loss": 6.482, "step": 10400 }, { "epoch": 0.019940179461615155, "grad_norm": 1.6043741703033447, "learning_rate": 1.9936278484730458e-05, "loss": 6.5101, "step": 10410 }, { "epoch": 0.019959334292990384, "grad_norm": 1.8057100772857666, "learning_rate": 1.995543324883954e-05, "loss": 6.4881, "step": 10420 }, { "epoch": 0.019978489124365617, "grad_norm": 1.5528473854064941, "learning_rate": 1.9974588012948617e-05, "loss": 6.6229, "step": 10430 }, { "epoch": 0.019997643955740847, "grad_norm": 1.4058277606964111, "learning_rate": 1.9993742777057702e-05, "loss": 6.5047, "step": 10440 }, { "epoch": 0.020016798787116076, "grad_norm": 1.9119415283203125, "learning_rate": 2.001289754116678e-05, "loss": 6.5561, "step": 10450 }, { "epoch": 0.02003595361849131, "grad_norm": 1.5782214403152466, "learning_rate": 2.003205230527586e-05, "loss": 6.6489, "step": 10460 }, { "epoch": 0.02005510844986654, "grad_norm": 1.7601591348648071, "learning_rate": 2.005120706938494e-05, "loss": 6.551, "step": 10470 }, { "epoch": 0.020074263281241768, "grad_norm": 1.7021034955978394, "learning_rate": 2.007036183349402e-05, "loss": 6.5935, "step": 10480 }, { "epoch": 0.020093418112617, "grad_norm": 1.4933888912200928, "learning_rate": 2.0089516597603098e-05, "loss": 6.4657, "step": 10490 }, { "epoch": 0.02011257294399223, "grad_norm": 1.9248747825622559, "learning_rate": 2.010867136171218e-05, "loss": 6.6087, "step": 10500 }, { "epoch": 0.02013172777536746, "grad_norm": 1.5865472555160522, "learning_rate": 2.0127826125821257e-05, "loss": 6.5243, "step": 10510 }, { "epoch": 0.020150882606742693, "grad_norm": 1.6137351989746094, "learning_rate": 2.014698088993034e-05, "loss": 6.4394, "step": 10520 }, { "epoch": 0.020170037438117922, "grad_norm": 1.5384950637817383, "learning_rate": 2.0166135654039417e-05, "loss": 6.5347, "step": 10530 }, { "epoch": 0.020189192269493155, "grad_norm": 1.9964104890823364, "learning_rate": 2.0185290418148498e-05, "loss": 6.3973, "step": 10540 }, { "epoch": 0.020208347100868385, "grad_norm": 1.9728927612304688, "learning_rate": 2.0204445182257576e-05, "loss": 6.4182, "step": 10550 }, { "epoch": 0.020227501932243614, "grad_norm": 1.7715507745742798, "learning_rate": 2.0223599946366657e-05, "loss": 6.4855, "step": 10560 }, { "epoch": 0.020246656763618847, "grad_norm": 1.56437087059021, "learning_rate": 2.0242754710475742e-05, "loss": 6.5545, "step": 10570 }, { "epoch": 0.020265811594994076, "grad_norm": 1.7544361352920532, "learning_rate": 2.026190947458482e-05, "loss": 6.4818, "step": 10580 }, { "epoch": 0.020284966426369306, "grad_norm": 1.5129733085632324, "learning_rate": 2.02810642386939e-05, "loss": 6.5838, "step": 10590 }, { "epoch": 0.02030412125774454, "grad_norm": 1.6927231550216675, "learning_rate": 2.030021900280298e-05, "loss": 6.5959, "step": 10600 }, { "epoch": 0.02032327608911977, "grad_norm": 1.478371024131775, "learning_rate": 2.031937376691206e-05, "loss": 6.5876, "step": 10610 }, { "epoch": 0.020342430920494998, "grad_norm": 1.3900259733200073, "learning_rate": 2.0338528531021138e-05, "loss": 6.4846, "step": 10620 }, { "epoch": 0.02036158575187023, "grad_norm": 2.1552281379699707, "learning_rate": 2.035768329513022e-05, "loss": 6.5831, "step": 10630 }, { "epoch": 0.02038074058324546, "grad_norm": 1.5742690563201904, "learning_rate": 2.0376838059239297e-05, "loss": 6.4732, "step": 10640 }, { "epoch": 0.02039989541462069, "grad_norm": 1.7609798908233643, "learning_rate": 2.039599282334838e-05, "loss": 6.4716, "step": 10650 }, { "epoch": 0.020419050245995923, "grad_norm": 1.6127510070800781, "learning_rate": 2.0415147587457457e-05, "loss": 6.5424, "step": 10660 }, { "epoch": 0.020438205077371152, "grad_norm": 1.5400416851043701, "learning_rate": 2.0434302351566538e-05, "loss": 6.4954, "step": 10670 }, { "epoch": 0.020457359908746385, "grad_norm": 2.0290329456329346, "learning_rate": 2.0453457115675616e-05, "loss": 6.599, "step": 10680 }, { "epoch": 0.020476514740121615, "grad_norm": 1.9170002937316895, "learning_rate": 2.04726118797847e-05, "loss": 6.6536, "step": 10690 }, { "epoch": 0.020495669571496844, "grad_norm": 1.649163007736206, "learning_rate": 2.049176664389378e-05, "loss": 6.453, "step": 10700 }, { "epoch": 0.020514824402872077, "grad_norm": 1.4085837602615356, "learning_rate": 2.051092140800286e-05, "loss": 6.5909, "step": 10710 }, { "epoch": 0.020533979234247306, "grad_norm": 1.5657908916473389, "learning_rate": 2.0530076172111938e-05, "loss": 6.5196, "step": 10720 }, { "epoch": 0.020553134065622536, "grad_norm": 1.4002389907836914, "learning_rate": 2.054923093622102e-05, "loss": 6.5492, "step": 10730 }, { "epoch": 0.02057228889699777, "grad_norm": 1.5458778142929077, "learning_rate": 2.05683857003301e-05, "loss": 6.5319, "step": 10740 }, { "epoch": 0.020591443728373, "grad_norm": 1.7612899541854858, "learning_rate": 2.0587540464439178e-05, "loss": 6.4681, "step": 10750 }, { "epoch": 0.020610598559748228, "grad_norm": 1.6059777736663818, "learning_rate": 2.060669522854826e-05, "loss": 6.5847, "step": 10760 }, { "epoch": 0.02062975339112346, "grad_norm": 1.8575351238250732, "learning_rate": 2.0625849992657337e-05, "loss": 6.4387, "step": 10770 }, { "epoch": 0.02064890822249869, "grad_norm": 1.5552740097045898, "learning_rate": 2.064500475676642e-05, "loss": 6.2701, "step": 10780 }, { "epoch": 0.02066806305387392, "grad_norm": 1.6506644487380981, "learning_rate": 2.0664159520875497e-05, "loss": 6.4554, "step": 10790 }, { "epoch": 0.020687217885249153, "grad_norm": 1.6370021104812622, "learning_rate": 2.068331428498458e-05, "loss": 6.5184, "step": 10800 }, { "epoch": 0.020706372716624382, "grad_norm": 1.605951189994812, "learning_rate": 2.070246904909366e-05, "loss": 6.5693, "step": 10810 }, { "epoch": 0.02072552754799961, "grad_norm": 2.3389179706573486, "learning_rate": 2.072162381320274e-05, "loss": 6.328, "step": 10820 }, { "epoch": 0.020744682379374844, "grad_norm": 1.8153976202011108, "learning_rate": 2.074077857731182e-05, "loss": 6.4656, "step": 10830 }, { "epoch": 0.020763837210750074, "grad_norm": 2.0385942459106445, "learning_rate": 2.07599333414209e-05, "loss": 6.3526, "step": 10840 }, { "epoch": 0.020782992042125307, "grad_norm": 1.4398572444915771, "learning_rate": 2.0779088105529978e-05, "loss": 6.4899, "step": 10850 }, { "epoch": 0.020802146873500536, "grad_norm": 1.6253643035888672, "learning_rate": 2.079824286963906e-05, "loss": 6.3923, "step": 10860 }, { "epoch": 0.020821301704875766, "grad_norm": 1.6418347358703613, "learning_rate": 2.0817397633748137e-05, "loss": 6.4361, "step": 10870 }, { "epoch": 0.020840456536251, "grad_norm": 1.6653944253921509, "learning_rate": 2.0836552397857218e-05, "loss": 6.5067, "step": 10880 }, { "epoch": 0.020859611367626228, "grad_norm": 1.4874014854431152, "learning_rate": 2.0855707161966296e-05, "loss": 6.4894, "step": 10890 }, { "epoch": 0.020878766199001458, "grad_norm": 1.6426348686218262, "learning_rate": 2.0874861926075377e-05, "loss": 6.4729, "step": 10900 }, { "epoch": 0.02089792103037669, "grad_norm": 1.7465567588806152, "learning_rate": 2.0894016690184462e-05, "loss": 6.559, "step": 10910 }, { "epoch": 0.02091707586175192, "grad_norm": 1.4626708030700684, "learning_rate": 2.0913171454293537e-05, "loss": 6.5269, "step": 10920 }, { "epoch": 0.02093623069312715, "grad_norm": 1.6624921560287476, "learning_rate": 2.093232621840262e-05, "loss": 6.549, "step": 10930 }, { "epoch": 0.020955385524502382, "grad_norm": 1.6874706745147705, "learning_rate": 2.09514809825117e-05, "loss": 6.4844, "step": 10940 }, { "epoch": 0.020974540355877612, "grad_norm": 1.7444298267364502, "learning_rate": 2.097063574662078e-05, "loss": 6.4665, "step": 10950 }, { "epoch": 0.02099369518725284, "grad_norm": 1.4230173826217651, "learning_rate": 2.098979051072986e-05, "loss": 6.3947, "step": 10960 }, { "epoch": 0.021012850018628074, "grad_norm": 2.239532232284546, "learning_rate": 2.100894527483894e-05, "loss": 6.348, "step": 10970 }, { "epoch": 0.021032004850003304, "grad_norm": 1.7134523391723633, "learning_rate": 2.1028100038948018e-05, "loss": 6.3402, "step": 10980 }, { "epoch": 0.021051159681378533, "grad_norm": 1.5463407039642334, "learning_rate": 2.10472548030571e-05, "loss": 6.5402, "step": 10990 }, { "epoch": 0.021070314512753766, "grad_norm": 1.6772223711013794, "learning_rate": 2.1066409567166177e-05, "loss": 6.4844, "step": 11000 }, { "epoch": 0.021089469344128996, "grad_norm": 1.5241953134536743, "learning_rate": 2.1085564331275258e-05, "loss": 6.4767, "step": 11010 }, { "epoch": 0.02110862417550423, "grad_norm": 1.6443395614624023, "learning_rate": 2.1104719095384336e-05, "loss": 6.5245, "step": 11020 }, { "epoch": 0.021127779006879458, "grad_norm": 1.7493256330490112, "learning_rate": 2.1123873859493417e-05, "loss": 6.4053, "step": 11030 }, { "epoch": 0.021146933838254688, "grad_norm": 1.6224993467330933, "learning_rate": 2.1143028623602495e-05, "loss": 6.4542, "step": 11040 }, { "epoch": 0.02116608866962992, "grad_norm": 1.5525051355361938, "learning_rate": 2.116218338771158e-05, "loss": 6.4361, "step": 11050 }, { "epoch": 0.02118524350100515, "grad_norm": 1.6546965837478638, "learning_rate": 2.1181338151820658e-05, "loss": 6.4847, "step": 11060 }, { "epoch": 0.02120439833238038, "grad_norm": 1.7200173139572144, "learning_rate": 2.120049291592974e-05, "loss": 6.4561, "step": 11070 }, { "epoch": 0.021223553163755612, "grad_norm": 1.7484817504882812, "learning_rate": 2.121964768003882e-05, "loss": 6.5163, "step": 11080 }, { "epoch": 0.021242707995130842, "grad_norm": 1.8313699960708618, "learning_rate": 2.12388024441479e-05, "loss": 6.4459, "step": 11090 }, { "epoch": 0.02126186282650607, "grad_norm": 1.9084455966949463, "learning_rate": 2.125795720825698e-05, "loss": 6.4309, "step": 11100 }, { "epoch": 0.021281017657881304, "grad_norm": 1.5925785303115845, "learning_rate": 2.1277111972366058e-05, "loss": 6.5354, "step": 11110 }, { "epoch": 0.021300172489256534, "grad_norm": 1.8073430061340332, "learning_rate": 2.129626673647514e-05, "loss": 6.423, "step": 11120 }, { "epoch": 0.021319327320631763, "grad_norm": 1.6412922143936157, "learning_rate": 2.1315421500584217e-05, "loss": 6.4926, "step": 11130 }, { "epoch": 0.021338482152006996, "grad_norm": 1.7680460214614868, "learning_rate": 2.1334576264693298e-05, "loss": 6.379, "step": 11140 }, { "epoch": 0.021357636983382226, "grad_norm": 2.026029586791992, "learning_rate": 2.1353731028802376e-05, "loss": 6.4852, "step": 11150 }, { "epoch": 0.021376791814757455, "grad_norm": 1.6917155981063843, "learning_rate": 2.137097031650055e-05, "loss": 6.4588, "step": 11160 }, { "epoch": 0.021395946646132688, "grad_norm": 1.8096848726272583, "learning_rate": 2.139012508060963e-05, "loss": 6.3146, "step": 11170 }, { "epoch": 0.021415101477507918, "grad_norm": 1.5221431255340576, "learning_rate": 2.140927984471871e-05, "loss": 6.4772, "step": 11180 }, { "epoch": 0.02143425630888315, "grad_norm": 1.802631139755249, "learning_rate": 2.142843460882779e-05, "loss": 6.4363, "step": 11190 }, { "epoch": 0.02145341114025838, "grad_norm": 1.7504355907440186, "learning_rate": 2.1447589372936868e-05, "loss": 6.5042, "step": 11200 }, { "epoch": 0.02147256597163361, "grad_norm": 1.5879414081573486, "learning_rate": 2.146674413704595e-05, "loss": 6.4922, "step": 11210 }, { "epoch": 0.021491720803008842, "grad_norm": 1.7812081575393677, "learning_rate": 2.1485898901155027e-05, "loss": 6.2953, "step": 11220 }, { "epoch": 0.021510875634384072, "grad_norm": 1.9023163318634033, "learning_rate": 2.1505053665264112e-05, "loss": 6.4624, "step": 11230 }, { "epoch": 0.0215300304657593, "grad_norm": 1.6169971227645874, "learning_rate": 2.1524208429373193e-05, "loss": 6.3838, "step": 11240 }, { "epoch": 0.021549185297134534, "grad_norm": 1.7639259099960327, "learning_rate": 2.154336319348227e-05, "loss": 6.3974, "step": 11250 }, { "epoch": 0.021568340128509764, "grad_norm": 1.7815358638763428, "learning_rate": 2.1562517957591352e-05, "loss": 6.4771, "step": 11260 }, { "epoch": 0.021587494959884993, "grad_norm": 1.746949315071106, "learning_rate": 2.158167272170043e-05, "loss": 6.3883, "step": 11270 }, { "epoch": 0.021606649791260226, "grad_norm": 1.716609001159668, "learning_rate": 2.160082748580951e-05, "loss": 6.3993, "step": 11280 }, { "epoch": 0.021625804622635456, "grad_norm": 1.5318593978881836, "learning_rate": 2.161998224991859e-05, "loss": 6.4846, "step": 11290 }, { "epoch": 0.021644959454010685, "grad_norm": 1.7179787158966064, "learning_rate": 2.163913701402767e-05, "loss": 6.381, "step": 11300 }, { "epoch": 0.021664114285385918, "grad_norm": 1.862231731414795, "learning_rate": 2.165829177813675e-05, "loss": 6.4403, "step": 11310 }, { "epoch": 0.021683269116761147, "grad_norm": 1.6539835929870605, "learning_rate": 2.167744654224583e-05, "loss": 6.4057, "step": 11320 }, { "epoch": 0.021702423948136377, "grad_norm": 1.899898648262024, "learning_rate": 2.1696601306354908e-05, "loss": 6.2659, "step": 11330 }, { "epoch": 0.02172157877951161, "grad_norm": 1.9517146348953247, "learning_rate": 2.1715756070463993e-05, "loss": 6.4445, "step": 11340 }, { "epoch": 0.02174073361088684, "grad_norm": 1.6669275760650635, "learning_rate": 2.173491083457307e-05, "loss": 6.3376, "step": 11350 }, { "epoch": 0.021759888442262072, "grad_norm": 1.7422072887420654, "learning_rate": 2.1754065598682152e-05, "loss": 6.324, "step": 11360 }, { "epoch": 0.0217790432736373, "grad_norm": 1.757256269454956, "learning_rate": 2.177322036279123e-05, "loss": 6.354, "step": 11370 }, { "epoch": 0.02179819810501253, "grad_norm": 1.5646611452102661, "learning_rate": 2.179237512690031e-05, "loss": 6.3354, "step": 11380 }, { "epoch": 0.021817352936387764, "grad_norm": 1.9121489524841309, "learning_rate": 2.181152989100939e-05, "loss": 6.3303, "step": 11390 }, { "epoch": 0.021836507767762994, "grad_norm": 1.5382215976715088, "learning_rate": 2.183068465511847e-05, "loss": 6.4907, "step": 11400 }, { "epoch": 0.021855662599138223, "grad_norm": 1.5800894498825073, "learning_rate": 2.184983941922755e-05, "loss": 6.4612, "step": 11410 }, { "epoch": 0.021874817430513456, "grad_norm": 1.8167097568511963, "learning_rate": 2.186899418333663e-05, "loss": 6.4203, "step": 11420 }, { "epoch": 0.021893972261888685, "grad_norm": 1.8758612871170044, "learning_rate": 2.188814894744571e-05, "loss": 6.4275, "step": 11430 }, { "epoch": 0.021913127093263915, "grad_norm": 1.5273736715316772, "learning_rate": 2.190730371155479e-05, "loss": 6.423, "step": 11440 }, { "epoch": 0.021932281924639148, "grad_norm": 1.3553948402404785, "learning_rate": 2.1926458475663873e-05, "loss": 6.5109, "step": 11450 }, { "epoch": 0.021951436756014377, "grad_norm": 1.5950515270233154, "learning_rate": 2.1945613239772948e-05, "loss": 6.4145, "step": 11460 }, { "epoch": 0.021970591587389607, "grad_norm": 1.7058335542678833, "learning_rate": 2.1964768003882033e-05, "loss": 6.3313, "step": 11470 }, { "epoch": 0.02198974641876484, "grad_norm": 1.7981687784194946, "learning_rate": 2.198392276799111e-05, "loss": 6.4252, "step": 11480 }, { "epoch": 0.02200890125014007, "grad_norm": 1.4487355947494507, "learning_rate": 2.2003077532100192e-05, "loss": 6.4652, "step": 11490 }, { "epoch": 0.0220280560815153, "grad_norm": 1.8177121877670288, "learning_rate": 2.202223229620927e-05, "loss": 6.2748, "step": 11500 }, { "epoch": 0.02204721091289053, "grad_norm": 1.5335906744003296, "learning_rate": 2.204138706031835e-05, "loss": 6.3029, "step": 11510 }, { "epoch": 0.02206636574426576, "grad_norm": 1.706433892250061, "learning_rate": 2.206054182442743e-05, "loss": 6.2856, "step": 11520 }, { "epoch": 0.022085520575640994, "grad_norm": 1.662317156791687, "learning_rate": 2.207969658853651e-05, "loss": 6.4796, "step": 11530 }, { "epoch": 0.022104675407016224, "grad_norm": 2.035954236984253, "learning_rate": 2.2098851352645588e-05, "loss": 6.3904, "step": 11540 }, { "epoch": 0.022123830238391453, "grad_norm": 1.7419564723968506, "learning_rate": 2.211800611675467e-05, "loss": 6.2685, "step": 11550 }, { "epoch": 0.022142985069766686, "grad_norm": 1.616652011871338, "learning_rate": 2.2137160880863747e-05, "loss": 6.5424, "step": 11560 }, { "epoch": 0.022162139901141915, "grad_norm": 1.8870606422424316, "learning_rate": 2.215631564497283e-05, "loss": 6.4875, "step": 11570 }, { "epoch": 0.022181294732517145, "grad_norm": 1.742916226387024, "learning_rate": 2.2175470409081913e-05, "loss": 6.353, "step": 11580 }, { "epoch": 0.022200449563892378, "grad_norm": 1.682126760482788, "learning_rate": 2.219462517319099e-05, "loss": 6.3827, "step": 11590 }, { "epoch": 0.022219604395267607, "grad_norm": 1.6037653684616089, "learning_rate": 2.2213779937300073e-05, "loss": 6.459, "step": 11600 }, { "epoch": 0.022238759226642837, "grad_norm": 1.3127361536026, "learning_rate": 2.223293470140915e-05, "loss": 6.438, "step": 11610 }, { "epoch": 0.02225791405801807, "grad_norm": 1.8422911167144775, "learning_rate": 2.2252089465518232e-05, "loss": 6.2953, "step": 11620 }, { "epoch": 0.0222770688893933, "grad_norm": 1.646162509918213, "learning_rate": 2.227124422962731e-05, "loss": 6.4875, "step": 11630 }, { "epoch": 0.02229622372076853, "grad_norm": 1.7860134840011597, "learning_rate": 2.229039899373639e-05, "loss": 6.3554, "step": 11640 }, { "epoch": 0.02231537855214376, "grad_norm": 1.5167253017425537, "learning_rate": 2.230955375784547e-05, "loss": 6.2795, "step": 11650 }, { "epoch": 0.02233453338351899, "grad_norm": 1.7912014722824097, "learning_rate": 2.232870852195455e-05, "loss": 6.4142, "step": 11660 }, { "epoch": 0.02235368821489422, "grad_norm": 1.6352335214614868, "learning_rate": 2.2347863286063628e-05, "loss": 6.3882, "step": 11670 }, { "epoch": 0.022372843046269453, "grad_norm": 1.4604380130767822, "learning_rate": 2.236701805017271e-05, "loss": 6.4684, "step": 11680 }, { "epoch": 0.022391997877644683, "grad_norm": 2.096296787261963, "learning_rate": 2.2386172814281787e-05, "loss": 6.4175, "step": 11690 }, { "epoch": 0.022411152709019916, "grad_norm": 1.6026040315628052, "learning_rate": 2.2405327578390872e-05, "loss": 6.4656, "step": 11700 }, { "epoch": 0.022430307540395145, "grad_norm": 1.704741358757019, "learning_rate": 2.242448234249995e-05, "loss": 6.3289, "step": 11710 }, { "epoch": 0.022449462371770375, "grad_norm": 1.5717103481292725, "learning_rate": 2.244363710660903e-05, "loss": 6.35, "step": 11720 }, { "epoch": 0.022468617203145608, "grad_norm": 1.7323806285858154, "learning_rate": 2.246279187071811e-05, "loss": 6.3263, "step": 11730 }, { "epoch": 0.022487772034520837, "grad_norm": 1.6435539722442627, "learning_rate": 2.248194663482719e-05, "loss": 6.4011, "step": 11740 }, { "epoch": 0.022506926865896067, "grad_norm": 1.7363852262496948, "learning_rate": 2.2501101398936272e-05, "loss": 6.3471, "step": 11750 }, { "epoch": 0.0225260816972713, "grad_norm": 1.8517439365386963, "learning_rate": 2.252025616304535e-05, "loss": 6.4213, "step": 11760 }, { "epoch": 0.02254523652864653, "grad_norm": 1.7997726202011108, "learning_rate": 2.253941092715443e-05, "loss": 6.3065, "step": 11770 }, { "epoch": 0.02256439136002176, "grad_norm": 1.6725506782531738, "learning_rate": 2.255856569126351e-05, "loss": 6.4597, "step": 11780 }, { "epoch": 0.02258354619139699, "grad_norm": 1.4759446382522583, "learning_rate": 2.257772045537259e-05, "loss": 6.3596, "step": 11790 }, { "epoch": 0.02260270102277222, "grad_norm": 1.7676483392715454, "learning_rate": 2.2596875219481668e-05, "loss": 6.3213, "step": 11800 }, { "epoch": 0.02262185585414745, "grad_norm": 1.5500397682189941, "learning_rate": 2.2616029983590753e-05, "loss": 6.2968, "step": 11810 }, { "epoch": 0.022641010685522683, "grad_norm": 2.2054100036621094, "learning_rate": 2.2635184747699827e-05, "loss": 6.3316, "step": 11820 }, { "epoch": 0.022660165516897913, "grad_norm": 1.7534270286560059, "learning_rate": 2.2654339511808912e-05, "loss": 6.2879, "step": 11830 }, { "epoch": 0.022679320348273146, "grad_norm": 1.5489696264266968, "learning_rate": 2.267349427591799e-05, "loss": 6.3625, "step": 11840 }, { "epoch": 0.022698475179648375, "grad_norm": 1.6373870372772217, "learning_rate": 2.269264904002707e-05, "loss": 6.3768, "step": 11850 }, { "epoch": 0.022717630011023605, "grad_norm": 1.6084007024765015, "learning_rate": 2.271180380413615e-05, "loss": 6.2692, "step": 11860 }, { "epoch": 0.022736784842398838, "grad_norm": 1.7292832136154175, "learning_rate": 2.273095856824523e-05, "loss": 6.4337, "step": 11870 }, { "epoch": 0.022755939673774067, "grad_norm": 2.03765606880188, "learning_rate": 2.275011333235431e-05, "loss": 6.3381, "step": 11880 }, { "epoch": 0.022775094505149297, "grad_norm": 1.5487457513809204, "learning_rate": 2.276926809646339e-05, "loss": 6.3772, "step": 11890 }, { "epoch": 0.02279424933652453, "grad_norm": 1.6993800401687622, "learning_rate": 2.2788422860572468e-05, "loss": 6.4412, "step": 11900 }, { "epoch": 0.02281340416789976, "grad_norm": 1.7200024127960205, "learning_rate": 2.280757762468155e-05, "loss": 6.359, "step": 11910 }, { "epoch": 0.02283255899927499, "grad_norm": 1.8763833045959473, "learning_rate": 2.282673238879063e-05, "loss": 6.4236, "step": 11920 }, { "epoch": 0.02285171383065022, "grad_norm": 1.7511099576950073, "learning_rate": 2.2845887152899708e-05, "loss": 6.3485, "step": 11930 }, { "epoch": 0.02287086866202545, "grad_norm": 1.7852083444595337, "learning_rate": 2.2865041917008793e-05, "loss": 6.3991, "step": 11940 }, { "epoch": 0.02289002349340068, "grad_norm": 1.9568992853164673, "learning_rate": 2.288419668111787e-05, "loss": 6.4119, "step": 11950 }, { "epoch": 0.022909178324775913, "grad_norm": 1.8282604217529297, "learning_rate": 2.2903351445226952e-05, "loss": 6.2652, "step": 11960 }, { "epoch": 0.022928333156151143, "grad_norm": 1.90345299243927, "learning_rate": 2.292250620933603e-05, "loss": 6.2648, "step": 11970 }, { "epoch": 0.022947487987526372, "grad_norm": 2.052565574645996, "learning_rate": 2.294166097344511e-05, "loss": 6.4015, "step": 11980 }, { "epoch": 0.022966642818901605, "grad_norm": 1.8957093954086304, "learning_rate": 2.296081573755419e-05, "loss": 6.3635, "step": 11990 }, { "epoch": 0.022985797650276835, "grad_norm": 1.544189691543579, "learning_rate": 2.297997050166327e-05, "loss": 6.4334, "step": 12000 }, { "epoch": 0.023004952481652068, "grad_norm": 1.6495165824890137, "learning_rate": 2.299912526577235e-05, "loss": 6.3452, "step": 12010 }, { "epoch": 0.023024107313027297, "grad_norm": 1.9098985195159912, "learning_rate": 2.301828002988143e-05, "loss": 6.2993, "step": 12020 }, { "epoch": 0.023043262144402527, "grad_norm": 1.4770184755325317, "learning_rate": 2.3037434793990508e-05, "loss": 6.3272, "step": 12030 }, { "epoch": 0.02306241697577776, "grad_norm": 1.700310468673706, "learning_rate": 2.305658955809959e-05, "loss": 6.3646, "step": 12040 }, { "epoch": 0.02308157180715299, "grad_norm": 1.6745761632919312, "learning_rate": 2.3075744322208667e-05, "loss": 6.391, "step": 12050 }, { "epoch": 0.02310072663852822, "grad_norm": 1.8157908916473389, "learning_rate": 2.3094899086317752e-05, "loss": 6.3017, "step": 12060 }, { "epoch": 0.02311988146990345, "grad_norm": 1.758819818496704, "learning_rate": 2.311405385042683e-05, "loss": 6.3722, "step": 12070 }, { "epoch": 0.02313903630127868, "grad_norm": 2.0959954261779785, "learning_rate": 2.313320861453591e-05, "loss": 6.2475, "step": 12080 }, { "epoch": 0.02315819113265391, "grad_norm": 1.8937616348266602, "learning_rate": 2.3152363378644992e-05, "loss": 6.2754, "step": 12090 }, { "epoch": 0.023177345964029143, "grad_norm": 1.5926623344421387, "learning_rate": 2.317151814275407e-05, "loss": 6.3171, "step": 12100 }, { "epoch": 0.023196500795404373, "grad_norm": 1.5051072835922241, "learning_rate": 2.319067290686315e-05, "loss": 6.4366, "step": 12110 }, { "epoch": 0.023215655626779602, "grad_norm": 1.6980488300323486, "learning_rate": 2.320982767097223e-05, "loss": 6.2983, "step": 12120 }, { "epoch": 0.023234810458154835, "grad_norm": 1.889420986175537, "learning_rate": 2.322898243508131e-05, "loss": 6.342, "step": 12130 }, { "epoch": 0.023253965289530065, "grad_norm": 1.632094144821167, "learning_rate": 2.324813719919039e-05, "loss": 6.4155, "step": 12140 }, { "epoch": 0.023273120120905294, "grad_norm": 1.8383002281188965, "learning_rate": 2.326729196329947e-05, "loss": 6.3347, "step": 12150 }, { "epoch": 0.023292274952280527, "grad_norm": 1.5652227401733398, "learning_rate": 2.3286446727408548e-05, "loss": 6.4177, "step": 12160 }, { "epoch": 0.023311429783655756, "grad_norm": 1.7491573095321655, "learning_rate": 2.3305601491517633e-05, "loss": 6.2914, "step": 12170 }, { "epoch": 0.02333058461503099, "grad_norm": 1.6834882497787476, "learning_rate": 2.3324756255626707e-05, "loss": 6.2237, "step": 12180 }, { "epoch": 0.02334973944640622, "grad_norm": 1.7505298852920532, "learning_rate": 2.3343911019735792e-05, "loss": 6.4069, "step": 12190 }, { "epoch": 0.02336889427778145, "grad_norm": 1.7907098531723022, "learning_rate": 2.336306578384487e-05, "loss": 6.3439, "step": 12200 }, { "epoch": 0.02338804910915668, "grad_norm": 1.7743970155715942, "learning_rate": 2.338222054795395e-05, "loss": 6.2983, "step": 12210 }, { "epoch": 0.02340720394053191, "grad_norm": 1.8014278411865234, "learning_rate": 2.340137531206303e-05, "loss": 6.371, "step": 12220 }, { "epoch": 0.02342635877190714, "grad_norm": 2.0967187881469727, "learning_rate": 2.342053007617211e-05, "loss": 6.3352, "step": 12230 }, { "epoch": 0.023445513603282373, "grad_norm": 1.7350356578826904, "learning_rate": 2.3439684840281188e-05, "loss": 6.2877, "step": 12240 }, { "epoch": 0.023464668434657603, "grad_norm": 1.7417073249816895, "learning_rate": 2.345883960439027e-05, "loss": 6.4445, "step": 12250 }, { "epoch": 0.023483823266032832, "grad_norm": 1.6169174909591675, "learning_rate": 2.347799436849935e-05, "loss": 6.3707, "step": 12260 }, { "epoch": 0.023502978097408065, "grad_norm": 1.7676315307617188, "learning_rate": 2.349714913260843e-05, "loss": 6.2929, "step": 12270 }, { "epoch": 0.023522132928783294, "grad_norm": 1.9016016721725464, "learning_rate": 2.351630389671751e-05, "loss": 6.3376, "step": 12280 }, { "epoch": 0.023541287760158524, "grad_norm": 1.4224374294281006, "learning_rate": 2.3535458660826588e-05, "loss": 6.3765, "step": 12290 }, { "epoch": 0.023560442591533757, "grad_norm": 1.885993480682373, "learning_rate": 2.3554613424935673e-05, "loss": 6.2638, "step": 12300 }, { "epoch": 0.023579597422908986, "grad_norm": 1.478058934211731, "learning_rate": 2.357376818904475e-05, "loss": 6.292, "step": 12310 }, { "epoch": 0.023598752254284216, "grad_norm": 1.5600755214691162, "learning_rate": 2.3592922953153832e-05, "loss": 6.2195, "step": 12320 }, { "epoch": 0.02361790708565945, "grad_norm": 1.7687921524047852, "learning_rate": 2.361207771726291e-05, "loss": 6.3629, "step": 12330 }, { "epoch": 0.023637061917034678, "grad_norm": 1.5857830047607422, "learning_rate": 2.363123248137199e-05, "loss": 6.218, "step": 12340 }, { "epoch": 0.02365621674840991, "grad_norm": 1.5317177772521973, "learning_rate": 2.365038724548107e-05, "loss": 6.3405, "step": 12350 }, { "epoch": 0.02367537157978514, "grad_norm": 2.1873645782470703, "learning_rate": 2.366954200959015e-05, "loss": 6.2668, "step": 12360 }, { "epoch": 0.02369452641116037, "grad_norm": 1.866497278213501, "learning_rate": 2.3688696773699228e-05, "loss": 6.3256, "step": 12370 }, { "epoch": 0.023713681242535603, "grad_norm": 1.7899610996246338, "learning_rate": 2.370785153780831e-05, "loss": 6.1502, "step": 12380 }, { "epoch": 0.023732836073910833, "grad_norm": 1.3628723621368408, "learning_rate": 2.3727006301917387e-05, "loss": 6.3151, "step": 12390 }, { "epoch": 0.023751990905286062, "grad_norm": 1.630735993385315, "learning_rate": 2.374616106602647e-05, "loss": 6.3087, "step": 12400 }, { "epoch": 0.023771145736661295, "grad_norm": 1.9851495027542114, "learning_rate": 2.3765315830135547e-05, "loss": 6.4448, "step": 12410 }, { "epoch": 0.023790300568036524, "grad_norm": 1.795396327972412, "learning_rate": 2.378447059424463e-05, "loss": 6.3111, "step": 12420 }, { "epoch": 0.023809455399411754, "grad_norm": 1.624435305595398, "learning_rate": 2.3803625358353713e-05, "loss": 6.3805, "step": 12430 }, { "epoch": 0.023828610230786987, "grad_norm": 2.1324832439422607, "learning_rate": 2.382278012246279e-05, "loss": 6.1894, "step": 12440 }, { "epoch": 0.023847765062162216, "grad_norm": 1.8196152448654175, "learning_rate": 2.3841934886571872e-05, "loss": 6.3316, "step": 12450 }, { "epoch": 0.023866919893537446, "grad_norm": 1.6970689296722412, "learning_rate": 2.386108965068095e-05, "loss": 6.3529, "step": 12460 }, { "epoch": 0.02388607472491268, "grad_norm": 1.8227983713150024, "learning_rate": 2.388024441479003e-05, "loss": 6.2224, "step": 12470 }, { "epoch": 0.023905229556287908, "grad_norm": 1.554147720336914, "learning_rate": 2.389939917889911e-05, "loss": 6.4, "step": 12480 }, { "epoch": 0.023924384387663138, "grad_norm": 1.363258957862854, "learning_rate": 2.391855394300819e-05, "loss": 6.4716, "step": 12490 }, { "epoch": 0.02394353921903837, "grad_norm": 1.8838871717453003, "learning_rate": 2.3937708707117268e-05, "loss": 6.2504, "step": 12500 }, { "epoch": 0.0239626940504136, "grad_norm": 1.8207131624221802, "learning_rate": 2.395686347122635e-05, "loss": 6.2025, "step": 12510 }, { "epoch": 0.023981848881788833, "grad_norm": 1.6717185974121094, "learning_rate": 2.3976018235335427e-05, "loss": 6.3505, "step": 12520 }, { "epoch": 0.024001003713164062, "grad_norm": 2.2397615909576416, "learning_rate": 2.3995172999444512e-05, "loss": 6.2972, "step": 12530 }, { "epoch": 0.024020158544539292, "grad_norm": 1.5988942384719849, "learning_rate": 2.4014327763553587e-05, "loss": 6.3034, "step": 12540 }, { "epoch": 0.024039313375914525, "grad_norm": 1.6280444860458374, "learning_rate": 2.403348252766267e-05, "loss": 6.249, "step": 12550 }, { "epoch": 0.024058468207289754, "grad_norm": 1.8021997213363647, "learning_rate": 2.405263729177175e-05, "loss": 6.222, "step": 12560 }, { "epoch": 0.024077623038664984, "grad_norm": 1.571223258972168, "learning_rate": 2.407179205588083e-05, "loss": 6.3494, "step": 12570 }, { "epoch": 0.024096777870040217, "grad_norm": 1.5751124620437622, "learning_rate": 2.409094681998991e-05, "loss": 6.3838, "step": 12580 }, { "epoch": 0.024115932701415446, "grad_norm": 1.7965781688690186, "learning_rate": 2.411010158409899e-05, "loss": 6.2343, "step": 12590 }, { "epoch": 0.024135087532790676, "grad_norm": 1.9820020198822021, "learning_rate": 2.412925634820807e-05, "loss": 6.2349, "step": 12600 }, { "epoch": 0.02415424236416591, "grad_norm": 1.666314721107483, "learning_rate": 2.414841111231715e-05, "loss": 6.2408, "step": 12610 }, { "epoch": 0.024173397195541138, "grad_norm": 1.7502667903900146, "learning_rate": 2.416756587642623e-05, "loss": 6.3179, "step": 12620 }, { "epoch": 0.024192552026916368, "grad_norm": 1.7790875434875488, "learning_rate": 2.4186720640535308e-05, "loss": 6.2046, "step": 12630 }, { "epoch": 0.0242117068582916, "grad_norm": 1.5557540655136108, "learning_rate": 2.420587540464439e-05, "loss": 6.3382, "step": 12640 }, { "epoch": 0.02423086168966683, "grad_norm": 1.6046587228775024, "learning_rate": 2.4225030168753467e-05, "loss": 6.2363, "step": 12650 }, { "epoch": 0.02425001652104206, "grad_norm": 1.6414800882339478, "learning_rate": 2.4244184932862552e-05, "loss": 6.2732, "step": 12660 }, { "epoch": 0.024269171352417292, "grad_norm": 1.6383501291275024, "learning_rate": 2.426333969697163e-05, "loss": 6.2573, "step": 12670 }, { "epoch": 0.024288326183792522, "grad_norm": 1.5570728778839111, "learning_rate": 2.428249446108071e-05, "loss": 6.3295, "step": 12680 }, { "epoch": 0.024307481015167755, "grad_norm": 1.6910890340805054, "learning_rate": 2.430164922518979e-05, "loss": 6.293, "step": 12690 }, { "epoch": 0.024326635846542984, "grad_norm": 1.7884804010391235, "learning_rate": 2.432080398929887e-05, "loss": 6.2421, "step": 12700 }, { "epoch": 0.024345790677918214, "grad_norm": 1.4724515676498413, "learning_rate": 2.433995875340795e-05, "loss": 6.2582, "step": 12710 }, { "epoch": 0.024364945509293447, "grad_norm": 1.9449043273925781, "learning_rate": 2.435911351751703e-05, "loss": 6.2104, "step": 12720 }, { "epoch": 0.024384100340668676, "grad_norm": 1.732021689414978, "learning_rate": 2.4378268281626108e-05, "loss": 6.1478, "step": 12730 }, { "epoch": 0.024403255172043906, "grad_norm": 1.612549901008606, "learning_rate": 2.439742304573519e-05, "loss": 6.2971, "step": 12740 }, { "epoch": 0.02442241000341914, "grad_norm": 1.5844380855560303, "learning_rate": 2.4416577809844267e-05, "loss": 6.3434, "step": 12750 }, { "epoch": 0.024441564834794368, "grad_norm": 1.714060664176941, "learning_rate": 2.4435732573953348e-05, "loss": 6.2293, "step": 12760 }, { "epoch": 0.024460719666169597, "grad_norm": 1.6868025064468384, "learning_rate": 2.4454887338062433e-05, "loss": 6.21, "step": 12770 }, { "epoch": 0.02447987449754483, "grad_norm": 1.7212387323379517, "learning_rate": 2.447404210217151e-05, "loss": 6.2629, "step": 12780 }, { "epoch": 0.02449902932892006, "grad_norm": 1.933918833732605, "learning_rate": 2.4493196866280592e-05, "loss": 6.2918, "step": 12790 }, { "epoch": 0.02451818416029529, "grad_norm": 2.0174858570098877, "learning_rate": 2.451235163038967e-05, "loss": 6.269, "step": 12800 }, { "epoch": 0.024537338991670522, "grad_norm": 1.804815411567688, "learning_rate": 2.453150639449875e-05, "loss": 6.1799, "step": 12810 }, { "epoch": 0.024556493823045752, "grad_norm": 2.3771286010742188, "learning_rate": 2.455066115860783e-05, "loss": 6.369, "step": 12820 }, { "epoch": 0.024575648654420985, "grad_norm": 1.6206376552581787, "learning_rate": 2.456981592271691e-05, "loss": 6.3303, "step": 12830 }, { "epoch": 0.024594803485796214, "grad_norm": 1.648774266242981, "learning_rate": 2.458897068682599e-05, "loss": 6.3274, "step": 12840 }, { "epoch": 0.024613958317171444, "grad_norm": 1.7284884452819824, "learning_rate": 2.460812545093507e-05, "loss": 6.2708, "step": 12850 }, { "epoch": 0.024633113148546677, "grad_norm": 1.6062570810317993, "learning_rate": 2.4627280215044148e-05, "loss": 6.2976, "step": 12860 }, { "epoch": 0.024652267979921906, "grad_norm": 1.9800915718078613, "learning_rate": 2.464643497915323e-05, "loss": 6.3341, "step": 12870 }, { "epoch": 0.024671422811297136, "grad_norm": 1.6685805320739746, "learning_rate": 2.4665589743262307e-05, "loss": 6.293, "step": 12880 }, { "epoch": 0.02469057764267237, "grad_norm": 1.7178478240966797, "learning_rate": 2.4684744507371392e-05, "loss": 6.3577, "step": 12890 }, { "epoch": 0.024709732474047598, "grad_norm": 1.4229052066802979, "learning_rate": 2.4703899271480466e-05, "loss": 6.3738, "step": 12900 }, { "epoch": 0.024728887305422827, "grad_norm": 1.5834821462631226, "learning_rate": 2.472305403558955e-05, "loss": 6.2616, "step": 12910 }, { "epoch": 0.02474804213679806, "grad_norm": 2.0544495582580566, "learning_rate": 2.474220879969863e-05, "loss": 6.252, "step": 12920 }, { "epoch": 0.02476719696817329, "grad_norm": 1.4403250217437744, "learning_rate": 2.476136356380771e-05, "loss": 6.314, "step": 12930 }, { "epoch": 0.02478635179954852, "grad_norm": 1.586239218711853, "learning_rate": 2.478051832791679e-05, "loss": 6.2302, "step": 12940 }, { "epoch": 0.024805506630923752, "grad_norm": 2.0686960220336914, "learning_rate": 2.479967309202587e-05, "loss": 6.1009, "step": 12950 }, { "epoch": 0.02482466146229898, "grad_norm": 1.8058030605316162, "learning_rate": 2.481882785613495e-05, "loss": 6.1371, "step": 12960 }, { "epoch": 0.02484381629367421, "grad_norm": 1.6011918783187866, "learning_rate": 2.483798262024403e-05, "loss": 6.2078, "step": 12970 }, { "epoch": 0.024862971125049444, "grad_norm": 1.6550885438919067, "learning_rate": 2.485713738435311e-05, "loss": 6.2945, "step": 12980 }, { "epoch": 0.024882125956424674, "grad_norm": 1.7438265085220337, "learning_rate": 2.4874376672051283e-05, "loss": 6.1901, "step": 12990 }, { "epoch": 0.024901280787799907, "grad_norm": 1.7245286703109741, "learning_rate": 2.489353143616036e-05, "loss": 6.2891, "step": 13000 }, { "epoch": 0.024920435619175136, "grad_norm": 1.6606239080429077, "learning_rate": 2.4912686200269442e-05, "loss": 6.2935, "step": 13010 }, { "epoch": 0.024939590450550365, "grad_norm": 1.6251122951507568, "learning_rate": 2.493184096437852e-05, "loss": 6.2708, "step": 13020 }, { "epoch": 0.0249587452819256, "grad_norm": 1.9012553691864014, "learning_rate": 2.49509957284876e-05, "loss": 6.2312, "step": 13030 }, { "epoch": 0.024977900113300828, "grad_norm": 1.8463987112045288, "learning_rate": 2.497015049259668e-05, "loss": 6.2052, "step": 13040 }, { "epoch": 0.024997054944676057, "grad_norm": 1.736588478088379, "learning_rate": 2.498930525670576e-05, "loss": 6.2156, "step": 13050 }, { "epoch": 0.02501620977605129, "grad_norm": 1.9656388759613037, "learning_rate": 2.500846002081484e-05, "loss": 6.2628, "step": 13060 }, { "epoch": 0.02503536460742652, "grad_norm": 1.5639429092407227, "learning_rate": 2.5027614784923923e-05, "loss": 6.301, "step": 13070 }, { "epoch": 0.02505451943880175, "grad_norm": 1.817817211151123, "learning_rate": 2.5046769549032998e-05, "loss": 6.2367, "step": 13080 }, { "epoch": 0.025073674270176982, "grad_norm": 1.803736925125122, "learning_rate": 2.5065924313142083e-05, "loss": 6.1699, "step": 13090 }, { "epoch": 0.02509282910155221, "grad_norm": 1.6149784326553345, "learning_rate": 2.5085079077251164e-05, "loss": 6.2161, "step": 13100 }, { "epoch": 0.02511198393292744, "grad_norm": 1.735405445098877, "learning_rate": 2.5104233841360242e-05, "loss": 6.2051, "step": 13110 }, { "epoch": 0.025131138764302674, "grad_norm": 1.7959634065628052, "learning_rate": 2.5123388605469323e-05, "loss": 6.1451, "step": 13120 }, { "epoch": 0.025150293595677903, "grad_norm": 1.5084222555160522, "learning_rate": 2.51425433695784e-05, "loss": 6.2483, "step": 13130 }, { "epoch": 0.025169448427053133, "grad_norm": 1.6972427368164062, "learning_rate": 2.5161698133687482e-05, "loss": 6.3436, "step": 13140 }, { "epoch": 0.025188603258428366, "grad_norm": 1.5083811283111572, "learning_rate": 2.518085289779656e-05, "loss": 6.2413, "step": 13150 }, { "epoch": 0.025207758089803595, "grad_norm": 1.5151419639587402, "learning_rate": 2.520000766190564e-05, "loss": 6.2566, "step": 13160 }, { "epoch": 0.02522691292117883, "grad_norm": 1.5205459594726562, "learning_rate": 2.521916242601472e-05, "loss": 6.2026, "step": 13170 }, { "epoch": 0.025246067752554058, "grad_norm": 1.9121886491775513, "learning_rate": 2.52383171901238e-05, "loss": 6.2672, "step": 13180 }, { "epoch": 0.025265222583929287, "grad_norm": 1.8880094289779663, "learning_rate": 2.525747195423288e-05, "loss": 6.1315, "step": 13190 }, { "epoch": 0.02528437741530452, "grad_norm": 2.5776774883270264, "learning_rate": 2.5276626718341963e-05, "loss": 6.2832, "step": 13200 }, { "epoch": 0.02530353224667975, "grad_norm": 1.7944399118423462, "learning_rate": 2.529578148245104e-05, "loss": 6.2264, "step": 13210 }, { "epoch": 0.02532268707805498, "grad_norm": 1.765110731124878, "learning_rate": 2.5314936246560123e-05, "loss": 6.1449, "step": 13220 }, { "epoch": 0.025341841909430212, "grad_norm": 1.888489842414856, "learning_rate": 2.53340910106692e-05, "loss": 6.2237, "step": 13230 }, { "epoch": 0.02536099674080544, "grad_norm": 1.7346301078796387, "learning_rate": 2.5353245774778282e-05, "loss": 6.3839, "step": 13240 }, { "epoch": 0.02538015157218067, "grad_norm": 1.504463791847229, "learning_rate": 2.537240053888736e-05, "loss": 6.2098, "step": 13250 }, { "epoch": 0.025399306403555904, "grad_norm": 2.132675886154175, "learning_rate": 2.539155530299644e-05, "loss": 6.1599, "step": 13260 }, { "epoch": 0.025418461234931133, "grad_norm": 1.7122234106063843, "learning_rate": 2.5410710067105522e-05, "loss": 6.3002, "step": 13270 }, { "epoch": 0.025437616066306363, "grad_norm": 1.641703486442566, "learning_rate": 2.54298648312146e-05, "loss": 6.2764, "step": 13280 }, { "epoch": 0.025456770897681596, "grad_norm": 1.6047027111053467, "learning_rate": 2.544901959532368e-05, "loss": 6.2169, "step": 13290 }, { "epoch": 0.025475925729056825, "grad_norm": 1.6126278638839722, "learning_rate": 2.546817435943276e-05, "loss": 6.2273, "step": 13300 }, { "epoch": 0.025495080560432055, "grad_norm": 1.5836026668548584, "learning_rate": 2.5487329123541844e-05, "loss": 6.2132, "step": 13310 }, { "epoch": 0.025514235391807288, "grad_norm": 1.8224878311157227, "learning_rate": 2.5506483887650922e-05, "loss": 6.228, "step": 13320 }, { "epoch": 0.025533390223182517, "grad_norm": 1.8392162322998047, "learning_rate": 2.5525638651760003e-05, "loss": 6.2363, "step": 13330 }, { "epoch": 0.02555254505455775, "grad_norm": 1.5554753541946411, "learning_rate": 2.554479341586908e-05, "loss": 6.2191, "step": 13340 }, { "epoch": 0.02557169988593298, "grad_norm": 1.7068493366241455, "learning_rate": 2.5563948179978163e-05, "loss": 6.1293, "step": 13350 }, { "epoch": 0.02559085471730821, "grad_norm": 1.7974426746368408, "learning_rate": 2.558310294408724e-05, "loss": 6.1218, "step": 13360 }, { "epoch": 0.025610009548683442, "grad_norm": 1.9217758178710938, "learning_rate": 2.5602257708196322e-05, "loss": 6.1885, "step": 13370 }, { "epoch": 0.02562916438005867, "grad_norm": 1.6659284830093384, "learning_rate": 2.56214124723054e-05, "loss": 6.1306, "step": 13380 }, { "epoch": 0.0256483192114339, "grad_norm": 1.8207738399505615, "learning_rate": 2.564056723641448e-05, "loss": 6.1961, "step": 13390 }, { "epoch": 0.025667474042809134, "grad_norm": 1.9434138536453247, "learning_rate": 2.565972200052356e-05, "loss": 6.1956, "step": 13400 }, { "epoch": 0.025686628874184363, "grad_norm": 1.8117862939834595, "learning_rate": 2.567887676463264e-05, "loss": 6.1006, "step": 13410 }, { "epoch": 0.025705783705559593, "grad_norm": 2.056490898132324, "learning_rate": 2.5698031528741718e-05, "loss": 6.1008, "step": 13420 }, { "epoch": 0.025724938536934826, "grad_norm": 1.7230660915374756, "learning_rate": 2.5717186292850803e-05, "loss": 6.2572, "step": 13430 }, { "epoch": 0.025744093368310055, "grad_norm": 1.661786437034607, "learning_rate": 2.5736341056959884e-05, "loss": 6.1447, "step": 13440 }, { "epoch": 0.025763248199685285, "grad_norm": 1.9061404466629028, "learning_rate": 2.5755495821068962e-05, "loss": 6.3318, "step": 13450 }, { "epoch": 0.025782403031060518, "grad_norm": 1.6613322496414185, "learning_rate": 2.5774650585178043e-05, "loss": 6.0188, "step": 13460 }, { "epoch": 0.025801557862435747, "grad_norm": 1.778003454208374, "learning_rate": 2.579380534928712e-05, "loss": 6.14, "step": 13470 }, { "epoch": 0.025820712693810977, "grad_norm": 1.755669116973877, "learning_rate": 2.5812960113396203e-05, "loss": 6.0513, "step": 13480 }, { "epoch": 0.02583986752518621, "grad_norm": 2.1142263412475586, "learning_rate": 2.583211487750528e-05, "loss": 6.2093, "step": 13490 }, { "epoch": 0.02585902235656144, "grad_norm": 1.6789112091064453, "learning_rate": 2.5851269641614362e-05, "loss": 6.1712, "step": 13500 }, { "epoch": 0.025878177187936672, "grad_norm": 1.7199293375015259, "learning_rate": 2.587042440572344e-05, "loss": 6.1984, "step": 13510 }, { "epoch": 0.0258973320193119, "grad_norm": 1.711504578590393, "learning_rate": 2.588957916983252e-05, "loss": 6.217, "step": 13520 }, { "epoch": 0.02591648685068713, "grad_norm": 1.656407356262207, "learning_rate": 2.59087339339416e-05, "loss": 6.1692, "step": 13530 }, { "epoch": 0.025935641682062364, "grad_norm": 1.5785645246505737, "learning_rate": 2.592788869805068e-05, "loss": 6.1051, "step": 13540 }, { "epoch": 0.025954796513437593, "grad_norm": 1.9136085510253906, "learning_rate": 2.5947043462159758e-05, "loss": 6.1306, "step": 13550 }, { "epoch": 0.025973951344812823, "grad_norm": 1.6905499696731567, "learning_rate": 2.5966198226268843e-05, "loss": 6.2218, "step": 13560 }, { "epoch": 0.025993106176188056, "grad_norm": 1.7221226692199707, "learning_rate": 2.598535299037792e-05, "loss": 6.0934, "step": 13570 }, { "epoch": 0.026012261007563285, "grad_norm": 1.749031901359558, "learning_rate": 2.6004507754487002e-05, "loss": 6.2633, "step": 13580 }, { "epoch": 0.026031415838938515, "grad_norm": 1.9605540037155151, "learning_rate": 2.602366251859608e-05, "loss": 6.1326, "step": 13590 }, { "epoch": 0.026050570670313748, "grad_norm": 1.7069898843765259, "learning_rate": 2.604281728270516e-05, "loss": 6.3131, "step": 13600 }, { "epoch": 0.026069725501688977, "grad_norm": 1.662554383277893, "learning_rate": 2.6061972046814243e-05, "loss": 6.2505, "step": 13610 }, { "epoch": 0.026088880333064206, "grad_norm": 2.113816499710083, "learning_rate": 2.608112681092332e-05, "loss": 6.1096, "step": 13620 }, { "epoch": 0.02610803516443944, "grad_norm": 1.687127709388733, "learning_rate": 2.6100281575032402e-05, "loss": 6.0369, "step": 13630 }, { "epoch": 0.02612718999581467, "grad_norm": 1.701443076133728, "learning_rate": 2.611943633914148e-05, "loss": 6.2875, "step": 13640 }, { "epoch": 0.0261463448271899, "grad_norm": 1.5978463888168335, "learning_rate": 2.613859110325056e-05, "loss": 6.1079, "step": 13650 }, { "epoch": 0.02616549965856513, "grad_norm": 1.5105425119400024, "learning_rate": 2.615774586735964e-05, "loss": 6.1399, "step": 13660 }, { "epoch": 0.02618465448994036, "grad_norm": 1.6644026041030884, "learning_rate": 2.6176900631468724e-05, "loss": 6.2011, "step": 13670 }, { "epoch": 0.026203809321315594, "grad_norm": 1.9970054626464844, "learning_rate": 2.6196055395577802e-05, "loss": 6.2181, "step": 13680 }, { "epoch": 0.026222964152690823, "grad_norm": 1.600908637046814, "learning_rate": 2.6215210159686883e-05, "loss": 6.2043, "step": 13690 }, { "epoch": 0.026242118984066053, "grad_norm": 1.7814316749572754, "learning_rate": 2.623436492379596e-05, "loss": 6.144, "step": 13700 }, { "epoch": 0.026261273815441286, "grad_norm": 1.7655621767044067, "learning_rate": 2.6253519687905042e-05, "loss": 6.0881, "step": 13710 }, { "epoch": 0.026280428646816515, "grad_norm": 2.0020830631256104, "learning_rate": 2.627267445201412e-05, "loss": 6.07, "step": 13720 }, { "epoch": 0.026299583478191745, "grad_norm": 1.9896035194396973, "learning_rate": 2.62918292161232e-05, "loss": 6.1501, "step": 13730 }, { "epoch": 0.026318738309566977, "grad_norm": 1.740111231803894, "learning_rate": 2.631098398023228e-05, "loss": 6.0587, "step": 13740 }, { "epoch": 0.026337893140942207, "grad_norm": 1.616782307624817, "learning_rate": 2.633013874434136e-05, "loss": 6.2117, "step": 13750 }, { "epoch": 0.026357047972317436, "grad_norm": 1.7182674407958984, "learning_rate": 2.634929350845044e-05, "loss": 6.0556, "step": 13760 }, { "epoch": 0.02637620280369267, "grad_norm": 1.7895504236221313, "learning_rate": 2.636844827255952e-05, "loss": 6.1889, "step": 13770 }, { "epoch": 0.0263953576350679, "grad_norm": 1.7208446264266968, "learning_rate": 2.6387603036668605e-05, "loss": 6.1971, "step": 13780 }, { "epoch": 0.02641451246644313, "grad_norm": 1.6902748346328735, "learning_rate": 2.6406757800777683e-05, "loss": 6.1453, "step": 13790 }, { "epoch": 0.02643366729781836, "grad_norm": 1.6458094120025635, "learning_rate": 2.6425912564886764e-05, "loss": 6.1351, "step": 13800 }, { "epoch": 0.02645282212919359, "grad_norm": 1.9013779163360596, "learning_rate": 2.6445067328995842e-05, "loss": 6.0563, "step": 13810 }, { "epoch": 0.02647197696056882, "grad_norm": 1.6850100755691528, "learning_rate": 2.6464222093104923e-05, "loss": 6.1966, "step": 13820 }, { "epoch": 0.026491131791944053, "grad_norm": 1.9475624561309814, "learning_rate": 2.6483376857214e-05, "loss": 6.2146, "step": 13830 }, { "epoch": 0.026510286623319283, "grad_norm": 1.722733497619629, "learning_rate": 2.6502531621323082e-05, "loss": 6.0933, "step": 13840 }, { "epoch": 0.026529441454694516, "grad_norm": 1.5105639696121216, "learning_rate": 2.652168638543216e-05, "loss": 6.1017, "step": 13850 }, { "epoch": 0.026548596286069745, "grad_norm": 1.6836968660354614, "learning_rate": 2.654084114954124e-05, "loss": 6.1546, "step": 13860 }, { "epoch": 0.026567751117444974, "grad_norm": 1.4375312328338623, "learning_rate": 2.655999591365032e-05, "loss": 6.1875, "step": 13870 }, { "epoch": 0.026586905948820207, "grad_norm": 1.7565215826034546, "learning_rate": 2.65791506777594e-05, "loss": 6.104, "step": 13880 }, { "epoch": 0.026606060780195437, "grad_norm": 1.5923494100570679, "learning_rate": 2.659830544186848e-05, "loss": 6.1474, "step": 13890 }, { "epoch": 0.026625215611570666, "grad_norm": 1.6832373142242432, "learning_rate": 2.661746020597756e-05, "loss": 6.1856, "step": 13900 }, { "epoch": 0.0266443704429459, "grad_norm": 1.5343492031097412, "learning_rate": 2.6636614970086638e-05, "loss": 6.1759, "step": 13910 }, { "epoch": 0.02666352527432113, "grad_norm": 1.7887978553771973, "learning_rate": 2.6655769734195723e-05, "loss": 6.1491, "step": 13920 }, { "epoch": 0.026682680105696358, "grad_norm": 1.8319851160049438, "learning_rate": 2.66749244983048e-05, "loss": 6.246, "step": 13930 }, { "epoch": 0.02670183493707159, "grad_norm": 1.7032910585403442, "learning_rate": 2.6694079262413882e-05, "loss": 6.1686, "step": 13940 }, { "epoch": 0.02672098976844682, "grad_norm": 1.9818918704986572, "learning_rate": 2.6713234026522963e-05, "loss": 6.0847, "step": 13950 }, { "epoch": 0.02674014459982205, "grad_norm": 1.581139326095581, "learning_rate": 2.673238879063204e-05, "loss": 6.1374, "step": 13960 }, { "epoch": 0.026759299431197283, "grad_norm": 1.8800472021102905, "learning_rate": 2.6751543554741122e-05, "loss": 6.0832, "step": 13970 }, { "epoch": 0.026778454262572512, "grad_norm": 1.6699070930480957, "learning_rate": 2.67706983188502e-05, "loss": 6.2179, "step": 13980 }, { "epoch": 0.026797609093947745, "grad_norm": 1.6352778673171997, "learning_rate": 2.678985308295928e-05, "loss": 6.127, "step": 13990 }, { "epoch": 0.026816763925322975, "grad_norm": 1.9704477787017822, "learning_rate": 2.680900784706836e-05, "loss": 6.1901, "step": 14000 }, { "epoch": 0.026835918756698204, "grad_norm": 1.8721427917480469, "learning_rate": 2.682816261117744e-05, "loss": 6.1226, "step": 14010 }, { "epoch": 0.026855073588073437, "grad_norm": 1.5678311586380005, "learning_rate": 2.684731737528652e-05, "loss": 6.092, "step": 14020 }, { "epoch": 0.026874228419448667, "grad_norm": 1.9837591648101807, "learning_rate": 2.6866472139395603e-05, "loss": 6.176, "step": 14030 }, { "epoch": 0.026893383250823896, "grad_norm": 2.20599365234375, "learning_rate": 2.688562690350468e-05, "loss": 6.1221, "step": 14040 }, { "epoch": 0.02691253808219913, "grad_norm": 1.7255932092666626, "learning_rate": 2.6904781667613763e-05, "loss": 6.296, "step": 14050 }, { "epoch": 0.02693169291357436, "grad_norm": 1.6729307174682617, "learning_rate": 2.692393643172284e-05, "loss": 6.0721, "step": 14060 }, { "epoch": 0.026950847744949588, "grad_norm": 1.9126454591751099, "learning_rate": 2.6943091195831922e-05, "loss": 6.0384, "step": 14070 }, { "epoch": 0.02697000257632482, "grad_norm": 1.8556264638900757, "learning_rate": 2.6962245959941e-05, "loss": 6.1573, "step": 14080 }, { "epoch": 0.02698915740770005, "grad_norm": 1.6210819482803345, "learning_rate": 2.698140072405008e-05, "loss": 6.0956, "step": 14090 }, { "epoch": 0.02700831223907528, "grad_norm": 1.8685495853424072, "learning_rate": 2.700055548815916e-05, "loss": 6.0027, "step": 14100 }, { "epoch": 0.027027467070450513, "grad_norm": 1.5704573392868042, "learning_rate": 2.701971025226824e-05, "loss": 6.1485, "step": 14110 }, { "epoch": 0.027046621901825742, "grad_norm": 1.6420254707336426, "learning_rate": 2.703886501637732e-05, "loss": 6.0087, "step": 14120 }, { "epoch": 0.027065776733200972, "grad_norm": 1.6082450151443481, "learning_rate": 2.70580197804864e-05, "loss": 6.2288, "step": 14130 }, { "epoch": 0.027084931564576205, "grad_norm": 1.6477327346801758, "learning_rate": 2.7077174544595484e-05, "loss": 6.0582, "step": 14140 }, { "epoch": 0.027104086395951434, "grad_norm": 1.9292246103286743, "learning_rate": 2.7096329308704562e-05, "loss": 6.1298, "step": 14150 }, { "epoch": 0.027123241227326667, "grad_norm": 1.9469560384750366, "learning_rate": 2.7115484072813643e-05, "loss": 6.208, "step": 14160 }, { "epoch": 0.027142396058701897, "grad_norm": 1.896006464958191, "learning_rate": 2.713463883692272e-05, "loss": 6.0909, "step": 14170 }, { "epoch": 0.027161550890077126, "grad_norm": 1.69797945022583, "learning_rate": 2.7153793601031803e-05, "loss": 6.1023, "step": 14180 }, { "epoch": 0.02718070572145236, "grad_norm": 1.693359613418579, "learning_rate": 2.717294836514088e-05, "loss": 6.0825, "step": 14190 }, { "epoch": 0.02719986055282759, "grad_norm": 1.7814431190490723, "learning_rate": 2.7192103129249962e-05, "loss": 6.2079, "step": 14200 }, { "epoch": 0.027219015384202818, "grad_norm": 1.8770074844360352, "learning_rate": 2.721125789335904e-05, "loss": 6.0042, "step": 14210 }, { "epoch": 0.02723817021557805, "grad_norm": 2.0219039916992188, "learning_rate": 2.723041265746812e-05, "loss": 6.0261, "step": 14220 }, { "epoch": 0.02725732504695328, "grad_norm": 1.575867772102356, "learning_rate": 2.72495674215772e-05, "loss": 6.0565, "step": 14230 }, { "epoch": 0.02727647987832851, "grad_norm": 2.011826992034912, "learning_rate": 2.726872218568628e-05, "loss": 6.1578, "step": 14240 }, { "epoch": 0.027295634709703743, "grad_norm": 1.9958572387695312, "learning_rate": 2.7287876949795358e-05, "loss": 6.1896, "step": 14250 }, { "epoch": 0.027314789541078972, "grad_norm": 1.5580512285232544, "learning_rate": 2.730703171390444e-05, "loss": 6.1826, "step": 14260 }, { "epoch": 0.027333944372454202, "grad_norm": 1.7106115818023682, "learning_rate": 2.7326186478013517e-05, "loss": 6.1833, "step": 14270 }, { "epoch": 0.027353099203829435, "grad_norm": 1.7232980728149414, "learning_rate": 2.7345341242122602e-05, "loss": 6.2091, "step": 14280 }, { "epoch": 0.027372254035204664, "grad_norm": 1.6175113916397095, "learning_rate": 2.7364496006231683e-05, "loss": 6.1017, "step": 14290 }, { "epoch": 0.027391408866579894, "grad_norm": 1.8284010887145996, "learning_rate": 2.738365077034076e-05, "loss": 6.0659, "step": 14300 }, { "epoch": 0.027410563697955127, "grad_norm": 1.7277617454528809, "learning_rate": 2.7402805534449843e-05, "loss": 6.0134, "step": 14310 }, { "epoch": 0.027429718529330356, "grad_norm": 1.7047559022903442, "learning_rate": 2.742196029855892e-05, "loss": 6.0064, "step": 14320 }, { "epoch": 0.02744887336070559, "grad_norm": 1.734575867652893, "learning_rate": 2.7441115062668002e-05, "loss": 6.0587, "step": 14330 }, { "epoch": 0.02746802819208082, "grad_norm": 1.6561253070831299, "learning_rate": 2.746026982677708e-05, "loss": 6.1656, "step": 14340 }, { "epoch": 0.027487183023456048, "grad_norm": 1.709997296333313, "learning_rate": 2.747942459088616e-05, "loss": 6.0296, "step": 14350 }, { "epoch": 0.02750633785483128, "grad_norm": 1.7404764890670776, "learning_rate": 2.749857935499524e-05, "loss": 6.0341, "step": 14360 }, { "epoch": 0.02752549268620651, "grad_norm": 2.4085376262664795, "learning_rate": 2.751773411910432e-05, "loss": 6.0674, "step": 14370 }, { "epoch": 0.02754464751758174, "grad_norm": 1.609059453010559, "learning_rate": 2.7536888883213398e-05, "loss": 6.0913, "step": 14380 }, { "epoch": 0.027563802348956973, "grad_norm": 1.6792577505111694, "learning_rate": 2.7556043647322483e-05, "loss": 6.0572, "step": 14390 }, { "epoch": 0.027582957180332202, "grad_norm": 1.5432286262512207, "learning_rate": 2.757519841143156e-05, "loss": 6.2107, "step": 14400 }, { "epoch": 0.02760211201170743, "grad_norm": 1.668953776359558, "learning_rate": 2.7594353175540642e-05, "loss": 6.0336, "step": 14410 }, { "epoch": 0.027621266843082665, "grad_norm": 1.7042629718780518, "learning_rate": 2.761350793964972e-05, "loss": 5.9797, "step": 14420 }, { "epoch": 0.027640421674457894, "grad_norm": 1.9677635431289673, "learning_rate": 2.76326627037588e-05, "loss": 6.1418, "step": 14430 }, { "epoch": 0.027659576505833124, "grad_norm": 2.183922529220581, "learning_rate": 2.765181746786788e-05, "loss": 6.0452, "step": 14440 }, { "epoch": 0.027678731337208357, "grad_norm": 1.6353955268859863, "learning_rate": 2.767097223197696e-05, "loss": 6.1631, "step": 14450 }, { "epoch": 0.027697886168583586, "grad_norm": 1.8094035387039185, "learning_rate": 2.7690126996086042e-05, "loss": 6.1198, "step": 14460 }, { "epoch": 0.027717040999958815, "grad_norm": 2.049185037612915, "learning_rate": 2.770928176019512e-05, "loss": 6.0117, "step": 14470 }, { "epoch": 0.02773619583133405, "grad_norm": 1.6768958568572998, "learning_rate": 2.77284365243042e-05, "loss": 5.9956, "step": 14480 }, { "epoch": 0.027755350662709278, "grad_norm": 1.8258692026138306, "learning_rate": 2.774759128841328e-05, "loss": 6.0938, "step": 14490 }, { "epoch": 0.02777450549408451, "grad_norm": 1.718754529953003, "learning_rate": 2.7766746052522364e-05, "loss": 6.1647, "step": 14500 }, { "epoch": 0.02779366032545974, "grad_norm": 1.697094440460205, "learning_rate": 2.778590081663144e-05, "loss": 6.0831, "step": 14510 }, { "epoch": 0.02781281515683497, "grad_norm": 1.723659634590149, "learning_rate": 2.7805055580740523e-05, "loss": 6.1315, "step": 14520 }, { "epoch": 0.027831969988210203, "grad_norm": 1.6662628650665283, "learning_rate": 2.78242103448496e-05, "loss": 6.0468, "step": 14530 }, { "epoch": 0.027851124819585432, "grad_norm": 1.6853712797164917, "learning_rate": 2.7843365108958682e-05, "loss": 6.1308, "step": 14540 }, { "epoch": 0.02787027965096066, "grad_norm": 1.5558981895446777, "learning_rate": 2.786251987306776e-05, "loss": 6.1595, "step": 14550 }, { "epoch": 0.027889434482335895, "grad_norm": 1.8938472270965576, "learning_rate": 2.788167463717684e-05, "loss": 6.1799, "step": 14560 }, { "epoch": 0.027908589313711124, "grad_norm": 2.0084338188171387, "learning_rate": 2.790082940128592e-05, "loss": 5.9861, "step": 14570 }, { "epoch": 0.027927744145086354, "grad_norm": 2.0519936084747314, "learning_rate": 2.7919984165395e-05, "loss": 6.0708, "step": 14580 }, { "epoch": 0.027946898976461586, "grad_norm": 1.7247552871704102, "learning_rate": 2.793913892950408e-05, "loss": 6.0691, "step": 14590 }, { "epoch": 0.027966053807836816, "grad_norm": 2.1993696689605713, "learning_rate": 2.795829369361316e-05, "loss": 6.2106, "step": 14600 }, { "epoch": 0.027985208639212045, "grad_norm": 1.7388323545455933, "learning_rate": 2.7977448457722238e-05, "loss": 6.0975, "step": 14610 }, { "epoch": 0.02800436347058728, "grad_norm": 1.972683310508728, "learning_rate": 2.799660322183132e-05, "loss": 6.0711, "step": 14620 }, { "epoch": 0.028023518301962508, "grad_norm": 1.7421444654464722, "learning_rate": 2.8015757985940404e-05, "loss": 6.085, "step": 14630 }, { "epoch": 0.028042673133337737, "grad_norm": 1.9297510385513306, "learning_rate": 2.8034912750049482e-05, "loss": 6.0506, "step": 14640 }, { "epoch": 0.02806182796471297, "grad_norm": 1.923265814781189, "learning_rate": 2.8054067514158563e-05, "loss": 6.0232, "step": 14650 }, { "epoch": 0.0280809827960882, "grad_norm": 1.6637510061264038, "learning_rate": 2.807322227826764e-05, "loss": 6.1568, "step": 14660 }, { "epoch": 0.028100137627463433, "grad_norm": 1.8678916692733765, "learning_rate": 2.8092377042376722e-05, "loss": 5.9972, "step": 14670 }, { "epoch": 0.028119292458838662, "grad_norm": 1.6899785995483398, "learning_rate": 2.81115318064858e-05, "loss": 5.9507, "step": 14680 }, { "epoch": 0.02813844729021389, "grad_norm": 1.7432644367218018, "learning_rate": 2.813068657059488e-05, "loss": 6.1568, "step": 14690 }, { "epoch": 0.028157602121589125, "grad_norm": 1.6517587900161743, "learning_rate": 2.814984133470396e-05, "loss": 5.9994, "step": 14700 }, { "epoch": 0.028176756952964354, "grad_norm": 1.921260952949524, "learning_rate": 2.816899609881304e-05, "loss": 6.1124, "step": 14710 }, { "epoch": 0.028195911784339583, "grad_norm": 1.9107500314712524, "learning_rate": 2.818815086292212e-05, "loss": 6.0526, "step": 14720 }, { "epoch": 0.028215066615714816, "grad_norm": 1.7468292713165283, "learning_rate": 2.82073056270312e-05, "loss": 6.2893, "step": 14730 }, { "epoch": 0.028234221447090046, "grad_norm": 2.036850690841675, "learning_rate": 2.8226460391140278e-05, "loss": 6.1784, "step": 14740 }, { "epoch": 0.028253376278465275, "grad_norm": 1.5528420209884644, "learning_rate": 2.8245615155249363e-05, "loss": 6.0266, "step": 14750 }, { "epoch": 0.02827253110984051, "grad_norm": 1.716058373451233, "learning_rate": 2.826476991935844e-05, "loss": 6.0405, "step": 14760 }, { "epoch": 0.028291685941215738, "grad_norm": 1.5003353357315063, "learning_rate": 2.8283924683467522e-05, "loss": 6.1597, "step": 14770 }, { "epoch": 0.028310840772590967, "grad_norm": 1.7357197999954224, "learning_rate": 2.83030794475766e-05, "loss": 6.0387, "step": 14780 }, { "epoch": 0.0283299956039662, "grad_norm": 1.7703202962875366, "learning_rate": 2.832223421168568e-05, "loss": 5.9663, "step": 14790 }, { "epoch": 0.02834915043534143, "grad_norm": 1.7451423406600952, "learning_rate": 2.8341388975794762e-05, "loss": 5.9778, "step": 14800 }, { "epoch": 0.02836830526671666, "grad_norm": 1.7748026847839355, "learning_rate": 2.836054373990384e-05, "loss": 6.0326, "step": 14810 }, { "epoch": 0.028387460098091892, "grad_norm": 1.643879771232605, "learning_rate": 2.837969850401292e-05, "loss": 6.1125, "step": 14820 }, { "epoch": 0.02840661492946712, "grad_norm": 1.7057485580444336, "learning_rate": 2.8398853268122e-05, "loss": 6.0479, "step": 14830 }, { "epoch": 0.028425769760842354, "grad_norm": 1.5991564989089966, "learning_rate": 2.841800803223108e-05, "loss": 6.0316, "step": 14840 }, { "epoch": 0.028444924592217584, "grad_norm": 1.5143605470657349, "learning_rate": 2.843716279634016e-05, "loss": 6.0216, "step": 14850 }, { "epoch": 0.028464079423592813, "grad_norm": 1.6663146018981934, "learning_rate": 2.8456317560449243e-05, "loss": 6.1037, "step": 14860 }, { "epoch": 0.028483234254968046, "grad_norm": 1.7024614810943604, "learning_rate": 2.847547232455832e-05, "loss": 6.1089, "step": 14870 }, { "epoch": 0.028502389086343276, "grad_norm": 1.6531018018722534, "learning_rate": 2.8494627088667403e-05, "loss": 6.1226, "step": 14880 }, { "epoch": 0.028521543917718505, "grad_norm": 1.9436455965042114, "learning_rate": 2.851378185277648e-05, "loss": 6.1253, "step": 14890 }, { "epoch": 0.028540698749093738, "grad_norm": 1.7251237630844116, "learning_rate": 2.8532936616885562e-05, "loss": 5.9797, "step": 14900 }, { "epoch": 0.028559853580468968, "grad_norm": 1.524519920349121, "learning_rate": 2.855209138099464e-05, "loss": 6.0555, "step": 14910 }, { "epoch": 0.028579008411844197, "grad_norm": 1.6187894344329834, "learning_rate": 2.857124614510372e-05, "loss": 6.1068, "step": 14920 }, { "epoch": 0.02859816324321943, "grad_norm": 1.8025329113006592, "learning_rate": 2.85904009092128e-05, "loss": 6.0733, "step": 14930 }, { "epoch": 0.02861731807459466, "grad_norm": 1.6213347911834717, "learning_rate": 2.860955567332188e-05, "loss": 6.0554, "step": 14940 }, { "epoch": 0.02863647290596989, "grad_norm": 1.6782475709915161, "learning_rate": 2.8628710437430958e-05, "loss": 6.0444, "step": 14950 }, { "epoch": 0.028655627737345122, "grad_norm": 1.659429669380188, "learning_rate": 2.864786520154004e-05, "loss": 6.0875, "step": 14960 }, { "epoch": 0.02867478256872035, "grad_norm": 1.704087257385254, "learning_rate": 2.8667019965649124e-05, "loss": 6.1287, "step": 14970 }, { "epoch": 0.02869393740009558, "grad_norm": 1.6511635780334473, "learning_rate": 2.86861747297582e-05, "loss": 6.0206, "step": 14980 }, { "epoch": 0.028713092231470814, "grad_norm": 1.749652624130249, "learning_rate": 2.8705329493867283e-05, "loss": 6.0235, "step": 14990 }, { "epoch": 0.028732247062846043, "grad_norm": 1.6377780437469482, "learning_rate": 2.872448425797636e-05, "loss": 6.0787, "step": 15000 }, { "epoch": 0.028751401894221276, "grad_norm": 1.5093029737472534, "learning_rate": 2.8743639022085443e-05, "loss": 6.1709, "step": 15010 }, { "epoch": 0.028770556725596506, "grad_norm": 1.792614221572876, "learning_rate": 2.876279378619452e-05, "loss": 5.9693, "step": 15020 }, { "epoch": 0.028789711556971735, "grad_norm": 1.5817400217056274, "learning_rate": 2.8781948550303602e-05, "loss": 6.0926, "step": 15030 }, { "epoch": 0.028808866388346968, "grad_norm": 1.7817188501358032, "learning_rate": 2.880110331441268e-05, "loss": 5.9419, "step": 15040 }, { "epoch": 0.028828021219722198, "grad_norm": 1.7179596424102783, "learning_rate": 2.882025807852176e-05, "loss": 6.0625, "step": 15050 }, { "epoch": 0.028847176051097427, "grad_norm": 1.6068880558013916, "learning_rate": 2.883941284263084e-05, "loss": 6.0123, "step": 15060 }, { "epoch": 0.02886633088247266, "grad_norm": 1.8089252710342407, "learning_rate": 2.885856760673992e-05, "loss": 6.0327, "step": 15070 }, { "epoch": 0.02888548571384789, "grad_norm": 1.714423656463623, "learning_rate": 2.8877722370848998e-05, "loss": 5.9537, "step": 15080 }, { "epoch": 0.02890464054522312, "grad_norm": 1.7448253631591797, "learning_rate": 2.889687713495808e-05, "loss": 5.983, "step": 15090 }, { "epoch": 0.028923795376598352, "grad_norm": 1.7977620363235474, "learning_rate": 2.8916031899067157e-05, "loss": 6.081, "step": 15100 }, { "epoch": 0.02894295020797358, "grad_norm": 1.942357063293457, "learning_rate": 2.8935186663176242e-05, "loss": 6.0309, "step": 15110 }, { "epoch": 0.02896210503934881, "grad_norm": 1.9687442779541016, "learning_rate": 2.895434142728532e-05, "loss": 5.9651, "step": 15120 }, { "epoch": 0.028981259870724044, "grad_norm": 1.7121391296386719, "learning_rate": 2.89734961913944e-05, "loss": 6.0432, "step": 15130 }, { "epoch": 0.029000414702099273, "grad_norm": 1.907947063446045, "learning_rate": 2.8992650955503483e-05, "loss": 6.0438, "step": 15140 }, { "epoch": 0.029019569533474506, "grad_norm": 1.5400067567825317, "learning_rate": 2.901180571961256e-05, "loss": 5.9661, "step": 15150 }, { "epoch": 0.029038724364849736, "grad_norm": 1.6486817598342896, "learning_rate": 2.9030960483721642e-05, "loss": 6.0449, "step": 15160 }, { "epoch": 0.029057879196224965, "grad_norm": 1.8687330484390259, "learning_rate": 2.905011524783072e-05, "loss": 5.892, "step": 15170 }, { "epoch": 0.029077034027600198, "grad_norm": 1.8343738317489624, "learning_rate": 2.90692700119398e-05, "loss": 6.0485, "step": 15180 }, { "epoch": 0.029096188858975428, "grad_norm": 1.6479111909866333, "learning_rate": 2.908842477604888e-05, "loss": 5.9663, "step": 15190 }, { "epoch": 0.029115343690350657, "grad_norm": 1.6531740427017212, "learning_rate": 2.910757954015796e-05, "loss": 6.0892, "step": 15200 }, { "epoch": 0.02913449852172589, "grad_norm": 1.5584006309509277, "learning_rate": 2.9126734304267038e-05, "loss": 5.9738, "step": 15210 }, { "epoch": 0.02915365335310112, "grad_norm": 1.8397339582443237, "learning_rate": 2.9145889068376123e-05, "loss": 6.0784, "step": 15220 }, { "epoch": 0.02917280818447635, "grad_norm": 1.8211039304733276, "learning_rate": 2.91650438324852e-05, "loss": 6.0254, "step": 15230 }, { "epoch": 0.029191963015851582, "grad_norm": 1.9998446702957153, "learning_rate": 2.9184198596594282e-05, "loss": 5.9869, "step": 15240 }, { "epoch": 0.02921111784722681, "grad_norm": 1.6540186405181885, "learning_rate": 2.920335336070336e-05, "loss": 6.0127, "step": 15250 }, { "epoch": 0.02923027267860204, "grad_norm": 1.6311653852462769, "learning_rate": 2.922250812481244e-05, "loss": 5.9855, "step": 15260 }, { "epoch": 0.029249427509977274, "grad_norm": 1.677997350692749, "learning_rate": 2.924166288892152e-05, "loss": 6.0447, "step": 15270 }, { "epoch": 0.029268582341352503, "grad_norm": 2.1243581771850586, "learning_rate": 2.92608176530306e-05, "loss": 5.9916, "step": 15280 }, { "epoch": 0.029287737172727733, "grad_norm": 1.995590090751648, "learning_rate": 2.927997241713968e-05, "loss": 5.9929, "step": 15290 }, { "epoch": 0.029306892004102966, "grad_norm": 1.5793806314468384, "learning_rate": 2.929912718124876e-05, "loss": 5.8663, "step": 15300 }, { "epoch": 0.029326046835478195, "grad_norm": 1.7089924812316895, "learning_rate": 2.931828194535784e-05, "loss": 6.0751, "step": 15310 }, { "epoch": 0.029345201666853428, "grad_norm": 1.7138699293136597, "learning_rate": 2.933743670946692e-05, "loss": 5.9455, "step": 15320 }, { "epoch": 0.029364356498228657, "grad_norm": 2.1152141094207764, "learning_rate": 2.9356591473576004e-05, "loss": 5.9378, "step": 15330 }, { "epoch": 0.029383511329603887, "grad_norm": 1.7044180631637573, "learning_rate": 2.9375746237685078e-05, "loss": 5.9224, "step": 15340 }, { "epoch": 0.02940266616097912, "grad_norm": 1.6764627695083618, "learning_rate": 2.9394901001794163e-05, "loss": 5.9732, "step": 15350 }, { "epoch": 0.02942182099235435, "grad_norm": 1.6768380403518677, "learning_rate": 2.941405576590324e-05, "loss": 6.1342, "step": 15360 }, { "epoch": 0.02944097582372958, "grad_norm": 1.8468143939971924, "learning_rate": 2.9433210530012322e-05, "loss": 5.9453, "step": 15370 }, { "epoch": 0.02946013065510481, "grad_norm": 1.8766584396362305, "learning_rate": 2.94523652941214e-05, "loss": 5.9845, "step": 15380 }, { "epoch": 0.02947928548648004, "grad_norm": 1.942747950553894, "learning_rate": 2.947152005823048e-05, "loss": 6.1325, "step": 15390 }, { "epoch": 0.02949844031785527, "grad_norm": 1.7695059776306152, "learning_rate": 2.949067482233956e-05, "loss": 6.0432, "step": 15400 }, { "epoch": 0.029517595149230504, "grad_norm": 1.737426519393921, "learning_rate": 2.950982958644864e-05, "loss": 5.9886, "step": 15410 }, { "epoch": 0.029536749980605733, "grad_norm": 1.8551764488220215, "learning_rate": 2.952898435055772e-05, "loss": 6.119, "step": 15420 }, { "epoch": 0.029555904811980963, "grad_norm": 1.8323434591293335, "learning_rate": 2.95481391146668e-05, "loss": 6.1513, "step": 15430 }, { "epoch": 0.029575059643356195, "grad_norm": 2.0308279991149902, "learning_rate": 2.9567293878775878e-05, "loss": 5.9989, "step": 15440 }, { "epoch": 0.029594214474731425, "grad_norm": 1.8118752241134644, "learning_rate": 2.958644864288496e-05, "loss": 6.1029, "step": 15450 }, { "epoch": 0.029613369306106654, "grad_norm": 1.660812258720398, "learning_rate": 2.9605603406994037e-05, "loss": 6.0859, "step": 15460 }, { "epoch": 0.029632524137481887, "grad_norm": 1.7657880783081055, "learning_rate": 2.962475817110312e-05, "loss": 5.8772, "step": 15470 }, { "epoch": 0.029651678968857117, "grad_norm": 1.6812330484390259, "learning_rate": 2.9643912935212203e-05, "loss": 5.9999, "step": 15480 }, { "epoch": 0.02967083380023235, "grad_norm": 1.974456548690796, "learning_rate": 2.966306769932128e-05, "loss": 5.981, "step": 15490 }, { "epoch": 0.02968998863160758, "grad_norm": 1.8111876249313354, "learning_rate": 2.9682222463430362e-05, "loss": 6.0185, "step": 15500 }, { "epoch": 0.02970914346298281, "grad_norm": 1.8499664068222046, "learning_rate": 2.970137722753944e-05, "loss": 6.1027, "step": 15510 }, { "epoch": 0.02972829829435804, "grad_norm": 2.0885238647460938, "learning_rate": 2.972053199164852e-05, "loss": 6.1179, "step": 15520 }, { "epoch": 0.02974745312573327, "grad_norm": 2.043504476547241, "learning_rate": 2.97396867557576e-05, "loss": 5.9031, "step": 15530 }, { "epoch": 0.0297666079571085, "grad_norm": 1.7832046747207642, "learning_rate": 2.975884151986668e-05, "loss": 6.026, "step": 15540 }, { "epoch": 0.029785762788483734, "grad_norm": 1.8093680143356323, "learning_rate": 2.977799628397576e-05, "loss": 5.9317, "step": 15550 }, { "epoch": 0.029804917619858963, "grad_norm": 1.5213533639907837, "learning_rate": 2.979715104808484e-05, "loss": 6.0438, "step": 15560 }, { "epoch": 0.029824072451234192, "grad_norm": 1.690330982208252, "learning_rate": 2.9816305812193918e-05, "loss": 6.0515, "step": 15570 }, { "epoch": 0.029843227282609425, "grad_norm": 2.0169124603271484, "learning_rate": 2.9835460576303003e-05, "loss": 5.8824, "step": 15580 }, { "epoch": 0.029862382113984655, "grad_norm": 2.8522329330444336, "learning_rate": 2.985461534041208e-05, "loss": 5.9207, "step": 15590 }, { "epoch": 0.029881536945359884, "grad_norm": 1.551543116569519, "learning_rate": 2.9873770104521162e-05, "loss": 6.1113, "step": 15600 }, { "epoch": 0.029900691776735117, "grad_norm": 1.6246792078018188, "learning_rate": 2.989292486863024e-05, "loss": 6.1099, "step": 15610 }, { "epoch": 0.029919846608110347, "grad_norm": 1.713736653327942, "learning_rate": 2.991207963273932e-05, "loss": 5.9682, "step": 15620 }, { "epoch": 0.029939001439485576, "grad_norm": 1.8990949392318726, "learning_rate": 2.99312343968484e-05, "loss": 5.9365, "step": 15630 }, { "epoch": 0.02995815627086081, "grad_norm": 1.763440728187561, "learning_rate": 2.995038916095748e-05, "loss": 5.9953, "step": 15640 }, { "epoch": 0.02997731110223604, "grad_norm": 1.8051362037658691, "learning_rate": 2.9969543925066558e-05, "loss": 5.9638, "step": 15650 }, { "epoch": 0.02999646593361127, "grad_norm": 1.8160884380340576, "learning_rate": 2.998869868917564e-05, "loss": 5.9921, "step": 15660 }, { "epoch": 0.0300156207649865, "grad_norm": 1.802298665046692, "learning_rate": 3.000785345328472e-05, "loss": 5.9726, "step": 15670 }, { "epoch": 0.03003477559636173, "grad_norm": 1.914729118347168, "learning_rate": 3.00270082173938e-05, "loss": 5.8873, "step": 15680 }, { "epoch": 0.030053930427736963, "grad_norm": 1.9334965944290161, "learning_rate": 3.0046162981502883e-05, "loss": 6.0709, "step": 15690 }, { "epoch": 0.030073085259112193, "grad_norm": 1.8656643629074097, "learning_rate": 3.006531774561196e-05, "loss": 5.9626, "step": 15700 }, { "epoch": 0.030092240090487422, "grad_norm": 1.7038079500198364, "learning_rate": 3.0084472509721043e-05, "loss": 6.0325, "step": 15710 }, { "epoch": 0.030111394921862655, "grad_norm": 1.9315516948699951, "learning_rate": 3.010362727383012e-05, "loss": 5.7947, "step": 15720 }, { "epoch": 0.030130549753237885, "grad_norm": 1.754196286201477, "learning_rate": 3.0122782037939202e-05, "loss": 6.0322, "step": 15730 }, { "epoch": 0.030149704584613114, "grad_norm": 1.744740605354309, "learning_rate": 3.014193680204828e-05, "loss": 5.9429, "step": 15740 }, { "epoch": 0.030168859415988347, "grad_norm": 1.7589020729064941, "learning_rate": 3.016109156615736e-05, "loss": 5.9154, "step": 15750 }, { "epoch": 0.030188014247363577, "grad_norm": 1.9767746925354004, "learning_rate": 3.018024633026644e-05, "loss": 6.0112, "step": 15760 }, { "epoch": 0.030207169078738806, "grad_norm": 1.6053000688552856, "learning_rate": 3.019940109437552e-05, "loss": 6.0756, "step": 15770 }, { "epoch": 0.03022632391011404, "grad_norm": 1.8396952152252197, "learning_rate": 3.0218555858484598e-05, "loss": 6.0003, "step": 15780 }, { "epoch": 0.03024547874148927, "grad_norm": 2.0180485248565674, "learning_rate": 3.023771062259368e-05, "loss": 5.8524, "step": 15790 }, { "epoch": 0.030264633572864498, "grad_norm": 1.9039373397827148, "learning_rate": 3.0256865386702757e-05, "loss": 5.8193, "step": 15800 }, { "epoch": 0.03028378840423973, "grad_norm": 1.7165803909301758, "learning_rate": 3.027602015081184e-05, "loss": 5.9452, "step": 15810 }, { "epoch": 0.03030294323561496, "grad_norm": 1.7311983108520508, "learning_rate": 3.0295174914920917e-05, "loss": 5.9938, "step": 15820 }, { "epoch": 0.030322098066990193, "grad_norm": 1.6041464805603027, "learning_rate": 3.031432967903e-05, "loss": 5.8728, "step": 15830 }, { "epoch": 0.030341252898365423, "grad_norm": 1.9263648986816406, "learning_rate": 3.0333484443139083e-05, "loss": 5.867, "step": 15840 }, { "epoch": 0.030360407729740652, "grad_norm": 1.7245995998382568, "learning_rate": 3.035263920724816e-05, "loss": 5.9347, "step": 15850 }, { "epoch": 0.030379562561115885, "grad_norm": 1.7454010248184204, "learning_rate": 3.0371793971357242e-05, "loss": 5.9313, "step": 15860 }, { "epoch": 0.030398717392491115, "grad_norm": 1.859555959701538, "learning_rate": 3.039094873546632e-05, "loss": 5.8872, "step": 15870 }, { "epoch": 0.030417872223866344, "grad_norm": 1.6711100339889526, "learning_rate": 3.04101034995754e-05, "loss": 5.955, "step": 15880 }, { "epoch": 0.030437027055241577, "grad_norm": 1.8140504360198975, "learning_rate": 3.042925826368448e-05, "loss": 5.8277, "step": 15890 }, { "epoch": 0.030456181886616807, "grad_norm": 2.0735881328582764, "learning_rate": 3.044841302779356e-05, "loss": 5.9799, "step": 15900 }, { "epoch": 0.030475336717992036, "grad_norm": 1.8330869674682617, "learning_rate": 3.0467567791902638e-05, "loss": 5.9657, "step": 15910 }, { "epoch": 0.03049449154936727, "grad_norm": 1.8339619636535645, "learning_rate": 3.048672255601172e-05, "loss": 6.0221, "step": 15920 }, { "epoch": 0.0305136463807425, "grad_norm": 1.8585617542266846, "learning_rate": 3.0505877320120797e-05, "loss": 5.9033, "step": 15930 }, { "epoch": 0.030532801212117728, "grad_norm": 2.024402379989624, "learning_rate": 3.052503208422988e-05, "loss": 5.8723, "step": 15940 }, { "epoch": 0.03055195604349296, "grad_norm": 1.5745633840560913, "learning_rate": 3.054418684833896e-05, "loss": 5.8671, "step": 15950 }, { "epoch": 0.03057111087486819, "grad_norm": 1.9694093465805054, "learning_rate": 3.056334161244804e-05, "loss": 5.9286, "step": 15960 }, { "epoch": 0.03059026570624342, "grad_norm": 1.6504818201065063, "learning_rate": 3.058249637655712e-05, "loss": 5.8642, "step": 15970 }, { "epoch": 0.030609420537618653, "grad_norm": 1.735667109489441, "learning_rate": 3.06016511406662e-05, "loss": 5.8747, "step": 15980 }, { "epoch": 0.030628575368993882, "grad_norm": 1.9766865968704224, "learning_rate": 3.0620805904775275e-05, "loss": 5.9216, "step": 15990 }, { "epoch": 0.030647730200369115, "grad_norm": 1.5621917247772217, "learning_rate": 3.063996066888436e-05, "loss": 6.0165, "step": 16000 }, { "epoch": 0.030666885031744345, "grad_norm": 1.6762760877609253, "learning_rate": 3.0659115432993444e-05, "loss": 6.0121, "step": 16010 }, { "epoch": 0.030686039863119574, "grad_norm": 2.169489622116089, "learning_rate": 3.067827019710252e-05, "loss": 5.9931, "step": 16020 }, { "epoch": 0.030705194694494807, "grad_norm": 1.7322429418563843, "learning_rate": 3.06974249612116e-05, "loss": 5.9964, "step": 16030 }, { "epoch": 0.030724349525870037, "grad_norm": 1.856603980064392, "learning_rate": 3.071657972532068e-05, "loss": 5.9361, "step": 16040 }, { "epoch": 0.030743504357245266, "grad_norm": 1.8494060039520264, "learning_rate": 3.073573448942976e-05, "loss": 5.9544, "step": 16050 }, { "epoch": 0.0307626591886205, "grad_norm": 1.8332104682922363, "learning_rate": 3.075488925353884e-05, "loss": 5.9576, "step": 16060 }, { "epoch": 0.03078181401999573, "grad_norm": 1.7723139524459839, "learning_rate": 3.077404401764792e-05, "loss": 6.0512, "step": 16070 }, { "epoch": 0.030800968851370958, "grad_norm": 1.7514846324920654, "learning_rate": 3.0793198781757e-05, "loss": 5.9891, "step": 16080 }, { "epoch": 0.03082012368274619, "grad_norm": 1.5116844177246094, "learning_rate": 3.081235354586608e-05, "loss": 6.1159, "step": 16090 }, { "epoch": 0.03083927851412142, "grad_norm": 1.8033374547958374, "learning_rate": 3.0831508309975156e-05, "loss": 5.9922, "step": 16100 }, { "epoch": 0.03085843334549665, "grad_norm": 1.721637487411499, "learning_rate": 3.0850663074084244e-05, "loss": 5.8974, "step": 16110 }, { "epoch": 0.030877588176871883, "grad_norm": 1.92933988571167, "learning_rate": 3.086981783819332e-05, "loss": 5.8043, "step": 16120 }, { "epoch": 0.030896743008247112, "grad_norm": 1.9078320264816284, "learning_rate": 3.08889726023024e-05, "loss": 5.8605, "step": 16130 }, { "epoch": 0.03091589783962234, "grad_norm": 1.8270639181137085, "learning_rate": 3.090812736641148e-05, "loss": 5.9478, "step": 16140 }, { "epoch": 0.030935052670997575, "grad_norm": 1.7275874614715576, "learning_rate": 3.092728213052056e-05, "loss": 5.8543, "step": 16150 }, { "epoch": 0.030954207502372804, "grad_norm": 2.027878761291504, "learning_rate": 3.094643689462964e-05, "loss": 6.0203, "step": 16160 }, { "epoch": 0.030973362333748037, "grad_norm": 1.8825052976608276, "learning_rate": 3.096559165873872e-05, "loss": 5.8364, "step": 16170 }, { "epoch": 0.030992517165123266, "grad_norm": 1.8257198333740234, "learning_rate": 3.09847464228478e-05, "loss": 5.8852, "step": 16180 }, { "epoch": 0.031011671996498496, "grad_norm": 1.9837861061096191, "learning_rate": 3.100390118695688e-05, "loss": 6.0895, "step": 16190 }, { "epoch": 0.03103082682787373, "grad_norm": 2.0333316326141357, "learning_rate": 3.102305595106596e-05, "loss": 5.9227, "step": 16200 }, { "epoch": 0.03104998165924896, "grad_norm": 1.745646595954895, "learning_rate": 3.104221071517504e-05, "loss": 5.8869, "step": 16210 }, { "epoch": 0.031069136490624188, "grad_norm": 1.7495408058166504, "learning_rate": 3.106136547928412e-05, "loss": 6.0056, "step": 16220 }, { "epoch": 0.03108829132199942, "grad_norm": 1.8629342317581177, "learning_rate": 3.10805202433932e-05, "loss": 5.9644, "step": 16230 }, { "epoch": 0.03110744615337465, "grad_norm": 1.7744930982589722, "learning_rate": 3.109967500750228e-05, "loss": 5.9217, "step": 16240 }, { "epoch": 0.03112660098474988, "grad_norm": 1.6560838222503662, "learning_rate": 3.111882977161136e-05, "loss": 5.8685, "step": 16250 }, { "epoch": 0.031145755816125113, "grad_norm": 2.0210964679718018, "learning_rate": 3.113798453572044e-05, "loss": 5.8839, "step": 16260 }, { "epoch": 0.031164910647500342, "grad_norm": 1.6664916276931763, "learning_rate": 3.115713929982952e-05, "loss": 5.9216, "step": 16270 }, { "epoch": 0.03118406547887557, "grad_norm": 1.5666781663894653, "learning_rate": 3.11762940639386e-05, "loss": 5.8938, "step": 16280 }, { "epoch": 0.031203220310250804, "grad_norm": 1.9337903261184692, "learning_rate": 3.119544882804768e-05, "loss": 5.9457, "step": 16290 }, { "epoch": 0.031222375141626034, "grad_norm": 1.7032593488693237, "learning_rate": 3.121460359215676e-05, "loss": 5.9025, "step": 16300 }, { "epoch": 0.031241529973001267, "grad_norm": 1.746495246887207, "learning_rate": 3.1233758356265836e-05, "loss": 5.9549, "step": 16310 }, { "epoch": 0.03126068480437649, "grad_norm": 2.107712507247925, "learning_rate": 3.125291312037492e-05, "loss": 5.7817, "step": 16320 }, { "epoch": 0.031279839635751726, "grad_norm": 1.5347236394882202, "learning_rate": 3.1272067884484e-05, "loss": 5.9812, "step": 16330 }, { "epoch": 0.03129899446712696, "grad_norm": 1.7783218622207642, "learning_rate": 3.129122264859308e-05, "loss": 5.8868, "step": 16340 }, { "epoch": 0.031318149298502185, "grad_norm": 1.6335511207580566, "learning_rate": 3.131037741270216e-05, "loss": 5.8451, "step": 16350 }, { "epoch": 0.03133730412987742, "grad_norm": 1.6938806772232056, "learning_rate": 3.132953217681124e-05, "loss": 5.9417, "step": 16360 }, { "epoch": 0.03135645896125265, "grad_norm": 1.8045192956924438, "learning_rate": 3.1348686940920324e-05, "loss": 5.9173, "step": 16370 }, { "epoch": 0.03137561379262788, "grad_norm": 1.9472556114196777, "learning_rate": 3.13678417050294e-05, "loss": 5.8861, "step": 16380 }, { "epoch": 0.03139476862400311, "grad_norm": 1.7001354694366455, "learning_rate": 3.138699646913848e-05, "loss": 5.9434, "step": 16390 }, { "epoch": 0.03141392345537834, "grad_norm": 1.8482390642166138, "learning_rate": 3.140615123324756e-05, "loss": 5.8329, "step": 16400 }, { "epoch": 0.031433078286753575, "grad_norm": 1.7093579769134521, "learning_rate": 3.142530599735664e-05, "loss": 6.0181, "step": 16410 }, { "epoch": 0.0314522331181288, "grad_norm": 1.6203014850616455, "learning_rate": 3.144446076146572e-05, "loss": 5.9065, "step": 16420 }, { "epoch": 0.031471387949504034, "grad_norm": 2.1075949668884277, "learning_rate": 3.14636155255748e-05, "loss": 5.9104, "step": 16430 }, { "epoch": 0.03149054278087927, "grad_norm": 1.6979210376739502, "learning_rate": 3.148277028968388e-05, "loss": 5.7316, "step": 16440 }, { "epoch": 0.03150969761225449, "grad_norm": 1.6553274393081665, "learning_rate": 3.150192505379296e-05, "loss": 6.0191, "step": 16450 }, { "epoch": 0.031528852443629726, "grad_norm": 1.891661524772644, "learning_rate": 3.1521079817902035e-05, "loss": 5.7221, "step": 16460 }, { "epoch": 0.03154800727500496, "grad_norm": 1.766967535018921, "learning_rate": 3.1540234582011124e-05, "loss": 5.8855, "step": 16470 }, { "epoch": 0.031567162106380185, "grad_norm": 1.9070974588394165, "learning_rate": 3.15593893461202e-05, "loss": 5.791, "step": 16480 }, { "epoch": 0.03158631693775542, "grad_norm": 1.7715550661087036, "learning_rate": 3.157854411022928e-05, "loss": 5.8714, "step": 16490 }, { "epoch": 0.03160547176913065, "grad_norm": 1.8044488430023193, "learning_rate": 3.159769887433836e-05, "loss": 5.8551, "step": 16500 }, { "epoch": 0.03162462660050588, "grad_norm": 1.6577445268630981, "learning_rate": 3.161685363844744e-05, "loss": 5.9732, "step": 16510 }, { "epoch": 0.03164378143188111, "grad_norm": 1.8008008003234863, "learning_rate": 3.163600840255652e-05, "loss": 5.8664, "step": 16520 }, { "epoch": 0.03166293626325634, "grad_norm": 1.874000072479248, "learning_rate": 3.16551631666656e-05, "loss": 5.8339, "step": 16530 }, { "epoch": 0.03168209109463157, "grad_norm": 1.6567065715789795, "learning_rate": 3.167431793077468e-05, "loss": 6.0423, "step": 16540 }, { "epoch": 0.0317012459260068, "grad_norm": 1.682034969329834, "learning_rate": 3.169347269488376e-05, "loss": 5.8628, "step": 16550 }, { "epoch": 0.031720400757382035, "grad_norm": 1.8254613876342773, "learning_rate": 3.171262745899284e-05, "loss": 5.9408, "step": 16560 }, { "epoch": 0.03173955558875726, "grad_norm": 1.9332855939865112, "learning_rate": 3.1731782223101916e-05, "loss": 5.7804, "step": 16570 }, { "epoch": 0.031758710420132494, "grad_norm": 1.5544487237930298, "learning_rate": 3.1750936987211e-05, "loss": 5.9057, "step": 16580 }, { "epoch": 0.03177786525150773, "grad_norm": 1.5138188600540161, "learning_rate": 3.177009175132008e-05, "loss": 5.9431, "step": 16590 }, { "epoch": 0.03179702008288295, "grad_norm": 1.8825138807296753, "learning_rate": 3.178924651542916e-05, "loss": 5.9389, "step": 16600 }, { "epoch": 0.031816174914258186, "grad_norm": 1.6855955123901367, "learning_rate": 3.180840127953824e-05, "loss": 6.0291, "step": 16610 }, { "epoch": 0.03183532974563342, "grad_norm": 1.6268028020858765, "learning_rate": 3.182755604364732e-05, "loss": 5.9829, "step": 16620 }, { "epoch": 0.031854484577008645, "grad_norm": 2.032712936401367, "learning_rate": 3.18467108077564e-05, "loss": 5.8808, "step": 16630 }, { "epoch": 0.03187363940838388, "grad_norm": 2.014244318008423, "learning_rate": 3.186586557186548e-05, "loss": 5.8259, "step": 16640 }, { "epoch": 0.03189279423975911, "grad_norm": 1.9337886571884155, "learning_rate": 3.188502033597456e-05, "loss": 5.9388, "step": 16650 }, { "epoch": 0.031911949071134336, "grad_norm": 1.8556363582611084, "learning_rate": 3.190417510008364e-05, "loss": 6.0069, "step": 16660 }, { "epoch": 0.03193110390250957, "grad_norm": 2.000854730606079, "learning_rate": 3.1923329864192716e-05, "loss": 5.9388, "step": 16670 }, { "epoch": 0.0319502587338848, "grad_norm": 1.9495311975479126, "learning_rate": 3.19424846283018e-05, "loss": 5.8713, "step": 16680 }, { "epoch": 0.03196941356526003, "grad_norm": 1.6578840017318726, "learning_rate": 3.196163939241088e-05, "loss": 5.8212, "step": 16690 }, { "epoch": 0.03198856839663526, "grad_norm": 1.8114256858825684, "learning_rate": 3.198079415651996e-05, "loss": 5.886, "step": 16700 }, { "epoch": 0.032007723228010494, "grad_norm": 1.4930871725082397, "learning_rate": 3.199994892062904e-05, "loss": 5.9461, "step": 16710 }, { "epoch": 0.03202687805938572, "grad_norm": 1.93282949924469, "learning_rate": 3.201910368473812e-05, "loss": 5.8183, "step": 16720 }, { "epoch": 0.03204603289076095, "grad_norm": 2.2640552520751953, "learning_rate": 3.2038258448847204e-05, "loss": 5.7775, "step": 16730 }, { "epoch": 0.032065187722136186, "grad_norm": 1.5475636720657349, "learning_rate": 3.205741321295628e-05, "loss": 5.8885, "step": 16740 }, { "epoch": 0.03208434255351142, "grad_norm": 1.6748569011688232, "learning_rate": 3.207656797706536e-05, "loss": 5.9022, "step": 16750 }, { "epoch": 0.032103497384886645, "grad_norm": 1.8400170803070068, "learning_rate": 3.209572274117444e-05, "loss": 5.882, "step": 16760 }, { "epoch": 0.03212265221626188, "grad_norm": 1.8335503339767456, "learning_rate": 3.211487750528352e-05, "loss": 5.9347, "step": 16770 }, { "epoch": 0.03214180704763711, "grad_norm": 1.8852462768554688, "learning_rate": 3.2134032269392597e-05, "loss": 5.8808, "step": 16780 }, { "epoch": 0.03216096187901234, "grad_norm": 1.6671407222747803, "learning_rate": 3.215318703350168e-05, "loss": 5.924, "step": 16790 }, { "epoch": 0.03218011671038757, "grad_norm": 1.7170464992523193, "learning_rate": 3.217234179761076e-05, "loss": 5.8312, "step": 16800 }, { "epoch": 0.0321992715417628, "grad_norm": 2.105604410171509, "learning_rate": 3.219149656171984e-05, "loss": 5.8057, "step": 16810 }, { "epoch": 0.03221842637313803, "grad_norm": 1.6918760538101196, "learning_rate": 3.2210651325828915e-05, "loss": 5.7153, "step": 16820 }, { "epoch": 0.03223758120451326, "grad_norm": 1.894545078277588, "learning_rate": 3.2229806089938e-05, "loss": 5.7959, "step": 16830 }, { "epoch": 0.032256736035888495, "grad_norm": 1.5657074451446533, "learning_rate": 3.224896085404708e-05, "loss": 6.0362, "step": 16840 }, { "epoch": 0.03227589086726372, "grad_norm": 1.65109121799469, "learning_rate": 3.226811561815616e-05, "loss": 5.871, "step": 16850 }, { "epoch": 0.032295045698638954, "grad_norm": 2.0702054500579834, "learning_rate": 3.228727038226524e-05, "loss": 5.7576, "step": 16860 }, { "epoch": 0.03231420053001419, "grad_norm": 1.911100149154663, "learning_rate": 3.230642514637432e-05, "loss": 5.9377, "step": 16870 }, { "epoch": 0.03233335536138941, "grad_norm": 1.6952428817749023, "learning_rate": 3.23255799104834e-05, "loss": 5.9055, "step": 16880 }, { "epoch": 0.032352510192764646, "grad_norm": 1.868404507637024, "learning_rate": 3.234473467459248e-05, "loss": 5.8765, "step": 16890 }, { "epoch": 0.03237166502413988, "grad_norm": 1.7717853784561157, "learning_rate": 3.236388943870156e-05, "loss": 5.8212, "step": 16900 }, { "epoch": 0.032390819855515104, "grad_norm": 1.7388842105865479, "learning_rate": 3.238304420281064e-05, "loss": 5.7892, "step": 16910 }, { "epoch": 0.03240997468689034, "grad_norm": 1.5372806787490845, "learning_rate": 3.240219896691972e-05, "loss": 6.0617, "step": 16920 }, { "epoch": 0.03242912951826557, "grad_norm": 1.632023572921753, "learning_rate": 3.2421353731028796e-05, "loss": 5.8644, "step": 16930 }, { "epoch": 0.032448284349640796, "grad_norm": 2.013596296310425, "learning_rate": 3.244050849513788e-05, "loss": 6.0684, "step": 16940 }, { "epoch": 0.03246743918101603, "grad_norm": 1.784565806388855, "learning_rate": 3.245966325924696e-05, "loss": 5.7448, "step": 16950 }, { "epoch": 0.03248659401239126, "grad_norm": 1.6247310638427734, "learning_rate": 3.247881802335604e-05, "loss": 5.7766, "step": 16960 }, { "epoch": 0.03250574884376649, "grad_norm": 1.7499678134918213, "learning_rate": 3.249797278746512e-05, "loss": 5.8315, "step": 16970 }, { "epoch": 0.03252490367514172, "grad_norm": 1.8325122594833374, "learning_rate": 3.25171275515742e-05, "loss": 5.8864, "step": 16980 }, { "epoch": 0.032544058506516954, "grad_norm": 1.7371383905410767, "learning_rate": 3.253628231568328e-05, "loss": 5.8825, "step": 16990 }, { "epoch": 0.03256321333789218, "grad_norm": 1.8061306476593018, "learning_rate": 3.255543707979236e-05, "loss": 5.7064, "step": 17000 }, { "epoch": 0.03258236816926741, "grad_norm": 1.7705211639404297, "learning_rate": 3.257459184390144e-05, "loss": 5.9272, "step": 17010 }, { "epoch": 0.032601523000642646, "grad_norm": 1.8008637428283691, "learning_rate": 3.259374660801052e-05, "loss": 5.8318, "step": 17020 }, { "epoch": 0.03262067783201787, "grad_norm": 1.7615673542022705, "learning_rate": 3.26129013721196e-05, "loss": 5.8161, "step": 17030 }, { "epoch": 0.032639832663393105, "grad_norm": 1.8190571069717407, "learning_rate": 3.263205613622868e-05, "loss": 5.8019, "step": 17040 }, { "epoch": 0.03265898749476834, "grad_norm": 1.7435880899429321, "learning_rate": 3.265121090033776e-05, "loss": 5.9028, "step": 17050 }, { "epoch": 0.03267814232614357, "grad_norm": 1.7685151100158691, "learning_rate": 3.267036566444684e-05, "loss": 5.9017, "step": 17060 }, { "epoch": 0.0326972971575188, "grad_norm": 1.6099588871002197, "learning_rate": 3.268952042855592e-05, "loss": 5.8028, "step": 17070 }, { "epoch": 0.03271645198889403, "grad_norm": 1.934985637664795, "learning_rate": 3.2708675192665e-05, "loss": 5.8591, "step": 17080 }, { "epoch": 0.03273560682026926, "grad_norm": 1.8230727910995483, "learning_rate": 3.272782995677408e-05, "loss": 5.7369, "step": 17090 }, { "epoch": 0.03275476165164449, "grad_norm": 1.7398567199707031, "learning_rate": 3.274698472088316e-05, "loss": 5.8141, "step": 17100 }, { "epoch": 0.03277391648301972, "grad_norm": 1.6557741165161133, "learning_rate": 3.276613948499224e-05, "loss": 5.8379, "step": 17110 }, { "epoch": 0.032793071314394955, "grad_norm": 1.6649724245071411, "learning_rate": 3.278529424910132e-05, "loss": 5.8578, "step": 17120 }, { "epoch": 0.03281222614577018, "grad_norm": 1.7322752475738525, "learning_rate": 3.28044490132104e-05, "loss": 5.7773, "step": 17130 }, { "epoch": 0.032831380977145413, "grad_norm": 1.8907043933868408, "learning_rate": 3.2823603777319476e-05, "loss": 5.8404, "step": 17140 }, { "epoch": 0.032850535808520646, "grad_norm": 1.8327080011367798, "learning_rate": 3.284275854142856e-05, "loss": 5.7724, "step": 17150 }, { "epoch": 0.03286969063989587, "grad_norm": 2.257535934448242, "learning_rate": 3.286191330553764e-05, "loss": 5.8087, "step": 17160 }, { "epoch": 0.032888845471271105, "grad_norm": 1.786719560623169, "learning_rate": 3.288106806964672e-05, "loss": 5.857, "step": 17170 }, { "epoch": 0.03290800030264634, "grad_norm": 2.1973891258239746, "learning_rate": 3.2900222833755795e-05, "loss": 5.7669, "step": 17180 }, { "epoch": 0.032927155134021564, "grad_norm": 1.5002164840698242, "learning_rate": 3.291937759786488e-05, "loss": 5.759, "step": 17190 }, { "epoch": 0.0329463099653968, "grad_norm": 1.6109049320220947, "learning_rate": 3.2938532361973964e-05, "loss": 5.8361, "step": 17200 }, { "epoch": 0.03296546479677203, "grad_norm": 1.6255234479904175, "learning_rate": 3.295768712608304e-05, "loss": 5.6525, "step": 17210 }, { "epoch": 0.032984619628147256, "grad_norm": 1.8066534996032715, "learning_rate": 3.297684189019212e-05, "loss": 5.8759, "step": 17220 }, { "epoch": 0.03300377445952249, "grad_norm": 1.7752494812011719, "learning_rate": 3.29959966543012e-05, "loss": 5.8744, "step": 17230 }, { "epoch": 0.03302292929089772, "grad_norm": 1.7165488004684448, "learning_rate": 3.301515141841028e-05, "loss": 5.7664, "step": 17240 }, { "epoch": 0.03304208412227295, "grad_norm": 1.8235034942626953, "learning_rate": 3.303430618251936e-05, "loss": 5.761, "step": 17250 }, { "epoch": 0.03306123895364818, "grad_norm": 1.7462224960327148, "learning_rate": 3.305346094662844e-05, "loss": 5.852, "step": 17260 }, { "epoch": 0.033080393785023414, "grad_norm": 1.7128227949142456, "learning_rate": 3.307261571073752e-05, "loss": 5.8512, "step": 17270 }, { "epoch": 0.03309954861639864, "grad_norm": 1.9926360845565796, "learning_rate": 3.30917704748466e-05, "loss": 5.8364, "step": 17280 }, { "epoch": 0.03311870344777387, "grad_norm": 1.821378231048584, "learning_rate": 3.3110925238955675e-05, "loss": 5.7407, "step": 17290 }, { "epoch": 0.033137858279149106, "grad_norm": 1.8733932971954346, "learning_rate": 3.313008000306476e-05, "loss": 5.8841, "step": 17300 }, { "epoch": 0.03315701311052433, "grad_norm": 1.8631795644760132, "learning_rate": 3.314923476717384e-05, "loss": 5.8919, "step": 17310 }, { "epoch": 0.033176167941899565, "grad_norm": 1.6945147514343262, "learning_rate": 3.316838953128292e-05, "loss": 5.7285, "step": 17320 }, { "epoch": 0.0331953227732748, "grad_norm": 1.799109697341919, "learning_rate": 3.3187544295392e-05, "loss": 5.7643, "step": 17330 }, { "epoch": 0.033214477604650024, "grad_norm": 1.6440812349319458, "learning_rate": 3.320669905950108e-05, "loss": 5.9619, "step": 17340 }, { "epoch": 0.03323363243602526, "grad_norm": 1.8725221157073975, "learning_rate": 3.3225853823610156e-05, "loss": 5.7248, "step": 17350 }, { "epoch": 0.03325278726740049, "grad_norm": 1.8970961570739746, "learning_rate": 3.324500858771924e-05, "loss": 5.7523, "step": 17360 }, { "epoch": 0.033271942098775716, "grad_norm": 1.7488157749176025, "learning_rate": 3.326416335182832e-05, "loss": 5.7998, "step": 17370 }, { "epoch": 0.03329109693015095, "grad_norm": 1.666482925415039, "learning_rate": 3.32833181159374e-05, "loss": 5.7975, "step": 17380 }, { "epoch": 0.03331025176152618, "grad_norm": 1.6421641111373901, "learning_rate": 3.330247288004648e-05, "loss": 5.872, "step": 17390 }, { "epoch": 0.033329406592901414, "grad_norm": 1.6433581113815308, "learning_rate": 3.3321627644155556e-05, "loss": 5.8517, "step": 17400 }, { "epoch": 0.03334856142427664, "grad_norm": 1.875099778175354, "learning_rate": 3.334078240826464e-05, "loss": 5.7228, "step": 17410 }, { "epoch": 0.03336771625565187, "grad_norm": 1.9046096801757812, "learning_rate": 3.335993717237372e-05, "loss": 5.6436, "step": 17420 }, { "epoch": 0.033386871087027106, "grad_norm": 1.7590264081954956, "learning_rate": 3.33790919364828e-05, "loss": 5.8755, "step": 17430 }, { "epoch": 0.03340602591840233, "grad_norm": 1.7575627565383911, "learning_rate": 3.339824670059188e-05, "loss": 5.9959, "step": 17440 }, { "epoch": 0.033425180749777565, "grad_norm": 1.9581546783447266, "learning_rate": 3.341740146470096e-05, "loss": 5.7489, "step": 17450 }, { "epoch": 0.0334443355811528, "grad_norm": 1.71177339553833, "learning_rate": 3.343655622881004e-05, "loss": 5.7634, "step": 17460 }, { "epoch": 0.033463490412528024, "grad_norm": 1.8654780387878418, "learning_rate": 3.345571099291912e-05, "loss": 5.8281, "step": 17470 }, { "epoch": 0.03348264524390326, "grad_norm": 1.7118650674819946, "learning_rate": 3.34748657570282e-05, "loss": 5.9654, "step": 17480 }, { "epoch": 0.03350180007527849, "grad_norm": 1.6634691953659058, "learning_rate": 3.349402052113728e-05, "loss": 5.7785, "step": 17490 }, { "epoch": 0.033520954906653716, "grad_norm": 1.6902354955673218, "learning_rate": 3.3513175285246356e-05, "loss": 5.7882, "step": 17500 }, { "epoch": 0.03354010973802895, "grad_norm": 1.7198920249938965, "learning_rate": 3.353233004935544e-05, "loss": 5.8215, "step": 17510 }, { "epoch": 0.03355926456940418, "grad_norm": 1.8379755020141602, "learning_rate": 3.355148481346452e-05, "loss": 5.7838, "step": 17520 }, { "epoch": 0.03357841940077941, "grad_norm": 1.9155532121658325, "learning_rate": 3.35706395775736e-05, "loss": 5.7681, "step": 17530 }, { "epoch": 0.03359757423215464, "grad_norm": 1.6899641752243042, "learning_rate": 3.358979434168268e-05, "loss": 5.7911, "step": 17540 }, { "epoch": 0.033616729063529874, "grad_norm": 1.8016395568847656, "learning_rate": 3.360894910579176e-05, "loss": 5.7608, "step": 17550 }, { "epoch": 0.0336358838949051, "grad_norm": 1.6559820175170898, "learning_rate": 3.3628103869900844e-05, "loss": 5.8158, "step": 17560 }, { "epoch": 0.03365503872628033, "grad_norm": 1.731520414352417, "learning_rate": 3.364725863400992e-05, "loss": 5.8113, "step": 17570 }, { "epoch": 0.033674193557655566, "grad_norm": 1.8204026222229004, "learning_rate": 3.3666413398119e-05, "loss": 5.8131, "step": 17580 }, { "epoch": 0.03369334838903079, "grad_norm": 2.1069324016571045, "learning_rate": 3.368556816222808e-05, "loss": 5.6768, "step": 17590 }, { "epoch": 0.033712503220406025, "grad_norm": 1.817162036895752, "learning_rate": 3.370472292633716e-05, "loss": 5.724, "step": 17600 }, { "epoch": 0.03373165805178126, "grad_norm": 1.6413789987564087, "learning_rate": 3.3723877690446237e-05, "loss": 5.8988, "step": 17610 }, { "epoch": 0.033750812883156484, "grad_norm": 1.806679606437683, "learning_rate": 3.374303245455532e-05, "loss": 5.68, "step": 17620 }, { "epoch": 0.033769967714531716, "grad_norm": 1.6536558866500854, "learning_rate": 3.37621872186644e-05, "loss": 5.8704, "step": 17630 }, { "epoch": 0.03378912254590695, "grad_norm": 1.8294130563735962, "learning_rate": 3.378134198277348e-05, "loss": 5.7465, "step": 17640 }, { "epoch": 0.033808277377282175, "grad_norm": 1.6615431308746338, "learning_rate": 3.3800496746882555e-05, "loss": 5.7705, "step": 17650 }, { "epoch": 0.03382743220865741, "grad_norm": 1.7139477729797363, "learning_rate": 3.3819651510991636e-05, "loss": 5.8061, "step": 17660 }, { "epoch": 0.03384658704003264, "grad_norm": 1.595889687538147, "learning_rate": 3.383880627510072e-05, "loss": 5.7441, "step": 17670 }, { "epoch": 0.03386574187140787, "grad_norm": 1.748533010482788, "learning_rate": 3.38579610392098e-05, "loss": 5.8237, "step": 17680 }, { "epoch": 0.0338848967027831, "grad_norm": 1.9438995122909546, "learning_rate": 3.387711580331888e-05, "loss": 5.6948, "step": 17690 }, { "epoch": 0.03390405153415833, "grad_norm": 1.6426293849945068, "learning_rate": 3.389627056742796e-05, "loss": 5.7587, "step": 17700 }, { "epoch": 0.03392320636553356, "grad_norm": 1.6801573038101196, "learning_rate": 3.391542533153704e-05, "loss": 5.7937, "step": 17710 }, { "epoch": 0.03394236119690879, "grad_norm": 2.069737672805786, "learning_rate": 3.393458009564612e-05, "loss": 5.712, "step": 17720 }, { "epoch": 0.033961516028284025, "grad_norm": 2.239854335784912, "learning_rate": 3.39537348597552e-05, "loss": 5.768, "step": 17730 }, { "epoch": 0.03398067085965926, "grad_norm": 1.8778128623962402, "learning_rate": 3.397288962386428e-05, "loss": 5.7984, "step": 17740 }, { "epoch": 0.033999825691034484, "grad_norm": 1.9366514682769775, "learning_rate": 3.399204438797336e-05, "loss": 5.7913, "step": 17750 }, { "epoch": 0.03401898052240972, "grad_norm": 1.5220251083374023, "learning_rate": 3.4011199152082436e-05, "loss": 5.7106, "step": 17760 }, { "epoch": 0.03403813535378495, "grad_norm": 1.8517354726791382, "learning_rate": 3.403035391619152e-05, "loss": 5.8164, "step": 17770 }, { "epoch": 0.034057290185160176, "grad_norm": 2.254310369491577, "learning_rate": 3.40495086803006e-05, "loss": 5.6709, "step": 17780 }, { "epoch": 0.03407644501653541, "grad_norm": 1.779463529586792, "learning_rate": 3.406866344440968e-05, "loss": 5.8185, "step": 17790 }, { "epoch": 0.03409559984791064, "grad_norm": 1.8124730587005615, "learning_rate": 3.408781820851876e-05, "loss": 5.7961, "step": 17800 }, { "epoch": 0.03411475467928587, "grad_norm": 1.657527208328247, "learning_rate": 3.410697297262784e-05, "loss": 5.9142, "step": 17810 }, { "epoch": 0.0341339095106611, "grad_norm": 1.8173108100891113, "learning_rate": 3.412612773673692e-05, "loss": 5.8432, "step": 17820 }, { "epoch": 0.034153064342036334, "grad_norm": 1.8269405364990234, "learning_rate": 3.4145282500846e-05, "loss": 5.6906, "step": 17830 }, { "epoch": 0.03417221917341156, "grad_norm": 1.9060598611831665, "learning_rate": 3.416443726495508e-05, "loss": 5.7402, "step": 17840 }, { "epoch": 0.03419137400478679, "grad_norm": 1.7779072523117065, "learning_rate": 3.418359202906416e-05, "loss": 5.7639, "step": 17850 }, { "epoch": 0.034210528836162026, "grad_norm": 1.74942147731781, "learning_rate": 3.4202746793173235e-05, "loss": 5.8236, "step": 17860 }, { "epoch": 0.03422968366753725, "grad_norm": 1.6884039640426636, "learning_rate": 3.422190155728232e-05, "loss": 5.8464, "step": 17870 }, { "epoch": 0.034248838498912484, "grad_norm": 1.8170796632766724, "learning_rate": 3.42410563213914e-05, "loss": 5.7558, "step": 17880 }, { "epoch": 0.03426799333028772, "grad_norm": 1.6889128684997559, "learning_rate": 3.426021108550048e-05, "loss": 5.8048, "step": 17890 }, { "epoch": 0.03428714816166294, "grad_norm": 1.5885599851608276, "learning_rate": 3.427936584960956e-05, "loss": 5.7082, "step": 17900 }, { "epoch": 0.034306302993038176, "grad_norm": 1.7016875743865967, "learning_rate": 3.429852061371864e-05, "loss": 5.9039, "step": 17910 }, { "epoch": 0.03432545782441341, "grad_norm": 1.6554763317108154, "learning_rate": 3.431767537782772e-05, "loss": 5.8784, "step": 17920 }, { "epoch": 0.034344612655788635, "grad_norm": 1.7273564338684082, "learning_rate": 3.43368301419368e-05, "loss": 5.8273, "step": 17930 }, { "epoch": 0.03436376748716387, "grad_norm": 1.9540435075759888, "learning_rate": 3.435598490604588e-05, "loss": 5.7729, "step": 17940 }, { "epoch": 0.0343829223185391, "grad_norm": 1.6150964498519897, "learning_rate": 3.437513967015496e-05, "loss": 5.8396, "step": 17950 }, { "epoch": 0.03440207714991433, "grad_norm": 1.792557954788208, "learning_rate": 3.439429443426404e-05, "loss": 5.7117, "step": 17960 }, { "epoch": 0.03442123198128956, "grad_norm": 1.9512271881103516, "learning_rate": 3.4413449198373116e-05, "loss": 5.7399, "step": 17970 }, { "epoch": 0.03444038681266479, "grad_norm": 2.232722520828247, "learning_rate": 3.44326039624822e-05, "loss": 5.7563, "step": 17980 }, { "epoch": 0.03445954164404002, "grad_norm": 1.7549275159835815, "learning_rate": 3.445175872659128e-05, "loss": 5.7247, "step": 17990 }, { "epoch": 0.03447869647541525, "grad_norm": 1.5446438789367676, "learning_rate": 3.447091349070036e-05, "loss": 5.9005, "step": 18000 }, { "epoch": 0.034497851306790485, "grad_norm": 1.772189736366272, "learning_rate": 3.4490068254809435e-05, "loss": 5.7213, "step": 18010 }, { "epoch": 0.03451700613816571, "grad_norm": 2.107607126235962, "learning_rate": 3.4509223018918516e-05, "loss": 5.7766, "step": 18020 }, { "epoch": 0.034536160969540944, "grad_norm": 1.8727654218673706, "learning_rate": 3.45283777830276e-05, "loss": 5.699, "step": 18030 }, { "epoch": 0.03455531580091618, "grad_norm": 1.777948021888733, "learning_rate": 3.454753254713668e-05, "loss": 5.5698, "step": 18040 }, { "epoch": 0.03457447063229141, "grad_norm": 1.925907850265503, "learning_rate": 3.456668731124576e-05, "loss": 5.8667, "step": 18050 }, { "epoch": 0.034593625463666636, "grad_norm": 1.658549427986145, "learning_rate": 3.458584207535484e-05, "loss": 5.8798, "step": 18060 }, { "epoch": 0.03461278029504187, "grad_norm": 1.8930613994598389, "learning_rate": 3.460499683946392e-05, "loss": 5.7879, "step": 18070 }, { "epoch": 0.0346319351264171, "grad_norm": 1.7576055526733398, "learning_rate": 3.4622236127162096e-05, "loss": 5.7521, "step": 18080 }, { "epoch": 0.03465108995779233, "grad_norm": 1.8105958700180054, "learning_rate": 3.464139089127117e-05, "loss": 5.8046, "step": 18090 }, { "epoch": 0.03467024478916756, "grad_norm": 1.714250087738037, "learning_rate": 3.466054565538025e-05, "loss": 5.709, "step": 18100 }, { "epoch": 0.03468939962054279, "grad_norm": 2.8938913345336914, "learning_rate": 3.467970041948933e-05, "loss": 5.7425, "step": 18110 }, { "epoch": 0.03470855445191802, "grad_norm": 1.9064602851867676, "learning_rate": 3.4698855183598414e-05, "loss": 5.7107, "step": 18120 }, { "epoch": 0.03472770928329325, "grad_norm": 1.837786078453064, "learning_rate": 3.471800994770749e-05, "loss": 5.6715, "step": 18130 }, { "epoch": 0.034746864114668485, "grad_norm": 1.7589088678359985, "learning_rate": 3.473716471181657e-05, "loss": 5.6989, "step": 18140 }, { "epoch": 0.03476601894604371, "grad_norm": 1.7827759981155396, "learning_rate": 3.475631947592565e-05, "loss": 5.733, "step": 18150 }, { "epoch": 0.034785173777418944, "grad_norm": 1.682606816291809, "learning_rate": 3.477547424003473e-05, "loss": 5.6204, "step": 18160 }, { "epoch": 0.03480432860879418, "grad_norm": 1.7229444980621338, "learning_rate": 3.479462900414381e-05, "loss": 5.7308, "step": 18170 }, { "epoch": 0.0348234834401694, "grad_norm": 1.945448398590088, "learning_rate": 3.481378376825289e-05, "loss": 5.6503, "step": 18180 }, { "epoch": 0.034842638271544636, "grad_norm": 1.584520936012268, "learning_rate": 3.483293853236197e-05, "loss": 5.7121, "step": 18190 }, { "epoch": 0.03486179310291987, "grad_norm": 1.7935535907745361, "learning_rate": 3.485209329647105e-05, "loss": 5.6432, "step": 18200 }, { "epoch": 0.034880947934295095, "grad_norm": 1.6285691261291504, "learning_rate": 3.487124806058013e-05, "loss": 5.8179, "step": 18210 }, { "epoch": 0.03490010276567033, "grad_norm": 1.668928623199463, "learning_rate": 3.4890402824689214e-05, "loss": 5.8235, "step": 18220 }, { "epoch": 0.03491925759704556, "grad_norm": 2.1680660247802734, "learning_rate": 3.4909557588798295e-05, "loss": 5.8136, "step": 18230 }, { "epoch": 0.03493841242842079, "grad_norm": 1.6683090925216675, "learning_rate": 3.492871235290737e-05, "loss": 5.8446, "step": 18240 }, { "epoch": 0.03495756725979602, "grad_norm": 1.756495475769043, "learning_rate": 3.494786711701645e-05, "loss": 5.7892, "step": 18250 }, { "epoch": 0.03497672209117125, "grad_norm": 1.8367953300476074, "learning_rate": 3.496702188112553e-05, "loss": 5.851, "step": 18260 }, { "epoch": 0.03499587692254648, "grad_norm": 1.6785025596618652, "learning_rate": 3.498617664523461e-05, "loss": 5.8486, "step": 18270 }, { "epoch": 0.03501503175392171, "grad_norm": 1.883433222770691, "learning_rate": 3.500533140934369e-05, "loss": 5.8019, "step": 18280 }, { "epoch": 0.035034186585296945, "grad_norm": 1.7262399196624756, "learning_rate": 3.502448617345277e-05, "loss": 5.682, "step": 18290 }, { "epoch": 0.03505334141667217, "grad_norm": 1.7137019634246826, "learning_rate": 3.504364093756185e-05, "loss": 5.5934, "step": 18300 }, { "epoch": 0.035072496248047404, "grad_norm": 1.7649171352386475, "learning_rate": 3.506279570167093e-05, "loss": 5.6379, "step": 18310 }, { "epoch": 0.03509165107942264, "grad_norm": 1.9999340772628784, "learning_rate": 3.5081950465780006e-05, "loss": 5.7023, "step": 18320 }, { "epoch": 0.03511080591079786, "grad_norm": 1.8770676851272583, "learning_rate": 3.5101105229889094e-05, "loss": 5.771, "step": 18330 }, { "epoch": 0.035129960742173096, "grad_norm": 1.7282782793045044, "learning_rate": 3.512025999399817e-05, "loss": 5.8184, "step": 18340 }, { "epoch": 0.03514911557354833, "grad_norm": 1.8147403001785278, "learning_rate": 3.513941475810725e-05, "loss": 5.6854, "step": 18350 }, { "epoch": 0.035168270404923554, "grad_norm": 1.641402006149292, "learning_rate": 3.515856952221633e-05, "loss": 5.9055, "step": 18360 }, { "epoch": 0.03518742523629879, "grad_norm": 1.6373900175094604, "learning_rate": 3.517772428632541e-05, "loss": 5.6144, "step": 18370 }, { "epoch": 0.03520658006767402, "grad_norm": 1.6120057106018066, "learning_rate": 3.5196879050434494e-05, "loss": 5.6957, "step": 18380 }, { "epoch": 0.03522573489904925, "grad_norm": 1.963301420211792, "learning_rate": 3.521603381454357e-05, "loss": 5.6412, "step": 18390 }, { "epoch": 0.03524488973042448, "grad_norm": 2.238394260406494, "learning_rate": 3.523518857865265e-05, "loss": 5.692, "step": 18400 }, { "epoch": 0.03526404456179971, "grad_norm": 1.9008643627166748, "learning_rate": 3.525434334276173e-05, "loss": 5.8391, "step": 18410 }, { "epoch": 0.035283199393174945, "grad_norm": 1.8095473051071167, "learning_rate": 3.527349810687081e-05, "loss": 5.6829, "step": 18420 }, { "epoch": 0.03530235422455017, "grad_norm": 1.805201768875122, "learning_rate": 3.529265287097989e-05, "loss": 5.6961, "step": 18430 }, { "epoch": 0.035321509055925404, "grad_norm": 1.6918054819107056, "learning_rate": 3.5311807635088975e-05, "loss": 5.7175, "step": 18440 }, { "epoch": 0.03534066388730064, "grad_norm": 2.0107336044311523, "learning_rate": 3.533096239919805e-05, "loss": 5.716, "step": 18450 }, { "epoch": 0.03535981871867586, "grad_norm": 1.7733392715454102, "learning_rate": 3.535011716330713e-05, "loss": 5.7249, "step": 18460 }, { "epoch": 0.035378973550051096, "grad_norm": 1.762912631034851, "learning_rate": 3.536927192741621e-05, "loss": 5.7492, "step": 18470 }, { "epoch": 0.03539812838142633, "grad_norm": 2.0612635612487793, "learning_rate": 3.5388426691525294e-05, "loss": 5.5692, "step": 18480 }, { "epoch": 0.035417283212801555, "grad_norm": 1.8029062747955322, "learning_rate": 3.540758145563437e-05, "loss": 5.6373, "step": 18490 }, { "epoch": 0.03543643804417679, "grad_norm": 1.7538716793060303, "learning_rate": 3.542673621974345e-05, "loss": 5.603, "step": 18500 }, { "epoch": 0.03545559287555202, "grad_norm": 2.0805320739746094, "learning_rate": 3.544589098385253e-05, "loss": 5.7351, "step": 18510 }, { "epoch": 0.03547474770692725, "grad_norm": 1.645302176475525, "learning_rate": 3.546504574796161e-05, "loss": 5.6679, "step": 18520 }, { "epoch": 0.03549390253830248, "grad_norm": 1.897645354270935, "learning_rate": 3.5484200512070687e-05, "loss": 5.7465, "step": 18530 }, { "epoch": 0.03551305736967771, "grad_norm": 1.6898843050003052, "learning_rate": 3.550335527617977e-05, "loss": 5.6855, "step": 18540 }, { "epoch": 0.03553221220105294, "grad_norm": 2.024270534515381, "learning_rate": 3.5522510040288856e-05, "loss": 5.7436, "step": 18550 }, { "epoch": 0.03555136703242817, "grad_norm": 1.7048368453979492, "learning_rate": 3.554166480439793e-05, "loss": 5.7978, "step": 18560 }, { "epoch": 0.035570521863803405, "grad_norm": 1.791005253791809, "learning_rate": 3.556081956850701e-05, "loss": 5.7217, "step": 18570 }, { "epoch": 0.03558967669517863, "grad_norm": 1.6652477979660034, "learning_rate": 3.557997433261609e-05, "loss": 5.6599, "step": 18580 }, { "epoch": 0.035608831526553864, "grad_norm": 1.765131950378418, "learning_rate": 3.5599129096725174e-05, "loss": 5.7043, "step": 18590 }, { "epoch": 0.035627986357929096, "grad_norm": 1.6759384870529175, "learning_rate": 3.561828386083425e-05, "loss": 5.6637, "step": 18600 }, { "epoch": 0.03564714118930432, "grad_norm": 1.6498384475708008, "learning_rate": 3.563743862494333e-05, "loss": 5.7222, "step": 18610 }, { "epoch": 0.035666296020679555, "grad_norm": 1.7747297286987305, "learning_rate": 3.565659338905241e-05, "loss": 5.7981, "step": 18620 }, { "epoch": 0.03568545085205479, "grad_norm": 1.711758017539978, "learning_rate": 3.567574815316149e-05, "loss": 5.7157, "step": 18630 }, { "epoch": 0.035704605683430014, "grad_norm": 1.5653294324874878, "learning_rate": 3.569490291727057e-05, "loss": 5.8299, "step": 18640 }, { "epoch": 0.03572376051480525, "grad_norm": 1.9732375144958496, "learning_rate": 3.571405768137965e-05, "loss": 5.6473, "step": 18650 }, { "epoch": 0.03574291534618048, "grad_norm": 1.9354913234710693, "learning_rate": 3.573321244548873e-05, "loss": 5.6725, "step": 18660 }, { "epoch": 0.035762070177555706, "grad_norm": 1.753443717956543, "learning_rate": 3.575236720959781e-05, "loss": 5.7803, "step": 18670 }, { "epoch": 0.03578122500893094, "grad_norm": 1.8755699396133423, "learning_rate": 3.5771521973706886e-05, "loss": 5.6438, "step": 18680 }, { "epoch": 0.03580037984030617, "grad_norm": 2.1502678394317627, "learning_rate": 3.5790676737815974e-05, "loss": 5.5548, "step": 18690 }, { "epoch": 0.0358195346716814, "grad_norm": 1.9905130863189697, "learning_rate": 3.580983150192505e-05, "loss": 5.7708, "step": 18700 }, { "epoch": 0.03583868950305663, "grad_norm": 1.9259904623031616, "learning_rate": 3.582898626603413e-05, "loss": 5.7453, "step": 18710 }, { "epoch": 0.035857844334431864, "grad_norm": 1.8817559480667114, "learning_rate": 3.584814103014321e-05, "loss": 5.7476, "step": 18720 }, { "epoch": 0.0358769991658071, "grad_norm": 1.7995768785476685, "learning_rate": 3.586729579425229e-05, "loss": 5.6661, "step": 18730 }, { "epoch": 0.03589615399718232, "grad_norm": 1.8517813682556152, "learning_rate": 3.5886450558361374e-05, "loss": 5.7406, "step": 18740 }, { "epoch": 0.035915308828557556, "grad_norm": 2.033731698989868, "learning_rate": 3.590560532247045e-05, "loss": 5.7032, "step": 18750 }, { "epoch": 0.03593446365993279, "grad_norm": 2.39910888671875, "learning_rate": 3.592476008657953e-05, "loss": 5.6864, "step": 18760 }, { "epoch": 0.035953618491308015, "grad_norm": 1.8545351028442383, "learning_rate": 3.594391485068861e-05, "loss": 5.6079, "step": 18770 }, { "epoch": 0.03597277332268325, "grad_norm": 1.658901572227478, "learning_rate": 3.596306961479769e-05, "loss": 5.6413, "step": 18780 }, { "epoch": 0.03599192815405848, "grad_norm": 1.6510193347930908, "learning_rate": 3.598222437890677e-05, "loss": 5.6107, "step": 18790 }, { "epoch": 0.03601108298543371, "grad_norm": 1.9152872562408447, "learning_rate": 3.6001379143015855e-05, "loss": 5.768, "step": 18800 }, { "epoch": 0.03603023781680894, "grad_norm": 1.6889055967330933, "learning_rate": 3.602053390712493e-05, "loss": 5.6613, "step": 18810 }, { "epoch": 0.03604939264818417, "grad_norm": 1.7622888088226318, "learning_rate": 3.603968867123401e-05, "loss": 5.6717, "step": 18820 }, { "epoch": 0.0360685474795594, "grad_norm": 1.7621101140975952, "learning_rate": 3.605884343534309e-05, "loss": 5.7672, "step": 18830 }, { "epoch": 0.03608770231093463, "grad_norm": 1.6344554424285889, "learning_rate": 3.607799819945217e-05, "loss": 5.6391, "step": 18840 }, { "epoch": 0.036106857142309864, "grad_norm": 1.941768765449524, "learning_rate": 3.609715296356125e-05, "loss": 5.6433, "step": 18850 }, { "epoch": 0.03612601197368509, "grad_norm": 1.8186233043670654, "learning_rate": 3.611630772767033e-05, "loss": 5.5859, "step": 18860 }, { "epoch": 0.03614516680506032, "grad_norm": 1.676337718963623, "learning_rate": 3.613546249177941e-05, "loss": 5.8492, "step": 18870 }, { "epoch": 0.036164321636435556, "grad_norm": 1.8471723794937134, "learning_rate": 3.615461725588849e-05, "loss": 5.6238, "step": 18880 }, { "epoch": 0.03618347646781078, "grad_norm": 1.907820224761963, "learning_rate": 3.617377201999757e-05, "loss": 5.638, "step": 18890 }, { "epoch": 0.036202631299186015, "grad_norm": 2.2244319915771484, "learning_rate": 3.619292678410665e-05, "loss": 5.6651, "step": 18900 }, { "epoch": 0.03622178613056125, "grad_norm": 1.616303563117981, "learning_rate": 3.6212081548215736e-05, "loss": 5.7376, "step": 18910 }, { "epoch": 0.036240940961936474, "grad_norm": 1.8788509368896484, "learning_rate": 3.623123631232481e-05, "loss": 5.6772, "step": 18920 }, { "epoch": 0.03626009579331171, "grad_norm": 1.5064018964767456, "learning_rate": 3.625039107643389e-05, "loss": 5.7102, "step": 18930 }, { "epoch": 0.03627925062468694, "grad_norm": 1.8822871446609497, "learning_rate": 3.626954584054297e-05, "loss": 5.7796, "step": 18940 }, { "epoch": 0.036298405456062166, "grad_norm": 1.686350703239441, "learning_rate": 3.6288700604652054e-05, "loss": 5.6322, "step": 18950 }, { "epoch": 0.0363175602874374, "grad_norm": 1.7714182138442993, "learning_rate": 3.630785536876113e-05, "loss": 5.6499, "step": 18960 }, { "epoch": 0.03633671511881263, "grad_norm": 1.8059494495391846, "learning_rate": 3.632701013287021e-05, "loss": 5.6306, "step": 18970 }, { "epoch": 0.03635586995018786, "grad_norm": 1.8755769729614258, "learning_rate": 3.634616489697929e-05, "loss": 5.7618, "step": 18980 }, { "epoch": 0.03637502478156309, "grad_norm": 1.6893960237503052, "learning_rate": 3.636531966108837e-05, "loss": 5.8015, "step": 18990 }, { "epoch": 0.036394179612938324, "grad_norm": 1.6418410539627075, "learning_rate": 3.638447442519745e-05, "loss": 5.7298, "step": 19000 }, { "epoch": 0.03641333444431355, "grad_norm": 2.161428928375244, "learning_rate": 3.640362918930653e-05, "loss": 5.8027, "step": 19010 }, { "epoch": 0.03643248927568878, "grad_norm": 1.761541724205017, "learning_rate": 3.642278395341561e-05, "loss": 5.7264, "step": 19020 }, { "epoch": 0.036451644107064016, "grad_norm": 1.7551162242889404, "learning_rate": 3.644193871752469e-05, "loss": 5.7179, "step": 19030 }, { "epoch": 0.03647079893843925, "grad_norm": 1.7825956344604492, "learning_rate": 3.6461093481633765e-05, "loss": 5.8336, "step": 19040 }, { "epoch": 0.036489953769814475, "grad_norm": 1.712259292602539, "learning_rate": 3.6480248245742854e-05, "loss": 5.6586, "step": 19050 }, { "epoch": 0.03650910860118971, "grad_norm": 1.7564966678619385, "learning_rate": 3.6499403009851935e-05, "loss": 5.7296, "step": 19060 }, { "epoch": 0.03652826343256494, "grad_norm": 1.9074172973632812, "learning_rate": 3.651855777396101e-05, "loss": 5.6281, "step": 19070 }, { "epoch": 0.036547418263940167, "grad_norm": 1.8681831359863281, "learning_rate": 3.653771253807009e-05, "loss": 5.5921, "step": 19080 }, { "epoch": 0.0365665730953154, "grad_norm": 1.9710825681686401, "learning_rate": 3.655686730217917e-05, "loss": 5.7602, "step": 19090 }, { "epoch": 0.03658572792669063, "grad_norm": 2.243185043334961, "learning_rate": 3.657602206628825e-05, "loss": 5.6438, "step": 19100 }, { "epoch": 0.03660488275806586, "grad_norm": 1.7283273935317993, "learning_rate": 3.659517683039733e-05, "loss": 5.7224, "step": 19110 }, { "epoch": 0.03662403758944109, "grad_norm": 1.636296272277832, "learning_rate": 3.661433159450641e-05, "loss": 5.6862, "step": 19120 }, { "epoch": 0.036643192420816324, "grad_norm": 1.801076054573059, "learning_rate": 3.663348635861549e-05, "loss": 5.764, "step": 19130 }, { "epoch": 0.03666234725219155, "grad_norm": 1.7162219285964966, "learning_rate": 3.665264112272457e-05, "loss": 5.67, "step": 19140 }, { "epoch": 0.03668150208356678, "grad_norm": 1.8168766498565674, "learning_rate": 3.6671795886833646e-05, "loss": 5.7837, "step": 19150 }, { "epoch": 0.036700656914942016, "grad_norm": 1.7839750051498413, "learning_rate": 3.6690950650942734e-05, "loss": 5.6249, "step": 19160 }, { "epoch": 0.03671981174631724, "grad_norm": 1.8121423721313477, "learning_rate": 3.671010541505181e-05, "loss": 5.5902, "step": 19170 }, { "epoch": 0.036738966577692475, "grad_norm": 1.7714418172836304, "learning_rate": 3.672926017916089e-05, "loss": 5.6694, "step": 19180 }, { "epoch": 0.03675812140906771, "grad_norm": 1.8387482166290283, "learning_rate": 3.674841494326997e-05, "loss": 5.6938, "step": 19190 }, { "epoch": 0.036777276240442934, "grad_norm": 1.801194190979004, "learning_rate": 3.676756970737905e-05, "loss": 5.7358, "step": 19200 }, { "epoch": 0.03679643107181817, "grad_norm": 1.7612278461456299, "learning_rate": 3.678672447148813e-05, "loss": 5.5996, "step": 19210 }, { "epoch": 0.0368155859031934, "grad_norm": 2.2774081230163574, "learning_rate": 3.680587923559721e-05, "loss": 5.759, "step": 19220 }, { "epoch": 0.036834740734568626, "grad_norm": 1.8036006689071655, "learning_rate": 3.682503399970629e-05, "loss": 5.5722, "step": 19230 }, { "epoch": 0.03685389556594386, "grad_norm": 1.6680355072021484, "learning_rate": 3.684418876381537e-05, "loss": 5.7582, "step": 19240 }, { "epoch": 0.03687305039731909, "grad_norm": 1.7048542499542236, "learning_rate": 3.686334352792445e-05, "loss": 5.7861, "step": 19250 }, { "epoch": 0.03689220522869432, "grad_norm": 1.6501555442810059, "learning_rate": 3.688249829203353e-05, "loss": 5.6215, "step": 19260 }, { "epoch": 0.03691136006006955, "grad_norm": 1.7039111852645874, "learning_rate": 3.6901653056142615e-05, "loss": 5.6348, "step": 19270 }, { "epoch": 0.036930514891444784, "grad_norm": 1.8909320831298828, "learning_rate": 3.692080782025169e-05, "loss": 5.7983, "step": 19280 }, { "epoch": 0.03694966972282001, "grad_norm": 2.9482579231262207, "learning_rate": 3.693996258436077e-05, "loss": 5.696, "step": 19290 }, { "epoch": 0.03696882455419524, "grad_norm": 1.870692491531372, "learning_rate": 3.695911734846985e-05, "loss": 5.6301, "step": 19300 }, { "epoch": 0.036987979385570476, "grad_norm": 1.8594979047775269, "learning_rate": 3.6978272112578934e-05, "loss": 5.7365, "step": 19310 }, { "epoch": 0.0370071342169457, "grad_norm": 2.051964044570923, "learning_rate": 3.699742687668801e-05, "loss": 5.5439, "step": 19320 }, { "epoch": 0.037026289048320934, "grad_norm": 2.0403950214385986, "learning_rate": 3.701658164079709e-05, "loss": 5.7791, "step": 19330 }, { "epoch": 0.03704544387969617, "grad_norm": 1.8016695976257324, "learning_rate": 3.703573640490617e-05, "loss": 5.5001, "step": 19340 }, { "epoch": 0.03706459871107139, "grad_norm": 1.843927264213562, "learning_rate": 3.705489116901525e-05, "loss": 5.5601, "step": 19350 }, { "epoch": 0.037083753542446626, "grad_norm": 1.6708259582519531, "learning_rate": 3.7074045933124327e-05, "loss": 5.5935, "step": 19360 }, { "epoch": 0.03710290837382186, "grad_norm": 1.9250338077545166, "learning_rate": 3.709320069723341e-05, "loss": 5.6267, "step": 19370 }, { "epoch": 0.03712206320519709, "grad_norm": 1.5712088346481323, "learning_rate": 3.711235546134249e-05, "loss": 5.6557, "step": 19380 }, { "epoch": 0.03714121803657232, "grad_norm": 1.6867998838424683, "learning_rate": 3.713151022545157e-05, "loss": 5.8774, "step": 19390 }, { "epoch": 0.03716037286794755, "grad_norm": 1.6640630960464478, "learning_rate": 3.715066498956065e-05, "loss": 5.8246, "step": 19400 }, { "epoch": 0.037179527699322784, "grad_norm": 1.8690415620803833, "learning_rate": 3.716981975366973e-05, "loss": 5.8074, "step": 19410 }, { "epoch": 0.03719868253069801, "grad_norm": 1.7793735265731812, "learning_rate": 3.7188974517778814e-05, "loss": 5.8946, "step": 19420 }, { "epoch": 0.03721783736207324, "grad_norm": 1.6705492734909058, "learning_rate": 3.720812928188789e-05, "loss": 5.7474, "step": 19430 }, { "epoch": 0.037236992193448476, "grad_norm": 2.04957914352417, "learning_rate": 3.722728404599697e-05, "loss": 5.5318, "step": 19440 }, { "epoch": 0.0372561470248237, "grad_norm": 2.030266523361206, "learning_rate": 3.724643881010605e-05, "loss": 5.7095, "step": 19450 }, { "epoch": 0.037275301856198935, "grad_norm": 1.653734803199768, "learning_rate": 3.726559357421513e-05, "loss": 5.5062, "step": 19460 }, { "epoch": 0.03729445668757417, "grad_norm": 1.9626210927963257, "learning_rate": 3.728474833832421e-05, "loss": 5.6563, "step": 19470 }, { "epoch": 0.037313611518949394, "grad_norm": 1.736471176147461, "learning_rate": 3.730390310243329e-05, "loss": 5.6147, "step": 19480 }, { "epoch": 0.03733276635032463, "grad_norm": 1.7610998153686523, "learning_rate": 3.732305786654237e-05, "loss": 5.5163, "step": 19490 }, { "epoch": 0.03735192118169986, "grad_norm": 1.973386287689209, "learning_rate": 3.734221263065145e-05, "loss": 5.647, "step": 19500 }, { "epoch": 0.037371076013075086, "grad_norm": 1.706936001777649, "learning_rate": 3.7361367394760526e-05, "loss": 5.6134, "step": 19510 }, { "epoch": 0.03739023084445032, "grad_norm": 3.6085314750671387, "learning_rate": 3.7380522158869614e-05, "loss": 5.5605, "step": 19520 }, { "epoch": 0.03740938567582555, "grad_norm": 1.7493129968643188, "learning_rate": 3.739967692297869e-05, "loss": 5.5759, "step": 19530 }, { "epoch": 0.03742854050720078, "grad_norm": 2.136784315109253, "learning_rate": 3.741883168708777e-05, "loss": 5.5099, "step": 19540 }, { "epoch": 0.03744769533857601, "grad_norm": 1.7998985052108765, "learning_rate": 3.743798645119685e-05, "loss": 5.6112, "step": 19550 }, { "epoch": 0.037466850169951244, "grad_norm": 2.022986650466919, "learning_rate": 3.745714121530593e-05, "loss": 5.6972, "step": 19560 }, { "epoch": 0.03748600500132647, "grad_norm": 1.6916497945785522, "learning_rate": 3.7476295979415014e-05, "loss": 5.7805, "step": 19570 }, { "epoch": 0.0375051598327017, "grad_norm": 1.5951331853866577, "learning_rate": 3.749545074352409e-05, "loss": 5.6467, "step": 19580 }, { "epoch": 0.037524314664076935, "grad_norm": 1.7163423299789429, "learning_rate": 3.751460550763317e-05, "loss": 5.6272, "step": 19590 }, { "epoch": 0.03754346949545216, "grad_norm": 1.7415187358856201, "learning_rate": 3.753376027174225e-05, "loss": 5.6856, "step": 19600 }, { "epoch": 0.037562624326827394, "grad_norm": 1.6820991039276123, "learning_rate": 3.7552915035851325e-05, "loss": 5.6799, "step": 19610 }, { "epoch": 0.03758177915820263, "grad_norm": 1.7699886560440063, "learning_rate": 3.7572069799960413e-05, "loss": 5.5129, "step": 19620 }, { "epoch": 0.03760093398957785, "grad_norm": 1.6774818897247314, "learning_rate": 3.7591224564069495e-05, "loss": 5.5909, "step": 19630 }, { "epoch": 0.037620088820953086, "grad_norm": 1.7618991136550903, "learning_rate": 3.761037932817857e-05, "loss": 5.627, "step": 19640 }, { "epoch": 0.03763924365232832, "grad_norm": 1.8066765069961548, "learning_rate": 3.7629534092287644e-05, "loss": 5.5746, "step": 19650 }, { "epoch": 0.037658398483703545, "grad_norm": 1.8473072052001953, "learning_rate": 3.764868885639673e-05, "loss": 5.6086, "step": 19660 }, { "epoch": 0.03767755331507878, "grad_norm": 1.8544687032699585, "learning_rate": 3.766784362050581e-05, "loss": 5.6275, "step": 19670 }, { "epoch": 0.03769670814645401, "grad_norm": 1.7333780527114868, "learning_rate": 3.768699838461489e-05, "loss": 5.7054, "step": 19680 }, { "epoch": 0.03771586297782924, "grad_norm": 2.039369583129883, "learning_rate": 3.770615314872397e-05, "loss": 5.6643, "step": 19690 }, { "epoch": 0.03773501780920447, "grad_norm": 1.8592125177383423, "learning_rate": 3.772530791283305e-05, "loss": 5.7085, "step": 19700 }, { "epoch": 0.0377541726405797, "grad_norm": 1.6056489944458008, "learning_rate": 3.774446267694213e-05, "loss": 5.5997, "step": 19710 }, { "epoch": 0.037773327471954936, "grad_norm": 1.90566086769104, "learning_rate": 3.7763617441051206e-05, "loss": 5.5106, "step": 19720 }, { "epoch": 0.03779248230333016, "grad_norm": 1.8451259136199951, "learning_rate": 3.778277220516029e-05, "loss": 5.5327, "step": 19730 }, { "epoch": 0.037811637134705395, "grad_norm": 1.8198137283325195, "learning_rate": 3.780192696926937e-05, "loss": 5.8414, "step": 19740 }, { "epoch": 0.03783079196608063, "grad_norm": 1.7048654556274414, "learning_rate": 3.782108173337845e-05, "loss": 5.7139, "step": 19750 }, { "epoch": 0.037849946797455854, "grad_norm": 1.6276319026947021, "learning_rate": 3.7840236497487525e-05, "loss": 5.7225, "step": 19760 }, { "epoch": 0.03786910162883109, "grad_norm": 1.7088690996170044, "learning_rate": 3.785939126159661e-05, "loss": 5.5638, "step": 19770 }, { "epoch": 0.03788825646020632, "grad_norm": 1.6695462465286255, "learning_rate": 3.7878546025705694e-05, "loss": 5.6637, "step": 19780 }, { "epoch": 0.037907411291581546, "grad_norm": 1.836577296257019, "learning_rate": 3.789770078981477e-05, "loss": 5.6374, "step": 19790 }, { "epoch": 0.03792656612295678, "grad_norm": 1.7167863845825195, "learning_rate": 3.791685555392385e-05, "loss": 5.6364, "step": 19800 }, { "epoch": 0.03794572095433201, "grad_norm": 1.752866268157959, "learning_rate": 3.793601031803293e-05, "loss": 5.6975, "step": 19810 }, { "epoch": 0.03796487578570724, "grad_norm": 1.7856419086456299, "learning_rate": 3.795516508214201e-05, "loss": 5.6143, "step": 19820 }, { "epoch": 0.03798403061708247, "grad_norm": 1.6358734369277954, "learning_rate": 3.797431984625109e-05, "loss": 5.6326, "step": 19830 }, { "epoch": 0.0380031854484577, "grad_norm": 1.8453423976898193, "learning_rate": 3.799347461036017e-05, "loss": 5.6629, "step": 19840 }, { "epoch": 0.03802234027983293, "grad_norm": 1.6329014301300049, "learning_rate": 3.801262937446925e-05, "loss": 5.6493, "step": 19850 }, { "epoch": 0.03804149511120816, "grad_norm": 1.560354232788086, "learning_rate": 3.803178413857833e-05, "loss": 5.6954, "step": 19860 }, { "epoch": 0.038060649942583395, "grad_norm": 1.7899401187896729, "learning_rate": 3.8050938902687405e-05, "loss": 5.7155, "step": 19870 }, { "epoch": 0.03807980477395862, "grad_norm": 1.7078169584274292, "learning_rate": 3.807009366679649e-05, "loss": 5.8022, "step": 19880 }, { "epoch": 0.038098959605333854, "grad_norm": 1.9918776750564575, "learning_rate": 3.8089248430905575e-05, "loss": 5.6898, "step": 19890 }, { "epoch": 0.03811811443670909, "grad_norm": 1.6014678478240967, "learning_rate": 3.810840319501465e-05, "loss": 5.5629, "step": 19900 }, { "epoch": 0.03813726926808431, "grad_norm": 1.775541067123413, "learning_rate": 3.812755795912373e-05, "loss": 5.5111, "step": 19910 }, { "epoch": 0.038156424099459546, "grad_norm": 1.8109757900238037, "learning_rate": 3.814671272323281e-05, "loss": 5.6811, "step": 19920 }, { "epoch": 0.03817557893083478, "grad_norm": 1.7045756578445435, "learning_rate": 3.816586748734189e-05, "loss": 5.7744, "step": 19930 }, { "epoch": 0.038194733762210005, "grad_norm": 1.9279289245605469, "learning_rate": 3.818502225145097e-05, "loss": 5.6328, "step": 19940 }, { "epoch": 0.03821388859358524, "grad_norm": 1.851715326309204, "learning_rate": 3.820417701556005e-05, "loss": 5.5843, "step": 19950 }, { "epoch": 0.03823304342496047, "grad_norm": 1.936645746231079, "learning_rate": 3.822333177966913e-05, "loss": 5.6152, "step": 19960 }, { "epoch": 0.0382521982563357, "grad_norm": 1.7980294227600098, "learning_rate": 3.824248654377821e-05, "loss": 5.7284, "step": 19970 }, { "epoch": 0.03827135308771093, "grad_norm": 1.8971072435379028, "learning_rate": 3.8261641307887286e-05, "loss": 5.5659, "step": 19980 }, { "epoch": 0.03829050791908616, "grad_norm": 1.8213235139846802, "learning_rate": 3.828079607199637e-05, "loss": 5.7051, "step": 19990 }, { "epoch": 0.03830966275046139, "grad_norm": 1.8059561252593994, "learning_rate": 3.8299950836105456e-05, "loss": 5.6557, "step": 20000 }, { "epoch": 0.03832881758183662, "grad_norm": 1.7812002897262573, "learning_rate": 3.831910560021453e-05, "loss": 5.5045, "step": 20010 }, { "epoch": 0.038347972413211855, "grad_norm": 1.7998801469802856, "learning_rate": 3.833826036432361e-05, "loss": 5.5645, "step": 20020 }, { "epoch": 0.03836712724458708, "grad_norm": 2.0958664417266846, "learning_rate": 3.8357415128432686e-05, "loss": 5.6278, "step": 20030 }, { "epoch": 0.038386282075962314, "grad_norm": 1.6535605192184448, "learning_rate": 3.8376569892541774e-05, "loss": 5.4962, "step": 20040 }, { "epoch": 0.038405436907337547, "grad_norm": 1.697166919708252, "learning_rate": 3.839572465665085e-05, "loss": 5.6398, "step": 20050 }, { "epoch": 0.03842459173871278, "grad_norm": 1.7292730808258057, "learning_rate": 3.841487942075993e-05, "loss": 5.5873, "step": 20060 }, { "epoch": 0.038443746570088005, "grad_norm": 1.8200008869171143, "learning_rate": 3.8434034184869004e-05, "loss": 5.481, "step": 20070 }, { "epoch": 0.03846290140146324, "grad_norm": 1.8605585098266602, "learning_rate": 3.845318894897809e-05, "loss": 5.6894, "step": 20080 }, { "epoch": 0.03848205623283847, "grad_norm": 1.8065166473388672, "learning_rate": 3.847234371308717e-05, "loss": 5.7175, "step": 20090 }, { "epoch": 0.0385012110642137, "grad_norm": 1.537960171699524, "learning_rate": 3.849149847719625e-05, "loss": 5.6989, "step": 20100 }, { "epoch": 0.03852036589558893, "grad_norm": 1.735011100769043, "learning_rate": 3.8510653241305336e-05, "loss": 5.5007, "step": 20110 }, { "epoch": 0.03853952072696416, "grad_norm": 1.685500979423523, "learning_rate": 3.852980800541441e-05, "loss": 5.6371, "step": 20120 }, { "epoch": 0.03855867555833939, "grad_norm": 1.7679638862609863, "learning_rate": 3.854896276952349e-05, "loss": 5.6813, "step": 20130 }, { "epoch": 0.03857783038971462, "grad_norm": 1.7067627906799316, "learning_rate": 3.856811753363257e-05, "loss": 5.586, "step": 20140 }, { "epoch": 0.038596985221089855, "grad_norm": 1.8954062461853027, "learning_rate": 3.8587272297741655e-05, "loss": 5.5696, "step": 20150 }, { "epoch": 0.03861614005246508, "grad_norm": 1.7715165615081787, "learning_rate": 3.860642706185073e-05, "loss": 5.6839, "step": 20160 }, { "epoch": 0.038635294883840314, "grad_norm": 1.5953891277313232, "learning_rate": 3.862558182595981e-05, "loss": 5.6493, "step": 20170 }, { "epoch": 0.03865444971521555, "grad_norm": 1.57597017288208, "learning_rate": 3.8644736590068885e-05, "loss": 5.7463, "step": 20180 }, { "epoch": 0.03867360454659077, "grad_norm": 1.6788007020950317, "learning_rate": 3.866389135417797e-05, "loss": 5.7394, "step": 20190 }, { "epoch": 0.038692759377966006, "grad_norm": 1.8438447713851929, "learning_rate": 3.868304611828705e-05, "loss": 5.4907, "step": 20200 }, { "epoch": 0.03871191420934124, "grad_norm": 1.4983241558074951, "learning_rate": 3.870220088239613e-05, "loss": 5.5396, "step": 20210 }, { "epoch": 0.038731069040716465, "grad_norm": 2.0625720024108887, "learning_rate": 3.8721355646505204e-05, "loss": 5.4546, "step": 20220 }, { "epoch": 0.0387502238720917, "grad_norm": 1.732575535774231, "learning_rate": 3.874051041061429e-05, "loss": 5.5685, "step": 20230 }, { "epoch": 0.03876937870346693, "grad_norm": 1.6063146591186523, "learning_rate": 3.875966517472337e-05, "loss": 5.7511, "step": 20240 }, { "epoch": 0.03878853353484216, "grad_norm": 1.8523151874542236, "learning_rate": 3.877881993883245e-05, "loss": 5.6571, "step": 20250 }, { "epoch": 0.03880768836621739, "grad_norm": 1.709725022315979, "learning_rate": 3.8797974702941536e-05, "loss": 5.7067, "step": 20260 }, { "epoch": 0.03882684319759262, "grad_norm": 1.7554774284362793, "learning_rate": 3.881712946705061e-05, "loss": 5.6305, "step": 20270 }, { "epoch": 0.03884599802896785, "grad_norm": 2.079568862915039, "learning_rate": 3.883628423115969e-05, "loss": 5.4547, "step": 20280 }, { "epoch": 0.03886515286034308, "grad_norm": 2.1261844635009766, "learning_rate": 3.8855438995268766e-05, "loss": 5.5282, "step": 20290 }, { "epoch": 0.038884307691718314, "grad_norm": 1.7274383306503296, "learning_rate": 3.8874593759377854e-05, "loss": 5.6505, "step": 20300 }, { "epoch": 0.03890346252309354, "grad_norm": 1.8837189674377441, "learning_rate": 3.889374852348693e-05, "loss": 5.5129, "step": 20310 }, { "epoch": 0.03892261735446877, "grad_norm": 1.8843246698379517, "learning_rate": 3.891290328759601e-05, "loss": 5.5711, "step": 20320 }, { "epoch": 0.038941772185844006, "grad_norm": 1.749711513519287, "learning_rate": 3.8932058051705085e-05, "loss": 5.5346, "step": 20330 }, { "epoch": 0.03896092701721923, "grad_norm": 1.8075315952301025, "learning_rate": 3.895121281581417e-05, "loss": 5.6061, "step": 20340 }, { "epoch": 0.038980081848594465, "grad_norm": 1.7811354398727417, "learning_rate": 3.8970367579923254e-05, "loss": 5.7742, "step": 20350 }, { "epoch": 0.0389992366799697, "grad_norm": 1.6576286554336548, "learning_rate": 3.898952234403233e-05, "loss": 5.678, "step": 20360 }, { "epoch": 0.03901839151134493, "grad_norm": 1.6384704113006592, "learning_rate": 3.90086771081414e-05, "loss": 5.5689, "step": 20370 }, { "epoch": 0.03903754634272016, "grad_norm": 1.688236951828003, "learning_rate": 3.902783187225049e-05, "loss": 5.6297, "step": 20380 }, { "epoch": 0.03905670117409539, "grad_norm": 1.6038827896118164, "learning_rate": 3.904698663635957e-05, "loss": 5.4979, "step": 20390 }, { "epoch": 0.03907585600547062, "grad_norm": 1.62442946434021, "learning_rate": 3.906614140046865e-05, "loss": 5.6631, "step": 20400 }, { "epoch": 0.03909501083684585, "grad_norm": 1.6817585229873657, "learning_rate": 3.908529616457773e-05, "loss": 5.5252, "step": 20410 }, { "epoch": 0.03911416566822108, "grad_norm": 1.6735841035842896, "learning_rate": 3.910445092868681e-05, "loss": 5.685, "step": 20420 }, { "epoch": 0.039133320499596315, "grad_norm": 1.7566319704055786, "learning_rate": 3.912360569279589e-05, "loss": 5.5061, "step": 20430 }, { "epoch": 0.03915247533097154, "grad_norm": 1.6665996313095093, "learning_rate": 3.9142760456904965e-05, "loss": 5.6384, "step": 20440 }, { "epoch": 0.039171630162346774, "grad_norm": 1.9839471578598022, "learning_rate": 3.9161915221014053e-05, "loss": 5.6078, "step": 20450 }, { "epoch": 0.03919078499372201, "grad_norm": 1.6478968858718872, "learning_rate": 3.9181069985123135e-05, "loss": 5.7109, "step": 20460 }, { "epoch": 0.03920993982509723, "grad_norm": 1.6304892301559448, "learning_rate": 3.920022474923221e-05, "loss": 5.5319, "step": 20470 }, { "epoch": 0.039229094656472466, "grad_norm": 1.7296632528305054, "learning_rate": 3.9219379513341284e-05, "loss": 5.6187, "step": 20480 }, { "epoch": 0.0392482494878477, "grad_norm": 1.939030408859253, "learning_rate": 3.923853427745037e-05, "loss": 5.714, "step": 20490 }, { "epoch": 0.039267404319222925, "grad_norm": 1.72990882396698, "learning_rate": 3.925768904155945e-05, "loss": 5.6467, "step": 20500 }, { "epoch": 0.03928655915059816, "grad_norm": 1.719230055809021, "learning_rate": 3.927684380566853e-05, "loss": 5.5344, "step": 20510 }, { "epoch": 0.03930571398197339, "grad_norm": 1.7780944108963013, "learning_rate": 3.929599856977761e-05, "loss": 5.7217, "step": 20520 }, { "epoch": 0.03932486881334862, "grad_norm": 1.7943098545074463, "learning_rate": 3.931515333388669e-05, "loss": 5.566, "step": 20530 }, { "epoch": 0.03934402364472385, "grad_norm": 1.8865653276443481, "learning_rate": 3.933430809799577e-05, "loss": 5.6567, "step": 20540 }, { "epoch": 0.03936317847609908, "grad_norm": 1.7580708265304565, "learning_rate": 3.9353462862104846e-05, "loss": 5.6363, "step": 20550 }, { "epoch": 0.03938233330747431, "grad_norm": 1.7470414638519287, "learning_rate": 3.937261762621393e-05, "loss": 5.4885, "step": 20560 }, { "epoch": 0.03940148813884954, "grad_norm": 1.7140898704528809, "learning_rate": 3.939177239032301e-05, "loss": 5.4808, "step": 20570 }, { "epoch": 0.039420642970224774, "grad_norm": 1.7119985818862915, "learning_rate": 3.941092715443209e-05, "loss": 5.4575, "step": 20580 }, { "epoch": 0.0394397978016, "grad_norm": 1.8816946744918823, "learning_rate": 3.9430081918541165e-05, "loss": 5.5451, "step": 20590 }, { "epoch": 0.03945895263297523, "grad_norm": 1.7039570808410645, "learning_rate": 3.944923668265025e-05, "loss": 5.566, "step": 20600 }, { "epoch": 0.039478107464350466, "grad_norm": 1.8433281183242798, "learning_rate": 3.9468391446759334e-05, "loss": 5.5389, "step": 20610 }, { "epoch": 0.03949726229572569, "grad_norm": 1.9368605613708496, "learning_rate": 3.948754621086841e-05, "loss": 5.5482, "step": 20620 }, { "epoch": 0.039516417127100925, "grad_norm": 1.8493239879608154, "learning_rate": 3.950670097497749e-05, "loss": 5.5434, "step": 20630 }, { "epoch": 0.03953557195847616, "grad_norm": 1.6142889261245728, "learning_rate": 3.952585573908657e-05, "loss": 5.6124, "step": 20640 }, { "epoch": 0.039554726789851384, "grad_norm": 1.781658411026001, "learning_rate": 3.954501050319565e-05, "loss": 5.5799, "step": 20650 }, { "epoch": 0.03957388162122662, "grad_norm": 1.8343255519866943, "learning_rate": 3.956416526730473e-05, "loss": 5.6782, "step": 20660 }, { "epoch": 0.03959303645260185, "grad_norm": 1.643381953239441, "learning_rate": 3.958332003141381e-05, "loss": 5.6334, "step": 20670 }, { "epoch": 0.039612191283977076, "grad_norm": 1.6840344667434692, "learning_rate": 3.960247479552289e-05, "loss": 5.7157, "step": 20680 }, { "epoch": 0.03963134611535231, "grad_norm": 1.715606689453125, "learning_rate": 3.962162955963197e-05, "loss": 5.5878, "step": 20690 }, { "epoch": 0.03965050094672754, "grad_norm": 1.838776707649231, "learning_rate": 3.9640784323741045e-05, "loss": 5.6285, "step": 20700 }, { "epoch": 0.039669655778102775, "grad_norm": 1.9540199041366577, "learning_rate": 3.965993908785013e-05, "loss": 5.7199, "step": 20710 }, { "epoch": 0.039688810609478, "grad_norm": 1.7930632829666138, "learning_rate": 3.9679093851959215e-05, "loss": 5.4664, "step": 20720 }, { "epoch": 0.039707965440853234, "grad_norm": 1.7052571773529053, "learning_rate": 3.969824861606829e-05, "loss": 5.5837, "step": 20730 }, { "epoch": 0.03972712027222847, "grad_norm": 1.6755186319351196, "learning_rate": 3.971740338017737e-05, "loss": 5.6808, "step": 20740 }, { "epoch": 0.03974627510360369, "grad_norm": 1.7652627229690552, "learning_rate": 3.9736558144286445e-05, "loss": 5.6301, "step": 20750 }, { "epoch": 0.039765429934978926, "grad_norm": 1.6057370901107788, "learning_rate": 3.975571290839553e-05, "loss": 5.5375, "step": 20760 }, { "epoch": 0.03978458476635416, "grad_norm": 1.5121819972991943, "learning_rate": 3.977486767250461e-05, "loss": 5.6154, "step": 20770 }, { "epoch": 0.039803739597729385, "grad_norm": 1.6859692335128784, "learning_rate": 3.979402243661369e-05, "loss": 5.4635, "step": 20780 }, { "epoch": 0.03982289442910462, "grad_norm": 1.8871381282806396, "learning_rate": 3.981317720072277e-05, "loss": 5.5149, "step": 20790 }, { "epoch": 0.03984204926047985, "grad_norm": 1.7299636602401733, "learning_rate": 3.983233196483185e-05, "loss": 5.5011, "step": 20800 }, { "epoch": 0.039861204091855076, "grad_norm": 1.7406905889511108, "learning_rate": 3.9851486728940926e-05, "loss": 5.5175, "step": 20810 }, { "epoch": 0.03988035892323031, "grad_norm": 1.981650710105896, "learning_rate": 3.987064149305001e-05, "loss": 5.5477, "step": 20820 }, { "epoch": 0.03989951375460554, "grad_norm": 1.5697550773620605, "learning_rate": 3.9889796257159096e-05, "loss": 5.6672, "step": 20830 }, { "epoch": 0.03991866858598077, "grad_norm": 1.6658388376235962, "learning_rate": 3.990895102126817e-05, "loss": 5.534, "step": 20840 }, { "epoch": 0.039937823417356, "grad_norm": 1.7268397808074951, "learning_rate": 3.992810578537725e-05, "loss": 5.6537, "step": 20850 }, { "epoch": 0.039956978248731234, "grad_norm": 1.72992742061615, "learning_rate": 3.9947260549486326e-05, "loss": 5.6007, "step": 20860 }, { "epoch": 0.03997613308010646, "grad_norm": 1.8741600513458252, "learning_rate": 3.9966415313595414e-05, "loss": 5.461, "step": 20870 }, { "epoch": 0.03999528791148169, "grad_norm": 1.6384810209274292, "learning_rate": 3.998557007770449e-05, "loss": 5.607, "step": 20880 }, { "epoch": 0.040014442742856926, "grad_norm": 2.0338358879089355, "learning_rate": 4.000472484181357e-05, "loss": 5.5432, "step": 20890 }, { "epoch": 0.04003359757423215, "grad_norm": 1.846183180809021, "learning_rate": 4.0023879605922644e-05, "loss": 5.5075, "step": 20900 }, { "epoch": 0.040052752405607385, "grad_norm": 1.651030421257019, "learning_rate": 4.004303437003173e-05, "loss": 5.5358, "step": 20910 }, { "epoch": 0.04007190723698262, "grad_norm": 1.767615795135498, "learning_rate": 4.006218913414081e-05, "loss": 5.4994, "step": 20920 }, { "epoch": 0.040091062068357844, "grad_norm": 1.9639289379119873, "learning_rate": 4.008134389824989e-05, "loss": 5.605, "step": 20930 }, { "epoch": 0.04011021689973308, "grad_norm": 2.329747200012207, "learning_rate": 4.0100498662358976e-05, "loss": 5.4953, "step": 20940 }, { "epoch": 0.04012937173110831, "grad_norm": 1.8272192478179932, "learning_rate": 4.011965342646805e-05, "loss": 5.4856, "step": 20950 }, { "epoch": 0.040148526562483536, "grad_norm": 1.8826446533203125, "learning_rate": 4.013880819057713e-05, "loss": 5.5648, "step": 20960 }, { "epoch": 0.04016768139385877, "grad_norm": 2.7038352489471436, "learning_rate": 4.015796295468621e-05, "loss": 5.5256, "step": 20970 }, { "epoch": 0.040186836225234, "grad_norm": 1.724889874458313, "learning_rate": 4.0177117718795295e-05, "loss": 5.6711, "step": 20980 }, { "epoch": 0.04020599105660923, "grad_norm": 2.04229474067688, "learning_rate": 4.019627248290437e-05, "loss": 5.6673, "step": 20990 }, { "epoch": 0.04022514588798446, "grad_norm": 1.7175776958465576, "learning_rate": 4.021542724701345e-05, "loss": 5.6438, "step": 21000 }, { "epoch": 0.040244300719359694, "grad_norm": 1.740628957748413, "learning_rate": 4.0234582011122525e-05, "loss": 5.69, "step": 21010 }, { "epoch": 0.04026345555073492, "grad_norm": 1.7024155855178833, "learning_rate": 4.025373677523161e-05, "loss": 5.5442, "step": 21020 }, { "epoch": 0.04028261038211015, "grad_norm": 1.7180687189102173, "learning_rate": 4.027289153934069e-05, "loss": 5.6043, "step": 21030 }, { "epoch": 0.040301765213485385, "grad_norm": 2.0637388229370117, "learning_rate": 4.029204630344977e-05, "loss": 5.3871, "step": 21040 }, { "epoch": 0.04032092004486062, "grad_norm": 1.8076635599136353, "learning_rate": 4.0311201067558844e-05, "loss": 5.4846, "step": 21050 }, { "epoch": 0.040340074876235844, "grad_norm": 1.7169896364212036, "learning_rate": 4.033035583166793e-05, "loss": 5.736, "step": 21060 }, { "epoch": 0.04035922970761108, "grad_norm": 1.8012325763702393, "learning_rate": 4.034951059577701e-05, "loss": 5.6475, "step": 21070 }, { "epoch": 0.04037838453898631, "grad_norm": 2.0892245769500732, "learning_rate": 4.036866535988609e-05, "loss": 5.4982, "step": 21080 }, { "epoch": 0.040397539370361536, "grad_norm": 1.6011286973953247, "learning_rate": 4.038782012399516e-05, "loss": 5.6314, "step": 21090 }, { "epoch": 0.04041669420173677, "grad_norm": 1.7308833599090576, "learning_rate": 4.040697488810425e-05, "loss": 5.7289, "step": 21100 }, { "epoch": 0.040435849033112, "grad_norm": 1.7429134845733643, "learning_rate": 4.042612965221333e-05, "loss": 5.5395, "step": 21110 }, { "epoch": 0.04045500386448723, "grad_norm": 1.757356882095337, "learning_rate": 4.0445284416322406e-05, "loss": 5.5846, "step": 21120 }, { "epoch": 0.04047415869586246, "grad_norm": 1.815421223640442, "learning_rate": 4.0464439180431494e-05, "loss": 5.4764, "step": 21130 }, { "epoch": 0.040493313527237694, "grad_norm": 1.693748950958252, "learning_rate": 4.048359394454057e-05, "loss": 5.5118, "step": 21140 }, { "epoch": 0.04051246835861292, "grad_norm": 1.7340697050094604, "learning_rate": 4.050274870864965e-05, "loss": 5.5211, "step": 21150 }, { "epoch": 0.04053162318998815, "grad_norm": 1.6831218004226685, "learning_rate": 4.0521903472758724e-05, "loss": 5.5219, "step": 21160 }, { "epoch": 0.040550778021363386, "grad_norm": 1.7162028551101685, "learning_rate": 4.054105823686781e-05, "loss": 5.5732, "step": 21170 }, { "epoch": 0.04056993285273861, "grad_norm": 1.4595774412155151, "learning_rate": 4.0560213000976894e-05, "loss": 5.5232, "step": 21180 }, { "epoch": 0.040589087684113845, "grad_norm": 1.6710985898971558, "learning_rate": 4.057936776508597e-05, "loss": 5.5865, "step": 21190 }, { "epoch": 0.04060824251548908, "grad_norm": 1.6766911745071411, "learning_rate": 4.059852252919504e-05, "loss": 5.4609, "step": 21200 }, { "epoch": 0.040627397346864304, "grad_norm": 1.6972010135650635, "learning_rate": 4.061767729330413e-05, "loss": 5.5752, "step": 21210 }, { "epoch": 0.04064655217823954, "grad_norm": 1.641531229019165, "learning_rate": 4.063683205741321e-05, "loss": 5.467, "step": 21220 }, { "epoch": 0.04066570700961477, "grad_norm": 1.6794403791427612, "learning_rate": 4.065598682152229e-05, "loss": 5.6387, "step": 21230 }, { "epoch": 0.040684861840989996, "grad_norm": 1.6683831214904785, "learning_rate": 4.067514158563137e-05, "loss": 5.58, "step": 21240 }, { "epoch": 0.04070401667236523, "grad_norm": 1.6396405696868896, "learning_rate": 4.069429634974045e-05, "loss": 5.6448, "step": 21250 }, { "epoch": 0.04072317150374046, "grad_norm": 1.7427605390548706, "learning_rate": 4.071345111384953e-05, "loss": 5.6283, "step": 21260 }, { "epoch": 0.04074232633511569, "grad_norm": 1.8169007301330566, "learning_rate": 4.0732605877958605e-05, "loss": 5.5063, "step": 21270 }, { "epoch": 0.04076148116649092, "grad_norm": 1.6666619777679443, "learning_rate": 4.075176064206769e-05, "loss": 5.4924, "step": 21280 }, { "epoch": 0.04078063599786615, "grad_norm": 1.6955655813217163, "learning_rate": 4.077091540617677e-05, "loss": 5.6444, "step": 21290 }, { "epoch": 0.04079979082924138, "grad_norm": 1.6728421449661255, "learning_rate": 4.079007017028585e-05, "loss": 5.6229, "step": 21300 }, { "epoch": 0.04081894566061661, "grad_norm": 1.7214992046356201, "learning_rate": 4.0809224934394924e-05, "loss": 5.4454, "step": 21310 }, { "epoch": 0.040838100491991845, "grad_norm": 1.7904107570648193, "learning_rate": 4.082837969850401e-05, "loss": 5.5344, "step": 21320 }, { "epoch": 0.04085725532336707, "grad_norm": 1.813720464706421, "learning_rate": 4.084753446261309e-05, "loss": 5.5046, "step": 21330 }, { "epoch": 0.040876410154742304, "grad_norm": 1.8339111804962158, "learning_rate": 4.086668922672217e-05, "loss": 5.5388, "step": 21340 }, { "epoch": 0.04089556498611754, "grad_norm": 1.8187406063079834, "learning_rate": 4.088584399083125e-05, "loss": 5.6394, "step": 21350 }, { "epoch": 0.04091471981749277, "grad_norm": 1.887807011604309, "learning_rate": 4.090499875494033e-05, "loss": 5.3494, "step": 21360 }, { "epoch": 0.040933874648867996, "grad_norm": 1.5742961168289185, "learning_rate": 4.092415351904941e-05, "loss": 5.6018, "step": 21370 }, { "epoch": 0.04095302948024323, "grad_norm": 1.6326696872711182, "learning_rate": 4.0943308283158486e-05, "loss": 5.4911, "step": 21380 }, { "epoch": 0.04097218431161846, "grad_norm": 1.6892929077148438, "learning_rate": 4.096246304726757e-05, "loss": 5.3218, "step": 21390 }, { "epoch": 0.04099133914299369, "grad_norm": 1.6519277095794678, "learning_rate": 4.098161781137665e-05, "loss": 5.4374, "step": 21400 }, { "epoch": 0.04101049397436892, "grad_norm": 1.7831571102142334, "learning_rate": 4.100077257548573e-05, "loss": 5.6074, "step": 21410 }, { "epoch": 0.041029648805744154, "grad_norm": 1.822120189666748, "learning_rate": 4.1019927339594805e-05, "loss": 5.5, "step": 21420 }, { "epoch": 0.04104880363711938, "grad_norm": 1.803320288658142, "learning_rate": 4.1039082103703886e-05, "loss": 5.537, "step": 21430 }, { "epoch": 0.04106795846849461, "grad_norm": 1.750061273574829, "learning_rate": 4.1058236867812974e-05, "loss": 5.5866, "step": 21440 }, { "epoch": 0.041087113299869846, "grad_norm": 1.9383385181427002, "learning_rate": 4.107739163192205e-05, "loss": 5.5053, "step": 21450 }, { "epoch": 0.04110626813124507, "grad_norm": 1.6658252477645874, "learning_rate": 4.109654639603113e-05, "loss": 5.5847, "step": 21460 }, { "epoch": 0.041125422962620305, "grad_norm": 1.6858705282211304, "learning_rate": 4.111570116014021e-05, "loss": 5.5281, "step": 21470 }, { "epoch": 0.04114457779399554, "grad_norm": 1.779659390449524, "learning_rate": 4.113485592424929e-05, "loss": 5.6388, "step": 21480 }, { "epoch": 0.041163732625370764, "grad_norm": 1.69219172000885, "learning_rate": 4.115401068835837e-05, "loss": 5.6674, "step": 21490 }, { "epoch": 0.041182887456746, "grad_norm": 1.6707316637039185, "learning_rate": 4.117316545246745e-05, "loss": 5.5002, "step": 21500 }, { "epoch": 0.04120204228812123, "grad_norm": 1.890453577041626, "learning_rate": 4.119232021657653e-05, "loss": 5.5448, "step": 21510 }, { "epoch": 0.041221197119496455, "grad_norm": 1.7581909894943237, "learning_rate": 4.121147498068561e-05, "loss": 5.4928, "step": 21520 }, { "epoch": 0.04124035195087169, "grad_norm": 1.7190510034561157, "learning_rate": 4.1230629744794685e-05, "loss": 5.5517, "step": 21530 }, { "epoch": 0.04125950678224692, "grad_norm": 1.7740527391433716, "learning_rate": 4.124978450890377e-05, "loss": 5.521, "step": 21540 }, { "epoch": 0.04127866161362215, "grad_norm": 1.6612439155578613, "learning_rate": 4.1268939273012855e-05, "loss": 5.4416, "step": 21550 }, { "epoch": 0.04129781644499738, "grad_norm": 1.5631539821624756, "learning_rate": 4.128809403712193e-05, "loss": 5.443, "step": 21560 }, { "epoch": 0.04131697127637261, "grad_norm": 1.5395539999008179, "learning_rate": 4.130724880123101e-05, "loss": 5.6833, "step": 21570 }, { "epoch": 0.04133612610774784, "grad_norm": 1.6412339210510254, "learning_rate": 4.1326403565340085e-05, "loss": 5.6003, "step": 21580 }, { "epoch": 0.04135528093912307, "grad_norm": 1.7941482067108154, "learning_rate": 4.134555832944917e-05, "loss": 5.5354, "step": 21590 }, { "epoch": 0.041374435770498305, "grad_norm": 1.8218741416931152, "learning_rate": 4.136471309355825e-05, "loss": 5.448, "step": 21600 }, { "epoch": 0.04139359060187353, "grad_norm": 1.7986209392547607, "learning_rate": 4.138386785766733e-05, "loss": 5.7016, "step": 21610 }, { "epoch": 0.041412745433248764, "grad_norm": 1.6754841804504395, "learning_rate": 4.140302262177641e-05, "loss": 5.5276, "step": 21620 }, { "epoch": 0.041431900264624, "grad_norm": 1.7341053485870361, "learning_rate": 4.142217738588549e-05, "loss": 5.5589, "step": 21630 }, { "epoch": 0.04145105509599922, "grad_norm": 1.6917283535003662, "learning_rate": 4.1441332149994566e-05, "loss": 5.5041, "step": 21640 }, { "epoch": 0.041470209927374456, "grad_norm": 1.7064635753631592, "learning_rate": 4.146048691410365e-05, "loss": 5.4596, "step": 21650 }, { "epoch": 0.04148936475874969, "grad_norm": 1.662839412689209, "learning_rate": 4.1479641678212736e-05, "loss": 5.569, "step": 21660 }, { "epoch": 0.041508519590124915, "grad_norm": 1.7517690658569336, "learning_rate": 4.149879644232181e-05, "loss": 5.5221, "step": 21670 }, { "epoch": 0.04152767442150015, "grad_norm": 1.9325190782546997, "learning_rate": 4.151795120643089e-05, "loss": 5.5722, "step": 21680 }, { "epoch": 0.04154682925287538, "grad_norm": 1.6118210554122925, "learning_rate": 4.1537105970539966e-05, "loss": 5.4369, "step": 21690 }, { "epoch": 0.041565984084250614, "grad_norm": 1.8077090978622437, "learning_rate": 4.1556260734649054e-05, "loss": 5.5144, "step": 21700 }, { "epoch": 0.04158513891562584, "grad_norm": 1.7032088041305542, "learning_rate": 4.157541549875813e-05, "loss": 5.5901, "step": 21710 }, { "epoch": 0.04160429374700107, "grad_norm": 1.7584600448608398, "learning_rate": 4.159457026286721e-05, "loss": 5.5368, "step": 21720 }, { "epoch": 0.041623448578376306, "grad_norm": 1.6128325462341309, "learning_rate": 4.1613725026976284e-05, "loss": 5.6606, "step": 21730 }, { "epoch": 0.04164260340975153, "grad_norm": 1.8994791507720947, "learning_rate": 4.163287979108537e-05, "loss": 5.5222, "step": 21740 }, { "epoch": 0.041661758241126765, "grad_norm": 1.6311140060424805, "learning_rate": 4.165203455519445e-05, "loss": 5.4494, "step": 21750 }, { "epoch": 0.041680913072502, "grad_norm": 1.8443341255187988, "learning_rate": 4.167118931930353e-05, "loss": 5.4923, "step": 21760 }, { "epoch": 0.04170006790387722, "grad_norm": 1.742997646331787, "learning_rate": 4.16903440834126e-05, "loss": 5.5762, "step": 21770 }, { "epoch": 0.041719222735252456, "grad_norm": 1.797096610069275, "learning_rate": 4.170949884752169e-05, "loss": 5.4862, "step": 21780 }, { "epoch": 0.04173837756662769, "grad_norm": 1.5431082248687744, "learning_rate": 4.172865361163077e-05, "loss": 5.7178, "step": 21790 }, { "epoch": 0.041757532398002915, "grad_norm": 1.6969727277755737, "learning_rate": 4.174780837573985e-05, "loss": 5.615, "step": 21800 }, { "epoch": 0.04177668722937815, "grad_norm": 1.716034173965454, "learning_rate": 4.1766963139848935e-05, "loss": 5.4825, "step": 21810 }, { "epoch": 0.04179584206075338, "grad_norm": 1.9185562133789062, "learning_rate": 4.178611790395801e-05, "loss": 5.4667, "step": 21820 }, { "epoch": 0.04181499689212861, "grad_norm": 1.6076388359069824, "learning_rate": 4.180527266806709e-05, "loss": 5.5846, "step": 21830 }, { "epoch": 0.04183415172350384, "grad_norm": 1.7651902437210083, "learning_rate": 4.1824427432176165e-05, "loss": 5.612, "step": 21840 }, { "epoch": 0.04185330655487907, "grad_norm": 1.6939386129379272, "learning_rate": 4.184358219628525e-05, "loss": 5.4176, "step": 21850 }, { "epoch": 0.0418724613862543, "grad_norm": 1.685826063156128, "learning_rate": 4.186273696039433e-05, "loss": 5.4881, "step": 21860 }, { "epoch": 0.04189161621762953, "grad_norm": 2.1899023056030273, "learning_rate": 4.188189172450341e-05, "loss": 5.4657, "step": 21870 }, { "epoch": 0.041910771049004765, "grad_norm": 1.642876386642456, "learning_rate": 4.1901046488612484e-05, "loss": 5.5115, "step": 21880 }, { "epoch": 0.04192992588037999, "grad_norm": 1.596312165260315, "learning_rate": 4.192020125272157e-05, "loss": 5.5838, "step": 21890 }, { "epoch": 0.041949080711755224, "grad_norm": 1.6442192792892456, "learning_rate": 4.193935601683065e-05, "loss": 5.5675, "step": 21900 }, { "epoch": 0.04196823554313046, "grad_norm": 1.7109805345535278, "learning_rate": 4.195851078093973e-05, "loss": 5.446, "step": 21910 }, { "epoch": 0.04198739037450568, "grad_norm": 1.8113727569580078, "learning_rate": 4.19776655450488e-05, "loss": 5.5178, "step": 21920 }, { "epoch": 0.042006545205880916, "grad_norm": 2.1228394508361816, "learning_rate": 4.199682030915789e-05, "loss": 5.5288, "step": 21930 }, { "epoch": 0.04202570003725615, "grad_norm": 2.001939058303833, "learning_rate": 4.201597507326697e-05, "loss": 5.4484, "step": 21940 }, { "epoch": 0.042044854868631375, "grad_norm": 1.9352003335952759, "learning_rate": 4.2035129837376046e-05, "loss": 5.4174, "step": 21950 }, { "epoch": 0.04206400970000661, "grad_norm": 1.6686475276947021, "learning_rate": 4.2054284601485134e-05, "loss": 5.4556, "step": 21960 }, { "epoch": 0.04208316453138184, "grad_norm": 1.7939943075180054, "learning_rate": 4.207343936559421e-05, "loss": 5.42, "step": 21970 }, { "epoch": 0.04210231936275707, "grad_norm": 1.6730303764343262, "learning_rate": 4.209259412970329e-05, "loss": 5.4755, "step": 21980 }, { "epoch": 0.0421214741941323, "grad_norm": 1.6897391080856323, "learning_rate": 4.2111748893812364e-05, "loss": 5.4904, "step": 21990 }, { "epoch": 0.04214062902550753, "grad_norm": 1.737194538116455, "learning_rate": 4.213090365792145e-05, "loss": 5.4807, "step": 22000 }, { "epoch": 0.04215978385688276, "grad_norm": 1.5614218711853027, "learning_rate": 4.215005842203053e-05, "loss": 5.6549, "step": 22010 }, { "epoch": 0.04217893868825799, "grad_norm": 1.8230215311050415, "learning_rate": 4.216921318613961e-05, "loss": 5.6025, "step": 22020 }, { "epoch": 0.042198093519633224, "grad_norm": 1.8364251852035522, "learning_rate": 4.218836795024868e-05, "loss": 5.5094, "step": 22030 }, { "epoch": 0.04221724835100846, "grad_norm": 1.5178422927856445, "learning_rate": 4.220752271435777e-05, "loss": 5.4251, "step": 22040 }, { "epoch": 0.04223640318238368, "grad_norm": 1.9920711517333984, "learning_rate": 4.222667747846685e-05, "loss": 5.5012, "step": 22050 }, { "epoch": 0.042255558013758916, "grad_norm": 1.6453092098236084, "learning_rate": 4.224583224257593e-05, "loss": 5.3414, "step": 22060 }, { "epoch": 0.04227471284513415, "grad_norm": 1.6748054027557373, "learning_rate": 4.226498700668501e-05, "loss": 5.5388, "step": 22070 }, { "epoch": 0.042293867676509375, "grad_norm": 1.981998085975647, "learning_rate": 4.228414177079409e-05, "loss": 5.5219, "step": 22080 }, { "epoch": 0.04231302250788461, "grad_norm": 1.7072608470916748, "learning_rate": 4.230329653490317e-05, "loss": 5.4342, "step": 22090 }, { "epoch": 0.04233217733925984, "grad_norm": 1.8471124172210693, "learning_rate": 4.2322451299012245e-05, "loss": 5.4848, "step": 22100 }, { "epoch": 0.04235133217063507, "grad_norm": 1.6554954051971436, "learning_rate": 4.2341606063121327e-05, "loss": 5.657, "step": 22110 }, { "epoch": 0.0423704870020103, "grad_norm": 1.8206287622451782, "learning_rate": 4.236076082723041e-05, "loss": 5.4393, "step": 22120 }, { "epoch": 0.04238964183338553, "grad_norm": 1.5825343132019043, "learning_rate": 4.237991559133949e-05, "loss": 5.6179, "step": 22130 }, { "epoch": 0.04240879666476076, "grad_norm": 1.82975172996521, "learning_rate": 4.2399070355448564e-05, "loss": 5.4964, "step": 22140 }, { "epoch": 0.04242795149613599, "grad_norm": 1.7667447328567505, "learning_rate": 4.241822511955765e-05, "loss": 5.4287, "step": 22150 }, { "epoch": 0.042447106327511225, "grad_norm": 1.6111596822738647, "learning_rate": 4.243737988366673e-05, "loss": 5.5128, "step": 22160 }, { "epoch": 0.04246626115888645, "grad_norm": 1.7126809358596802, "learning_rate": 4.245653464777581e-05, "loss": 5.6116, "step": 22170 }, { "epoch": 0.042485415990261684, "grad_norm": 1.9876362085342407, "learning_rate": 4.247568941188489e-05, "loss": 5.4201, "step": 22180 }, { "epoch": 0.04250457082163692, "grad_norm": 1.6592326164245605, "learning_rate": 4.249484417599397e-05, "loss": 5.4842, "step": 22190 }, { "epoch": 0.04252372565301214, "grad_norm": 2.2138397693634033, "learning_rate": 4.251399894010305e-05, "loss": 5.5267, "step": 22200 }, { "epoch": 0.042542880484387376, "grad_norm": 1.7491976022720337, "learning_rate": 4.2533153704212126e-05, "loss": 5.3862, "step": 22210 }, { "epoch": 0.04256203531576261, "grad_norm": 1.7580252885818481, "learning_rate": 4.255230846832121e-05, "loss": 5.4971, "step": 22220 }, { "epoch": 0.042581190147137835, "grad_norm": 1.8275296688079834, "learning_rate": 4.257146323243029e-05, "loss": 5.5439, "step": 22230 }, { "epoch": 0.04260034497851307, "grad_norm": 1.7213448286056519, "learning_rate": 4.259061799653937e-05, "loss": 5.5615, "step": 22240 }, { "epoch": 0.0426194998098883, "grad_norm": 1.6944187879562378, "learning_rate": 4.2609772760648445e-05, "loss": 5.438, "step": 22250 }, { "epoch": 0.042638654641263526, "grad_norm": 1.64655601978302, "learning_rate": 4.2628927524757526e-05, "loss": 5.4502, "step": 22260 }, { "epoch": 0.04265780947263876, "grad_norm": 1.8785501718521118, "learning_rate": 4.2648082288866614e-05, "loss": 5.5369, "step": 22270 }, { "epoch": 0.04267696430401399, "grad_norm": 1.7108306884765625, "learning_rate": 4.266723705297569e-05, "loss": 5.5089, "step": 22280 }, { "epoch": 0.04269611913538922, "grad_norm": 1.9493650197982788, "learning_rate": 4.268639181708477e-05, "loss": 5.343, "step": 22290 }, { "epoch": 0.04271527396676445, "grad_norm": 1.797438144683838, "learning_rate": 4.270554658119385e-05, "loss": 5.4085, "step": 22300 }, { "epoch": 0.042734428798139684, "grad_norm": 1.6646356582641602, "learning_rate": 4.272470134530293e-05, "loss": 5.4864, "step": 22310 }, { "epoch": 0.04275358362951491, "grad_norm": 1.7193619012832642, "learning_rate": 4.274385610941201e-05, "loss": 5.5492, "step": 22320 }, { "epoch": 0.04277273846089014, "grad_norm": 1.6717089414596558, "learning_rate": 4.276301087352109e-05, "loss": 5.4737, "step": 22330 }, { "epoch": 0.042791893292265376, "grad_norm": 1.7338066101074219, "learning_rate": 4.278216563763017e-05, "loss": 5.4716, "step": 22340 }, { "epoch": 0.04281104812364061, "grad_norm": 1.6437714099884033, "learning_rate": 4.280132040173925e-05, "loss": 5.521, "step": 22350 }, { "epoch": 0.042830202955015835, "grad_norm": 1.7343695163726807, "learning_rate": 4.2820475165848325e-05, "loss": 5.4463, "step": 22360 }, { "epoch": 0.04284935778639107, "grad_norm": 1.8958501815795898, "learning_rate": 4.283962992995741e-05, "loss": 5.4423, "step": 22370 }, { "epoch": 0.0428685126177663, "grad_norm": 1.757185459136963, "learning_rate": 4.2858784694066495e-05, "loss": 5.4845, "step": 22380 }, { "epoch": 0.04288766744914153, "grad_norm": 1.5872039794921875, "learning_rate": 4.287793945817557e-05, "loss": 5.4767, "step": 22390 }, { "epoch": 0.04290682228051676, "grad_norm": 1.8071297407150269, "learning_rate": 4.289709422228465e-05, "loss": 5.4473, "step": 22400 }, { "epoch": 0.04292597711189199, "grad_norm": 1.6382226943969727, "learning_rate": 4.2916248986393725e-05, "loss": 5.5466, "step": 22410 }, { "epoch": 0.04294513194326722, "grad_norm": 1.6471236944198608, "learning_rate": 4.293540375050281e-05, "loss": 5.3986, "step": 22420 }, { "epoch": 0.04296428677464245, "grad_norm": 1.679511308670044, "learning_rate": 4.295455851461189e-05, "loss": 5.3093, "step": 22430 }, { "epoch": 0.042983441606017685, "grad_norm": 1.664379358291626, "learning_rate": 4.297371327872097e-05, "loss": 5.3858, "step": 22440 }, { "epoch": 0.04300259643739291, "grad_norm": 1.6882474422454834, "learning_rate": 4.2992868042830044e-05, "loss": 5.4855, "step": 22450 }, { "epoch": 0.043021751268768144, "grad_norm": 1.7357895374298096, "learning_rate": 4.301202280693913e-05, "loss": 5.4664, "step": 22460 }, { "epoch": 0.04304090610014338, "grad_norm": 1.7156147956848145, "learning_rate": 4.3031177571048206e-05, "loss": 5.3291, "step": 22470 }, { "epoch": 0.0430600609315186, "grad_norm": 1.937348484992981, "learning_rate": 4.305033233515729e-05, "loss": 5.4092, "step": 22480 }, { "epoch": 0.043079215762893835, "grad_norm": 1.555476427078247, "learning_rate": 4.3069487099266376e-05, "loss": 5.5726, "step": 22490 }, { "epoch": 0.04309837059426907, "grad_norm": 1.7157540321350098, "learning_rate": 4.308864186337545e-05, "loss": 5.4612, "step": 22500 }, { "epoch": 0.043117525425644294, "grad_norm": 1.641829252243042, "learning_rate": 4.310779662748453e-05, "loss": 5.4815, "step": 22510 }, { "epoch": 0.04313668025701953, "grad_norm": 1.6940442323684692, "learning_rate": 4.3126951391593606e-05, "loss": 5.5026, "step": 22520 }, { "epoch": 0.04315583508839476, "grad_norm": 1.6061381101608276, "learning_rate": 4.3146106155702694e-05, "loss": 5.4364, "step": 22530 }, { "epoch": 0.043174989919769986, "grad_norm": 1.8631155490875244, "learning_rate": 4.316526091981177e-05, "loss": 5.3937, "step": 22540 }, { "epoch": 0.04319414475114522, "grad_norm": 1.7482094764709473, "learning_rate": 4.318441568392085e-05, "loss": 5.5163, "step": 22550 }, { "epoch": 0.04321329958252045, "grad_norm": 1.8736945390701294, "learning_rate": 4.320165497161902e-05, "loss": 5.5133, "step": 22560 }, { "epoch": 0.04323245441389568, "grad_norm": 1.7619984149932861, "learning_rate": 4.3220809735728104e-05, "loss": 5.4754, "step": 22570 }, { "epoch": 0.04325160924527091, "grad_norm": 1.7284411191940308, "learning_rate": 4.323996449983718e-05, "loss": 5.6326, "step": 22580 }, { "epoch": 0.043270764076646144, "grad_norm": 1.817244291305542, "learning_rate": 4.325911926394626e-05, "loss": 5.541, "step": 22590 }, { "epoch": 0.04328991890802137, "grad_norm": 2.1256864070892334, "learning_rate": 4.327827402805534e-05, "loss": 5.4083, "step": 22600 }, { "epoch": 0.0433090737393966, "grad_norm": 1.9055060148239136, "learning_rate": 4.329742879216442e-05, "loss": 5.5708, "step": 22610 }, { "epoch": 0.043328228570771836, "grad_norm": 1.6947410106658936, "learning_rate": 4.33165835562735e-05, "loss": 5.4673, "step": 22620 }, { "epoch": 0.04334738340214706, "grad_norm": 1.7528280019760132, "learning_rate": 4.3335738320382585e-05, "loss": 5.4878, "step": 22630 }, { "epoch": 0.043366538233522295, "grad_norm": 1.6580768823623657, "learning_rate": 4.335489308449166e-05, "loss": 5.4785, "step": 22640 }, { "epoch": 0.04338569306489753, "grad_norm": 1.612146019935608, "learning_rate": 4.337404784860074e-05, "loss": 5.6049, "step": 22650 }, { "epoch": 0.043404847896272754, "grad_norm": 1.5347756147384644, "learning_rate": 4.3393202612709816e-05, "loss": 5.5654, "step": 22660 }, { "epoch": 0.04342400272764799, "grad_norm": 1.9696266651153564, "learning_rate": 4.3412357376818904e-05, "loss": 5.3969, "step": 22670 }, { "epoch": 0.04344315755902322, "grad_norm": 2.2123329639434814, "learning_rate": 4.3431512140927985e-05, "loss": 5.4738, "step": 22680 }, { "epoch": 0.04346231239039845, "grad_norm": 1.6889960765838623, "learning_rate": 4.345066690503706e-05, "loss": 5.349, "step": 22690 }, { "epoch": 0.04348146722177368, "grad_norm": 1.7686983346939087, "learning_rate": 4.346982166914614e-05, "loss": 5.486, "step": 22700 }, { "epoch": 0.04350062205314891, "grad_norm": 1.7330046892166138, "learning_rate": 4.348897643325522e-05, "loss": 5.4941, "step": 22710 }, { "epoch": 0.043519776884524145, "grad_norm": 1.5978199243545532, "learning_rate": 4.3508131197364304e-05, "loss": 5.4622, "step": 22720 }, { "epoch": 0.04353893171589937, "grad_norm": 1.5648101568222046, "learning_rate": 4.352728596147338e-05, "loss": 5.5362, "step": 22730 }, { "epoch": 0.0435580865472746, "grad_norm": 1.6276811361312866, "learning_rate": 4.354644072558246e-05, "loss": 5.4143, "step": 22740 }, { "epoch": 0.043577241378649836, "grad_norm": 1.7342299222946167, "learning_rate": 4.356559548969154e-05, "loss": 5.3916, "step": 22750 }, { "epoch": 0.04359639621002506, "grad_norm": 1.7937740087509155, "learning_rate": 4.358475025380062e-05, "loss": 5.5049, "step": 22760 }, { "epoch": 0.043615551041400295, "grad_norm": 1.7402465343475342, "learning_rate": 4.3603905017909697e-05, "loss": 5.4245, "step": 22770 }, { "epoch": 0.04363470587277553, "grad_norm": 1.8231366872787476, "learning_rate": 4.362305978201878e-05, "loss": 5.4337, "step": 22780 }, { "epoch": 0.043653860704150754, "grad_norm": 1.6425830125808716, "learning_rate": 4.3642214546127866e-05, "loss": 5.4262, "step": 22790 }, { "epoch": 0.04367301553552599, "grad_norm": 2.201448917388916, "learning_rate": 4.366136931023694e-05, "loss": 5.5945, "step": 22800 }, { "epoch": 0.04369217036690122, "grad_norm": 1.7582025527954102, "learning_rate": 4.3680524074346015e-05, "loss": 5.4077, "step": 22810 }, { "epoch": 0.043711325198276446, "grad_norm": 1.841823697090149, "learning_rate": 4.36996788384551e-05, "loss": 5.3994, "step": 22820 }, { "epoch": 0.04373048002965168, "grad_norm": 2.0019659996032715, "learning_rate": 4.3718833602564184e-05, "loss": 5.5075, "step": 22830 }, { "epoch": 0.04374963486102691, "grad_norm": 1.636641502380371, "learning_rate": 4.373798836667326e-05, "loss": 5.3171, "step": 22840 }, { "epoch": 0.04376878969240214, "grad_norm": 1.7679219245910645, "learning_rate": 4.375714313078234e-05, "loss": 5.2916, "step": 22850 }, { "epoch": 0.04378794452377737, "grad_norm": 1.7323806285858154, "learning_rate": 4.377629789489142e-05, "loss": 5.5105, "step": 22860 }, { "epoch": 0.043807099355152604, "grad_norm": 1.7555382251739502, "learning_rate": 4.37954526590005e-05, "loss": 5.3107, "step": 22870 }, { "epoch": 0.04382625418652783, "grad_norm": 1.6987881660461426, "learning_rate": 4.381460742310958e-05, "loss": 5.5415, "step": 22880 }, { "epoch": 0.04384540901790306, "grad_norm": 1.7022708654403687, "learning_rate": 4.383376218721866e-05, "loss": 5.3764, "step": 22890 }, { "epoch": 0.043864563849278296, "grad_norm": 1.7519211769104004, "learning_rate": 4.385291695132775e-05, "loss": 5.5484, "step": 22900 }, { "epoch": 0.04388371868065352, "grad_norm": 1.636641502380371, "learning_rate": 4.387207171543682e-05, "loss": 5.6339, "step": 22910 }, { "epoch": 0.043902873512028755, "grad_norm": 1.5818651914596558, "learning_rate": 4.3891226479545896e-05, "loss": 5.3774, "step": 22920 }, { "epoch": 0.04392202834340399, "grad_norm": 1.8549901247024536, "learning_rate": 4.391038124365498e-05, "loss": 5.2987, "step": 22930 }, { "epoch": 0.043941183174779214, "grad_norm": 1.8343772888183594, "learning_rate": 4.3929536007764065e-05, "loss": 5.5207, "step": 22940 }, { "epoch": 0.04396033800615445, "grad_norm": 1.6574006080627441, "learning_rate": 4.394869077187314e-05, "loss": 5.3049, "step": 22950 }, { "epoch": 0.04397949283752968, "grad_norm": 2.2029342651367188, "learning_rate": 4.396784553598222e-05, "loss": 5.2764, "step": 22960 }, { "epoch": 0.043998647668904906, "grad_norm": 1.811469554901123, "learning_rate": 4.39870003000913e-05, "loss": 5.4037, "step": 22970 }, { "epoch": 0.04401780250028014, "grad_norm": 1.842588186264038, "learning_rate": 4.4006155064200384e-05, "loss": 5.5328, "step": 22980 }, { "epoch": 0.04403695733165537, "grad_norm": 1.8250306844711304, "learning_rate": 4.402530982830946e-05, "loss": 5.4339, "step": 22990 }, { "epoch": 0.0440561121630306, "grad_norm": 1.6251283884048462, "learning_rate": 4.404446459241854e-05, "loss": 5.5437, "step": 23000 }, { "epoch": 0.04407526699440583, "grad_norm": 1.6229389905929565, "learning_rate": 4.406361935652762e-05, "loss": 5.5833, "step": 23010 }, { "epoch": 0.04409442182578106, "grad_norm": 1.6972064971923828, "learning_rate": 4.40827741206367e-05, "loss": 5.4321, "step": 23020 }, { "epoch": 0.044113576657156296, "grad_norm": 1.682190179824829, "learning_rate": 4.410192888474578e-05, "loss": 5.412, "step": 23030 }, { "epoch": 0.04413273148853152, "grad_norm": 1.6726477146148682, "learning_rate": 4.412108364885486e-05, "loss": 5.4392, "step": 23040 }, { "epoch": 0.044151886319906755, "grad_norm": 1.8105621337890625, "learning_rate": 4.4140238412963946e-05, "loss": 5.4805, "step": 23050 }, { "epoch": 0.04417104115128199, "grad_norm": 1.5409080982208252, "learning_rate": 4.415939317707302e-05, "loss": 5.5185, "step": 23060 }, { "epoch": 0.044190195982657214, "grad_norm": 1.72085702419281, "learning_rate": 4.41785479411821e-05, "loss": 5.4203, "step": 23070 }, { "epoch": 0.04420935081403245, "grad_norm": 1.6570465564727783, "learning_rate": 4.4197702705291176e-05, "loss": 5.5142, "step": 23080 }, { "epoch": 0.04422850564540768, "grad_norm": 1.5536025762557983, "learning_rate": 4.4216857469400264e-05, "loss": 5.5802, "step": 23090 }, { "epoch": 0.044247660476782906, "grad_norm": 1.642465591430664, "learning_rate": 4.423601223350934e-05, "loss": 5.4635, "step": 23100 }, { "epoch": 0.04426681530815814, "grad_norm": 1.621293067932129, "learning_rate": 4.425516699761842e-05, "loss": 5.5813, "step": 23110 }, { "epoch": 0.04428597013953337, "grad_norm": 1.6920359134674072, "learning_rate": 4.4274321761727495e-05, "loss": 5.59, "step": 23120 }, { "epoch": 0.0443051249709086, "grad_norm": 1.6504510641098022, "learning_rate": 4.429347652583658e-05, "loss": 5.4619, "step": 23130 }, { "epoch": 0.04432427980228383, "grad_norm": 1.6709420680999756, "learning_rate": 4.431263128994566e-05, "loss": 5.433, "step": 23140 }, { "epoch": 0.044343434633659064, "grad_norm": 1.5693175792694092, "learning_rate": 4.433178605405474e-05, "loss": 5.3477, "step": 23150 }, { "epoch": 0.04436258946503429, "grad_norm": 1.6257771253585815, "learning_rate": 4.435094081816383e-05, "loss": 5.451, "step": 23160 }, { "epoch": 0.04438174429640952, "grad_norm": 1.6448112726211548, "learning_rate": 4.43700955822729e-05, "loss": 5.2234, "step": 23170 }, { "epoch": 0.044400899127784756, "grad_norm": 1.6034483909606934, "learning_rate": 4.438925034638198e-05, "loss": 5.371, "step": 23180 }, { "epoch": 0.04442005395915998, "grad_norm": 1.615350365638733, "learning_rate": 4.440840511049106e-05, "loss": 5.6032, "step": 23190 }, { "epoch": 0.044439208790535215, "grad_norm": 1.722683310508728, "learning_rate": 4.442564439818923e-05, "loss": 5.4611, "step": 23200 }, { "epoch": 0.04445836362191045, "grad_norm": 2.088371753692627, "learning_rate": 4.444479916229831e-05, "loss": 5.4678, "step": 23210 }, { "epoch": 0.044477518453285673, "grad_norm": 2.038200855255127, "learning_rate": 4.4463953926407386e-05, "loss": 5.4563, "step": 23220 }, { "epoch": 0.044496673284660906, "grad_norm": 1.709191083908081, "learning_rate": 4.4483108690516474e-05, "loss": 5.4064, "step": 23230 }, { "epoch": 0.04451582811603614, "grad_norm": 1.8165665864944458, "learning_rate": 4.4502263454625556e-05, "loss": 5.3754, "step": 23240 }, { "epoch": 0.044534982947411365, "grad_norm": 1.5640407800674438, "learning_rate": 4.452141821873463e-05, "loss": 5.4904, "step": 23250 }, { "epoch": 0.0445541377787866, "grad_norm": 1.629921317100525, "learning_rate": 4.454057298284371e-05, "loss": 5.5012, "step": 23260 }, { "epoch": 0.04457329261016183, "grad_norm": 1.6702052354812622, "learning_rate": 4.455972774695279e-05, "loss": 5.6012, "step": 23270 }, { "epoch": 0.04459244744153706, "grad_norm": 1.9745230674743652, "learning_rate": 4.4578882511061874e-05, "loss": 5.3439, "step": 23280 }, { "epoch": 0.04461160227291229, "grad_norm": 1.5741937160491943, "learning_rate": 4.459803727517095e-05, "loss": 5.433, "step": 23290 }, { "epoch": 0.04463075710428752, "grad_norm": 1.615976095199585, "learning_rate": 4.461719203928004e-05, "loss": 5.3791, "step": 23300 }, { "epoch": 0.04464991193566275, "grad_norm": 1.8172121047973633, "learning_rate": 4.463634680338911e-05, "loss": 5.4991, "step": 23310 }, { "epoch": 0.04466906676703798, "grad_norm": 1.942478060722351, "learning_rate": 4.465550156749819e-05, "loss": 5.3319, "step": 23320 }, { "epoch": 0.044688221598413215, "grad_norm": 1.7399004697799683, "learning_rate": 4.467465633160727e-05, "loss": 5.3927, "step": 23330 }, { "epoch": 0.04470737642978844, "grad_norm": 1.7254799604415894, "learning_rate": 4.4693811095716355e-05, "loss": 5.557, "step": 23340 }, { "epoch": 0.044726531261163674, "grad_norm": 1.8099709749221802, "learning_rate": 4.4712965859825436e-05, "loss": 5.5383, "step": 23350 }, { "epoch": 0.04474568609253891, "grad_norm": 1.7606289386749268, "learning_rate": 4.473212062393451e-05, "loss": 5.4907, "step": 23360 }, { "epoch": 0.04476484092391414, "grad_norm": 1.784557819366455, "learning_rate": 4.475127538804359e-05, "loss": 5.387, "step": 23370 }, { "epoch": 0.044783995755289366, "grad_norm": 1.7426129579544067, "learning_rate": 4.4770430152152674e-05, "loss": 5.3414, "step": 23380 }, { "epoch": 0.0448031505866646, "grad_norm": 1.6859042644500732, "learning_rate": 4.4789584916261755e-05, "loss": 5.4599, "step": 23390 }, { "epoch": 0.04482230541803983, "grad_norm": 1.6434743404388428, "learning_rate": 4.480873968037083e-05, "loss": 5.4005, "step": 23400 }, { "epoch": 0.04484146024941506, "grad_norm": 1.7185615301132202, "learning_rate": 4.482789444447991e-05, "loss": 5.4074, "step": 23410 }, { "epoch": 0.04486061508079029, "grad_norm": 1.651898741722107, "learning_rate": 4.484704920858899e-05, "loss": 5.253, "step": 23420 }, { "epoch": 0.044879769912165524, "grad_norm": 2.0159289836883545, "learning_rate": 4.486620397269807e-05, "loss": 5.4746, "step": 23430 }, { "epoch": 0.04489892474354075, "grad_norm": 1.5943002700805664, "learning_rate": 4.488535873680715e-05, "loss": 5.4355, "step": 23440 }, { "epoch": 0.04491807957491598, "grad_norm": 1.657310962677002, "learning_rate": 4.490451350091623e-05, "loss": 5.5179, "step": 23450 }, { "epoch": 0.044937234406291215, "grad_norm": 1.6648303270339966, "learning_rate": 4.492366826502532e-05, "loss": 5.4888, "step": 23460 }, { "epoch": 0.04495638923766644, "grad_norm": 1.6299138069152832, "learning_rate": 4.494282302913439e-05, "loss": 5.2489, "step": 23470 }, { "epoch": 0.044975544069041674, "grad_norm": 1.604596734046936, "learning_rate": 4.496197779324347e-05, "loss": 5.3838, "step": 23480 }, { "epoch": 0.04499469890041691, "grad_norm": 1.6753547191619873, "learning_rate": 4.4981132557352554e-05, "loss": 5.4896, "step": 23490 }, { "epoch": 0.04501385373179213, "grad_norm": 1.7518259286880493, "learning_rate": 4.5000287321461636e-05, "loss": 5.4232, "step": 23500 }, { "epoch": 0.045033008563167366, "grad_norm": 1.6330009698867798, "learning_rate": 4.501944208557071e-05, "loss": 5.5375, "step": 23510 }, { "epoch": 0.0450521633945426, "grad_norm": 1.8746062517166138, "learning_rate": 4.503859684967979e-05, "loss": 5.496, "step": 23520 }, { "epoch": 0.045071318225917825, "grad_norm": 1.7634941339492798, "learning_rate": 4.505775161378887e-05, "loss": 5.3998, "step": 23530 }, { "epoch": 0.04509047305729306, "grad_norm": 1.5653512477874756, "learning_rate": 4.5076906377897954e-05, "loss": 5.38, "step": 23540 }, { "epoch": 0.04510962788866829, "grad_norm": 1.6119801998138428, "learning_rate": 4.509606114200703e-05, "loss": 5.3695, "step": 23550 }, { "epoch": 0.04512878272004352, "grad_norm": 1.7731789350509644, "learning_rate": 4.511521590611611e-05, "loss": 5.3029, "step": 23560 }, { "epoch": 0.04514793755141875, "grad_norm": 1.7322052717208862, "learning_rate": 4.51343706702252e-05, "loss": 5.3693, "step": 23570 }, { "epoch": 0.04516709238279398, "grad_norm": 1.7041879892349243, "learning_rate": 4.515352543433427e-05, "loss": 5.4037, "step": 23580 }, { "epoch": 0.04518624721416921, "grad_norm": 1.570600152015686, "learning_rate": 4.5172680198443354e-05, "loss": 5.4793, "step": 23590 }, { "epoch": 0.04520540204554444, "grad_norm": 1.6818448305130005, "learning_rate": 4.519183496255243e-05, "loss": 5.5866, "step": 23600 }, { "epoch": 0.045224556876919675, "grad_norm": 1.572041392326355, "learning_rate": 4.5210989726661516e-05, "loss": 5.5147, "step": 23610 }, { "epoch": 0.0452437117082949, "grad_norm": 1.6951377391815186, "learning_rate": 4.523014449077059e-05, "loss": 5.4698, "step": 23620 }, { "epoch": 0.045262866539670134, "grad_norm": 2.4726784229278564, "learning_rate": 4.524929925487967e-05, "loss": 5.5565, "step": 23630 }, { "epoch": 0.04528202137104537, "grad_norm": 1.656836748123169, "learning_rate": 4.5268454018988754e-05, "loss": 5.271, "step": 23640 }, { "epoch": 0.04530117620242059, "grad_norm": 1.6152796745300293, "learning_rate": 4.5287608783097835e-05, "loss": 5.393, "step": 23650 }, { "epoch": 0.045320331033795826, "grad_norm": 1.8570858240127563, "learning_rate": 4.530676354720691e-05, "loss": 5.5247, "step": 23660 }, { "epoch": 0.04533948586517106, "grad_norm": 2.0728468894958496, "learning_rate": 4.532591831131599e-05, "loss": 5.4185, "step": 23670 }, { "epoch": 0.04535864069654629, "grad_norm": 1.5995585918426514, "learning_rate": 4.534507307542508e-05, "loss": 5.3848, "step": 23680 }, { "epoch": 0.04537779552792152, "grad_norm": 1.8194448947906494, "learning_rate": 4.536422783953415e-05, "loss": 5.5335, "step": 23690 }, { "epoch": 0.04539695035929675, "grad_norm": 1.6679824590682983, "learning_rate": 4.5383382603643235e-05, "loss": 5.3371, "step": 23700 }, { "epoch": 0.04541610519067198, "grad_norm": 1.7804826498031616, "learning_rate": 4.540253736775231e-05, "loss": 5.3883, "step": 23710 }, { "epoch": 0.04543526002204721, "grad_norm": 1.748882532119751, "learning_rate": 4.54216921318614e-05, "loss": 5.4127, "step": 23720 }, { "epoch": 0.04545441485342244, "grad_norm": 1.5958609580993652, "learning_rate": 4.544084689597047e-05, "loss": 5.5247, "step": 23730 }, { "epoch": 0.045473569684797675, "grad_norm": 1.9450165033340454, "learning_rate": 4.546000166007955e-05, "loss": 5.5683, "step": 23740 }, { "epoch": 0.0454927245161729, "grad_norm": 1.6119743585586548, "learning_rate": 4.547915642418863e-05, "loss": 5.5376, "step": 23750 }, { "epoch": 0.045511879347548134, "grad_norm": 1.7540466785430908, "learning_rate": 4.5498311188297716e-05, "loss": 5.4467, "step": 23760 }, { "epoch": 0.04553103417892337, "grad_norm": 1.6570706367492676, "learning_rate": 4.551746595240679e-05, "loss": 5.5288, "step": 23770 }, { "epoch": 0.04555018901029859, "grad_norm": 2.2348103523254395, "learning_rate": 4.553662071651587e-05, "loss": 5.3221, "step": 23780 }, { "epoch": 0.045569343841673826, "grad_norm": 1.628211498260498, "learning_rate": 4.5555775480624946e-05, "loss": 5.4588, "step": 23790 }, { "epoch": 0.04558849867304906, "grad_norm": 1.7122119665145874, "learning_rate": 4.5574930244734034e-05, "loss": 5.597, "step": 23800 }, { "epoch": 0.045607653504424285, "grad_norm": 1.7311651706695557, "learning_rate": 4.559408500884311e-05, "loss": 5.5053, "step": 23810 }, { "epoch": 0.04562680833579952, "grad_norm": 1.6251729726791382, "learning_rate": 4.561323977295219e-05, "loss": 5.4384, "step": 23820 }, { "epoch": 0.04564596316717475, "grad_norm": 1.706979751586914, "learning_rate": 4.563239453706128e-05, "loss": 5.3757, "step": 23830 }, { "epoch": 0.04566511799854998, "grad_norm": 1.5084054470062256, "learning_rate": 4.565154930117035e-05, "loss": 5.3964, "step": 23840 }, { "epoch": 0.04568427282992521, "grad_norm": 1.5907272100448608, "learning_rate": 4.5670704065279434e-05, "loss": 5.3899, "step": 23850 }, { "epoch": 0.04570342766130044, "grad_norm": 1.6703057289123535, "learning_rate": 4.568985882938851e-05, "loss": 5.4908, "step": 23860 }, { "epoch": 0.04572258249267567, "grad_norm": 1.6070047616958618, "learning_rate": 4.5709013593497597e-05, "loss": 5.4564, "step": 23870 }, { "epoch": 0.0457417373240509, "grad_norm": 1.6348434686660767, "learning_rate": 4.572816835760667e-05, "loss": 5.5066, "step": 23880 }, { "epoch": 0.045760892155426135, "grad_norm": 1.615235447883606, "learning_rate": 4.574732312171575e-05, "loss": 5.3831, "step": 23890 }, { "epoch": 0.04578004698680136, "grad_norm": 1.7223236560821533, "learning_rate": 4.576647788582483e-05, "loss": 5.3801, "step": 23900 }, { "epoch": 0.045799201818176594, "grad_norm": 1.681410789489746, "learning_rate": 4.5785632649933915e-05, "loss": 5.4075, "step": 23910 }, { "epoch": 0.04581835664955183, "grad_norm": 1.532462477684021, "learning_rate": 4.580478741404299e-05, "loss": 5.3548, "step": 23920 }, { "epoch": 0.04583751148092705, "grad_norm": 1.6324259042739868, "learning_rate": 4.582394217815207e-05, "loss": 5.5611, "step": 23930 }, { "epoch": 0.045856666312302286, "grad_norm": 1.701211929321289, "learning_rate": 4.5843096942261145e-05, "loss": 5.4539, "step": 23940 }, { "epoch": 0.04587582114367752, "grad_norm": 1.5366013050079346, "learning_rate": 4.5862251706370233e-05, "loss": 5.3933, "step": 23950 }, { "epoch": 0.045894975975052744, "grad_norm": 1.9607985019683838, "learning_rate": 4.5881406470479315e-05, "loss": 5.4527, "step": 23960 }, { "epoch": 0.04591413080642798, "grad_norm": 1.6692259311676025, "learning_rate": 4.590056123458839e-05, "loss": 5.3836, "step": 23970 }, { "epoch": 0.04593328563780321, "grad_norm": 1.6084420680999756, "learning_rate": 4.591971599869748e-05, "loss": 5.4334, "step": 23980 }, { "epoch": 0.045952440469178436, "grad_norm": 1.7432924509048462, "learning_rate": 4.593887076280655e-05, "loss": 5.2916, "step": 23990 }, { "epoch": 0.04597159530055367, "grad_norm": 1.6267499923706055, "learning_rate": 4.595802552691563e-05, "loss": 5.315, "step": 24000 }, { "epoch": 0.0459907501319289, "grad_norm": 1.6602171659469604, "learning_rate": 4.597718029102471e-05, "loss": 5.3164, "step": 24010 }, { "epoch": 0.046009904963304135, "grad_norm": 1.9140857458114624, "learning_rate": 4.5996335055133796e-05, "loss": 5.2371, "step": 24020 }, { "epoch": 0.04602905979467936, "grad_norm": 1.6662936210632324, "learning_rate": 4.601548981924287e-05, "loss": 5.459, "step": 24030 }, { "epoch": 0.046048214626054594, "grad_norm": 1.5688985586166382, "learning_rate": 4.603464458335195e-05, "loss": 5.3506, "step": 24040 }, { "epoch": 0.04606736945742983, "grad_norm": 1.5880908966064453, "learning_rate": 4.6053799347461026e-05, "loss": 5.4554, "step": 24050 }, { "epoch": 0.04608652428880505, "grad_norm": 1.7896339893341064, "learning_rate": 4.6072954111570114e-05, "loss": 5.4343, "step": 24060 }, { "epoch": 0.046105679120180286, "grad_norm": 1.647046446800232, "learning_rate": 4.6092108875679196e-05, "loss": 5.4328, "step": 24070 }, { "epoch": 0.04612483395155552, "grad_norm": 1.801584005355835, "learning_rate": 4.611126363978827e-05, "loss": 5.364, "step": 24080 }, { "epoch": 0.046143988782930745, "grad_norm": 1.6055985689163208, "learning_rate": 4.613041840389735e-05, "loss": 5.1757, "step": 24090 }, { "epoch": 0.04616314361430598, "grad_norm": 1.8434170484542847, "learning_rate": 4.614957316800643e-05, "loss": 5.2863, "step": 24100 }, { "epoch": 0.04618229844568121, "grad_norm": 1.585294246673584, "learning_rate": 4.6168727932115514e-05, "loss": 5.4415, "step": 24110 }, { "epoch": 0.04620145327705644, "grad_norm": 1.5859454870224, "learning_rate": 4.618788269622459e-05, "loss": 5.3568, "step": 24120 }, { "epoch": 0.04622060810843167, "grad_norm": 1.537915825843811, "learning_rate": 4.620703746033367e-05, "loss": 5.5659, "step": 24130 }, { "epoch": 0.0462397629398069, "grad_norm": 1.678602695465088, "learning_rate": 4.622619222444275e-05, "loss": 5.4166, "step": 24140 }, { "epoch": 0.04625891777118213, "grad_norm": 1.5151231288909912, "learning_rate": 4.624534698855183e-05, "loss": 5.4066, "step": 24150 }, { "epoch": 0.04627807260255736, "grad_norm": 1.679633378982544, "learning_rate": 4.626450175266091e-05, "loss": 5.4285, "step": 24160 }, { "epoch": 0.046297227433932595, "grad_norm": 1.7235102653503418, "learning_rate": 4.6283656516769995e-05, "loss": 5.3931, "step": 24170 }, { "epoch": 0.04631638226530782, "grad_norm": 1.7800699472427368, "learning_rate": 4.6302811280879076e-05, "loss": 5.3723, "step": 24180 }, { "epoch": 0.046335537096683053, "grad_norm": 1.598550796508789, "learning_rate": 4.632196604498815e-05, "loss": 5.26, "step": 24190 }, { "epoch": 0.046354691928058286, "grad_norm": 1.618115782737732, "learning_rate": 4.634112080909723e-05, "loss": 5.3224, "step": 24200 }, { "epoch": 0.04637384675943351, "grad_norm": 1.6055700778961182, "learning_rate": 4.6360275573206314e-05, "loss": 5.4044, "step": 24210 }, { "epoch": 0.046393001590808745, "grad_norm": 1.5644590854644775, "learning_rate": 4.6379430337315395e-05, "loss": 5.4416, "step": 24220 }, { "epoch": 0.04641215642218398, "grad_norm": 1.829227328300476, "learning_rate": 4.639858510142447e-05, "loss": 5.349, "step": 24230 }, { "epoch": 0.046431311253559204, "grad_norm": 1.5619757175445557, "learning_rate": 4.641773986553355e-05, "loss": 5.4475, "step": 24240 }, { "epoch": 0.04645046608493444, "grad_norm": 1.6487008333206177, "learning_rate": 4.643689462964263e-05, "loss": 5.5859, "step": 24250 }, { "epoch": 0.04646962091630967, "grad_norm": 1.6498011350631714, "learning_rate": 4.645604939375171e-05, "loss": 5.4943, "step": 24260 }, { "epoch": 0.046488775747684896, "grad_norm": 1.8073445558547974, "learning_rate": 4.647520415786079e-05, "loss": 5.4379, "step": 24270 }, { "epoch": 0.04650793057906013, "grad_norm": 1.576836347579956, "learning_rate": 4.649435892196987e-05, "loss": 5.5053, "step": 24280 }, { "epoch": 0.04652708541043536, "grad_norm": 1.6005407571792603, "learning_rate": 4.651351368607896e-05, "loss": 5.594, "step": 24290 }, { "epoch": 0.04654624024181059, "grad_norm": 1.6828930377960205, "learning_rate": 4.653266845018803e-05, "loss": 5.4293, "step": 24300 }, { "epoch": 0.04656539507318582, "grad_norm": 1.8674402236938477, "learning_rate": 4.655182321429711e-05, "loss": 5.314, "step": 24310 }, { "epoch": 0.046584549904561054, "grad_norm": 1.8133327960968018, "learning_rate": 4.6570977978406194e-05, "loss": 5.3288, "step": 24320 }, { "epoch": 0.04660370473593628, "grad_norm": 1.5030475854873657, "learning_rate": 4.6590132742515276e-05, "loss": 5.4316, "step": 24330 }, { "epoch": 0.04662285956731151, "grad_norm": 1.553187370300293, "learning_rate": 4.660928750662435e-05, "loss": 5.3824, "step": 24340 }, { "epoch": 0.046642014398686746, "grad_norm": 1.615343451499939, "learning_rate": 4.662844227073343e-05, "loss": 5.4082, "step": 24350 }, { "epoch": 0.04666116923006198, "grad_norm": 1.8038511276245117, "learning_rate": 4.664759703484251e-05, "loss": 5.2355, "step": 24360 }, { "epoch": 0.046680324061437205, "grad_norm": 1.972244143486023, "learning_rate": 4.6666751798951594e-05, "loss": 5.3367, "step": 24370 }, { "epoch": 0.04669947889281244, "grad_norm": 1.757911205291748, "learning_rate": 4.668590656306067e-05, "loss": 5.3231, "step": 24380 }, { "epoch": 0.04671863372418767, "grad_norm": 1.7521759271621704, "learning_rate": 4.670506132716975e-05, "loss": 5.3018, "step": 24390 }, { "epoch": 0.0467377885555629, "grad_norm": 1.7172948122024536, "learning_rate": 4.672421609127884e-05, "loss": 5.5962, "step": 24400 }, { "epoch": 0.04675694338693813, "grad_norm": 1.6641168594360352, "learning_rate": 4.674337085538791e-05, "loss": 5.3792, "step": 24410 }, { "epoch": 0.04677609821831336, "grad_norm": 1.4710801839828491, "learning_rate": 4.6762525619496994e-05, "loss": 5.6145, "step": 24420 }, { "epoch": 0.04679525304968859, "grad_norm": 1.7520745992660522, "learning_rate": 4.678168038360607e-05, "loss": 5.2474, "step": 24430 }, { "epoch": 0.04681440788106382, "grad_norm": 1.738818883895874, "learning_rate": 4.6800835147715156e-05, "loss": 5.4848, "step": 24440 }, { "epoch": 0.046833562712439054, "grad_norm": 1.7786285877227783, "learning_rate": 4.681998991182423e-05, "loss": 5.3721, "step": 24450 }, { "epoch": 0.04685271754381428, "grad_norm": 1.6582262516021729, "learning_rate": 4.683914467593331e-05, "loss": 5.4303, "step": 24460 }, { "epoch": 0.04687187237518951, "grad_norm": 1.6877340078353882, "learning_rate": 4.685829944004239e-05, "loss": 5.346, "step": 24470 }, { "epoch": 0.046891027206564746, "grad_norm": 1.6352183818817139, "learning_rate": 4.6877454204151475e-05, "loss": 5.4656, "step": 24480 }, { "epoch": 0.04691018203793997, "grad_norm": 1.5019819736480713, "learning_rate": 4.689660896826055e-05, "loss": 5.5009, "step": 24490 }, { "epoch": 0.046929336869315205, "grad_norm": 1.6965328454971313, "learning_rate": 4.691576373236963e-05, "loss": 5.3464, "step": 24500 }, { "epoch": 0.04694849170069044, "grad_norm": 1.8084298372268677, "learning_rate": 4.693491849647872e-05, "loss": 5.335, "step": 24510 }, { "epoch": 0.046967646532065664, "grad_norm": 1.5610195398330688, "learning_rate": 4.695407326058779e-05, "loss": 5.4297, "step": 24520 }, { "epoch": 0.0469868013634409, "grad_norm": 1.647545576095581, "learning_rate": 4.697322802469687e-05, "loss": 5.2769, "step": 24530 }, { "epoch": 0.04700595619481613, "grad_norm": 1.6668298244476318, "learning_rate": 4.699238278880595e-05, "loss": 5.2549, "step": 24540 }, { "epoch": 0.047025111026191356, "grad_norm": 1.684722661972046, "learning_rate": 4.701153755291504e-05, "loss": 5.4312, "step": 24550 }, { "epoch": 0.04704426585756659, "grad_norm": 1.6511774063110352, "learning_rate": 4.703069231702411e-05, "loss": 5.3256, "step": 24560 }, { "epoch": 0.04706342068894182, "grad_norm": 1.599410057067871, "learning_rate": 4.704984708113319e-05, "loss": 5.2478, "step": 24570 }, { "epoch": 0.04708257552031705, "grad_norm": 1.764427900314331, "learning_rate": 4.706900184524227e-05, "loss": 5.3245, "step": 24580 }, { "epoch": 0.04710173035169228, "grad_norm": 1.6905620098114014, "learning_rate": 4.7088156609351356e-05, "loss": 5.3516, "step": 24590 }, { "epoch": 0.047120885183067514, "grad_norm": 1.6951117515563965, "learning_rate": 4.710731137346043e-05, "loss": 5.2345, "step": 24600 }, { "epoch": 0.04714004001444274, "grad_norm": 1.6693378686904907, "learning_rate": 4.712646613756951e-05, "loss": 5.4366, "step": 24610 }, { "epoch": 0.04715919484581797, "grad_norm": 1.6034150123596191, "learning_rate": 4.7145620901678586e-05, "loss": 5.3767, "step": 24620 }, { "epoch": 0.047178349677193206, "grad_norm": 1.624629020690918, "learning_rate": 4.7164775665787674e-05, "loss": 5.3158, "step": 24630 }, { "epoch": 0.04719750450856843, "grad_norm": 1.6645036935806274, "learning_rate": 4.718393042989675e-05, "loss": 5.3761, "step": 24640 }, { "epoch": 0.047216659339943665, "grad_norm": 1.7788008451461792, "learning_rate": 4.720308519400583e-05, "loss": 5.3908, "step": 24650 }, { "epoch": 0.0472358141713189, "grad_norm": 1.9556723833084106, "learning_rate": 4.722223995811492e-05, "loss": 5.2857, "step": 24660 }, { "epoch": 0.04725496900269413, "grad_norm": 1.628499150276184, "learning_rate": 4.724139472222399e-05, "loss": 5.3661, "step": 24670 }, { "epoch": 0.047274123834069356, "grad_norm": 1.5588330030441284, "learning_rate": 4.7260549486333074e-05, "loss": 5.3125, "step": 24680 }, { "epoch": 0.04729327866544459, "grad_norm": 1.642638921737671, "learning_rate": 4.727970425044215e-05, "loss": 5.5046, "step": 24690 }, { "epoch": 0.04731243349681982, "grad_norm": 1.6170892715454102, "learning_rate": 4.7298859014551237e-05, "loss": 5.2161, "step": 24700 }, { "epoch": 0.04733158832819505, "grad_norm": 1.6034733057022095, "learning_rate": 4.731801377866031e-05, "loss": 5.5339, "step": 24710 }, { "epoch": 0.04735074315957028, "grad_norm": 1.7545607089996338, "learning_rate": 4.733716854276939e-05, "loss": 5.4586, "step": 24720 }, { "epoch": 0.047369897990945514, "grad_norm": 1.6835826635360718, "learning_rate": 4.735632330687847e-05, "loss": 5.2801, "step": 24730 }, { "epoch": 0.04738905282232074, "grad_norm": 1.7589209079742432, "learning_rate": 4.7375478070987555e-05, "loss": 5.3885, "step": 24740 }, { "epoch": 0.04740820765369597, "grad_norm": 1.640093207359314, "learning_rate": 4.739463283509663e-05, "loss": 5.2519, "step": 24750 }, { "epoch": 0.047427362485071206, "grad_norm": 1.625813364982605, "learning_rate": 4.741378759920571e-05, "loss": 5.4614, "step": 24760 }, { "epoch": 0.04744651731644643, "grad_norm": 1.6952745914459229, "learning_rate": 4.7432942363314785e-05, "loss": 5.3276, "step": 24770 }, { "epoch": 0.047465672147821665, "grad_norm": 1.634453535079956, "learning_rate": 4.7452097127423873e-05, "loss": 5.3138, "step": 24780 }, { "epoch": 0.0474848269791969, "grad_norm": 1.7369478940963745, "learning_rate": 4.7471251891532955e-05, "loss": 5.5044, "step": 24790 }, { "epoch": 0.047503981810572124, "grad_norm": 1.7091631889343262, "learning_rate": 4.749040665564203e-05, "loss": 5.4081, "step": 24800 }, { "epoch": 0.04752313664194736, "grad_norm": 1.9036753177642822, "learning_rate": 4.750956141975111e-05, "loss": 5.2877, "step": 24810 }, { "epoch": 0.04754229147332259, "grad_norm": 1.8743072748184204, "learning_rate": 4.752871618386019e-05, "loss": 5.2418, "step": 24820 }, { "epoch": 0.047561446304697816, "grad_norm": 1.6079158782958984, "learning_rate": 4.754787094796927e-05, "loss": 5.3655, "step": 24830 }, { "epoch": 0.04758060113607305, "grad_norm": 1.609902024269104, "learning_rate": 4.756702571207835e-05, "loss": 5.3285, "step": 24840 }, { "epoch": 0.04759975596744828, "grad_norm": 1.8335720300674438, "learning_rate": 4.7586180476187436e-05, "loss": 5.426, "step": 24850 }, { "epoch": 0.04761891079882351, "grad_norm": 1.7436537742614746, "learning_rate": 4.760533524029651e-05, "loss": 5.5357, "step": 24860 }, { "epoch": 0.04763806563019874, "grad_norm": 1.6334985494613647, "learning_rate": 4.762449000440559e-05, "loss": 5.4624, "step": 24870 }, { "epoch": 0.047657220461573974, "grad_norm": 1.5295207500457764, "learning_rate": 4.7643644768514666e-05, "loss": 5.4614, "step": 24880 }, { "epoch": 0.0476763752929492, "grad_norm": 1.987648844718933, "learning_rate": 4.7662799532623754e-05, "loss": 5.424, "step": 24890 }, { "epoch": 0.04769553012432443, "grad_norm": 1.6285842657089233, "learning_rate": 4.7681954296732836e-05, "loss": 5.4149, "step": 24900 }, { "epoch": 0.047714684955699666, "grad_norm": 1.553926706314087, "learning_rate": 4.770110906084191e-05, "loss": 5.4223, "step": 24910 }, { "epoch": 0.04773383978707489, "grad_norm": 1.6094679832458496, "learning_rate": 4.772026382495099e-05, "loss": 5.3198, "step": 24920 }, { "epoch": 0.047752994618450124, "grad_norm": 1.6211364269256592, "learning_rate": 4.773941858906007e-05, "loss": 5.4921, "step": 24930 }, { "epoch": 0.04777214944982536, "grad_norm": 1.618013858795166, "learning_rate": 4.7758573353169154e-05, "loss": 5.3306, "step": 24940 }, { "epoch": 0.04779130428120058, "grad_norm": 1.8666704893112183, "learning_rate": 4.777772811727823e-05, "loss": 5.3903, "step": 24950 }, { "epoch": 0.047810459112575816, "grad_norm": 1.483085036277771, "learning_rate": 4.779688288138731e-05, "loss": 5.5339, "step": 24960 }, { "epoch": 0.04782961394395105, "grad_norm": 1.6725664138793945, "learning_rate": 4.781603764549639e-05, "loss": 5.5159, "step": 24970 }, { "epoch": 0.047848768775326275, "grad_norm": 1.7210289239883423, "learning_rate": 4.783519240960547e-05, "loss": 5.3285, "step": 24980 }, { "epoch": 0.04786792360670151, "grad_norm": 1.7159106731414795, "learning_rate": 4.785434717371455e-05, "loss": 5.3178, "step": 24990 }, { "epoch": 0.04788707843807674, "grad_norm": 1.5455392599105835, "learning_rate": 4.7873501937823635e-05, "loss": 5.3179, "step": 25000 }, { "epoch": 0.047906233269451974, "grad_norm": 1.7014822959899902, "learning_rate": 4.7892656701932716e-05, "loss": 5.3528, "step": 25010 }, { "epoch": 0.0479253881008272, "grad_norm": 1.7445250749588013, "learning_rate": 4.791181146604179e-05, "loss": 5.2482, "step": 25020 }, { "epoch": 0.04794454293220243, "grad_norm": 1.8814146518707275, "learning_rate": 4.793096623015087e-05, "loss": 5.3199, "step": 25030 }, { "epoch": 0.047963697763577666, "grad_norm": 1.806624412536621, "learning_rate": 4.7950120994259953e-05, "loss": 5.4538, "step": 25040 }, { "epoch": 0.04798285259495289, "grad_norm": 1.950453281402588, "learning_rate": 4.7969275758369035e-05, "loss": 5.3369, "step": 25050 }, { "epoch": 0.048002007426328125, "grad_norm": 1.720444679260254, "learning_rate": 4.798843052247811e-05, "loss": 5.152, "step": 25060 }, { "epoch": 0.04802116225770336, "grad_norm": 1.6181061267852783, "learning_rate": 4.800758528658719e-05, "loss": 5.4245, "step": 25070 }, { "epoch": 0.048040317089078584, "grad_norm": 1.5946093797683716, "learning_rate": 4.802674005069627e-05, "loss": 5.3103, "step": 25080 }, { "epoch": 0.04805947192045382, "grad_norm": 1.6678667068481445, "learning_rate": 4.804589481480535e-05, "loss": 5.3257, "step": 25090 }, { "epoch": 0.04807862675182905, "grad_norm": 1.6961506605148315, "learning_rate": 4.806504957891443e-05, "loss": 5.3113, "step": 25100 }, { "epoch": 0.048097781583204276, "grad_norm": 1.5996712446212769, "learning_rate": 4.808420434302351e-05, "loss": 5.4573, "step": 25110 }, { "epoch": 0.04811693641457951, "grad_norm": 1.718576192855835, "learning_rate": 4.81033591071326e-05, "loss": 5.3253, "step": 25120 }, { "epoch": 0.04813609124595474, "grad_norm": 1.873521327972412, "learning_rate": 4.812251387124167e-05, "loss": 5.3, "step": 25130 }, { "epoch": 0.04815524607732997, "grad_norm": 1.7176532745361328, "learning_rate": 4.814166863535075e-05, "loss": 5.3429, "step": 25140 }, { "epoch": 0.0481744009087052, "grad_norm": 1.6729457378387451, "learning_rate": 4.816082339945983e-05, "loss": 5.3306, "step": 25150 }, { "epoch": 0.04819355574008043, "grad_norm": 2.929762601852417, "learning_rate": 4.8179978163568916e-05, "loss": 5.3328, "step": 25160 }, { "epoch": 0.04821271057145566, "grad_norm": 1.707320213317871, "learning_rate": 4.819913292767799e-05, "loss": 5.3236, "step": 25170 }, { "epoch": 0.04823186540283089, "grad_norm": 1.6764531135559082, "learning_rate": 4.821828769178707e-05, "loss": 5.2311, "step": 25180 }, { "epoch": 0.048251020234206125, "grad_norm": 1.6528669595718384, "learning_rate": 4.823744245589615e-05, "loss": 5.4023, "step": 25190 }, { "epoch": 0.04827017506558135, "grad_norm": 1.6433241367340088, "learning_rate": 4.8256597220005234e-05, "loss": 5.3309, "step": 25200 }, { "epoch": 0.048289329896956584, "grad_norm": 1.631314754486084, "learning_rate": 4.827575198411431e-05, "loss": 5.4342, "step": 25210 }, { "epoch": 0.04830848472833182, "grad_norm": 1.6962544918060303, "learning_rate": 4.829490674822339e-05, "loss": 5.2545, "step": 25220 }, { "epoch": 0.04832763955970704, "grad_norm": 1.6806542873382568, "learning_rate": 4.831406151233248e-05, "loss": 5.1866, "step": 25230 }, { "epoch": 0.048346794391082276, "grad_norm": 1.593723177909851, "learning_rate": 4.833321627644155e-05, "loss": 5.3019, "step": 25240 }, { "epoch": 0.04836594922245751, "grad_norm": 1.6876862049102783, "learning_rate": 4.835237104055063e-05, "loss": 5.4566, "step": 25250 }, { "epoch": 0.048385104053832735, "grad_norm": 1.579653024673462, "learning_rate": 4.837152580465971e-05, "loss": 5.4008, "step": 25260 }, { "epoch": 0.04840425888520797, "grad_norm": 1.564228892326355, "learning_rate": 4.8390680568768796e-05, "loss": 5.3699, "step": 25270 }, { "epoch": 0.0484234137165832, "grad_norm": 1.6182746887207031, "learning_rate": 4.840983533287787e-05, "loss": 5.5337, "step": 25280 }, { "epoch": 0.04844256854795843, "grad_norm": 1.5810047388076782, "learning_rate": 4.842899009698695e-05, "loss": 5.4144, "step": 25290 }, { "epoch": 0.04846172337933366, "grad_norm": 1.5492255687713623, "learning_rate": 4.844814486109603e-05, "loss": 5.3302, "step": 25300 }, { "epoch": 0.04848087821070889, "grad_norm": 1.7727066278457642, "learning_rate": 4.8467299625205115e-05, "loss": 5.3184, "step": 25310 }, { "epoch": 0.04850003304208412, "grad_norm": 1.6160500049591064, "learning_rate": 4.848645438931419e-05, "loss": 5.3342, "step": 25320 }, { "epoch": 0.04851918787345935, "grad_norm": 1.7135494947433472, "learning_rate": 4.850560915342327e-05, "loss": 5.319, "step": 25330 }, { "epoch": 0.048538342704834585, "grad_norm": 1.5461900234222412, "learning_rate": 4.852476391753236e-05, "loss": 5.2851, "step": 25340 }, { "epoch": 0.04855749753620982, "grad_norm": 1.679070234298706, "learning_rate": 4.854391868164143e-05, "loss": 5.4123, "step": 25350 }, { "epoch": 0.048576652367585044, "grad_norm": 1.7797415256500244, "learning_rate": 4.856307344575051e-05, "loss": 5.315, "step": 25360 }, { "epoch": 0.04859580719896028, "grad_norm": 1.671920895576477, "learning_rate": 4.858222820985959e-05, "loss": 5.3514, "step": 25370 }, { "epoch": 0.04861496203033551, "grad_norm": 1.5601304769515991, "learning_rate": 4.860138297396868e-05, "loss": 5.3467, "step": 25380 }, { "epoch": 0.048634116861710736, "grad_norm": 1.6319270133972168, "learning_rate": 4.862053773807775e-05, "loss": 5.5251, "step": 25390 }, { "epoch": 0.04865327169308597, "grad_norm": 1.5715687274932861, "learning_rate": 4.863969250218683e-05, "loss": 5.3189, "step": 25400 }, { "epoch": 0.0486724265244612, "grad_norm": 1.5880814790725708, "learning_rate": 4.865884726629591e-05, "loss": 5.4682, "step": 25410 }, { "epoch": 0.04869158135583643, "grad_norm": 1.5502190589904785, "learning_rate": 4.8678002030404996e-05, "loss": 5.3272, "step": 25420 }, { "epoch": 0.04871073618721166, "grad_norm": 1.8391140699386597, "learning_rate": 4.869715679451407e-05, "loss": 5.2431, "step": 25430 }, { "epoch": 0.04872989101858689, "grad_norm": 1.5364408493041992, "learning_rate": 4.871631155862315e-05, "loss": 5.4455, "step": 25440 }, { "epoch": 0.04874904584996212, "grad_norm": 1.5514642000198364, "learning_rate": 4.8735466322732226e-05, "loss": 5.2109, "step": 25450 }, { "epoch": 0.04876820068133735, "grad_norm": 1.6318471431732178, "learning_rate": 4.8754621086841314e-05, "loss": 5.3678, "step": 25460 }, { "epoch": 0.048787355512712585, "grad_norm": 1.7311962842941284, "learning_rate": 4.877377585095039e-05, "loss": 5.4206, "step": 25470 }, { "epoch": 0.04880651034408781, "grad_norm": 1.5768216848373413, "learning_rate": 4.879293061505947e-05, "loss": 5.2511, "step": 25480 }, { "epoch": 0.048825665175463044, "grad_norm": 1.6914513111114502, "learning_rate": 4.8812085379168545e-05, "loss": 5.3096, "step": 25490 }, { "epoch": 0.04884482000683828, "grad_norm": 1.6249544620513916, "learning_rate": 4.883124014327763e-05, "loss": 5.3963, "step": 25500 }, { "epoch": 0.0488639748382135, "grad_norm": 1.6826848983764648, "learning_rate": 4.8850394907386714e-05, "loss": 5.4143, "step": 25510 }, { "epoch": 0.048883129669588736, "grad_norm": 1.531178593635559, "learning_rate": 4.886954967149579e-05, "loss": 5.4016, "step": 25520 }, { "epoch": 0.04890228450096397, "grad_norm": 1.73707914352417, "learning_rate": 4.8888704435604877e-05, "loss": 5.3562, "step": 25530 }, { "epoch": 0.048921439332339195, "grad_norm": 1.74485445022583, "learning_rate": 4.890785919971395e-05, "loss": 5.4591, "step": 25540 }, { "epoch": 0.04894059416371443, "grad_norm": 1.662420630455017, "learning_rate": 4.892701396382303e-05, "loss": 5.4581, "step": 25550 }, { "epoch": 0.04895974899508966, "grad_norm": 1.7959606647491455, "learning_rate": 4.894616872793211e-05, "loss": 5.2416, "step": 25560 }, { "epoch": 0.04897890382646489, "grad_norm": 1.6140397787094116, "learning_rate": 4.8965323492041195e-05, "loss": 5.3407, "step": 25570 }, { "epoch": 0.04899805865784012, "grad_norm": 1.6582002639770508, "learning_rate": 4.898447825615027e-05, "loss": 5.3289, "step": 25580 }, { "epoch": 0.04901721348921535, "grad_norm": 1.6929211616516113, "learning_rate": 4.900363302025935e-05, "loss": 5.4038, "step": 25590 }, { "epoch": 0.04903636832059058, "grad_norm": 1.6508678197860718, "learning_rate": 4.9022787784368425e-05, "loss": 5.2942, "step": 25600 }, { "epoch": 0.04905552315196581, "grad_norm": 1.9880656003952026, "learning_rate": 4.9041942548477513e-05, "loss": 5.3527, "step": 25610 }, { "epoch": 0.049074677983341045, "grad_norm": 1.6387304067611694, "learning_rate": 4.9061097312586595e-05, "loss": 5.1173, "step": 25620 }, { "epoch": 0.04909383281471627, "grad_norm": 1.705907940864563, "learning_rate": 4.908025207669567e-05, "loss": 5.3755, "step": 25630 }, { "epoch": 0.049112987646091504, "grad_norm": 1.7529901266098022, "learning_rate": 4.909940684080475e-05, "loss": 5.2251, "step": 25640 }, { "epoch": 0.049132142477466736, "grad_norm": 1.7807068824768066, "learning_rate": 4.911856160491383e-05, "loss": 5.3123, "step": 25650 }, { "epoch": 0.04915129730884197, "grad_norm": 1.5957698822021484, "learning_rate": 4.913771636902291e-05, "loss": 5.3126, "step": 25660 }, { "epoch": 0.049170452140217195, "grad_norm": 1.4952274560928345, "learning_rate": 4.915687113313199e-05, "loss": 5.4241, "step": 25670 }, { "epoch": 0.04918960697159243, "grad_norm": 1.828418254852295, "learning_rate": 4.917602589724107e-05, "loss": 5.3049, "step": 25680 }, { "epoch": 0.04920876180296766, "grad_norm": 1.7348783016204834, "learning_rate": 4.919518066135015e-05, "loss": 5.4448, "step": 25690 }, { "epoch": 0.04922791663434289, "grad_norm": 1.9021821022033691, "learning_rate": 4.921433542545923e-05, "loss": 5.3485, "step": 25700 }, { "epoch": 0.04924707146571812, "grad_norm": 1.6232402324676514, "learning_rate": 4.9233490189568306e-05, "loss": 5.2375, "step": 25710 }, { "epoch": 0.04926622629709335, "grad_norm": 1.8491320610046387, "learning_rate": 4.9252644953677394e-05, "loss": 5.1434, "step": 25720 }, { "epoch": 0.04928538112846858, "grad_norm": 1.8590075969696045, "learning_rate": 4.9271799717786476e-05, "loss": 5.3737, "step": 25730 }, { "epoch": 0.04930453595984381, "grad_norm": 1.6673274040222168, "learning_rate": 4.929095448189555e-05, "loss": 5.3268, "step": 25740 }, { "epoch": 0.049323690791219045, "grad_norm": 1.5610696077346802, "learning_rate": 4.931010924600463e-05, "loss": 5.4356, "step": 25750 }, { "epoch": 0.04934284562259427, "grad_norm": 1.7062196731567383, "learning_rate": 4.932926401011371e-05, "loss": 5.2811, "step": 25760 }, { "epoch": 0.049362000453969504, "grad_norm": 1.7599422931671143, "learning_rate": 4.9348418774222794e-05, "loss": 5.3058, "step": 25770 }, { "epoch": 0.04938115528534474, "grad_norm": 1.5814851522445679, "learning_rate": 4.936757353833187e-05, "loss": 5.3673, "step": 25780 }, { "epoch": 0.04940031011671996, "grad_norm": 1.6427286863327026, "learning_rate": 4.938672830244095e-05, "loss": 5.3826, "step": 25790 }, { "epoch": 0.049419464948095196, "grad_norm": 1.6063587665557861, "learning_rate": 4.940588306655003e-05, "loss": 5.3888, "step": 25800 }, { "epoch": 0.04943861977947043, "grad_norm": 1.6424202919006348, "learning_rate": 4.942503783065911e-05, "loss": 5.4013, "step": 25810 }, { "epoch": 0.049457774610845655, "grad_norm": 1.763880729675293, "learning_rate": 4.944419259476819e-05, "loss": 5.2682, "step": 25820 }, { "epoch": 0.04947692944222089, "grad_norm": 1.6579874753952026, "learning_rate": 4.946334735887727e-05, "loss": 5.2819, "step": 25830 }, { "epoch": 0.04949608427359612, "grad_norm": 1.5653178691864014, "learning_rate": 4.9482502122986356e-05, "loss": 5.3107, "step": 25840 }, { "epoch": 0.04951523910497135, "grad_norm": 1.6304799318313599, "learning_rate": 4.950165688709543e-05, "loss": 5.3554, "step": 25850 }, { "epoch": 0.04953439393634658, "grad_norm": 1.8421255350112915, "learning_rate": 4.952081165120451e-05, "loss": 5.229, "step": 25860 }, { "epoch": 0.04955354876772181, "grad_norm": 1.640395998954773, "learning_rate": 4.9539966415313593e-05, "loss": 5.2932, "step": 25870 }, { "epoch": 0.04957270359909704, "grad_norm": 1.6733494997024536, "learning_rate": 4.9559121179422675e-05, "loss": 5.4512, "step": 25880 }, { "epoch": 0.04959185843047227, "grad_norm": 1.6426913738250732, "learning_rate": 4.957827594353175e-05, "loss": 5.3129, "step": 25890 }, { "epoch": 0.049611013261847504, "grad_norm": 1.5624676942825317, "learning_rate": 4.959743070764083e-05, "loss": 5.3686, "step": 25900 }, { "epoch": 0.04963016809322273, "grad_norm": 1.820490837097168, "learning_rate": 4.961658547174991e-05, "loss": 5.352, "step": 25910 }, { "epoch": 0.04964932292459796, "grad_norm": 1.7244032621383667, "learning_rate": 4.963574023585899e-05, "loss": 5.4325, "step": 25920 }, { "epoch": 0.049668477755973196, "grad_norm": 1.6271262168884277, "learning_rate": 4.965489499996807e-05, "loss": 5.4632, "step": 25930 }, { "epoch": 0.04968763258734842, "grad_norm": 1.5385466814041138, "learning_rate": 4.967404976407715e-05, "loss": 5.1998, "step": 25940 }, { "epoch": 0.049706787418723655, "grad_norm": 1.6453256607055664, "learning_rate": 4.969320452818624e-05, "loss": 5.346, "step": 25950 }, { "epoch": 0.04972594225009889, "grad_norm": 1.7478187084197998, "learning_rate": 4.971235929229531e-05, "loss": 5.2958, "step": 25960 }, { "epoch": 0.049745097081474114, "grad_norm": 1.85723876953125, "learning_rate": 4.9731514056404386e-05, "loss": 5.3795, "step": 25970 }, { "epoch": 0.04976425191284935, "grad_norm": 1.589043140411377, "learning_rate": 4.975066882051347e-05, "loss": 5.2994, "step": 25980 }, { "epoch": 0.04978340674422458, "grad_norm": 1.5988725423812866, "learning_rate": 4.9769823584622556e-05, "loss": 5.3005, "step": 25990 }, { "epoch": 0.04980256157559981, "grad_norm": 1.550731897354126, "learning_rate": 4.978897834873163e-05, "loss": 5.2421, "step": 26000 }, { "epoch": 0.04982171640697504, "grad_norm": 1.7285338640213013, "learning_rate": 4.980813311284071e-05, "loss": 5.2355, "step": 26010 }, { "epoch": 0.04984087123835027, "grad_norm": 1.7348746061325073, "learning_rate": 4.9827287876949786e-05, "loss": 5.3162, "step": 26020 }, { "epoch": 0.049860026069725505, "grad_norm": 1.9447442293167114, "learning_rate": 4.9846442641058874e-05, "loss": 5.3139, "step": 26030 }, { "epoch": 0.04987918090110073, "grad_norm": 1.7423107624053955, "learning_rate": 4.986559740516795e-05, "loss": 5.2213, "step": 26040 }, { "epoch": 0.049898335732475964, "grad_norm": 1.7255748510360718, "learning_rate": 4.988475216927703e-05, "loss": 5.3009, "step": 26050 }, { "epoch": 0.0499174905638512, "grad_norm": 1.7411448955535889, "learning_rate": 4.990390693338612e-05, "loss": 5.2594, "step": 26060 }, { "epoch": 0.04993664539522642, "grad_norm": 1.5759389400482178, "learning_rate": 4.992306169749519e-05, "loss": 5.2933, "step": 26070 }, { "epoch": 0.049955800226601656, "grad_norm": 1.7112029790878296, "learning_rate": 4.994221646160427e-05, "loss": 5.1122, "step": 26080 }, { "epoch": 0.04997495505797689, "grad_norm": 1.5711452960968018, "learning_rate": 4.996137122571335e-05, "loss": 5.2598, "step": 26090 }, { "epoch": 0.049994109889352115, "grad_norm": 1.696695327758789, "learning_rate": 4.9980525989822436e-05, "loss": 5.2156, "step": 26100 }, { "epoch": 0.05001326472072735, "grad_norm": 1.6102471351623535, "learning_rate": 4.999968075393151e-05, "loss": 5.4224, "step": 26110 }, { "epoch": 0.05003241955210258, "grad_norm": 1.612795114517212, "learning_rate": 5.001883551804059e-05, "loss": 5.3123, "step": 26120 }, { "epoch": 0.050051574383477807, "grad_norm": 1.7500430345535278, "learning_rate": 5.003799028214967e-05, "loss": 5.3688, "step": 26130 }, { "epoch": 0.05007072921485304, "grad_norm": 1.7206239700317383, "learning_rate": 5.0057145046258755e-05, "loss": 5.3475, "step": 26140 }, { "epoch": 0.05008988404622827, "grad_norm": 1.6467454433441162, "learning_rate": 5.007629981036783e-05, "loss": 5.1944, "step": 26150 }, { "epoch": 0.0501090388776035, "grad_norm": 1.6662520170211792, "learning_rate": 5.009545457447691e-05, "loss": 5.2299, "step": 26160 }, { "epoch": 0.05012819370897873, "grad_norm": 1.7539476156234741, "learning_rate": 5.0112693862175084e-05, "loss": 5.1684, "step": 26170 }, { "epoch": 0.050147348540353964, "grad_norm": 1.6187515258789062, "learning_rate": 5.0131848626284165e-05, "loss": 5.3271, "step": 26180 }, { "epoch": 0.05016650337172919, "grad_norm": 1.6092724800109863, "learning_rate": 5.015100339039324e-05, "loss": 5.283, "step": 26190 }, { "epoch": 0.05018565820310442, "grad_norm": 1.6470245122909546, "learning_rate": 5.017015815450233e-05, "loss": 5.3136, "step": 26200 }, { "epoch": 0.050204813034479656, "grad_norm": 1.5847749710083008, "learning_rate": 5.01893129186114e-05, "loss": 5.2283, "step": 26210 }, { "epoch": 0.05022396786585488, "grad_norm": 1.5027363300323486, "learning_rate": 5.0208467682720484e-05, "loss": 5.2784, "step": 26220 }, { "epoch": 0.050243122697230115, "grad_norm": 1.691167950630188, "learning_rate": 5.022762244682956e-05, "loss": 5.2998, "step": 26230 }, { "epoch": 0.05026227752860535, "grad_norm": 1.6027995347976685, "learning_rate": 5.0246777210938646e-05, "loss": 5.18, "step": 26240 }, { "epoch": 0.050281432359980574, "grad_norm": 1.6231650114059448, "learning_rate": 5.026593197504772e-05, "loss": 5.395, "step": 26250 }, { "epoch": 0.05030058719135581, "grad_norm": 1.6620595455169678, "learning_rate": 5.02850867391568e-05, "loss": 5.369, "step": 26260 }, { "epoch": 0.05031974202273104, "grad_norm": 1.6575932502746582, "learning_rate": 5.0304241503265877e-05, "loss": 5.3402, "step": 26270 }, { "epoch": 0.050338896854106266, "grad_norm": 1.5593359470367432, "learning_rate": 5.0323396267374965e-05, "loss": 5.3091, "step": 26280 }, { "epoch": 0.0503580516854815, "grad_norm": 1.7682397365570068, "learning_rate": 5.0342551031484046e-05, "loss": 5.4135, "step": 26290 }, { "epoch": 0.05037720651685673, "grad_norm": 1.6724278926849365, "learning_rate": 5.036170579559312e-05, "loss": 5.297, "step": 26300 }, { "epoch": 0.05039636134823196, "grad_norm": 1.7869240045547485, "learning_rate": 5.03808605597022e-05, "loss": 5.2113, "step": 26310 }, { "epoch": 0.05041551617960719, "grad_norm": 1.7264963388442993, "learning_rate": 5.040001532381128e-05, "loss": 5.1269, "step": 26320 }, { "epoch": 0.050434671010982424, "grad_norm": 1.795128345489502, "learning_rate": 5.0419170087920364e-05, "loss": 5.3786, "step": 26330 }, { "epoch": 0.05045382584235766, "grad_norm": 1.679409384727478, "learning_rate": 5.043832485202944e-05, "loss": 5.2284, "step": 26340 }, { "epoch": 0.05047298067373288, "grad_norm": 1.5809088945388794, "learning_rate": 5.045747961613853e-05, "loss": 5.3635, "step": 26350 }, { "epoch": 0.050492135505108116, "grad_norm": 1.755230188369751, "learning_rate": 5.04766343802476e-05, "loss": 5.213, "step": 26360 }, { "epoch": 0.05051129033648335, "grad_norm": 1.819595217704773, "learning_rate": 5.049578914435668e-05, "loss": 5.2718, "step": 26370 }, { "epoch": 0.050530445167858574, "grad_norm": 1.6624342203140259, "learning_rate": 5.051494390846576e-05, "loss": 5.3719, "step": 26380 }, { "epoch": 0.05054959999923381, "grad_norm": 1.5571140050888062, "learning_rate": 5.0534098672574845e-05, "loss": 5.3108, "step": 26390 }, { "epoch": 0.05056875483060904, "grad_norm": 1.6169205904006958, "learning_rate": 5.055325343668393e-05, "loss": 5.2481, "step": 26400 }, { "epoch": 0.050587909661984266, "grad_norm": 1.8422189950942993, "learning_rate": 5.0572408200793e-05, "loss": 5.2941, "step": 26410 }, { "epoch": 0.0506070644933595, "grad_norm": 1.5115206241607666, "learning_rate": 5.059156296490208e-05, "loss": 5.2733, "step": 26420 }, { "epoch": 0.05062621932473473, "grad_norm": 1.517604947090149, "learning_rate": 5.0610717729011164e-05, "loss": 5.2703, "step": 26430 }, { "epoch": 0.05064537415610996, "grad_norm": 1.6888834238052368, "learning_rate": 5.0629872493120245e-05, "loss": 5.2389, "step": 26440 }, { "epoch": 0.05066452898748519, "grad_norm": 1.6066800355911255, "learning_rate": 5.064902725722932e-05, "loss": 5.1811, "step": 26450 }, { "epoch": 0.050683683818860424, "grad_norm": 1.7315800189971924, "learning_rate": 5.06681820213384e-05, "loss": 5.2647, "step": 26460 }, { "epoch": 0.05070283865023565, "grad_norm": 1.5643680095672607, "learning_rate": 5.068733678544748e-05, "loss": 5.3978, "step": 26470 }, { "epoch": 0.05072199348161088, "grad_norm": 1.6804602146148682, "learning_rate": 5.0706491549556564e-05, "loss": 5.2989, "step": 26480 }, { "epoch": 0.050741148312986116, "grad_norm": 1.837021827697754, "learning_rate": 5.072564631366564e-05, "loss": 5.1215, "step": 26490 }, { "epoch": 0.05076030314436134, "grad_norm": 1.5846530199050903, "learning_rate": 5.074480107777472e-05, "loss": 5.3452, "step": 26500 }, { "epoch": 0.050779457975736575, "grad_norm": 1.6357663869857788, "learning_rate": 5.076395584188381e-05, "loss": 5.3628, "step": 26510 }, { "epoch": 0.05079861280711181, "grad_norm": 1.7200933694839478, "learning_rate": 5.078311060599288e-05, "loss": 5.242, "step": 26520 }, { "epoch": 0.050817767638487034, "grad_norm": 1.892066478729248, "learning_rate": 5.0802265370101963e-05, "loss": 5.3237, "step": 26530 }, { "epoch": 0.05083692246986227, "grad_norm": 1.8830771446228027, "learning_rate": 5.0821420134211045e-05, "loss": 5.2555, "step": 26540 }, { "epoch": 0.0508560773012375, "grad_norm": 1.603029727935791, "learning_rate": 5.0840574898320126e-05, "loss": 5.402, "step": 26550 }, { "epoch": 0.050875232132612726, "grad_norm": 1.772295355796814, "learning_rate": 5.08597296624292e-05, "loss": 5.5124, "step": 26560 }, { "epoch": 0.05089438696398796, "grad_norm": 1.6366784572601318, "learning_rate": 5.087888442653828e-05, "loss": 5.3714, "step": 26570 }, { "epoch": 0.05091354179536319, "grad_norm": 1.6041227579116821, "learning_rate": 5.089803919064736e-05, "loss": 5.2209, "step": 26580 }, { "epoch": 0.05093269662673842, "grad_norm": 1.6945890188217163, "learning_rate": 5.0917193954756445e-05, "loss": 5.2505, "step": 26590 }, { "epoch": 0.05095185145811365, "grad_norm": 1.759406566619873, "learning_rate": 5.093634871886552e-05, "loss": 5.22, "step": 26600 }, { "epoch": 0.050971006289488884, "grad_norm": 2.9769232273101807, "learning_rate": 5.09555034829746e-05, "loss": 5.4499, "step": 26610 }, { "epoch": 0.05099016112086411, "grad_norm": 1.5605010986328125, "learning_rate": 5.097465824708369e-05, "loss": 5.1534, "step": 26620 }, { "epoch": 0.05100931595223934, "grad_norm": 1.7942919731140137, "learning_rate": 5.099381301119276e-05, "loss": 5.2959, "step": 26630 }, { "epoch": 0.051028470783614575, "grad_norm": 1.8386893272399902, "learning_rate": 5.1012967775301844e-05, "loss": 5.291, "step": 26640 }, { "epoch": 0.0510476256149898, "grad_norm": 1.7361419200897217, "learning_rate": 5.103212253941092e-05, "loss": 5.2547, "step": 26650 }, { "epoch": 0.051066780446365034, "grad_norm": 2.5241870880126953, "learning_rate": 5.105127730352001e-05, "loss": 5.2949, "step": 26660 }, { "epoch": 0.05108593527774027, "grad_norm": 1.8772025108337402, "learning_rate": 5.107043206762908e-05, "loss": 5.2615, "step": 26670 }, { "epoch": 0.0511050901091155, "grad_norm": 1.7254043817520142, "learning_rate": 5.108958683173816e-05, "loss": 5.2105, "step": 26680 }, { "epoch": 0.051124244940490726, "grad_norm": 1.8421275615692139, "learning_rate": 5.1108741595847244e-05, "loss": 5.2119, "step": 26690 }, { "epoch": 0.05114339977186596, "grad_norm": 1.6134898662567139, "learning_rate": 5.1127896359956325e-05, "loss": 5.3224, "step": 26700 }, { "epoch": 0.05116255460324119, "grad_norm": 1.5786479711532593, "learning_rate": 5.11470511240654e-05, "loss": 5.2359, "step": 26710 }, { "epoch": 0.05118170943461642, "grad_norm": 1.5960783958435059, "learning_rate": 5.116620588817448e-05, "loss": 5.2519, "step": 26720 }, { "epoch": 0.05120086426599165, "grad_norm": 1.515380859375, "learning_rate": 5.118536065228357e-05, "loss": 5.3128, "step": 26730 }, { "epoch": 0.051220019097366884, "grad_norm": 1.57402765750885, "learning_rate": 5.1204515416392644e-05, "loss": 5.3947, "step": 26740 }, { "epoch": 0.05123917392874211, "grad_norm": 1.6869467496871948, "learning_rate": 5.1223670180501725e-05, "loss": 5.1209, "step": 26750 }, { "epoch": 0.05125832876011734, "grad_norm": 1.7247966527938843, "learning_rate": 5.12428249446108e-05, "loss": 5.3106, "step": 26760 }, { "epoch": 0.051277483591492576, "grad_norm": 1.5386338233947754, "learning_rate": 5.126197970871989e-05, "loss": 5.3516, "step": 26770 }, { "epoch": 0.0512966384228678, "grad_norm": 1.624829649925232, "learning_rate": 5.128113447282896e-05, "loss": 5.3051, "step": 26780 }, { "epoch": 0.051315793254243035, "grad_norm": 1.7498133182525635, "learning_rate": 5.1300289236938044e-05, "loss": 5.2312, "step": 26790 }, { "epoch": 0.05133494808561827, "grad_norm": 1.568526029586792, "learning_rate": 5.131944400104712e-05, "loss": 5.0695, "step": 26800 }, { "epoch": 0.051354102916993494, "grad_norm": 1.632667064666748, "learning_rate": 5.1338598765156206e-05, "loss": 5.2575, "step": 26810 }, { "epoch": 0.05137325774836873, "grad_norm": 1.5772793292999268, "learning_rate": 5.135775352926528e-05, "loss": 5.3381, "step": 26820 }, { "epoch": 0.05139241257974396, "grad_norm": 1.6298483610153198, "learning_rate": 5.137690829337436e-05, "loss": 5.2499, "step": 26830 }, { "epoch": 0.051411567411119186, "grad_norm": 1.5490684509277344, "learning_rate": 5.1396063057483436e-05, "loss": 5.2334, "step": 26840 }, { "epoch": 0.05143072224249442, "grad_norm": 1.753705620765686, "learning_rate": 5.1415217821592525e-05, "loss": 5.1212, "step": 26850 }, { "epoch": 0.05144987707386965, "grad_norm": 1.5729212760925293, "learning_rate": 5.1434372585701606e-05, "loss": 5.2355, "step": 26860 }, { "epoch": 0.05146903190524488, "grad_norm": 1.650329828262329, "learning_rate": 5.145352734981068e-05, "loss": 5.249, "step": 26870 }, { "epoch": 0.05148818673662011, "grad_norm": 1.531926155090332, "learning_rate": 5.147268211391977e-05, "loss": 5.2823, "step": 26880 }, { "epoch": 0.05150734156799534, "grad_norm": 1.7119964361190796, "learning_rate": 5.149183687802884e-05, "loss": 5.2, "step": 26890 }, { "epoch": 0.05152649639937057, "grad_norm": 1.5276120901107788, "learning_rate": 5.1510991642137924e-05, "loss": 5.2543, "step": 26900 }, { "epoch": 0.0515456512307458, "grad_norm": 1.7818996906280518, "learning_rate": 5.1530146406247e-05, "loss": 5.243, "step": 26910 }, { "epoch": 0.051564806062121035, "grad_norm": 1.5695329904556274, "learning_rate": 5.154930117035609e-05, "loss": 5.2805, "step": 26920 }, { "epoch": 0.05158396089349626, "grad_norm": 1.5414481163024902, "learning_rate": 5.156845593446516e-05, "loss": 5.2352, "step": 26930 }, { "epoch": 0.051603115724871494, "grad_norm": 1.6488455533981323, "learning_rate": 5.158761069857424e-05, "loss": 5.1988, "step": 26940 }, { "epoch": 0.05162227055624673, "grad_norm": 1.6029571294784546, "learning_rate": 5.160676546268332e-05, "loss": 5.3153, "step": 26950 }, { "epoch": 0.05164142538762195, "grad_norm": 1.598271131515503, "learning_rate": 5.1625920226792405e-05, "loss": 5.3617, "step": 26960 }, { "epoch": 0.051660580218997186, "grad_norm": 1.643799066543579, "learning_rate": 5.164507499090148e-05, "loss": 5.2143, "step": 26970 }, { "epoch": 0.05167973505037242, "grad_norm": 1.6997697353363037, "learning_rate": 5.166422975501056e-05, "loss": 5.2933, "step": 26980 }, { "epoch": 0.05169888988174765, "grad_norm": 1.6629045009613037, "learning_rate": 5.1683384519119636e-05, "loss": 5.3364, "step": 26990 }, { "epoch": 0.05171804471312288, "grad_norm": 1.645259141921997, "learning_rate": 5.1702539283228724e-05, "loss": 5.0856, "step": 27000 }, { "epoch": 0.05173719954449811, "grad_norm": 1.6414172649383545, "learning_rate": 5.1721694047337805e-05, "loss": 5.2259, "step": 27010 }, { "epoch": 0.051756354375873344, "grad_norm": 1.781658411026001, "learning_rate": 5.174084881144688e-05, "loss": 5.2935, "step": 27020 }, { "epoch": 0.05177550920724857, "grad_norm": 1.5698648691177368, "learning_rate": 5.176000357555596e-05, "loss": 5.3679, "step": 27030 }, { "epoch": 0.0517946640386238, "grad_norm": 1.6174577474594116, "learning_rate": 5.177915833966504e-05, "loss": 5.3108, "step": 27040 }, { "epoch": 0.051813818869999036, "grad_norm": 1.5947152376174927, "learning_rate": 5.1798313103774124e-05, "loss": 5.3228, "step": 27050 }, { "epoch": 0.05183297370137426, "grad_norm": 1.7124559879302979, "learning_rate": 5.18174678678832e-05, "loss": 5.1699, "step": 27060 }, { "epoch": 0.051852128532749495, "grad_norm": 1.9126032590866089, "learning_rate": 5.1836622631992286e-05, "loss": 5.3968, "step": 27070 }, { "epoch": 0.05187128336412473, "grad_norm": 1.7006818056106567, "learning_rate": 5.185577739610136e-05, "loss": 5.3809, "step": 27080 }, { "epoch": 0.051890438195499954, "grad_norm": 1.5849270820617676, "learning_rate": 5.187493216021044e-05, "loss": 5.294, "step": 27090 }, { "epoch": 0.051909593026875187, "grad_norm": 1.8694977760314941, "learning_rate": 5.1894086924319517e-05, "loss": 5.2587, "step": 27100 }, { "epoch": 0.05192874785825042, "grad_norm": 1.5644950866699219, "learning_rate": 5.1913241688428605e-05, "loss": 5.2587, "step": 27110 }, { "epoch": 0.051947902689625645, "grad_norm": 1.5224149227142334, "learning_rate": 5.1932396452537686e-05, "loss": 5.314, "step": 27120 }, { "epoch": 0.05196705752100088, "grad_norm": 1.6553022861480713, "learning_rate": 5.195155121664676e-05, "loss": 5.1977, "step": 27130 }, { "epoch": 0.05198621235237611, "grad_norm": 1.5101995468139648, "learning_rate": 5.197070598075584e-05, "loss": 5.277, "step": 27140 }, { "epoch": 0.05200536718375134, "grad_norm": 1.5541613101959229, "learning_rate": 5.198986074486492e-05, "loss": 5.3141, "step": 27150 }, { "epoch": 0.05202452201512657, "grad_norm": 1.7451695203781128, "learning_rate": 5.2009015508974004e-05, "loss": 5.2216, "step": 27160 }, { "epoch": 0.0520436768465018, "grad_norm": 1.6275829076766968, "learning_rate": 5.202817027308308e-05, "loss": 5.354, "step": 27170 }, { "epoch": 0.05206283167787703, "grad_norm": 1.578745722770691, "learning_rate": 5.204732503719216e-05, "loss": 5.2868, "step": 27180 }, { "epoch": 0.05208198650925226, "grad_norm": 1.7614654302597046, "learning_rate": 5.206647980130124e-05, "loss": 5.2618, "step": 27190 }, { "epoch": 0.052101141340627495, "grad_norm": 1.5075814723968506, "learning_rate": 5.208563456541032e-05, "loss": 5.3503, "step": 27200 }, { "epoch": 0.05212029617200272, "grad_norm": 1.7781908512115479, "learning_rate": 5.21047893295194e-05, "loss": 5.2506, "step": 27210 }, { "epoch": 0.052139451003377954, "grad_norm": 1.6082016229629517, "learning_rate": 5.2123944093628485e-05, "loss": 5.265, "step": 27220 }, { "epoch": 0.05215860583475319, "grad_norm": 1.6933311223983765, "learning_rate": 5.214309885773757e-05, "loss": 5.3089, "step": 27230 }, { "epoch": 0.05217776066612841, "grad_norm": 1.5605071783065796, "learning_rate": 5.216225362184664e-05, "loss": 5.3068, "step": 27240 }, { "epoch": 0.052196915497503646, "grad_norm": 1.7603187561035156, "learning_rate": 5.218140838595572e-05, "loss": 5.2504, "step": 27250 }, { "epoch": 0.05221607032887888, "grad_norm": 1.6641110181808472, "learning_rate": 5.2200563150064804e-05, "loss": 5.4095, "step": 27260 }, { "epoch": 0.052235225160254105, "grad_norm": 1.6473232507705688, "learning_rate": 5.2219717914173885e-05, "loss": 5.2321, "step": 27270 }, { "epoch": 0.05225437999162934, "grad_norm": 1.5761820077896118, "learning_rate": 5.223887267828296e-05, "loss": 5.2031, "step": 27280 }, { "epoch": 0.05227353482300457, "grad_norm": 1.829152226448059, "learning_rate": 5.225802744239204e-05, "loss": 5.2664, "step": 27290 }, { "epoch": 0.0522926896543798, "grad_norm": 1.6258094310760498, "learning_rate": 5.227718220650112e-05, "loss": 5.4099, "step": 27300 }, { "epoch": 0.05231184448575503, "grad_norm": 2.016052007675171, "learning_rate": 5.2296336970610204e-05, "loss": 5.1043, "step": 27310 }, { "epoch": 0.05233099931713026, "grad_norm": 1.6567078828811646, "learning_rate": 5.231549173471928e-05, "loss": 5.1883, "step": 27320 }, { "epoch": 0.052350154148505496, "grad_norm": 1.5906509160995483, "learning_rate": 5.233464649882836e-05, "loss": 5.1938, "step": 27330 }, { "epoch": 0.05236930897988072, "grad_norm": 1.6565059423446655, "learning_rate": 5.235380126293745e-05, "loss": 5.1416, "step": 27340 }, { "epoch": 0.052388463811255954, "grad_norm": 1.574907898902893, "learning_rate": 5.237295602704652e-05, "loss": 5.3008, "step": 27350 }, { "epoch": 0.05240761864263119, "grad_norm": 1.536962628364563, "learning_rate": 5.2392110791155603e-05, "loss": 5.3354, "step": 27360 }, { "epoch": 0.05242677347400641, "grad_norm": 1.728819489479065, "learning_rate": 5.241126555526468e-05, "loss": 5.3539, "step": 27370 }, { "epoch": 0.052445928305381646, "grad_norm": 1.6470777988433838, "learning_rate": 5.2430420319373766e-05, "loss": 5.2042, "step": 27380 }, { "epoch": 0.05246508313675688, "grad_norm": 1.5438206195831299, "learning_rate": 5.244957508348284e-05, "loss": 5.2849, "step": 27390 }, { "epoch": 0.052484237968132105, "grad_norm": 1.756807804107666, "learning_rate": 5.246872984759192e-05, "loss": 5.3049, "step": 27400 }, { "epoch": 0.05250339279950734, "grad_norm": 1.7530325651168823, "learning_rate": 5.2487884611701e-05, "loss": 5.2434, "step": 27410 }, { "epoch": 0.05252254763088257, "grad_norm": 1.5415613651275635, "learning_rate": 5.2507039375810084e-05, "loss": 5.4107, "step": 27420 }, { "epoch": 0.0525417024622578, "grad_norm": 1.6262761354446411, "learning_rate": 5.252619413991916e-05, "loss": 5.2318, "step": 27430 }, { "epoch": 0.05256085729363303, "grad_norm": 1.5875935554504395, "learning_rate": 5.254534890402824e-05, "loss": 5.2476, "step": 27440 }, { "epoch": 0.05258001212500826, "grad_norm": 1.5142836570739746, "learning_rate": 5.256450366813733e-05, "loss": 5.2836, "step": 27450 }, { "epoch": 0.05259916695638349, "grad_norm": 1.6693674325942993, "learning_rate": 5.25836584322464e-05, "loss": 5.3127, "step": 27460 }, { "epoch": 0.05261832178775872, "grad_norm": 1.5778459310531616, "learning_rate": 5.2602813196355484e-05, "loss": 5.2052, "step": 27470 }, { "epoch": 0.052637476619133955, "grad_norm": 1.6453200578689575, "learning_rate": 5.262196796046456e-05, "loss": 5.1609, "step": 27480 }, { "epoch": 0.05265663145050918, "grad_norm": 1.6601606607437134, "learning_rate": 5.264112272457365e-05, "loss": 5.2628, "step": 27490 }, { "epoch": 0.052675786281884414, "grad_norm": 1.6209564208984375, "learning_rate": 5.266027748868272e-05, "loss": 5.2898, "step": 27500 }, { "epoch": 0.05269494111325965, "grad_norm": 1.7965731620788574, "learning_rate": 5.26794322527918e-05, "loss": 5.346, "step": 27510 }, { "epoch": 0.05271409594463487, "grad_norm": 1.5112340450286865, "learning_rate": 5.269858701690088e-05, "loss": 5.3377, "step": 27520 }, { "epoch": 0.052733250776010106, "grad_norm": 1.6287569999694824, "learning_rate": 5.2717741781009965e-05, "loss": 5.1476, "step": 27530 }, { "epoch": 0.05275240560738534, "grad_norm": 1.6477131843566895, "learning_rate": 5.273689654511904e-05, "loss": 5.2766, "step": 27540 }, { "epoch": 0.052771560438760565, "grad_norm": 1.6272237300872803, "learning_rate": 5.275605130922812e-05, "loss": 5.093, "step": 27550 }, { "epoch": 0.0527907152701358, "grad_norm": 1.6067848205566406, "learning_rate": 5.277520607333721e-05, "loss": 5.1408, "step": 27560 }, { "epoch": 0.05280987010151103, "grad_norm": 1.7014063596725464, "learning_rate": 5.2794360837446284e-05, "loss": 5.2507, "step": 27570 }, { "epoch": 0.05282902493288626, "grad_norm": 1.632845163345337, "learning_rate": 5.2813515601555365e-05, "loss": 5.3886, "step": 27580 }, { "epoch": 0.05284817976426149, "grad_norm": 1.688970685005188, "learning_rate": 5.283267036566444e-05, "loss": 5.3354, "step": 27590 }, { "epoch": 0.05286733459563672, "grad_norm": 1.594879150390625, "learning_rate": 5.285182512977353e-05, "loss": 5.0932, "step": 27600 }, { "epoch": 0.05288648942701195, "grad_norm": 1.564799427986145, "learning_rate": 5.28709798938826e-05, "loss": 5.335, "step": 27610 }, { "epoch": 0.05290564425838718, "grad_norm": 1.7603336572647095, "learning_rate": 5.2890134657991683e-05, "loss": 5.1283, "step": 27620 }, { "epoch": 0.052924799089762414, "grad_norm": 1.5232465267181396, "learning_rate": 5.290928942210076e-05, "loss": 5.2851, "step": 27630 }, { "epoch": 0.05294395392113764, "grad_norm": 1.5771156549453735, "learning_rate": 5.2928444186209846e-05, "loss": 5.2499, "step": 27640 }, { "epoch": 0.05296310875251287, "grad_norm": 1.6945862770080566, "learning_rate": 5.294759895031892e-05, "loss": 5.2611, "step": 27650 }, { "epoch": 0.052982263583888106, "grad_norm": 1.6362049579620361, "learning_rate": 5.2966753714428e-05, "loss": 5.2251, "step": 27660 }, { "epoch": 0.05300141841526334, "grad_norm": 1.7093935012817383, "learning_rate": 5.2985908478537076e-05, "loss": 5.2432, "step": 27670 }, { "epoch": 0.053020573246638565, "grad_norm": 1.5664838552474976, "learning_rate": 5.3005063242646165e-05, "loss": 5.3018, "step": 27680 }, { "epoch": 0.0530397280780138, "grad_norm": 1.5954375267028809, "learning_rate": 5.302421800675524e-05, "loss": 5.2342, "step": 27690 }, { "epoch": 0.05305888290938903, "grad_norm": 1.583404779434204, "learning_rate": 5.304337277086432e-05, "loss": 5.2874, "step": 27700 }, { "epoch": 0.05307803774076426, "grad_norm": 1.5229276418685913, "learning_rate": 5.3062527534973395e-05, "loss": 5.1777, "step": 27710 }, { "epoch": 0.05309719257213949, "grad_norm": 1.6584757566452026, "learning_rate": 5.308168229908248e-05, "loss": 5.2388, "step": 27720 }, { "epoch": 0.05311634740351472, "grad_norm": 1.5129774808883667, "learning_rate": 5.3100837063191564e-05, "loss": 5.4109, "step": 27730 }, { "epoch": 0.05313550223488995, "grad_norm": 1.7254836559295654, "learning_rate": 5.311999182730064e-05, "loss": 5.2563, "step": 27740 }, { "epoch": 0.05315465706626518, "grad_norm": 1.5575729608535767, "learning_rate": 5.313914659140973e-05, "loss": 5.081, "step": 27750 }, { "epoch": 0.053173811897640415, "grad_norm": 1.5008827447891235, "learning_rate": 5.31583013555188e-05, "loss": 5.2867, "step": 27760 }, { "epoch": 0.05319296672901564, "grad_norm": 1.5883028507232666, "learning_rate": 5.317745611962788e-05, "loss": 5.3096, "step": 27770 }, { "epoch": 0.053212121560390874, "grad_norm": 1.749805212020874, "learning_rate": 5.319661088373696e-05, "loss": 5.1904, "step": 27780 }, { "epoch": 0.05323127639176611, "grad_norm": 1.5545743703842163, "learning_rate": 5.3215765647846045e-05, "loss": 5.3451, "step": 27790 }, { "epoch": 0.05325043122314133, "grad_norm": 1.6658574342727661, "learning_rate": 5.323492041195512e-05, "loss": 5.1521, "step": 27800 }, { "epoch": 0.053269586054516566, "grad_norm": 1.4912296533584595, "learning_rate": 5.32540751760642e-05, "loss": 5.3548, "step": 27810 }, { "epoch": 0.0532887408858918, "grad_norm": 1.5924994945526123, "learning_rate": 5.3273229940173276e-05, "loss": 5.3255, "step": 27820 }, { "epoch": 0.053307895717267025, "grad_norm": 1.617031216621399, "learning_rate": 5.3292384704282364e-05, "loss": 5.1919, "step": 27830 }, { "epoch": 0.05332705054864226, "grad_norm": 1.586232304573059, "learning_rate": 5.3311539468391445e-05, "loss": 5.2459, "step": 27840 }, { "epoch": 0.05334620538001749, "grad_norm": 1.6336169242858887, "learning_rate": 5.333069423250052e-05, "loss": 5.2731, "step": 27850 }, { "epoch": 0.053365360211392716, "grad_norm": 1.5390137434005737, "learning_rate": 5.33498489966096e-05, "loss": 5.2622, "step": 27860 }, { "epoch": 0.05338451504276795, "grad_norm": 1.7668592929840088, "learning_rate": 5.336900376071868e-05, "loss": 5.3032, "step": 27870 }, { "epoch": 0.05340366987414318, "grad_norm": 1.6311496496200562, "learning_rate": 5.3388158524827764e-05, "loss": 5.1891, "step": 27880 }, { "epoch": 0.05342282470551841, "grad_norm": 1.6983364820480347, "learning_rate": 5.340731328893684e-05, "loss": 5.2503, "step": 27890 }, { "epoch": 0.05344197953689364, "grad_norm": 1.6993454694747925, "learning_rate": 5.3426468053045926e-05, "loss": 5.3247, "step": 27900 }, { "epoch": 0.053461134368268874, "grad_norm": 1.6230076551437378, "learning_rate": 5.3445622817155e-05, "loss": 5.1787, "step": 27910 }, { "epoch": 0.0534802891996441, "grad_norm": 1.7176064252853394, "learning_rate": 5.346477758126408e-05, "loss": 5.3197, "step": 27920 }, { "epoch": 0.05349944403101933, "grad_norm": 1.5922062397003174, "learning_rate": 5.3483932345373157e-05, "loss": 5.2723, "step": 27930 }, { "epoch": 0.053518598862394566, "grad_norm": 1.7277863025665283, "learning_rate": 5.3503087109482245e-05, "loss": 5.2443, "step": 27940 }, { "epoch": 0.05353775369376979, "grad_norm": 1.6362831592559814, "learning_rate": 5.3522241873591326e-05, "loss": 5.3048, "step": 27950 }, { "epoch": 0.053556908525145025, "grad_norm": 1.8865413665771484, "learning_rate": 5.35413966377004e-05, "loss": 5.3022, "step": 27960 }, { "epoch": 0.05357606335652026, "grad_norm": 1.608567714691162, "learning_rate": 5.356055140180948e-05, "loss": 5.326, "step": 27970 }, { "epoch": 0.05359521818789549, "grad_norm": 1.7444815635681152, "learning_rate": 5.357970616591856e-05, "loss": 5.1468, "step": 27980 }, { "epoch": 0.05361437301927072, "grad_norm": 2.036670207977295, "learning_rate": 5.3598860930027644e-05, "loss": 5.0579, "step": 27990 }, { "epoch": 0.05363352785064595, "grad_norm": 1.6785286664962769, "learning_rate": 5.361801569413672e-05, "loss": 5.1575, "step": 28000 }, { "epoch": 0.05365268268202118, "grad_norm": 1.5906010866165161, "learning_rate": 5.36371704582458e-05, "loss": 5.3324, "step": 28010 }, { "epoch": 0.05367183751339641, "grad_norm": 1.5853835344314575, "learning_rate": 5.365632522235488e-05, "loss": 5.2641, "step": 28020 }, { "epoch": 0.05369099234477164, "grad_norm": 1.6932857036590576, "learning_rate": 5.367547998646396e-05, "loss": 5.1452, "step": 28030 }, { "epoch": 0.053710147176146875, "grad_norm": 1.492512822151184, "learning_rate": 5.369463475057304e-05, "loss": 5.2115, "step": 28040 }, { "epoch": 0.0537293020075221, "grad_norm": 1.4999703168869019, "learning_rate": 5.371378951468212e-05, "loss": 5.1425, "step": 28050 }, { "epoch": 0.053748456838897334, "grad_norm": 1.5529594421386719, "learning_rate": 5.373294427879121e-05, "loss": 5.1947, "step": 28060 }, { "epoch": 0.053767611670272566, "grad_norm": 1.9207032918930054, "learning_rate": 5.375209904290028e-05, "loss": 5.1288, "step": 28070 }, { "epoch": 0.05378676650164779, "grad_norm": 1.8160150051116943, "learning_rate": 5.377125380700936e-05, "loss": 5.1533, "step": 28080 }, { "epoch": 0.053805921333023025, "grad_norm": 1.626401424407959, "learning_rate": 5.3790408571118444e-05, "loss": 5.2203, "step": 28090 }, { "epoch": 0.05382507616439826, "grad_norm": 1.5915396213531494, "learning_rate": 5.3809563335227525e-05, "loss": 5.2403, "step": 28100 }, { "epoch": 0.053844230995773484, "grad_norm": 1.6091768741607666, "learning_rate": 5.38287180993366e-05, "loss": 5.1875, "step": 28110 }, { "epoch": 0.05386338582714872, "grad_norm": 1.5747309923171997, "learning_rate": 5.384787286344568e-05, "loss": 5.2108, "step": 28120 }, { "epoch": 0.05388254065852395, "grad_norm": 1.806433916091919, "learning_rate": 5.386702762755476e-05, "loss": 5.2197, "step": 28130 }, { "epoch": 0.053901695489899176, "grad_norm": 1.8561497926712036, "learning_rate": 5.3886182391663844e-05, "loss": 5.2046, "step": 28140 }, { "epoch": 0.05392085032127441, "grad_norm": 1.613569974899292, "learning_rate": 5.390533715577292e-05, "loss": 5.3703, "step": 28150 }, { "epoch": 0.05394000515264964, "grad_norm": 2.12685227394104, "learning_rate": 5.3924491919882e-05, "loss": 5.3722, "step": 28160 }, { "epoch": 0.05395915998402487, "grad_norm": 1.559173822402954, "learning_rate": 5.394364668399109e-05, "loss": 5.407, "step": 28170 }, { "epoch": 0.0539783148154001, "grad_norm": 1.674013376235962, "learning_rate": 5.396280144810016e-05, "loss": 5.1678, "step": 28180 }, { "epoch": 0.053997469646775334, "grad_norm": 1.6171252727508545, "learning_rate": 5.3981956212209243e-05, "loss": 5.1366, "step": 28190 }, { "epoch": 0.05401662447815056, "grad_norm": 1.6498677730560303, "learning_rate": 5.400111097631832e-05, "loss": 5.139, "step": 28200 }, { "epoch": 0.05403577930952579, "grad_norm": 1.7086750268936157, "learning_rate": 5.4020265740427406e-05, "loss": 5.0886, "step": 28210 }, { "epoch": 0.054054934140901026, "grad_norm": 1.5133352279663086, "learning_rate": 5.403942050453648e-05, "loss": 5.1905, "step": 28220 }, { "epoch": 0.05407408897227625, "grad_norm": 1.5365386009216309, "learning_rate": 5.405857526864556e-05, "loss": 5.3443, "step": 28230 }, { "epoch": 0.054093243803651485, "grad_norm": 1.6556189060211182, "learning_rate": 5.407773003275464e-05, "loss": 5.3093, "step": 28240 }, { "epoch": 0.05411239863502672, "grad_norm": 1.665832757949829, "learning_rate": 5.4096884796863724e-05, "loss": 5.193, "step": 28250 }, { "epoch": 0.054131553466401944, "grad_norm": 1.5100797414779663, "learning_rate": 5.41160395609728e-05, "loss": 5.3125, "step": 28260 }, { "epoch": 0.05415070829777718, "grad_norm": 1.7448360919952393, "learning_rate": 5.413519432508188e-05, "loss": 5.2179, "step": 28270 }, { "epoch": 0.05416986312915241, "grad_norm": 1.8637418746948242, "learning_rate": 5.415434908919097e-05, "loss": 5.1952, "step": 28280 }, { "epoch": 0.054189017960527636, "grad_norm": 1.5586574077606201, "learning_rate": 5.417350385330004e-05, "loss": 5.168, "step": 28290 }, { "epoch": 0.05420817279190287, "grad_norm": 1.5685440301895142, "learning_rate": 5.4192658617409124e-05, "loss": 5.2827, "step": 28300 }, { "epoch": 0.0542273276232781, "grad_norm": 1.5772043466567993, "learning_rate": 5.42118133815182e-05, "loss": 5.2014, "step": 28310 }, { "epoch": 0.054246482454653334, "grad_norm": 1.5284548997879028, "learning_rate": 5.423096814562729e-05, "loss": 5.2745, "step": 28320 }, { "epoch": 0.05426563728602856, "grad_norm": 1.6444835662841797, "learning_rate": 5.425012290973636e-05, "loss": 5.2175, "step": 28330 }, { "epoch": 0.05428479211740379, "grad_norm": 1.710015058517456, "learning_rate": 5.426927767384544e-05, "loss": 5.2484, "step": 28340 }, { "epoch": 0.054303946948779026, "grad_norm": 1.580512523651123, "learning_rate": 5.428843243795452e-05, "loss": 5.1832, "step": 28350 }, { "epoch": 0.05432310178015425, "grad_norm": 1.566632866859436, "learning_rate": 5.4307587202063605e-05, "loss": 5.2701, "step": 28360 }, { "epoch": 0.054342256611529485, "grad_norm": 1.7026203870773315, "learning_rate": 5.432674196617268e-05, "loss": 5.1851, "step": 28370 }, { "epoch": 0.05436141144290472, "grad_norm": 1.597285509109497, "learning_rate": 5.434589673028176e-05, "loss": 5.2454, "step": 28380 }, { "epoch": 0.054380566274279944, "grad_norm": 1.6223297119140625, "learning_rate": 5.4365051494390836e-05, "loss": 5.1577, "step": 28390 }, { "epoch": 0.05439972110565518, "grad_norm": 1.631320595741272, "learning_rate": 5.4384206258499924e-05, "loss": 5.1929, "step": 28400 }, { "epoch": 0.05441887593703041, "grad_norm": 1.6741092205047607, "learning_rate": 5.4403361022609005e-05, "loss": 5.1894, "step": 28410 }, { "epoch": 0.054438030768405636, "grad_norm": 1.669303297996521, "learning_rate": 5.442251578671808e-05, "loss": 5.2525, "step": 28420 }, { "epoch": 0.05445718559978087, "grad_norm": 1.7204358577728271, "learning_rate": 5.444167055082717e-05, "loss": 5.2795, "step": 28430 }, { "epoch": 0.0544763404311561, "grad_norm": 1.7675144672393799, "learning_rate": 5.446082531493624e-05, "loss": 5.2108, "step": 28440 }, { "epoch": 0.05449549526253133, "grad_norm": 1.6754313707351685, "learning_rate": 5.4479980079045323e-05, "loss": 5.3035, "step": 28450 }, { "epoch": 0.05451465009390656, "grad_norm": 1.864698886871338, "learning_rate": 5.44991348431544e-05, "loss": 5.1592, "step": 28460 }, { "epoch": 0.054533804925281794, "grad_norm": 1.6701242923736572, "learning_rate": 5.4518289607263486e-05, "loss": 5.0901, "step": 28470 }, { "epoch": 0.05455295975665702, "grad_norm": 1.5962122678756714, "learning_rate": 5.453744437137256e-05, "loss": 5.2455, "step": 28480 }, { "epoch": 0.05457211458803225, "grad_norm": 1.5712816715240479, "learning_rate": 5.455659913548164e-05, "loss": 5.3171, "step": 28490 }, { "epoch": 0.054591269419407486, "grad_norm": 1.830950140953064, "learning_rate": 5.4575753899590716e-05, "loss": 5.2632, "step": 28500 }, { "epoch": 0.05461042425078271, "grad_norm": 1.5900624990463257, "learning_rate": 5.4594908663699805e-05, "loss": 5.2363, "step": 28510 }, { "epoch": 0.054629579082157945, "grad_norm": 1.5898892879486084, "learning_rate": 5.461406342780888e-05, "loss": 5.2984, "step": 28520 }, { "epoch": 0.05464873391353318, "grad_norm": 1.6444445848464966, "learning_rate": 5.463321819191796e-05, "loss": 5.1595, "step": 28530 }, { "epoch": 0.054667888744908404, "grad_norm": 1.6256345510482788, "learning_rate": 5.4652372956027035e-05, "loss": 5.1616, "step": 28540 }, { "epoch": 0.05468704357628364, "grad_norm": 1.6735590696334839, "learning_rate": 5.467152772013612e-05, "loss": 5.2869, "step": 28550 }, { "epoch": 0.05470619840765887, "grad_norm": 1.6533938646316528, "learning_rate": 5.4690682484245204e-05, "loss": 5.1224, "step": 28560 }, { "epoch": 0.054725353239034095, "grad_norm": 1.5456234216690063, "learning_rate": 5.470983724835428e-05, "loss": 5.2288, "step": 28570 }, { "epoch": 0.05474450807040933, "grad_norm": 1.5878351926803589, "learning_rate": 5.472899201246337e-05, "loss": 5.2512, "step": 28580 }, { "epoch": 0.05476366290178456, "grad_norm": 1.620229721069336, "learning_rate": 5.474814677657244e-05, "loss": 5.3298, "step": 28590 }, { "epoch": 0.05478281773315979, "grad_norm": 1.7960823774337769, "learning_rate": 5.476730154068152e-05, "loss": 5.2335, "step": 28600 }, { "epoch": 0.05480197256453502, "grad_norm": 1.533593773841858, "learning_rate": 5.47864563047906e-05, "loss": 5.1906, "step": 28610 }, { "epoch": 0.05482112739591025, "grad_norm": 1.6044111251831055, "learning_rate": 5.4805611068899685e-05, "loss": 5.2162, "step": 28620 }, { "epoch": 0.05484028222728548, "grad_norm": 1.7458677291870117, "learning_rate": 5.482476583300876e-05, "loss": 5.1659, "step": 28630 }, { "epoch": 0.05485943705866071, "grad_norm": 1.6192195415496826, "learning_rate": 5.484392059711784e-05, "loss": 5.2079, "step": 28640 }, { "epoch": 0.054878591890035945, "grad_norm": 1.6050891876220703, "learning_rate": 5.4863075361226916e-05, "loss": 5.3293, "step": 28650 }, { "epoch": 0.05489774672141118, "grad_norm": 1.5595825910568237, "learning_rate": 5.4882230125336004e-05, "loss": 5.1179, "step": 28660 }, { "epoch": 0.054916901552786404, "grad_norm": 1.6206591129302979, "learning_rate": 5.4901384889445085e-05, "loss": 5.2162, "step": 28670 }, { "epoch": 0.05493605638416164, "grad_norm": 1.5155822038650513, "learning_rate": 5.492053965355416e-05, "loss": 5.157, "step": 28680 }, { "epoch": 0.05495521121553687, "grad_norm": 1.6387425661087036, "learning_rate": 5.493969441766324e-05, "loss": 5.1896, "step": 28690 }, { "epoch": 0.054974366046912096, "grad_norm": 1.5441869497299194, "learning_rate": 5.495884918177232e-05, "loss": 5.2712, "step": 28700 }, { "epoch": 0.05499352087828733, "grad_norm": 1.5461379289627075, "learning_rate": 5.4978003945881404e-05, "loss": 5.121, "step": 28710 }, { "epoch": 0.05501267570966256, "grad_norm": 1.600038766860962, "learning_rate": 5.499715870999048e-05, "loss": 5.2108, "step": 28720 }, { "epoch": 0.05503183054103779, "grad_norm": 1.535599946975708, "learning_rate": 5.501631347409956e-05, "loss": 5.1365, "step": 28730 }, { "epoch": 0.05505098537241302, "grad_norm": 1.6054009199142456, "learning_rate": 5.503546823820864e-05, "loss": 5.1754, "step": 28740 }, { "epoch": 0.055070140203788254, "grad_norm": 1.497539758682251, "learning_rate": 5.505462300231772e-05, "loss": 5.2248, "step": 28750 }, { "epoch": 0.05508929503516348, "grad_norm": 1.7244789600372314, "learning_rate": 5.5073777766426797e-05, "loss": 5.2396, "step": 28760 }, { "epoch": 0.05510844986653871, "grad_norm": 1.5534101724624634, "learning_rate": 5.5092932530535885e-05, "loss": 5.1863, "step": 28770 }, { "epoch": 0.055127604697913946, "grad_norm": 1.6463775634765625, "learning_rate": 5.5112087294644966e-05, "loss": 5.2108, "step": 28780 }, { "epoch": 0.05514675952928917, "grad_norm": 1.685509204864502, "learning_rate": 5.513124205875404e-05, "loss": 5.149, "step": 28790 }, { "epoch": 0.055165914360664405, "grad_norm": 1.6085492372512817, "learning_rate": 5.515039682286312e-05, "loss": 5.075, "step": 28800 }, { "epoch": 0.05518506919203964, "grad_norm": 1.6208009719848633, "learning_rate": 5.51695515869722e-05, "loss": 5.1756, "step": 28810 }, { "epoch": 0.05520422402341486, "grad_norm": 1.542258620262146, "learning_rate": 5.5188706351081284e-05, "loss": 5.1653, "step": 28820 }, { "epoch": 0.055223378854790096, "grad_norm": 1.5960735082626343, "learning_rate": 5.520786111519036e-05, "loss": 5.1934, "step": 28830 }, { "epoch": 0.05524253368616533, "grad_norm": 1.581479549407959, "learning_rate": 5.522701587929944e-05, "loss": 5.3216, "step": 28840 }, { "epoch": 0.055261688517540555, "grad_norm": 1.5638818740844727, "learning_rate": 5.524617064340852e-05, "loss": 5.316, "step": 28850 }, { "epoch": 0.05528084334891579, "grad_norm": 1.580108642578125, "learning_rate": 5.52653254075176e-05, "loss": 5.0516, "step": 28860 }, { "epoch": 0.05529999818029102, "grad_norm": 1.523224115371704, "learning_rate": 5.528448017162668e-05, "loss": 5.362, "step": 28870 }, { "epoch": 0.05531915301166625, "grad_norm": 1.5506012439727783, "learning_rate": 5.530363493573576e-05, "loss": 5.175, "step": 28880 }, { "epoch": 0.05533830784304148, "grad_norm": 1.6350579261779785, "learning_rate": 5.532278969984485e-05, "loss": 5.2694, "step": 28890 }, { "epoch": 0.05535746267441671, "grad_norm": 1.6447395086288452, "learning_rate": 5.534194446395392e-05, "loss": 5.2397, "step": 28900 }, { "epoch": 0.05537661750579194, "grad_norm": 1.7092820405960083, "learning_rate": 5.5361099228063e-05, "loss": 5.2093, "step": 28910 }, { "epoch": 0.05539577233716717, "grad_norm": 1.499101161956787, "learning_rate": 5.5380253992172084e-05, "loss": 5.3612, "step": 28920 }, { "epoch": 0.055414927168542405, "grad_norm": 1.7086865901947021, "learning_rate": 5.5399408756281165e-05, "loss": 5.1265, "step": 28930 }, { "epoch": 0.05543408199991763, "grad_norm": 1.5533089637756348, "learning_rate": 5.541856352039024e-05, "loss": 5.178, "step": 28940 }, { "epoch": 0.055453236831292864, "grad_norm": 1.5636110305786133, "learning_rate": 5.543771828449932e-05, "loss": 5.2676, "step": 28950 }, { "epoch": 0.0554723916626681, "grad_norm": 1.5468496084213257, "learning_rate": 5.54568730486084e-05, "loss": 5.2616, "step": 28960 }, { "epoch": 0.05549154649404332, "grad_norm": 1.5956534147262573, "learning_rate": 5.5476027812717484e-05, "loss": 5.2047, "step": 28970 }, { "epoch": 0.055510701325418556, "grad_norm": 1.6196707487106323, "learning_rate": 5.549518257682656e-05, "loss": 5.1705, "step": 28980 }, { "epoch": 0.05552985615679379, "grad_norm": 1.5327930450439453, "learning_rate": 5.551433734093564e-05, "loss": 5.1906, "step": 28990 }, { "epoch": 0.05554901098816902, "grad_norm": 1.5734745264053345, "learning_rate": 5.553349210504473e-05, "loss": 5.2546, "step": 29000 }, { "epoch": 0.05556816581954425, "grad_norm": 1.8638362884521484, "learning_rate": 5.55526468691538e-05, "loss": 5.2793, "step": 29010 }, { "epoch": 0.05558732065091948, "grad_norm": 1.6205458641052246, "learning_rate": 5.557180163326288e-05, "loss": 5.1375, "step": 29020 }, { "epoch": 0.055606475482294714, "grad_norm": 1.515457272529602, "learning_rate": 5.559095639737196e-05, "loss": 5.2061, "step": 29030 }, { "epoch": 0.05562563031366994, "grad_norm": 1.5753368139266968, "learning_rate": 5.5610111161481046e-05, "loss": 5.2062, "step": 29040 }, { "epoch": 0.05564478514504517, "grad_norm": 1.524827480316162, "learning_rate": 5.562926592559012e-05, "loss": 5.2782, "step": 29050 }, { "epoch": 0.055663939976420405, "grad_norm": 1.573127269744873, "learning_rate": 5.56484206896992e-05, "loss": 5.1932, "step": 29060 }, { "epoch": 0.05568309480779563, "grad_norm": 1.6147172451019287, "learning_rate": 5.5667575453808276e-05, "loss": 5.1826, "step": 29070 }, { "epoch": 0.055702249639170864, "grad_norm": 1.6576112508773804, "learning_rate": 5.5686730217917364e-05, "loss": 5.1109, "step": 29080 }, { "epoch": 0.0557214044705461, "grad_norm": 1.513567566871643, "learning_rate": 5.570588498202644e-05, "loss": 5.1146, "step": 29090 }, { "epoch": 0.05574055930192132, "grad_norm": 1.4679033756256104, "learning_rate": 5.572503974613552e-05, "loss": 5.208, "step": 29100 }, { "epoch": 0.055759714133296556, "grad_norm": 1.5399513244628906, "learning_rate": 5.574419451024461e-05, "loss": 5.3605, "step": 29110 }, { "epoch": 0.05577886896467179, "grad_norm": 1.6728544235229492, "learning_rate": 5.576334927435368e-05, "loss": 5.2281, "step": 29120 }, { "epoch": 0.055798023796047015, "grad_norm": 1.59255850315094, "learning_rate": 5.5782504038462764e-05, "loss": 5.1101, "step": 29130 }, { "epoch": 0.05581717862742225, "grad_norm": 1.5474495887756348, "learning_rate": 5.580165880257184e-05, "loss": 5.2629, "step": 29140 }, { "epoch": 0.05583633345879748, "grad_norm": 1.6219263076782227, "learning_rate": 5.582081356668093e-05, "loss": 5.2654, "step": 29150 }, { "epoch": 0.05585548829017271, "grad_norm": 1.644882082939148, "learning_rate": 5.583996833079e-05, "loss": 5.2043, "step": 29160 }, { "epoch": 0.05587464312154794, "grad_norm": 1.6672476530075073, "learning_rate": 5.585912309489908e-05, "loss": 5.2001, "step": 29170 }, { "epoch": 0.05589379795292317, "grad_norm": 1.4831738471984863, "learning_rate": 5.587827785900816e-05, "loss": 5.2669, "step": 29180 }, { "epoch": 0.0559129527842984, "grad_norm": 1.6505446434020996, "learning_rate": 5.5897432623117245e-05, "loss": 5.1857, "step": 29190 }, { "epoch": 0.05593210761567363, "grad_norm": 1.9486427307128906, "learning_rate": 5.591658738722632e-05, "loss": 5.1439, "step": 29200 }, { "epoch": 0.055951262447048865, "grad_norm": 1.689156413078308, "learning_rate": 5.59357421513354e-05, "loss": 5.1173, "step": 29210 }, { "epoch": 0.05597041727842409, "grad_norm": 1.7149089574813843, "learning_rate": 5.5954896915444476e-05, "loss": 5.0457, "step": 29220 }, { "epoch": 0.055989572109799324, "grad_norm": 1.5738986730575562, "learning_rate": 5.5974051679553564e-05, "loss": 5.2965, "step": 29230 }, { "epoch": 0.05600872694117456, "grad_norm": 1.5055607557296753, "learning_rate": 5.599320644366264e-05, "loss": 5.2095, "step": 29240 }, { "epoch": 0.05602788177254978, "grad_norm": 1.74594247341156, "learning_rate": 5.601236120777172e-05, "loss": 5.1758, "step": 29250 }, { "epoch": 0.056047036603925016, "grad_norm": 1.6946159601211548, "learning_rate": 5.603151597188081e-05, "loss": 5.2206, "step": 29260 }, { "epoch": 0.05606619143530025, "grad_norm": 1.5836870670318604, "learning_rate": 5.605067073598988e-05, "loss": 5.0717, "step": 29270 }, { "epoch": 0.056085346266675475, "grad_norm": 1.5425399541854858, "learning_rate": 5.6069825500098963e-05, "loss": 5.2267, "step": 29280 }, { "epoch": 0.05610450109805071, "grad_norm": 1.5774387121200562, "learning_rate": 5.608898026420804e-05, "loss": 5.1174, "step": 29290 }, { "epoch": 0.05612365592942594, "grad_norm": 1.4550502300262451, "learning_rate": 5.6108135028317126e-05, "loss": 5.2795, "step": 29300 }, { "epoch": 0.05614281076080117, "grad_norm": 1.5098215341567993, "learning_rate": 5.61272897924262e-05, "loss": 5.3529, "step": 29310 }, { "epoch": 0.0561619655921764, "grad_norm": 1.640588402748108, "learning_rate": 5.614644455653528e-05, "loss": 5.0167, "step": 29320 }, { "epoch": 0.05618112042355163, "grad_norm": 1.7059695720672607, "learning_rate": 5.6165599320644356e-05, "loss": 5.0796, "step": 29330 }, { "epoch": 0.056200275254926865, "grad_norm": 1.525557041168213, "learning_rate": 5.6184754084753444e-05, "loss": 5.2291, "step": 29340 }, { "epoch": 0.05621943008630209, "grad_norm": 1.6292020082473755, "learning_rate": 5.620390884886252e-05, "loss": 5.0313, "step": 29350 }, { "epoch": 0.056238584917677324, "grad_norm": 1.4896653890609741, "learning_rate": 5.62230636129716e-05, "loss": 5.2007, "step": 29360 }, { "epoch": 0.05625773974905256, "grad_norm": 1.5101885795593262, "learning_rate": 5.6242218377080675e-05, "loss": 5.1503, "step": 29370 }, { "epoch": 0.05627689458042778, "grad_norm": 1.5018879175186157, "learning_rate": 5.626137314118976e-05, "loss": 5.153, "step": 29380 }, { "epoch": 0.056296049411803016, "grad_norm": 1.6821807622909546, "learning_rate": 5.6280527905298844e-05, "loss": 5.1226, "step": 29390 }, { "epoch": 0.05631520424317825, "grad_norm": 1.7102593183517456, "learning_rate": 5.629968266940792e-05, "loss": 5.1332, "step": 29400 }, { "epoch": 0.056334359074553475, "grad_norm": 1.567392110824585, "learning_rate": 5.6318837433517e-05, "loss": 5.1047, "step": 29410 }, { "epoch": 0.05635351390592871, "grad_norm": 1.577846646308899, "learning_rate": 5.633799219762608e-05, "loss": 5.1494, "step": 29420 }, { "epoch": 0.05637266873730394, "grad_norm": 1.578161358833313, "learning_rate": 5.635714696173516e-05, "loss": 5.2439, "step": 29430 }, { "epoch": 0.05639182356867917, "grad_norm": 1.524781346321106, "learning_rate": 5.637630172584424e-05, "loss": 5.216, "step": 29440 }, { "epoch": 0.0564109784000544, "grad_norm": 1.6648316383361816, "learning_rate": 5.6395456489953325e-05, "loss": 5.078, "step": 29450 }, { "epoch": 0.05643013323142963, "grad_norm": 1.6121774911880493, "learning_rate": 5.64146112540624e-05, "loss": 5.1357, "step": 29460 }, { "epoch": 0.05644928806280486, "grad_norm": 1.72437584400177, "learning_rate": 5.643376601817148e-05, "loss": 5.3063, "step": 29470 }, { "epoch": 0.05646844289418009, "grad_norm": 1.5466097593307495, "learning_rate": 5.6452920782280556e-05, "loss": 5.2934, "step": 29480 }, { "epoch": 0.056487597725555325, "grad_norm": 1.6173876523971558, "learning_rate": 5.6472075546389644e-05, "loss": 5.2851, "step": 29490 }, { "epoch": 0.05650675255693055, "grad_norm": 1.6201993227005005, "learning_rate": 5.6491230310498725e-05, "loss": 5.1915, "step": 29500 }, { "epoch": 0.056525907388305784, "grad_norm": 1.5993149280548096, "learning_rate": 5.65103850746078e-05, "loss": 5.1306, "step": 29510 }, { "epoch": 0.05654506221968102, "grad_norm": 1.5913598537445068, "learning_rate": 5.652953983871688e-05, "loss": 5.2055, "step": 29520 }, { "epoch": 0.05656421705105624, "grad_norm": 1.7056312561035156, "learning_rate": 5.654869460282596e-05, "loss": 5.1346, "step": 29530 }, { "epoch": 0.056583371882431475, "grad_norm": 1.5861247777938843, "learning_rate": 5.6567849366935044e-05, "loss": 5.2589, "step": 29540 }, { "epoch": 0.05660252671380671, "grad_norm": 1.5806504487991333, "learning_rate": 5.658700413104412e-05, "loss": 5.186, "step": 29550 }, { "epoch": 0.056621681545181934, "grad_norm": 1.5720844268798828, "learning_rate": 5.66061588951532e-05, "loss": 5.0372, "step": 29560 }, { "epoch": 0.05664083637655717, "grad_norm": 1.5749709606170654, "learning_rate": 5.662531365926228e-05, "loss": 5.2124, "step": 29570 }, { "epoch": 0.0566599912079324, "grad_norm": 1.542215347290039, "learning_rate": 5.664446842337136e-05, "loss": 5.1211, "step": 29580 }, { "epoch": 0.056679146039307626, "grad_norm": 1.6107527017593384, "learning_rate": 5.6663623187480436e-05, "loss": 5.114, "step": 29590 }, { "epoch": 0.05669830087068286, "grad_norm": 1.6841249465942383, "learning_rate": 5.6682777951589525e-05, "loss": 5.3297, "step": 29600 }, { "epoch": 0.05671745570205809, "grad_norm": 1.5554178953170776, "learning_rate": 5.6701932715698606e-05, "loss": 5.1173, "step": 29610 }, { "epoch": 0.05673661053343332, "grad_norm": 1.6531431674957275, "learning_rate": 5.672108747980768e-05, "loss": 5.1434, "step": 29620 }, { "epoch": 0.05675576536480855, "grad_norm": 1.578202247619629, "learning_rate": 5.674024224391676e-05, "loss": 5.3243, "step": 29630 }, { "epoch": 0.056774920196183784, "grad_norm": 1.6768862009048462, "learning_rate": 5.675939700802584e-05, "loss": 5.1425, "step": 29640 }, { "epoch": 0.05679407502755902, "grad_norm": 1.6666489839553833, "learning_rate": 5.677663629572401e-05, "loss": 5.1259, "step": 29650 }, { "epoch": 0.05681322985893424, "grad_norm": 1.5167570114135742, "learning_rate": 5.679579105983309e-05, "loss": 5.1725, "step": 29660 }, { "epoch": 0.056832384690309476, "grad_norm": 1.609365701675415, "learning_rate": 5.681494582394218e-05, "loss": 5.2322, "step": 29670 }, { "epoch": 0.05685153952168471, "grad_norm": 1.4825048446655273, "learning_rate": 5.683410058805125e-05, "loss": 5.317, "step": 29680 }, { "epoch": 0.056870694353059935, "grad_norm": 1.5940628051757812, "learning_rate": 5.6853255352160335e-05, "loss": 5.2608, "step": 29690 }, { "epoch": 0.05688984918443517, "grad_norm": 1.5505249500274658, "learning_rate": 5.687241011626941e-05, "loss": 5.2308, "step": 29700 }, { "epoch": 0.0569090040158104, "grad_norm": 1.670703411102295, "learning_rate": 5.68915648803785e-05, "loss": 5.1436, "step": 29710 }, { "epoch": 0.05692815884718563, "grad_norm": 2.7139039039611816, "learning_rate": 5.691071964448757e-05, "loss": 4.9938, "step": 29720 }, { "epoch": 0.05694731367856086, "grad_norm": 1.5226013660430908, "learning_rate": 5.692987440859665e-05, "loss": 5.2832, "step": 29730 }, { "epoch": 0.05696646850993609, "grad_norm": 1.5403132438659668, "learning_rate": 5.694902917270573e-05, "loss": 5.1816, "step": 29740 }, { "epoch": 0.05698562334131132, "grad_norm": 1.7572871446609497, "learning_rate": 5.6968183936814816e-05, "loss": 5.1006, "step": 29750 }, { "epoch": 0.05700477817268655, "grad_norm": 1.5761421918869019, "learning_rate": 5.698733870092389e-05, "loss": 5.1096, "step": 29760 }, { "epoch": 0.057023933004061785, "grad_norm": 1.8758268356323242, "learning_rate": 5.700649346503297e-05, "loss": 5.1755, "step": 29770 }, { "epoch": 0.05704308783543701, "grad_norm": 1.635252594947815, "learning_rate": 5.702564822914206e-05, "loss": 5.1225, "step": 29780 }, { "epoch": 0.05706224266681224, "grad_norm": 1.7034019231796265, "learning_rate": 5.7044802993251134e-05, "loss": 5.2096, "step": 29790 }, { "epoch": 0.057081397498187476, "grad_norm": 1.606664776802063, "learning_rate": 5.7063957757360215e-05, "loss": 5.1119, "step": 29800 }, { "epoch": 0.0571005523295627, "grad_norm": 1.6218595504760742, "learning_rate": 5.708311252146929e-05, "loss": 5.1522, "step": 29810 }, { "epoch": 0.057119707160937935, "grad_norm": 1.6684993505477905, "learning_rate": 5.710226728557838e-05, "loss": 5.2077, "step": 29820 }, { "epoch": 0.05713886199231317, "grad_norm": 1.872859001159668, "learning_rate": 5.712142204968745e-05, "loss": 5.1422, "step": 29830 }, { "epoch": 0.057158016823688394, "grad_norm": 1.9277209043502808, "learning_rate": 5.7140576813796534e-05, "loss": 5.1902, "step": 29840 }, { "epoch": 0.05717717165506363, "grad_norm": 1.6655203104019165, "learning_rate": 5.715973157790561e-05, "loss": 5.1512, "step": 29850 }, { "epoch": 0.05719632648643886, "grad_norm": 1.6854325532913208, "learning_rate": 5.7178886342014697e-05, "loss": 5.0859, "step": 29860 }, { "epoch": 0.057215481317814086, "grad_norm": 1.6536204814910889, "learning_rate": 5.719804110612377e-05, "loss": 5.3402, "step": 29870 }, { "epoch": 0.05723463614918932, "grad_norm": 1.5887433290481567, "learning_rate": 5.721719587023285e-05, "loss": 5.0701, "step": 29880 }, { "epoch": 0.05725379098056455, "grad_norm": 1.647698998451233, "learning_rate": 5.723635063434193e-05, "loss": 5.1242, "step": 29890 }, { "epoch": 0.05727294581193978, "grad_norm": 1.602310299873352, "learning_rate": 5.7255505398451015e-05, "loss": 5.0217, "step": 29900 }, { "epoch": 0.05729210064331501, "grad_norm": 1.712659239768982, "learning_rate": 5.7274660162560096e-05, "loss": 5.1146, "step": 29910 }, { "epoch": 0.057311255474690244, "grad_norm": 1.729272484779358, "learning_rate": 5.729381492666917e-05, "loss": 5.1184, "step": 29920 }, { "epoch": 0.05733041030606547, "grad_norm": 1.5466341972351074, "learning_rate": 5.731296969077826e-05, "loss": 5.0511, "step": 29930 }, { "epoch": 0.0573495651374407, "grad_norm": 1.616219401359558, "learning_rate": 5.7332124454887333e-05, "loss": 5.1736, "step": 29940 }, { "epoch": 0.057368719968815936, "grad_norm": 1.6727391481399536, "learning_rate": 5.7351279218996415e-05, "loss": 5.1769, "step": 29950 }, { "epoch": 0.05738787480019116, "grad_norm": 1.4766101837158203, "learning_rate": 5.737043398310549e-05, "loss": 5.2694, "step": 29960 }, { "epoch": 0.057407029631566395, "grad_norm": 1.6916784048080444, "learning_rate": 5.738958874721458e-05, "loss": 5.1716, "step": 29970 }, { "epoch": 0.05742618446294163, "grad_norm": 1.5895365476608276, "learning_rate": 5.740874351132365e-05, "loss": 5.1692, "step": 29980 }, { "epoch": 0.05744533929431686, "grad_norm": 1.5330842733383179, "learning_rate": 5.742789827543273e-05, "loss": 5.1011, "step": 29990 }, { "epoch": 0.05746449412569209, "grad_norm": 1.558606743812561, "learning_rate": 5.744705303954181e-05, "loss": 5.1488, "step": 30000 }, { "epoch": 0.05748364895706732, "grad_norm": 1.6441594362258911, "learning_rate": 5.7466207803650896e-05, "loss": 5.2084, "step": 30010 }, { "epoch": 0.05750280378844255, "grad_norm": 1.673669695854187, "learning_rate": 5.748536256775998e-05, "loss": 5.1471, "step": 30020 }, { "epoch": 0.05752195861981778, "grad_norm": 1.5646129846572876, "learning_rate": 5.750451733186905e-05, "loss": 5.1285, "step": 30030 }, { "epoch": 0.05754111345119301, "grad_norm": 1.5121854543685913, "learning_rate": 5.7523672095978126e-05, "loss": 5.233, "step": 30040 }, { "epoch": 0.057560268282568244, "grad_norm": 1.508813738822937, "learning_rate": 5.7542826860087214e-05, "loss": 5.158, "step": 30050 }, { "epoch": 0.05757942311394347, "grad_norm": 1.547363042831421, "learning_rate": 5.7561981624196296e-05, "loss": 5.2588, "step": 30060 }, { "epoch": 0.0575985779453187, "grad_norm": 1.626813530921936, "learning_rate": 5.758113638830537e-05, "loss": 5.1456, "step": 30070 }, { "epoch": 0.057617732776693936, "grad_norm": 1.601984977722168, "learning_rate": 5.760029115241445e-05, "loss": 5.2041, "step": 30080 }, { "epoch": 0.05763688760806916, "grad_norm": 1.6291871070861816, "learning_rate": 5.761944591652353e-05, "loss": 5.2966, "step": 30090 }, { "epoch": 0.057656042439444395, "grad_norm": 1.4984114170074463, "learning_rate": 5.7638600680632614e-05, "loss": 5.1838, "step": 30100 }, { "epoch": 0.05767519727081963, "grad_norm": 1.6804307699203491, "learning_rate": 5.765775544474169e-05, "loss": 5.0972, "step": 30110 }, { "epoch": 0.057694352102194854, "grad_norm": 1.5933704376220703, "learning_rate": 5.7676910208850777e-05, "loss": 5.1949, "step": 30120 }, { "epoch": 0.05771350693357009, "grad_norm": 1.838984727859497, "learning_rate": 5.769606497295986e-05, "loss": 5.0333, "step": 30130 }, { "epoch": 0.05773266176494532, "grad_norm": 1.588189959526062, "learning_rate": 5.771521973706893e-05, "loss": 5.0201, "step": 30140 }, { "epoch": 0.057751816596320546, "grad_norm": 1.6231480836868286, "learning_rate": 5.773437450117801e-05, "loss": 5.1578, "step": 30150 }, { "epoch": 0.05777097142769578, "grad_norm": 1.4951012134552002, "learning_rate": 5.7753529265287095e-05, "loss": 5.245, "step": 30160 }, { "epoch": 0.05779012625907101, "grad_norm": 1.5736068487167358, "learning_rate": 5.7772684029396176e-05, "loss": 5.2499, "step": 30170 }, { "epoch": 0.05780928109044624, "grad_norm": 1.6138008832931519, "learning_rate": 5.779183879350525e-05, "loss": 5.1262, "step": 30180 }, { "epoch": 0.05782843592182147, "grad_norm": 1.5200350284576416, "learning_rate": 5.781099355761433e-05, "loss": 5.0401, "step": 30190 }, { "epoch": 0.057847590753196704, "grad_norm": 1.5797629356384277, "learning_rate": 5.7830148321723413e-05, "loss": 5.2092, "step": 30200 }, { "epoch": 0.05786674558457193, "grad_norm": 1.6653852462768555, "learning_rate": 5.7849303085832495e-05, "loss": 5.0795, "step": 30210 }, { "epoch": 0.05788590041594716, "grad_norm": 1.6055235862731934, "learning_rate": 5.786845784994157e-05, "loss": 5.1636, "step": 30220 }, { "epoch": 0.057905055247322396, "grad_norm": 1.4726191759109497, "learning_rate": 5.788761261405065e-05, "loss": 5.1786, "step": 30230 }, { "epoch": 0.05792421007869762, "grad_norm": 1.6326618194580078, "learning_rate": 5.790676737815973e-05, "loss": 5.1464, "step": 30240 }, { "epoch": 0.057943364910072855, "grad_norm": 1.5771572589874268, "learning_rate": 5.792592214226881e-05, "loss": 5.118, "step": 30250 }, { "epoch": 0.05796251974144809, "grad_norm": 1.6182860136032104, "learning_rate": 5.794507690637789e-05, "loss": 5.2032, "step": 30260 }, { "epoch": 0.057981674572823313, "grad_norm": 1.540498971939087, "learning_rate": 5.7964231670486976e-05, "loss": 5.2541, "step": 30270 }, { "epoch": 0.058000829404198546, "grad_norm": 1.5909003019332886, "learning_rate": 5.798338643459606e-05, "loss": 5.1018, "step": 30280 }, { "epoch": 0.05801998423557378, "grad_norm": 1.5557094812393188, "learning_rate": 5.800254119870513e-05, "loss": 5.016, "step": 30290 }, { "epoch": 0.05803913906694901, "grad_norm": 1.5497602224349976, "learning_rate": 5.802169596281421e-05, "loss": 5.1769, "step": 30300 }, { "epoch": 0.05805829389832424, "grad_norm": 1.5561869144439697, "learning_rate": 5.8040850726923294e-05, "loss": 5.2329, "step": 30310 }, { "epoch": 0.05807744872969947, "grad_norm": 1.5457953214645386, "learning_rate": 5.8060005491032376e-05, "loss": 5.293, "step": 30320 }, { "epoch": 0.058096603561074704, "grad_norm": 1.5005265474319458, "learning_rate": 5.807916025514145e-05, "loss": 5.2044, "step": 30330 }, { "epoch": 0.05811575839244993, "grad_norm": 1.5191751718521118, "learning_rate": 5.809831501925053e-05, "loss": 5.1072, "step": 30340 }, { "epoch": 0.05813491322382516, "grad_norm": 1.6009718179702759, "learning_rate": 5.811746978335961e-05, "loss": 5.1759, "step": 30350 }, { "epoch": 0.058154068055200396, "grad_norm": 1.6856999397277832, "learning_rate": 5.8136624547468694e-05, "loss": 5.1349, "step": 30360 }, { "epoch": 0.05817322288657562, "grad_norm": 1.6305352449417114, "learning_rate": 5.815577931157777e-05, "loss": 4.9494, "step": 30370 }, { "epoch": 0.058192377717950855, "grad_norm": 1.63131582736969, "learning_rate": 5.817493407568685e-05, "loss": 5.1726, "step": 30380 }, { "epoch": 0.05821153254932609, "grad_norm": 1.6382802724838257, "learning_rate": 5.819408883979594e-05, "loss": 5.1633, "step": 30390 }, { "epoch": 0.058230687380701314, "grad_norm": 1.6063880920410156, "learning_rate": 5.821324360390501e-05, "loss": 5.1883, "step": 30400 }, { "epoch": 0.05824984221207655, "grad_norm": 1.6903306245803833, "learning_rate": 5.8232398368014094e-05, "loss": 5.1808, "step": 30410 }, { "epoch": 0.05826899704345178, "grad_norm": 1.8027069568634033, "learning_rate": 5.825155313212317e-05, "loss": 5.2731, "step": 30420 }, { "epoch": 0.058288151874827006, "grad_norm": 1.5361229181289673, "learning_rate": 5.8270707896232256e-05, "loss": 5.1291, "step": 30430 }, { "epoch": 0.05830730670620224, "grad_norm": 2.437748432159424, "learning_rate": 5.828986266034133e-05, "loss": 5.0711, "step": 30440 }, { "epoch": 0.05832646153757747, "grad_norm": 1.5912587642669678, "learning_rate": 5.830901742445041e-05, "loss": 5.1268, "step": 30450 }, { "epoch": 0.0583456163689527, "grad_norm": 1.535268783569336, "learning_rate": 5.8328172188559494e-05, "loss": 5.1182, "step": 30460 }, { "epoch": 0.05836477120032793, "grad_norm": 1.635360598564148, "learning_rate": 5.8347326952668575e-05, "loss": 5.1638, "step": 30470 }, { "epoch": 0.058383926031703164, "grad_norm": 1.7707701921463013, "learning_rate": 5.836648171677765e-05, "loss": 5.0786, "step": 30480 }, { "epoch": 0.05840308086307839, "grad_norm": 1.579753041267395, "learning_rate": 5.838563648088673e-05, "loss": 5.2022, "step": 30490 }, { "epoch": 0.05842223569445362, "grad_norm": 1.677078366279602, "learning_rate": 5.840479124499582e-05, "loss": 5.1048, "step": 30500 }, { "epoch": 0.058441390525828855, "grad_norm": 1.5528672933578491, "learning_rate": 5.842394600910489e-05, "loss": 5.1935, "step": 30510 }, { "epoch": 0.05846054535720408, "grad_norm": 1.5187404155731201, "learning_rate": 5.8443100773213975e-05, "loss": 5.0739, "step": 30520 }, { "epoch": 0.058479700188579314, "grad_norm": 1.657562017440796, "learning_rate": 5.846225553732305e-05, "loss": 5.2935, "step": 30530 }, { "epoch": 0.05849885501995455, "grad_norm": 1.8716689348220825, "learning_rate": 5.848141030143214e-05, "loss": 5.2816, "step": 30540 }, { "epoch": 0.05851800985132977, "grad_norm": 1.549515724182129, "learning_rate": 5.850056506554121e-05, "loss": 5.2599, "step": 30550 }, { "epoch": 0.058537164682705006, "grad_norm": 1.573230504989624, "learning_rate": 5.851971982965029e-05, "loss": 5.0698, "step": 30560 }, { "epoch": 0.05855631951408024, "grad_norm": 1.5349501371383667, "learning_rate": 5.853887459375937e-05, "loss": 5.169, "step": 30570 }, { "epoch": 0.058575474345455465, "grad_norm": 1.6379693746566772, "learning_rate": 5.8558029357868456e-05, "loss": 5.0566, "step": 30580 }, { "epoch": 0.0585946291768307, "grad_norm": 1.5377370119094849, "learning_rate": 5.857718412197753e-05, "loss": 5.1481, "step": 30590 }, { "epoch": 0.05861378400820593, "grad_norm": 1.617548942565918, "learning_rate": 5.859633888608661e-05, "loss": 5.0542, "step": 30600 }, { "epoch": 0.05863293883958116, "grad_norm": 1.6785098314285278, "learning_rate": 5.86154936501957e-05, "loss": 5.2409, "step": 30610 }, { "epoch": 0.05865209367095639, "grad_norm": 1.6624655723571777, "learning_rate": 5.8634648414304774e-05, "loss": 5.1784, "step": 30620 }, { "epoch": 0.05867124850233162, "grad_norm": 1.7205135822296143, "learning_rate": 5.8653803178413855e-05, "loss": 4.9948, "step": 30630 }, { "epoch": 0.058690403333706856, "grad_norm": 1.511366844177246, "learning_rate": 5.867295794252293e-05, "loss": 5.071, "step": 30640 }, { "epoch": 0.05870955816508208, "grad_norm": 1.7495198249816895, "learning_rate": 5.869211270663202e-05, "loss": 5.0498, "step": 30650 }, { "epoch": 0.058728712996457315, "grad_norm": 1.6307659149169922, "learning_rate": 5.871126747074109e-05, "loss": 5.1821, "step": 30660 }, { "epoch": 0.05874786782783255, "grad_norm": 1.7400892972946167, "learning_rate": 5.8730422234850174e-05, "loss": 4.9932, "step": 30670 }, { "epoch": 0.058767022659207774, "grad_norm": 1.6559568643569946, "learning_rate": 5.874957699895925e-05, "loss": 5.1453, "step": 30680 }, { "epoch": 0.05878617749058301, "grad_norm": 1.5595948696136475, "learning_rate": 5.8768731763068336e-05, "loss": 5.1341, "step": 30690 }, { "epoch": 0.05880533232195824, "grad_norm": 1.7331846952438354, "learning_rate": 5.878788652717741e-05, "loss": 5.049, "step": 30700 }, { "epoch": 0.058824487153333466, "grad_norm": 1.6106361150741577, "learning_rate": 5.880704129128649e-05, "loss": 5.2401, "step": 30710 }, { "epoch": 0.0588436419847087, "grad_norm": 1.6126761436462402, "learning_rate": 5.882619605539557e-05, "loss": 5.1929, "step": 30720 }, { "epoch": 0.05886279681608393, "grad_norm": 1.5172028541564941, "learning_rate": 5.8845350819504655e-05, "loss": 5.1414, "step": 30730 }, { "epoch": 0.05888195164745916, "grad_norm": 1.6956963539123535, "learning_rate": 5.8864505583613736e-05, "loss": 5.2882, "step": 30740 }, { "epoch": 0.05890110647883439, "grad_norm": 1.6175944805145264, "learning_rate": 5.888366034772281e-05, "loss": 5.0777, "step": 30750 }, { "epoch": 0.05892026131020962, "grad_norm": 1.6435725688934326, "learning_rate": 5.8902815111831885e-05, "loss": 5.263, "step": 30760 }, { "epoch": 0.05893941614158485, "grad_norm": 1.6130056381225586, "learning_rate": 5.892196987594097e-05, "loss": 5.1087, "step": 30770 }, { "epoch": 0.05895857097296008, "grad_norm": 1.515133023262024, "learning_rate": 5.8941124640050055e-05, "loss": 5.1257, "step": 30780 }, { "epoch": 0.058977725804335315, "grad_norm": 1.8034874200820923, "learning_rate": 5.896027940415913e-05, "loss": 5.1099, "step": 30790 }, { "epoch": 0.05899688063571054, "grad_norm": 1.5631088018417358, "learning_rate": 5.897943416826822e-05, "loss": 5.2588, "step": 30800 }, { "epoch": 0.059016035467085774, "grad_norm": 1.6331753730773926, "learning_rate": 5.899858893237729e-05, "loss": 5.0274, "step": 30810 }, { "epoch": 0.05903519029846101, "grad_norm": 1.5623440742492676, "learning_rate": 5.901774369648637e-05, "loss": 5.1343, "step": 30820 }, { "epoch": 0.05905434512983623, "grad_norm": 1.4741419553756714, "learning_rate": 5.903689846059545e-05, "loss": 5.164, "step": 30830 }, { "epoch": 0.059073499961211466, "grad_norm": 1.5958287715911865, "learning_rate": 5.9056053224704536e-05, "loss": 5.0495, "step": 30840 }, { "epoch": 0.0590926547925867, "grad_norm": 1.5845046043395996, "learning_rate": 5.907520798881362e-05, "loss": 5.1257, "step": 30850 }, { "epoch": 0.059111809623961925, "grad_norm": 1.6187162399291992, "learning_rate": 5.909436275292269e-05, "loss": 5.0318, "step": 30860 }, { "epoch": 0.05913096445533716, "grad_norm": 1.5724785327911377, "learning_rate": 5.9113517517031766e-05, "loss": 5.1558, "step": 30870 }, { "epoch": 0.05915011928671239, "grad_norm": 1.613213062286377, "learning_rate": 5.9132672281140854e-05, "loss": 5.1125, "step": 30880 }, { "epoch": 0.05916927411808762, "grad_norm": 1.576462984085083, "learning_rate": 5.9151827045249936e-05, "loss": 5.0718, "step": 30890 }, { "epoch": 0.05918842894946285, "grad_norm": 1.5235124826431274, "learning_rate": 5.917098180935901e-05, "loss": 5.222, "step": 30900 }, { "epoch": 0.05920758378083808, "grad_norm": 1.5327293872833252, "learning_rate": 5.919013657346809e-05, "loss": 5.061, "step": 30910 }, { "epoch": 0.05922673861221331, "grad_norm": 1.4862054586410522, "learning_rate": 5.920929133757717e-05, "loss": 5.0894, "step": 30920 }, { "epoch": 0.05924589344358854, "grad_norm": 1.5336207151412964, "learning_rate": 5.9228446101686254e-05, "loss": 5.146, "step": 30930 }, { "epoch": 0.059265048274963775, "grad_norm": 1.5616307258605957, "learning_rate": 5.924760086579533e-05, "loss": 5.2709, "step": 30940 }, { "epoch": 0.059284203106339, "grad_norm": 1.53754723072052, "learning_rate": 5.9266755629904417e-05, "loss": 5.0327, "step": 30950 }, { "epoch": 0.059303357937714234, "grad_norm": 1.5916916131973267, "learning_rate": 5.928591039401349e-05, "loss": 5.1822, "step": 30960 }, { "epoch": 0.05932251276908947, "grad_norm": 1.5030755996704102, "learning_rate": 5.930506515812257e-05, "loss": 5.1972, "step": 30970 }, { "epoch": 0.0593416676004647, "grad_norm": 1.6479010581970215, "learning_rate": 5.932421992223165e-05, "loss": 5.2694, "step": 30980 }, { "epoch": 0.059360822431839926, "grad_norm": 1.6202963590621948, "learning_rate": 5.9343374686340735e-05, "loss": 5.0148, "step": 30990 }, { "epoch": 0.05937997726321516, "grad_norm": 1.4692970514297485, "learning_rate": 5.9362529450449816e-05, "loss": 5.0516, "step": 31000 }, { "epoch": 0.05939913209459039, "grad_norm": 1.882033348083496, "learning_rate": 5.938168421455889e-05, "loss": 5.1484, "step": 31010 }, { "epoch": 0.05941828692596562, "grad_norm": 1.473160743713379, "learning_rate": 5.940083897866797e-05, "loss": 5.2837, "step": 31020 }, { "epoch": 0.05943744175734085, "grad_norm": 1.681246280670166, "learning_rate": 5.9419993742777053e-05, "loss": 5.1922, "step": 31030 }, { "epoch": 0.05945659658871608, "grad_norm": 1.547658920288086, "learning_rate": 5.9439148506886135e-05, "loss": 5.1225, "step": 31040 }, { "epoch": 0.05947575142009131, "grad_norm": 1.6169931888580322, "learning_rate": 5.945830327099521e-05, "loss": 5.075, "step": 31050 }, { "epoch": 0.05949490625146654, "grad_norm": 1.5389069318771362, "learning_rate": 5.947745803510429e-05, "loss": 5.0163, "step": 31060 }, { "epoch": 0.059514061082841775, "grad_norm": 1.538296103477478, "learning_rate": 5.949661279921337e-05, "loss": 5.1398, "step": 31070 }, { "epoch": 0.059533215914217, "grad_norm": 1.6468786001205444, "learning_rate": 5.951576756332245e-05, "loss": 5.2834, "step": 31080 }, { "epoch": 0.059552370745592234, "grad_norm": 1.6018472909927368, "learning_rate": 5.953492232743153e-05, "loss": 5.3024, "step": 31090 }, { "epoch": 0.05957152557696747, "grad_norm": 1.6124184131622314, "learning_rate": 5.955407709154061e-05, "loss": 5.0706, "step": 31100 }, { "epoch": 0.05959068040834269, "grad_norm": 1.6811498403549194, "learning_rate": 5.95732318556497e-05, "loss": 5.2236, "step": 31110 }, { "epoch": 0.059609835239717926, "grad_norm": 1.5630425214767456, "learning_rate": 5.959238661975877e-05, "loss": 5.2093, "step": 31120 }, { "epoch": 0.05962899007109316, "grad_norm": 1.5421475172042847, "learning_rate": 5.961154138386785e-05, "loss": 5.1403, "step": 31130 }, { "epoch": 0.059648144902468385, "grad_norm": 1.4726544618606567, "learning_rate": 5.9630696147976934e-05, "loss": 5.0877, "step": 31140 }, { "epoch": 0.05966729973384362, "grad_norm": 1.616503357887268, "learning_rate": 5.9649850912086016e-05, "loss": 5.0294, "step": 31150 }, { "epoch": 0.05968645456521885, "grad_norm": 1.5183686017990112, "learning_rate": 5.966900567619509e-05, "loss": 5.0466, "step": 31160 }, { "epoch": 0.05970560939659408, "grad_norm": 1.5571718215942383, "learning_rate": 5.968816044030417e-05, "loss": 5.2437, "step": 31170 }, { "epoch": 0.05972476422796931, "grad_norm": 1.6306977272033691, "learning_rate": 5.970731520441325e-05, "loss": 5.2672, "step": 31180 }, { "epoch": 0.05974391905934454, "grad_norm": 1.6147116422653198, "learning_rate": 5.9726469968522334e-05, "loss": 5.1304, "step": 31190 }, { "epoch": 0.05976307389071977, "grad_norm": 1.7632160186767578, "learning_rate": 5.974562473263141e-05, "loss": 4.9423, "step": 31200 }, { "epoch": 0.059782228722095, "grad_norm": 1.641034483909607, "learning_rate": 5.976477949674049e-05, "loss": 5.2133, "step": 31210 }, { "epoch": 0.059801383553470235, "grad_norm": 1.5078318119049072, "learning_rate": 5.978393426084958e-05, "loss": 5.1813, "step": 31220 }, { "epoch": 0.05982053838484546, "grad_norm": 1.654366374015808, "learning_rate": 5.980308902495865e-05, "loss": 5.2096, "step": 31230 }, { "epoch": 0.059839693216220693, "grad_norm": 1.5311553478240967, "learning_rate": 5.9822243789067734e-05, "loss": 5.0913, "step": 31240 }, { "epoch": 0.059858848047595926, "grad_norm": 1.825936198234558, "learning_rate": 5.984139855317681e-05, "loss": 5.071, "step": 31250 }, { "epoch": 0.05987800287897115, "grad_norm": 1.6459693908691406, "learning_rate": 5.9860553317285896e-05, "loss": 5.154, "step": 31260 }, { "epoch": 0.059897157710346385, "grad_norm": 1.5008361339569092, "learning_rate": 5.987970808139497e-05, "loss": 5.2456, "step": 31270 }, { "epoch": 0.05991631254172162, "grad_norm": 1.506255030632019, "learning_rate": 5.989886284550405e-05, "loss": 5.0899, "step": 31280 }, { "epoch": 0.05993546737309685, "grad_norm": 1.5535000562667847, "learning_rate": 5.9918017609613134e-05, "loss": 5.1305, "step": 31290 }, { "epoch": 0.05995462220447208, "grad_norm": 1.6941356658935547, "learning_rate": 5.9937172373722215e-05, "loss": 5.1506, "step": 31300 }, { "epoch": 0.05997377703584731, "grad_norm": 1.8082923889160156, "learning_rate": 5.995632713783129e-05, "loss": 5.1026, "step": 31310 }, { "epoch": 0.05999293186722254, "grad_norm": 1.5803650617599487, "learning_rate": 5.997548190194037e-05, "loss": 5.076, "step": 31320 }, { "epoch": 0.06001208669859777, "grad_norm": 1.4837151765823364, "learning_rate": 5.999463666604946e-05, "loss": 5.2149, "step": 31330 }, { "epoch": 0.060031241529973, "grad_norm": 1.6303778886795044, "learning_rate": 6.001379143015853e-05, "loss": 5.114, "step": 31340 }, { "epoch": 0.060050396361348235, "grad_norm": 1.8446563482284546, "learning_rate": 6.0032946194267615e-05, "loss": 5.0661, "step": 31350 }, { "epoch": 0.06006955119272346, "grad_norm": 1.6419750452041626, "learning_rate": 6.005210095837669e-05, "loss": 5.1475, "step": 31360 }, { "epoch": 0.060088706024098694, "grad_norm": 1.6219600439071655, "learning_rate": 6.007125572248578e-05, "loss": 5.1176, "step": 31370 }, { "epoch": 0.06010786085547393, "grad_norm": 1.5290254354476929, "learning_rate": 6.009041048659485e-05, "loss": 5.1263, "step": 31380 }, { "epoch": 0.06012701568684915, "grad_norm": 1.574873685836792, "learning_rate": 6.010956525070393e-05, "loss": 4.9848, "step": 31390 }, { "epoch": 0.060146170518224386, "grad_norm": 1.6806366443634033, "learning_rate": 6.012872001481301e-05, "loss": 5.25, "step": 31400 }, { "epoch": 0.06016532534959962, "grad_norm": 1.5348418951034546, "learning_rate": 6.0147874778922096e-05, "loss": 5.109, "step": 31410 }, { "epoch": 0.060184480180974845, "grad_norm": 1.5413434505462646, "learning_rate": 6.016702954303117e-05, "loss": 5.2117, "step": 31420 }, { "epoch": 0.06020363501235008, "grad_norm": 1.710343360900879, "learning_rate": 6.018618430714025e-05, "loss": 5.0855, "step": 31430 }, { "epoch": 0.06022278984372531, "grad_norm": 1.5288317203521729, "learning_rate": 6.0205339071249326e-05, "loss": 5.1297, "step": 31440 }, { "epoch": 0.06024194467510054, "grad_norm": 1.5018608570098877, "learning_rate": 6.0224493835358414e-05, "loss": 5.0448, "step": 31450 }, { "epoch": 0.06026109950647577, "grad_norm": 1.6646959781646729, "learning_rate": 6.0243648599467495e-05, "loss": 5.1474, "step": 31460 }, { "epoch": 0.060280254337851, "grad_norm": 1.5623500347137451, "learning_rate": 6.026280336357657e-05, "loss": 5.1054, "step": 31470 }, { "epoch": 0.06029940916922623, "grad_norm": 1.5914860963821411, "learning_rate": 6.028195812768566e-05, "loss": 5.2019, "step": 31480 }, { "epoch": 0.06031856400060146, "grad_norm": 1.6199642419815063, "learning_rate": 6.030111289179473e-05, "loss": 4.9776, "step": 31490 }, { "epoch": 0.060337718831976694, "grad_norm": 1.599808692932129, "learning_rate": 6.0320267655903814e-05, "loss": 4.9834, "step": 31500 }, { "epoch": 0.06035687366335192, "grad_norm": 1.4866479635238647, "learning_rate": 6.033942242001289e-05, "loss": 5.0298, "step": 31510 }, { "epoch": 0.06037602849472715, "grad_norm": 1.6284810304641724, "learning_rate": 6.0358577184121976e-05, "loss": 5.0338, "step": 31520 }, { "epoch": 0.060395183326102386, "grad_norm": 1.5576857328414917, "learning_rate": 6.037773194823105e-05, "loss": 4.9843, "step": 31530 }, { "epoch": 0.06041433815747761, "grad_norm": 1.5815051794052124, "learning_rate": 6.039688671234013e-05, "loss": 5.0999, "step": 31540 }, { "epoch": 0.060433492988852845, "grad_norm": 1.5713634490966797, "learning_rate": 6.041604147644921e-05, "loss": 5.124, "step": 31550 }, { "epoch": 0.06045264782022808, "grad_norm": 1.5406992435455322, "learning_rate": 6.0435196240558295e-05, "loss": 5.2357, "step": 31560 }, { "epoch": 0.060471802651603304, "grad_norm": 1.5401172637939453, "learning_rate": 6.0454351004667376e-05, "loss": 5.0855, "step": 31570 }, { "epoch": 0.06049095748297854, "grad_norm": 1.519096851348877, "learning_rate": 6.047350576877645e-05, "loss": 5.1425, "step": 31580 }, { "epoch": 0.06051011231435377, "grad_norm": 1.5390933752059937, "learning_rate": 6.0492660532885525e-05, "loss": 5.0523, "step": 31590 }, { "epoch": 0.060529267145728996, "grad_norm": 1.4887444972991943, "learning_rate": 6.051181529699461e-05, "loss": 5.1878, "step": 31600 }, { "epoch": 0.06054842197710423, "grad_norm": 1.5660170316696167, "learning_rate": 6.0530970061103695e-05, "loss": 5.0217, "step": 31610 }, { "epoch": 0.06056757680847946, "grad_norm": 1.686819076538086, "learning_rate": 6.055012482521277e-05, "loss": 5.1308, "step": 31620 }, { "epoch": 0.060586731639854695, "grad_norm": 1.5116323232650757, "learning_rate": 6.056927958932186e-05, "loss": 5.0446, "step": 31630 }, { "epoch": 0.06060588647122992, "grad_norm": 1.7678014039993286, "learning_rate": 6.058843435343093e-05, "loss": 5.0927, "step": 31640 }, { "epoch": 0.060625041302605154, "grad_norm": 1.6741523742675781, "learning_rate": 6.060758911754001e-05, "loss": 4.9084, "step": 31650 }, { "epoch": 0.06064419613398039, "grad_norm": 1.5567141771316528, "learning_rate": 6.062674388164909e-05, "loss": 5.1581, "step": 31660 }, { "epoch": 0.06066335096535561, "grad_norm": 1.4166022539138794, "learning_rate": 6.0645898645758176e-05, "loss": 5.121, "step": 31670 }, { "epoch": 0.060682505796730846, "grad_norm": 1.6193358898162842, "learning_rate": 6.066505340986725e-05, "loss": 5.1688, "step": 31680 }, { "epoch": 0.06070166062810608, "grad_norm": 1.5061988830566406, "learning_rate": 6.068420817397633e-05, "loss": 5.2536, "step": 31690 }, { "epoch": 0.060720815459481305, "grad_norm": 1.5600227117538452, "learning_rate": 6.0703362938085406e-05, "loss": 5.0522, "step": 31700 }, { "epoch": 0.06073997029085654, "grad_norm": 1.6417860984802246, "learning_rate": 6.0722517702194494e-05, "loss": 5.033, "step": 31710 }, { "epoch": 0.06075912512223177, "grad_norm": 1.4814211130142212, "learning_rate": 6.0741672466303575e-05, "loss": 5.2313, "step": 31720 }, { "epoch": 0.060778279953606996, "grad_norm": 1.5535600185394287, "learning_rate": 6.076082723041265e-05, "loss": 5.158, "step": 31730 }, { "epoch": 0.06079743478498223, "grad_norm": 1.6173304319381714, "learning_rate": 6.077998199452173e-05, "loss": 5.189, "step": 31740 }, { "epoch": 0.06081658961635746, "grad_norm": 1.5612152814865112, "learning_rate": 6.079913675863081e-05, "loss": 5.2527, "step": 31750 }, { "epoch": 0.06083574444773269, "grad_norm": 1.6923550367355347, "learning_rate": 6.0818291522739894e-05, "loss": 5.2301, "step": 31760 }, { "epoch": 0.06085489927910792, "grad_norm": 1.5951333045959473, "learning_rate": 6.083744628684897e-05, "loss": 5.0618, "step": 31770 }, { "epoch": 0.060874054110483154, "grad_norm": 2.1942873001098633, "learning_rate": 6.085660105095805e-05, "loss": 5.0194, "step": 31780 }, { "epoch": 0.06089320894185838, "grad_norm": 1.598414421081543, "learning_rate": 6.087575581506713e-05, "loss": 5.142, "step": 31790 }, { "epoch": 0.06091236377323361, "grad_norm": 1.5154125690460205, "learning_rate": 6.089491057917621e-05, "loss": 5.0034, "step": 31800 }, { "epoch": 0.060931518604608846, "grad_norm": 1.5580319166183472, "learning_rate": 6.091406534328529e-05, "loss": 5.0467, "step": 31810 }, { "epoch": 0.06095067343598407, "grad_norm": 1.583156943321228, "learning_rate": 6.0933220107394375e-05, "loss": 5.1943, "step": 31820 }, { "epoch": 0.060969828267359305, "grad_norm": 1.6283527612686157, "learning_rate": 6.0952374871503456e-05, "loss": 4.9938, "step": 31830 }, { "epoch": 0.06098898309873454, "grad_norm": 1.5628360509872437, "learning_rate": 6.097152963561253e-05, "loss": 5.1901, "step": 31840 }, { "epoch": 0.061008137930109764, "grad_norm": 1.6568769216537476, "learning_rate": 6.099068439972161e-05, "loss": 5.1974, "step": 31850 }, { "epoch": 0.061027292761485, "grad_norm": 1.5897784233093262, "learning_rate": 6.1009839163830693e-05, "loss": 5.0471, "step": 31860 }, { "epoch": 0.06104644759286023, "grad_norm": 1.5718777179718018, "learning_rate": 6.1028993927939775e-05, "loss": 5.1704, "step": 31870 }, { "epoch": 0.061065602424235456, "grad_norm": 1.5486314296722412, "learning_rate": 6.104814869204886e-05, "loss": 5.2059, "step": 31880 }, { "epoch": 0.06108475725561069, "grad_norm": 1.7191275358200073, "learning_rate": 6.106730345615792e-05, "loss": 5.1415, "step": 31890 }, { "epoch": 0.06110391208698592, "grad_norm": 1.4914480447769165, "learning_rate": 6.108645822026702e-05, "loss": 5.1293, "step": 31900 }, { "epoch": 0.06112306691836115, "grad_norm": 1.45182466506958, "learning_rate": 6.110561298437609e-05, "loss": 5.0215, "step": 31910 }, { "epoch": 0.06114222174973638, "grad_norm": 1.5996001958847046, "learning_rate": 6.112476774848517e-05, "loss": 5.1156, "step": 31920 }, { "epoch": 0.061161376581111614, "grad_norm": 1.5594377517700195, "learning_rate": 6.114392251259425e-05, "loss": 5.1257, "step": 31930 }, { "epoch": 0.06118053141248684, "grad_norm": 1.5137416124343872, "learning_rate": 6.116307727670333e-05, "loss": 5.0549, "step": 31940 }, { "epoch": 0.06119968624386207, "grad_norm": 1.4678953886032104, "learning_rate": 6.118223204081241e-05, "loss": 5.0424, "step": 31950 }, { "epoch": 0.061218841075237306, "grad_norm": 1.525862455368042, "learning_rate": 6.120138680492149e-05, "loss": 5.1522, "step": 31960 }, { "epoch": 0.06123799590661254, "grad_norm": 1.4549111127853394, "learning_rate": 6.122054156903057e-05, "loss": 4.995, "step": 31970 }, { "epoch": 0.061257150737987764, "grad_norm": 1.6294718980789185, "learning_rate": 6.123969633313966e-05, "loss": 5.0313, "step": 31980 }, { "epoch": 0.061276305569363, "grad_norm": 1.7646700143814087, "learning_rate": 6.125885109724874e-05, "loss": 5.0481, "step": 31990 }, { "epoch": 0.06129546040073823, "grad_norm": 1.5689451694488525, "learning_rate": 6.12780058613578e-05, "loss": 5.1712, "step": 32000 }, { "epoch": 0.061314615232113456, "grad_norm": 1.5558546781539917, "learning_rate": 6.12971606254669e-05, "loss": 4.9906, "step": 32010 }, { "epoch": 0.06133377006348869, "grad_norm": 1.6228755712509155, "learning_rate": 6.131631538957597e-05, "loss": 4.9187, "step": 32020 }, { "epoch": 0.06135292489486392, "grad_norm": 1.5791163444519043, "learning_rate": 6.133547015368505e-05, "loss": 5.1345, "step": 32030 }, { "epoch": 0.06137207972623915, "grad_norm": 1.4306622743606567, "learning_rate": 6.135462491779413e-05, "loss": 5.0516, "step": 32040 }, { "epoch": 0.06139123455761438, "grad_norm": 1.4960200786590576, "learning_rate": 6.137377968190321e-05, "loss": 5.1156, "step": 32050 }, { "epoch": 0.061410389388989614, "grad_norm": 1.5270531177520752, "learning_rate": 6.139293444601229e-05, "loss": 5.05, "step": 32060 }, { "epoch": 0.06142954422036484, "grad_norm": 1.5345309972763062, "learning_rate": 6.141208921012137e-05, "loss": 5.1728, "step": 32070 }, { "epoch": 0.06144869905174007, "grad_norm": 1.567118525505066, "learning_rate": 6.143124397423046e-05, "loss": 5.1357, "step": 32080 }, { "epoch": 0.061467853883115306, "grad_norm": 1.507182240486145, "learning_rate": 6.145039873833954e-05, "loss": 5.201, "step": 32090 }, { "epoch": 0.06148700871449053, "grad_norm": 1.6124627590179443, "learning_rate": 6.146955350244862e-05, "loss": 5.0723, "step": 32100 }, { "epoch": 0.061506163545865765, "grad_norm": 1.5328558683395386, "learning_rate": 6.148870826655769e-05, "loss": 5.0919, "step": 32110 }, { "epoch": 0.061525318377241, "grad_norm": 1.5142502784729004, "learning_rate": 6.150786303066677e-05, "loss": 5.035, "step": 32120 }, { "epoch": 0.061544473208616224, "grad_norm": 1.5483824014663696, "learning_rate": 6.152701779477585e-05, "loss": 5.2999, "step": 32130 }, { "epoch": 0.06156362803999146, "grad_norm": 1.570831298828125, "learning_rate": 6.154617255888493e-05, "loss": 5.0505, "step": 32140 }, { "epoch": 0.06158278287136669, "grad_norm": 1.5171581506729126, "learning_rate": 6.156532732299401e-05, "loss": 5.1657, "step": 32150 }, { "epoch": 0.061601937702741916, "grad_norm": 1.5843448638916016, "learning_rate": 6.158448208710309e-05, "loss": 5.1668, "step": 32160 }, { "epoch": 0.06162109253411715, "grad_norm": 1.474081039428711, "learning_rate": 6.160363685121217e-05, "loss": 5.0935, "step": 32170 }, { "epoch": 0.06164024736549238, "grad_norm": 1.5699485540390015, "learning_rate": 6.162279161532125e-05, "loss": 5.044, "step": 32180 }, { "epoch": 0.06165940219686761, "grad_norm": 1.5619323253631592, "learning_rate": 6.164194637943034e-05, "loss": 5.1401, "step": 32190 }, { "epoch": 0.06167855702824284, "grad_norm": 1.5262621641159058, "learning_rate": 6.166110114353942e-05, "loss": 5.0472, "step": 32200 }, { "epoch": 0.061697711859618073, "grad_norm": 1.589749813079834, "learning_rate": 6.16802559076485e-05, "loss": 5.2199, "step": 32210 }, { "epoch": 0.0617168666909933, "grad_norm": 1.5614007711410522, "learning_rate": 6.169941067175757e-05, "loss": 5.0259, "step": 32220 }, { "epoch": 0.06173602152236853, "grad_norm": 1.5379796028137207, "learning_rate": 6.171856543586665e-05, "loss": 5.1078, "step": 32230 }, { "epoch": 0.061755176353743765, "grad_norm": 1.6439565420150757, "learning_rate": 6.173772019997573e-05, "loss": 5.0167, "step": 32240 }, { "epoch": 0.06177433118511899, "grad_norm": 1.5478392839431763, "learning_rate": 6.175687496408481e-05, "loss": 5.0445, "step": 32250 }, { "epoch": 0.061793486016494224, "grad_norm": 1.544792652130127, "learning_rate": 6.177602972819389e-05, "loss": 5.0356, "step": 32260 }, { "epoch": 0.06181264084786946, "grad_norm": 1.618467926979065, "learning_rate": 6.179518449230297e-05, "loss": 4.8364, "step": 32270 }, { "epoch": 0.06183179567924468, "grad_norm": 1.564583420753479, "learning_rate": 6.181433925641205e-05, "loss": 5.1679, "step": 32280 }, { "epoch": 0.061850950510619916, "grad_norm": 1.5582988262176514, "learning_rate": 6.183349402052114e-05, "loss": 5.0352, "step": 32290 }, { "epoch": 0.06187010534199515, "grad_norm": 1.5696263313293457, "learning_rate": 6.18526487846302e-05, "loss": 5.1014, "step": 32300 }, { "epoch": 0.06188926017337038, "grad_norm": 1.6256046295166016, "learning_rate": 6.18718035487393e-05, "loss": 5.0374, "step": 32310 }, { "epoch": 0.06190841500474561, "grad_norm": 1.6308211088180542, "learning_rate": 6.189095831284838e-05, "loss": 5.1348, "step": 32320 }, { "epoch": 0.06192756983612084, "grad_norm": 1.4972892999649048, "learning_rate": 6.191011307695745e-05, "loss": 5.1271, "step": 32330 }, { "epoch": 0.061946724667496074, "grad_norm": 1.474563479423523, "learning_rate": 6.192926784106653e-05, "loss": 5.2029, "step": 32340 }, { "epoch": 0.0619658794988713, "grad_norm": 1.5227429866790771, "learning_rate": 6.194842260517561e-05, "loss": 5.157, "step": 32350 }, { "epoch": 0.06198503433024653, "grad_norm": 1.5385372638702393, "learning_rate": 6.196757736928469e-05, "loss": 5.1175, "step": 32360 }, { "epoch": 0.062004189161621766, "grad_norm": 1.4986774921417236, "learning_rate": 6.198673213339377e-05, "loss": 5.0947, "step": 32370 }, { "epoch": 0.06202334399299699, "grad_norm": 1.5550826787948608, "learning_rate": 6.200588689750285e-05, "loss": 4.9569, "step": 32380 }, { "epoch": 0.062042498824372225, "grad_norm": 1.690065860748291, "learning_rate": 6.202504166161193e-05, "loss": 4.9543, "step": 32390 }, { "epoch": 0.06206165365574746, "grad_norm": 1.4977343082427979, "learning_rate": 6.204419642572102e-05, "loss": 5.0597, "step": 32400 }, { "epoch": 0.062080808487122684, "grad_norm": 1.559804916381836, "learning_rate": 6.206335118983008e-05, "loss": 4.9979, "step": 32410 }, { "epoch": 0.06209996331849792, "grad_norm": 1.7101000547409058, "learning_rate": 6.208250595393917e-05, "loss": 4.9422, "step": 32420 }, { "epoch": 0.06211911814987315, "grad_norm": 1.7155734300613403, "learning_rate": 6.210166071804826e-05, "loss": 5.0098, "step": 32430 }, { "epoch": 0.062138272981248376, "grad_norm": 1.5980188846588135, "learning_rate": 6.212081548215733e-05, "loss": 5.1302, "step": 32440 }, { "epoch": 0.06215742781262361, "grad_norm": 1.5454624891281128, "learning_rate": 6.213997024626641e-05, "loss": 5.1014, "step": 32450 }, { "epoch": 0.06217658264399884, "grad_norm": 1.5783506631851196, "learning_rate": 6.215912501037549e-05, "loss": 5.1559, "step": 32460 }, { "epoch": 0.06219573747537407, "grad_norm": 1.5126391649246216, "learning_rate": 6.217827977448457e-05, "loss": 5.1521, "step": 32470 }, { "epoch": 0.0622148923067493, "grad_norm": 1.610743761062622, "learning_rate": 6.219743453859365e-05, "loss": 4.9302, "step": 32480 }, { "epoch": 0.06223404713812453, "grad_norm": 1.6921031475067139, "learning_rate": 6.221658930270273e-05, "loss": 5.1177, "step": 32490 }, { "epoch": 0.06225320196949976, "grad_norm": 1.7143486738204956, "learning_rate": 6.223574406681182e-05, "loss": 5.0554, "step": 32500 }, { "epoch": 0.06227235680087499, "grad_norm": 1.5740307569503784, "learning_rate": 6.22548988309209e-05, "loss": 5.078, "step": 32510 }, { "epoch": 0.062291511632250225, "grad_norm": 1.4885282516479492, "learning_rate": 6.227405359502996e-05, "loss": 5.0166, "step": 32520 }, { "epoch": 0.06231066646362545, "grad_norm": 1.5054435729980469, "learning_rate": 6.229320835913905e-05, "loss": 5.1892, "step": 32530 }, { "epoch": 0.062329821295000684, "grad_norm": 1.5780268907546997, "learning_rate": 6.231236312324814e-05, "loss": 5.0244, "step": 32540 }, { "epoch": 0.06234897612637592, "grad_norm": 1.6465424299240112, "learning_rate": 6.233151788735721e-05, "loss": 4.9241, "step": 32550 }, { "epoch": 0.06236813095775114, "grad_norm": 1.7276428937911987, "learning_rate": 6.235067265146629e-05, "loss": 5.0293, "step": 32560 }, { "epoch": 0.062387285789126376, "grad_norm": 1.580592155456543, "learning_rate": 6.236982741557537e-05, "loss": 5.0858, "step": 32570 }, { "epoch": 0.06240644062050161, "grad_norm": 1.487614393234253, "learning_rate": 6.238898217968445e-05, "loss": 4.9692, "step": 32580 }, { "epoch": 0.062425595451876835, "grad_norm": 1.4957590103149414, "learning_rate": 6.240813694379353e-05, "loss": 4.9112, "step": 32590 }, { "epoch": 0.06244475028325207, "grad_norm": 1.4578064680099487, "learning_rate": 6.242729170790262e-05, "loss": 5.1173, "step": 32600 }, { "epoch": 0.0624639051146273, "grad_norm": 1.5405077934265137, "learning_rate": 6.244644647201168e-05, "loss": 5.0071, "step": 32610 }, { "epoch": 0.062483059946002534, "grad_norm": 1.5753767490386963, "learning_rate": 6.246560123612078e-05, "loss": 4.9778, "step": 32620 }, { "epoch": 0.06250221477737776, "grad_norm": 1.4298795461654663, "learning_rate": 6.248475600022985e-05, "loss": 5.1114, "step": 32630 }, { "epoch": 0.06252136960875299, "grad_norm": 1.6516163349151611, "learning_rate": 6.250391076433893e-05, "loss": 4.9757, "step": 32640 }, { "epoch": 0.06254052444012823, "grad_norm": 1.5849918127059937, "learning_rate": 6.252306552844801e-05, "loss": 5.1524, "step": 32650 }, { "epoch": 0.06255967927150345, "grad_norm": 1.4881731271743774, "learning_rate": 6.254222029255709e-05, "loss": 4.9783, "step": 32660 }, { "epoch": 0.06257883410287868, "grad_norm": 1.7174267768859863, "learning_rate": 6.256137505666617e-05, "loss": 5.1083, "step": 32670 }, { "epoch": 0.06259798893425392, "grad_norm": 1.583556890487671, "learning_rate": 6.258052982077525e-05, "loss": 5.0425, "step": 32680 }, { "epoch": 0.06261714376562914, "grad_norm": 1.464403748512268, "learning_rate": 6.259968458488433e-05, "loss": 5.1265, "step": 32690 }, { "epoch": 0.06263629859700437, "grad_norm": 1.6691153049468994, "learning_rate": 6.261883934899341e-05, "loss": 5.0901, "step": 32700 }, { "epoch": 0.06265545342837961, "grad_norm": 1.5308412313461304, "learning_rate": 6.26379941131025e-05, "loss": 5.2191, "step": 32710 }, { "epoch": 0.06267460825975484, "grad_norm": 1.5391908884048462, "learning_rate": 6.265714887721156e-05, "loss": 5.187, "step": 32720 }, { "epoch": 0.06269376309113006, "grad_norm": 1.5538311004638672, "learning_rate": 6.267630364132066e-05, "loss": 5.0147, "step": 32730 }, { "epoch": 0.0627129179225053, "grad_norm": 1.655471920967102, "learning_rate": 6.269545840542973e-05, "loss": 5.1724, "step": 32740 }, { "epoch": 0.06273207275388053, "grad_norm": 1.5660120248794556, "learning_rate": 6.271461316953881e-05, "loss": 5.052, "step": 32750 }, { "epoch": 0.06275122758525575, "grad_norm": 1.5605278015136719, "learning_rate": 6.273376793364789e-05, "loss": 5.0818, "step": 32760 }, { "epoch": 0.062770382416631, "grad_norm": 1.5734816789627075, "learning_rate": 6.275292269775697e-05, "loss": 5.0342, "step": 32770 }, { "epoch": 0.06278953724800622, "grad_norm": 1.8265380859375, "learning_rate": 6.277207746186605e-05, "loss": 4.9325, "step": 32780 }, { "epoch": 0.06280869207938146, "grad_norm": 1.502073884010315, "learning_rate": 6.279123222597513e-05, "loss": 5.1537, "step": 32790 }, { "epoch": 0.06282784691075669, "grad_norm": 1.7839760780334473, "learning_rate": 6.281038699008421e-05, "loss": 4.9582, "step": 32800 }, { "epoch": 0.06284700174213191, "grad_norm": 1.534031867980957, "learning_rate": 6.28295417541933e-05, "loss": 5.0769, "step": 32810 }, { "epoch": 0.06286615657350715, "grad_norm": 1.5868111848831177, "learning_rate": 6.284869651830238e-05, "loss": 5.091, "step": 32820 }, { "epoch": 0.06288531140488238, "grad_norm": 1.6483451128005981, "learning_rate": 6.286785128241144e-05, "loss": 5.096, "step": 32830 }, { "epoch": 0.0629044662362576, "grad_norm": 1.4740543365478516, "learning_rate": 6.288700604652054e-05, "loss": 4.9217, "step": 32840 }, { "epoch": 0.06292362106763284, "grad_norm": 1.6030335426330566, "learning_rate": 6.290616081062961e-05, "loss": 5.1749, "step": 32850 }, { "epoch": 0.06294277589900807, "grad_norm": 1.5416020154953003, "learning_rate": 6.292531557473869e-05, "loss": 5.0266, "step": 32860 }, { "epoch": 0.0629619307303833, "grad_norm": 1.5574623346328735, "learning_rate": 6.294447033884777e-05, "loss": 5.0566, "step": 32870 }, { "epoch": 0.06298108556175853, "grad_norm": 1.4883673191070557, "learning_rate": 6.296362510295685e-05, "loss": 5.0338, "step": 32880 }, { "epoch": 0.06300024039313376, "grad_norm": 1.6154166460037231, "learning_rate": 6.298277986706593e-05, "loss": 5.092, "step": 32890 }, { "epoch": 0.06301939522450899, "grad_norm": 1.511975884437561, "learning_rate": 6.300193463117501e-05, "loss": 5.0704, "step": 32900 }, { "epoch": 0.06303855005588423, "grad_norm": 1.577131986618042, "learning_rate": 6.30210893952841e-05, "loss": 5.0355, "step": 32910 }, { "epoch": 0.06305770488725945, "grad_norm": 1.5091904401779175, "learning_rate": 6.304024415939318e-05, "loss": 5.1195, "step": 32920 }, { "epoch": 0.06307685971863468, "grad_norm": 1.5867029428482056, "learning_rate": 6.305939892350226e-05, "loss": 5.0498, "step": 32930 }, { "epoch": 0.06309601455000992, "grad_norm": 1.6634974479675293, "learning_rate": 6.307855368761133e-05, "loss": 5.0914, "step": 32940 }, { "epoch": 0.06311516938138514, "grad_norm": 1.51404869556427, "learning_rate": 6.30977084517204e-05, "loss": 4.9976, "step": 32950 }, { "epoch": 0.06313432421276037, "grad_norm": 1.5452401638031006, "learning_rate": 6.311686321582949e-05, "loss": 5.0216, "step": 32960 }, { "epoch": 0.06315347904413561, "grad_norm": 1.450939416885376, "learning_rate": 6.313601797993857e-05, "loss": 5.0846, "step": 32970 }, { "epoch": 0.06317263387551084, "grad_norm": 1.5746911764144897, "learning_rate": 6.315517274404765e-05, "loss": 4.9357, "step": 32980 }, { "epoch": 0.06319178870688606, "grad_norm": 1.538422703742981, "learning_rate": 6.317432750815673e-05, "loss": 5.1304, "step": 32990 }, { "epoch": 0.0632109435382613, "grad_norm": 1.5876191854476929, "learning_rate": 6.319348227226581e-05, "loss": 5.1661, "step": 33000 }, { "epoch": 0.06323009836963653, "grad_norm": 1.435694694519043, "learning_rate": 6.32126370363749e-05, "loss": 5.0959, "step": 33010 }, { "epoch": 0.06324925320101175, "grad_norm": 1.5277310609817505, "learning_rate": 6.323179180048398e-05, "loss": 5.1248, "step": 33020 }, { "epoch": 0.063268408032387, "grad_norm": 1.6087771654129028, "learning_rate": 6.325094656459306e-05, "loss": 5.0528, "step": 33030 }, { "epoch": 0.06328756286376222, "grad_norm": 1.6826751232147217, "learning_rate": 6.327010132870214e-05, "loss": 5.1165, "step": 33040 }, { "epoch": 0.06330671769513745, "grad_norm": 1.5660239458084106, "learning_rate": 6.32892560928112e-05, "loss": 5.0959, "step": 33050 }, { "epoch": 0.06332587252651269, "grad_norm": 1.6502758264541626, "learning_rate": 6.330841085692029e-05, "loss": 5.1345, "step": 33060 }, { "epoch": 0.06334502735788791, "grad_norm": 1.4846711158752441, "learning_rate": 6.332756562102937e-05, "loss": 5.0675, "step": 33070 }, { "epoch": 0.06336418218926314, "grad_norm": 1.5265485048294067, "learning_rate": 6.334672038513845e-05, "loss": 5.0751, "step": 33080 }, { "epoch": 0.06338333702063838, "grad_norm": 1.5642056465148926, "learning_rate": 6.336587514924753e-05, "loss": 4.9803, "step": 33090 }, { "epoch": 0.0634024918520136, "grad_norm": 1.5782098770141602, "learning_rate": 6.338502991335661e-05, "loss": 5.102, "step": 33100 }, { "epoch": 0.06342164668338883, "grad_norm": 1.5403473377227783, "learning_rate": 6.34041846774657e-05, "loss": 5.0063, "step": 33110 }, { "epoch": 0.06344080151476407, "grad_norm": 1.5175206661224365, "learning_rate": 6.342333944157478e-05, "loss": 5.1032, "step": 33120 }, { "epoch": 0.0634599563461393, "grad_norm": 1.5365647077560425, "learning_rate": 6.344249420568384e-05, "loss": 5.0329, "step": 33130 }, { "epoch": 0.06347911117751452, "grad_norm": 1.5040160417556763, "learning_rate": 6.346164896979292e-05, "loss": 5.0734, "step": 33140 }, { "epoch": 0.06349826600888976, "grad_norm": 1.6555148363113403, "learning_rate": 6.348080373390202e-05, "loss": 5.1206, "step": 33150 }, { "epoch": 0.06351742084026499, "grad_norm": 1.5800418853759766, "learning_rate": 6.349995849801109e-05, "loss": 5.0585, "step": 33160 }, { "epoch": 0.06353657567164021, "grad_norm": 1.5051188468933105, "learning_rate": 6.351911326212017e-05, "loss": 5.006, "step": 33170 }, { "epoch": 0.06355573050301545, "grad_norm": 1.4930249452590942, "learning_rate": 6.353826802622925e-05, "loss": 5.1808, "step": 33180 }, { "epoch": 0.06357488533439068, "grad_norm": 1.5619926452636719, "learning_rate": 6.355742279033833e-05, "loss": 5.0622, "step": 33190 }, { "epoch": 0.0635940401657659, "grad_norm": 1.6248222589492798, "learning_rate": 6.357657755444741e-05, "loss": 5.1267, "step": 33200 }, { "epoch": 0.06361319499714115, "grad_norm": 1.7301002740859985, "learning_rate": 6.35957323185565e-05, "loss": 4.982, "step": 33210 }, { "epoch": 0.06363234982851637, "grad_norm": 1.5135712623596191, "learning_rate": 6.361488708266557e-05, "loss": 5.097, "step": 33220 }, { "epoch": 0.0636515046598916, "grad_norm": 1.5406922101974487, "learning_rate": 6.363404184677466e-05, "loss": 5.1374, "step": 33230 }, { "epoch": 0.06367065949126684, "grad_norm": 1.4681984186172485, "learning_rate": 6.365319661088372e-05, "loss": 5.0758, "step": 33240 }, { "epoch": 0.06368981432264206, "grad_norm": 1.651430606842041, "learning_rate": 6.36723513749928e-05, "loss": 5.1237, "step": 33250 }, { "epoch": 0.06370896915401729, "grad_norm": 1.5434772968292236, "learning_rate": 6.36915061391019e-05, "loss": 5.0962, "step": 33260 }, { "epoch": 0.06372812398539253, "grad_norm": 1.4388787746429443, "learning_rate": 6.370874542680006e-05, "loss": 5.1907, "step": 33270 }, { "epoch": 0.06374727881676776, "grad_norm": 1.5715023279190063, "learning_rate": 6.372790019090914e-05, "loss": 5.1833, "step": 33280 }, { "epoch": 0.06376643364814298, "grad_norm": 1.5117770433425903, "learning_rate": 6.374705495501822e-05, "loss": 5.1862, "step": 33290 }, { "epoch": 0.06378558847951822, "grad_norm": 1.4837021827697754, "learning_rate": 6.37662097191273e-05, "loss": 4.9512, "step": 33300 }, { "epoch": 0.06380474331089345, "grad_norm": 1.5686206817626953, "learning_rate": 6.378536448323638e-05, "loss": 5.1449, "step": 33310 }, { "epoch": 0.06382389814226867, "grad_norm": 1.581110954284668, "learning_rate": 6.380451924734547e-05, "loss": 5.0145, "step": 33320 }, { "epoch": 0.06384305297364391, "grad_norm": 1.616673469543457, "learning_rate": 6.382367401145455e-05, "loss": 4.9254, "step": 33330 }, { "epoch": 0.06386220780501914, "grad_norm": 1.5680155754089355, "learning_rate": 6.384282877556363e-05, "loss": 5.0806, "step": 33340 }, { "epoch": 0.06388136263639436, "grad_norm": 1.4310476779937744, "learning_rate": 6.38619835396727e-05, "loss": 5.038, "step": 33350 }, { "epoch": 0.0639005174677696, "grad_norm": 1.5126159191131592, "learning_rate": 6.388113830378179e-05, "loss": 5.1106, "step": 33360 }, { "epoch": 0.06391967229914483, "grad_norm": 1.4389656782150269, "learning_rate": 6.390029306789086e-05, "loss": 5.1757, "step": 33370 }, { "epoch": 0.06393882713052006, "grad_norm": 1.643798828125, "learning_rate": 6.391944783199994e-05, "loss": 5.2075, "step": 33380 }, { "epoch": 0.0639579819618953, "grad_norm": 1.4981718063354492, "learning_rate": 6.393860259610902e-05, "loss": 5.2058, "step": 33390 }, { "epoch": 0.06397713679327052, "grad_norm": 1.536241888999939, "learning_rate": 6.39577573602181e-05, "loss": 5.0979, "step": 33400 }, { "epoch": 0.06399629162464575, "grad_norm": 1.6511832475662231, "learning_rate": 6.397691212432718e-05, "loss": 5.062, "step": 33410 }, { "epoch": 0.06401544645602099, "grad_norm": 1.697230339050293, "learning_rate": 6.399606688843627e-05, "loss": 5.1569, "step": 33420 }, { "epoch": 0.06403460128739621, "grad_norm": 1.5043634176254272, "learning_rate": 6.401522165254533e-05, "loss": 5.062, "step": 33430 }, { "epoch": 0.06405375611877144, "grad_norm": 1.6551066637039185, "learning_rate": 6.403437641665443e-05, "loss": 5.0419, "step": 33440 }, { "epoch": 0.06407291095014668, "grad_norm": 1.627714991569519, "learning_rate": 6.405353118076351e-05, "loss": 5.007, "step": 33450 }, { "epoch": 0.0640920657815219, "grad_norm": 1.5757410526275635, "learning_rate": 6.407268594487258e-05, "loss": 4.9452, "step": 33460 }, { "epoch": 0.06411122061289715, "grad_norm": 1.6159412860870361, "learning_rate": 6.409184070898166e-05, "loss": 5.1043, "step": 33470 }, { "epoch": 0.06413037544427237, "grad_norm": 1.4699617624282837, "learning_rate": 6.411099547309074e-05, "loss": 5.1152, "step": 33480 }, { "epoch": 0.0641495302756476, "grad_norm": 1.4654078483581543, "learning_rate": 6.413015023719982e-05, "loss": 4.9045, "step": 33490 }, { "epoch": 0.06416868510702284, "grad_norm": 1.565819501876831, "learning_rate": 6.41493050013089e-05, "loss": 5.1578, "step": 33500 }, { "epoch": 0.06418783993839806, "grad_norm": 1.6015050411224365, "learning_rate": 6.416845976541798e-05, "loss": 5.0164, "step": 33510 }, { "epoch": 0.06420699476977329, "grad_norm": 1.569210171699524, "learning_rate": 6.418761452952707e-05, "loss": 4.997, "step": 33520 }, { "epoch": 0.06422614960114853, "grad_norm": 1.5911251306533813, "learning_rate": 6.420676929363615e-05, "loss": 5.1397, "step": 33530 }, { "epoch": 0.06424530443252376, "grad_norm": 1.453550100326538, "learning_rate": 6.422592405774521e-05, "loss": 5.1365, "step": 33540 }, { "epoch": 0.06426445926389898, "grad_norm": 1.4726251363754272, "learning_rate": 6.424507882185431e-05, "loss": 5.136, "step": 33550 }, { "epoch": 0.06428361409527422, "grad_norm": 1.6540887355804443, "learning_rate": 6.426423358596339e-05, "loss": 5.0048, "step": 33560 }, { "epoch": 0.06430276892664945, "grad_norm": 1.5102249383926392, "learning_rate": 6.428338835007246e-05, "loss": 5.0398, "step": 33570 }, { "epoch": 0.06432192375802467, "grad_norm": 1.5456268787384033, "learning_rate": 6.430254311418154e-05, "loss": 4.872, "step": 33580 }, { "epoch": 0.06434107858939991, "grad_norm": 1.5089375972747803, "learning_rate": 6.432169787829062e-05, "loss": 5.0822, "step": 33590 }, { "epoch": 0.06436023342077514, "grad_norm": 1.547521710395813, "learning_rate": 6.43408526423997e-05, "loss": 4.9704, "step": 33600 }, { "epoch": 0.06437938825215037, "grad_norm": 1.5079472064971924, "learning_rate": 6.436000740650878e-05, "loss": 5.0348, "step": 33610 }, { "epoch": 0.0643985430835256, "grad_norm": 1.52211594581604, "learning_rate": 6.437916217061786e-05, "loss": 5.144, "step": 33620 }, { "epoch": 0.06441769791490083, "grad_norm": 1.5491390228271484, "learning_rate": 6.439831693472695e-05, "loss": 5.1107, "step": 33630 }, { "epoch": 0.06443685274627606, "grad_norm": 1.5316379070281982, "learning_rate": 6.441747169883603e-05, "loss": 4.9295, "step": 33640 }, { "epoch": 0.0644560075776513, "grad_norm": 1.5011670589447021, "learning_rate": 6.44366264629451e-05, "loss": 4.9765, "step": 33650 }, { "epoch": 0.06447516240902652, "grad_norm": 1.7005995512008667, "learning_rate": 6.445578122705418e-05, "loss": 5.2109, "step": 33660 }, { "epoch": 0.06449431724040175, "grad_norm": 1.4919121265411377, "learning_rate": 6.447493599116327e-05, "loss": 5.094, "step": 33670 }, { "epoch": 0.06451347207177699, "grad_norm": 1.5489461421966553, "learning_rate": 6.449409075527234e-05, "loss": 5.0796, "step": 33680 }, { "epoch": 0.06453262690315222, "grad_norm": 1.5461050271987915, "learning_rate": 6.451324551938142e-05, "loss": 4.9552, "step": 33690 }, { "epoch": 0.06455178173452744, "grad_norm": 1.5752078294754028, "learning_rate": 6.45324002834905e-05, "loss": 4.969, "step": 33700 }, { "epoch": 0.06457093656590268, "grad_norm": 1.5290602445602417, "learning_rate": 6.455155504759958e-05, "loss": 5.0327, "step": 33710 }, { "epoch": 0.06459009139727791, "grad_norm": 1.702268362045288, "learning_rate": 6.457070981170866e-05, "loss": 5.0962, "step": 33720 }, { "epoch": 0.06460924622865313, "grad_norm": 1.5748411417007446, "learning_rate": 6.458986457581775e-05, "loss": 4.965, "step": 33730 }, { "epoch": 0.06462840106002837, "grad_norm": 1.5310174226760864, "learning_rate": 6.460901933992683e-05, "loss": 5.0238, "step": 33740 }, { "epoch": 0.0646475558914036, "grad_norm": 1.5831639766693115, "learning_rate": 6.462817410403591e-05, "loss": 4.9629, "step": 33750 }, { "epoch": 0.06466671072277883, "grad_norm": 1.5666829347610474, "learning_rate": 6.464732886814498e-05, "loss": 5.0276, "step": 33760 }, { "epoch": 0.06468586555415407, "grad_norm": 1.6481724977493286, "learning_rate": 6.466648363225406e-05, "loss": 4.9885, "step": 33770 }, { "epoch": 0.06470502038552929, "grad_norm": 1.5730723142623901, "learning_rate": 6.468563839636315e-05, "loss": 4.9857, "step": 33780 }, { "epoch": 0.06472417521690452, "grad_norm": 1.530822515487671, "learning_rate": 6.470479316047222e-05, "loss": 5.153, "step": 33790 }, { "epoch": 0.06474333004827976, "grad_norm": 1.6163341999053955, "learning_rate": 6.47239479245813e-05, "loss": 4.9835, "step": 33800 }, { "epoch": 0.06476248487965498, "grad_norm": 1.4660015106201172, "learning_rate": 6.474310268869038e-05, "loss": 5.0263, "step": 33810 }, { "epoch": 0.06478163971103021, "grad_norm": 1.5860991477966309, "learning_rate": 6.476225745279946e-05, "loss": 5.0065, "step": 33820 }, { "epoch": 0.06480079454240545, "grad_norm": 1.49910306930542, "learning_rate": 6.478141221690855e-05, "loss": 4.9837, "step": 33830 }, { "epoch": 0.06481994937378067, "grad_norm": 1.5451815128326416, "learning_rate": 6.480056698101763e-05, "loss": 5.1437, "step": 33840 }, { "epoch": 0.0648391042051559, "grad_norm": 1.5558960437774658, "learning_rate": 6.481972174512671e-05, "loss": 5.0274, "step": 33850 }, { "epoch": 0.06485825903653114, "grad_norm": 1.4738713502883911, "learning_rate": 6.483887650923579e-05, "loss": 5.0142, "step": 33860 }, { "epoch": 0.06487741386790637, "grad_norm": 1.462711215019226, "learning_rate": 6.485803127334486e-05, "loss": 4.9763, "step": 33870 }, { "epoch": 0.06489656869928159, "grad_norm": 1.6578030586242676, "learning_rate": 6.487718603745394e-05, "loss": 5.1487, "step": 33880 }, { "epoch": 0.06491572353065683, "grad_norm": 1.579803705215454, "learning_rate": 6.489634080156303e-05, "loss": 5.0612, "step": 33890 }, { "epoch": 0.06493487836203206, "grad_norm": 1.6489574909210205, "learning_rate": 6.49154955656721e-05, "loss": 5.1039, "step": 33900 }, { "epoch": 0.06495403319340728, "grad_norm": 1.6122777462005615, "learning_rate": 6.493465032978118e-05, "loss": 4.967, "step": 33910 }, { "epoch": 0.06497318802478252, "grad_norm": 1.6508694887161255, "learning_rate": 6.495380509389026e-05, "loss": 5.0896, "step": 33920 }, { "epoch": 0.06499234285615775, "grad_norm": 1.7236393690109253, "learning_rate": 6.497295985799934e-05, "loss": 4.9616, "step": 33930 }, { "epoch": 0.06501149768753298, "grad_norm": 1.5172516107559204, "learning_rate": 6.499211462210843e-05, "loss": 5.0943, "step": 33940 }, { "epoch": 0.06503065251890822, "grad_norm": 1.5160366296768188, "learning_rate": 6.501126938621751e-05, "loss": 5.025, "step": 33950 }, { "epoch": 0.06504980735028344, "grad_norm": 1.5046238899230957, "learning_rate": 6.503042415032657e-05, "loss": 5.0502, "step": 33960 }, { "epoch": 0.06506896218165867, "grad_norm": 1.595617651939392, "learning_rate": 6.504957891443567e-05, "loss": 5.1025, "step": 33970 }, { "epoch": 0.06508811701303391, "grad_norm": 1.576843023300171, "learning_rate": 6.506873367854474e-05, "loss": 4.9786, "step": 33980 }, { "epoch": 0.06510727184440913, "grad_norm": 1.601335048675537, "learning_rate": 6.508788844265382e-05, "loss": 4.9361, "step": 33990 }, { "epoch": 0.06512642667578436, "grad_norm": 1.6570309400558472, "learning_rate": 6.51070432067629e-05, "loss": 5.0044, "step": 34000 }, { "epoch": 0.0651455815071596, "grad_norm": 1.63922119140625, "learning_rate": 6.512619797087198e-05, "loss": 4.9053, "step": 34010 }, { "epoch": 0.06516473633853483, "grad_norm": 1.576280117034912, "learning_rate": 6.514535273498106e-05, "loss": 5.0373, "step": 34020 }, { "epoch": 0.06518389116991005, "grad_norm": 1.5882736444473267, "learning_rate": 6.516450749909014e-05, "loss": 5.0877, "step": 34030 }, { "epoch": 0.06520304600128529, "grad_norm": 1.5657670497894287, "learning_rate": 6.518366226319923e-05, "loss": 4.9889, "step": 34040 }, { "epoch": 0.06522220083266052, "grad_norm": 1.494215965270996, "learning_rate": 6.52028170273083e-05, "loss": 5.1028, "step": 34050 }, { "epoch": 0.06524135566403574, "grad_norm": 1.5945605039596558, "learning_rate": 6.522197179141739e-05, "loss": 5.0182, "step": 34060 }, { "epoch": 0.06526051049541098, "grad_norm": 1.4788848161697388, "learning_rate": 6.524112655552646e-05, "loss": 5.0858, "step": 34070 }, { "epoch": 0.06527966532678621, "grad_norm": 1.5437136888504028, "learning_rate": 6.526028131963555e-05, "loss": 5.0482, "step": 34080 }, { "epoch": 0.06529882015816144, "grad_norm": 1.6043781042099, "learning_rate": 6.527943608374462e-05, "loss": 5.0913, "step": 34090 }, { "epoch": 0.06531797498953668, "grad_norm": 1.6178327798843384, "learning_rate": 6.52985908478537e-05, "loss": 5.1119, "step": 34100 }, { "epoch": 0.0653371298209119, "grad_norm": 1.6554430723190308, "learning_rate": 6.531774561196278e-05, "loss": 4.9451, "step": 34110 }, { "epoch": 0.06535628465228714, "grad_norm": 1.5121746063232422, "learning_rate": 6.533690037607186e-05, "loss": 5.0854, "step": 34120 }, { "epoch": 0.06537543948366237, "grad_norm": 1.5503257513046265, "learning_rate": 6.535605514018094e-05, "loss": 5.0106, "step": 34130 }, { "epoch": 0.0653945943150376, "grad_norm": 1.5311369895935059, "learning_rate": 6.537520990429002e-05, "loss": 5.1138, "step": 34140 }, { "epoch": 0.06541374914641283, "grad_norm": 1.4364420175552368, "learning_rate": 6.539436466839909e-05, "loss": 4.9829, "step": 34150 }, { "epoch": 0.06543290397778806, "grad_norm": 1.4671615362167358, "learning_rate": 6.541351943250819e-05, "loss": 5.1309, "step": 34160 }, { "epoch": 0.06545205880916329, "grad_norm": 1.386218547821045, "learning_rate": 6.543267419661727e-05, "loss": 5.2007, "step": 34170 }, { "epoch": 0.06547121364053853, "grad_norm": 1.5528539419174194, "learning_rate": 6.545182896072634e-05, "loss": 4.9588, "step": 34180 }, { "epoch": 0.06549036847191375, "grad_norm": 1.542253017425537, "learning_rate": 6.547098372483543e-05, "loss": 4.8724, "step": 34190 }, { "epoch": 0.06550952330328898, "grad_norm": 1.517397165298462, "learning_rate": 6.54901384889445e-05, "loss": 5.0692, "step": 34200 }, { "epoch": 0.06552867813466422, "grad_norm": 1.699585199356079, "learning_rate": 6.550929325305358e-05, "loss": 4.9813, "step": 34210 }, { "epoch": 0.06554783296603944, "grad_norm": 1.5013314485549927, "learning_rate": 6.552844801716266e-05, "loss": 5.0482, "step": 34220 }, { "epoch": 0.06556698779741467, "grad_norm": 1.5151973962783813, "learning_rate": 6.554760278127174e-05, "loss": 5.0764, "step": 34230 }, { "epoch": 0.06558614262878991, "grad_norm": 1.7117599248886108, "learning_rate": 6.556675754538082e-05, "loss": 5.1305, "step": 34240 }, { "epoch": 0.06560529746016514, "grad_norm": 1.563175082206726, "learning_rate": 6.55859123094899e-05, "loss": 4.9767, "step": 34250 }, { "epoch": 0.06562445229154036, "grad_norm": 1.5128018856048584, "learning_rate": 6.560506707359897e-05, "loss": 5.1078, "step": 34260 }, { "epoch": 0.0656436071229156, "grad_norm": 1.665131688117981, "learning_rate": 6.562422183770807e-05, "loss": 4.9611, "step": 34270 }, { "epoch": 0.06566276195429083, "grad_norm": 1.5242072343826294, "learning_rate": 6.564337660181715e-05, "loss": 5.037, "step": 34280 }, { "epoch": 0.06568191678566605, "grad_norm": 1.5879744291305542, "learning_rate": 6.566253136592622e-05, "loss": 5.0458, "step": 34290 }, { "epoch": 0.06570107161704129, "grad_norm": 1.759177565574646, "learning_rate": 6.56816861300353e-05, "loss": 5.1778, "step": 34300 }, { "epoch": 0.06572022644841652, "grad_norm": 1.555600881576538, "learning_rate": 6.570084089414438e-05, "loss": 5.1044, "step": 34310 }, { "epoch": 0.06573938127979174, "grad_norm": 1.5772709846496582, "learning_rate": 6.571999565825346e-05, "loss": 4.8234, "step": 34320 }, { "epoch": 0.06575853611116698, "grad_norm": 1.5514702796936035, "learning_rate": 6.573915042236254e-05, "loss": 4.9844, "step": 34330 }, { "epoch": 0.06577769094254221, "grad_norm": 1.5249544382095337, "learning_rate": 6.575830518647162e-05, "loss": 5.02, "step": 34340 }, { "epoch": 0.06579684577391744, "grad_norm": 1.5171823501586914, "learning_rate": 6.57774599505807e-05, "loss": 5.0287, "step": 34350 }, { "epoch": 0.06581600060529268, "grad_norm": 1.5478177070617676, "learning_rate": 6.579661471468979e-05, "loss": 4.9802, "step": 34360 }, { "epoch": 0.0658351554366679, "grad_norm": 1.588955283164978, "learning_rate": 6.581576947879885e-05, "loss": 4.9626, "step": 34370 }, { "epoch": 0.06585431026804313, "grad_norm": 1.533915400505066, "learning_rate": 6.583492424290795e-05, "loss": 5.098, "step": 34380 }, { "epoch": 0.06587346509941837, "grad_norm": 1.6085281372070312, "learning_rate": 6.585407900701703e-05, "loss": 4.987, "step": 34390 }, { "epoch": 0.0658926199307936, "grad_norm": 1.5624357461929321, "learning_rate": 6.58732337711261e-05, "loss": 5.0544, "step": 34400 }, { "epoch": 0.06591177476216882, "grad_norm": 1.5185540914535522, "learning_rate": 6.589238853523518e-05, "loss": 5.1066, "step": 34410 }, { "epoch": 0.06593092959354406, "grad_norm": 1.5452309846878052, "learning_rate": 6.591154329934426e-05, "loss": 5.0744, "step": 34420 }, { "epoch": 0.06595008442491929, "grad_norm": 1.5302547216415405, "learning_rate": 6.593069806345334e-05, "loss": 5.0499, "step": 34430 }, { "epoch": 0.06596923925629451, "grad_norm": 1.552132248878479, "learning_rate": 6.594985282756242e-05, "loss": 5.0321, "step": 34440 }, { "epoch": 0.06598839408766975, "grad_norm": 1.636988639831543, "learning_rate": 6.59690075916715e-05, "loss": 4.9487, "step": 34450 }, { "epoch": 0.06600754891904498, "grad_norm": 1.4870991706848145, "learning_rate": 6.598816235578059e-05, "loss": 5.1014, "step": 34460 }, { "epoch": 0.0660267037504202, "grad_norm": 1.5960254669189453, "learning_rate": 6.600731711988967e-05, "loss": 5.0634, "step": 34470 }, { "epoch": 0.06604585858179544, "grad_norm": 1.5166510343551636, "learning_rate": 6.602647188399874e-05, "loss": 5.206, "step": 34480 }, { "epoch": 0.06606501341317067, "grad_norm": 1.5242987871170044, "learning_rate": 6.604562664810782e-05, "loss": 5.1511, "step": 34490 }, { "epoch": 0.0660841682445459, "grad_norm": 1.5745784044265747, "learning_rate": 6.606478141221691e-05, "loss": 4.919, "step": 34500 }, { "epoch": 0.06610332307592114, "grad_norm": 1.5388070344924927, "learning_rate": 6.608393617632598e-05, "loss": 4.9936, "step": 34510 }, { "epoch": 0.06612247790729636, "grad_norm": 1.4284987449645996, "learning_rate": 6.610309094043506e-05, "loss": 5.0754, "step": 34520 }, { "epoch": 0.06614163273867159, "grad_norm": 1.4782825708389282, "learning_rate": 6.612224570454414e-05, "loss": 5.0882, "step": 34530 }, { "epoch": 0.06616078757004683, "grad_norm": 1.5642672777175903, "learning_rate": 6.614140046865322e-05, "loss": 4.9893, "step": 34540 }, { "epoch": 0.06617994240142205, "grad_norm": 1.48440420627594, "learning_rate": 6.61605552327623e-05, "loss": 5.0143, "step": 34550 }, { "epoch": 0.06619909723279728, "grad_norm": 1.569103479385376, "learning_rate": 6.617970999687139e-05, "loss": 4.9566, "step": 34560 }, { "epoch": 0.06621825206417252, "grad_norm": 1.6051912307739258, "learning_rate": 6.619886476098047e-05, "loss": 5.0405, "step": 34570 }, { "epoch": 0.06623740689554775, "grad_norm": 1.52983558177948, "learning_rate": 6.621801952508955e-05, "loss": 5.1046, "step": 34580 }, { "epoch": 0.06625656172692297, "grad_norm": 1.569265604019165, "learning_rate": 6.623717428919862e-05, "loss": 5.0397, "step": 34590 }, { "epoch": 0.06627571655829821, "grad_norm": 1.4562124013900757, "learning_rate": 6.62563290533077e-05, "loss": 4.9578, "step": 34600 }, { "epoch": 0.06629487138967344, "grad_norm": 1.7135547399520874, "learning_rate": 6.627548381741679e-05, "loss": 4.894, "step": 34610 }, { "epoch": 0.06631402622104866, "grad_norm": 1.568987250328064, "learning_rate": 6.629463858152586e-05, "loss": 5.1146, "step": 34620 }, { "epoch": 0.0663331810524239, "grad_norm": 1.5281803607940674, "learning_rate": 6.631379334563494e-05, "loss": 4.9549, "step": 34630 }, { "epoch": 0.06635233588379913, "grad_norm": 1.475947618484497, "learning_rate": 6.633294810974402e-05, "loss": 5.1054, "step": 34640 }, { "epoch": 0.06637149071517436, "grad_norm": 1.8092539310455322, "learning_rate": 6.63521028738531e-05, "loss": 5.0002, "step": 34650 }, { "epoch": 0.0663906455465496, "grad_norm": 1.476519227027893, "learning_rate": 6.637125763796219e-05, "loss": 5.0164, "step": 34660 }, { "epoch": 0.06640980037792482, "grad_norm": 1.488372564315796, "learning_rate": 6.639041240207127e-05, "loss": 4.9967, "step": 34670 }, { "epoch": 0.06642895520930005, "grad_norm": 1.5216072797775269, "learning_rate": 6.640956716618033e-05, "loss": 5.0714, "step": 34680 }, { "epoch": 0.06644811004067529, "grad_norm": 1.511568307876587, "learning_rate": 6.642872193028943e-05, "loss": 5.1723, "step": 34690 }, { "epoch": 0.06646726487205051, "grad_norm": 1.5302162170410156, "learning_rate": 6.64478766943985e-05, "loss": 5.1011, "step": 34700 }, { "epoch": 0.06648641970342574, "grad_norm": 1.6030895709991455, "learning_rate": 6.646703145850758e-05, "loss": 5.0109, "step": 34710 }, { "epoch": 0.06650557453480098, "grad_norm": 1.5816833972930908, "learning_rate": 6.648618622261667e-05, "loss": 5.026, "step": 34720 }, { "epoch": 0.0665247293661762, "grad_norm": 1.5327283143997192, "learning_rate": 6.650534098672574e-05, "loss": 5.0522, "step": 34730 }, { "epoch": 0.06654388419755143, "grad_norm": 1.5142223834991455, "learning_rate": 6.652449575083482e-05, "loss": 4.925, "step": 34740 }, { "epoch": 0.06656303902892667, "grad_norm": 1.6060409545898438, "learning_rate": 6.65436505149439e-05, "loss": 4.8667, "step": 34750 }, { "epoch": 0.0665821938603019, "grad_norm": 1.560491681098938, "learning_rate": 6.656280527905298e-05, "loss": 4.9332, "step": 34760 }, { "epoch": 0.06660134869167714, "grad_norm": 1.865837812423706, "learning_rate": 6.658196004316207e-05, "loss": 5.03, "step": 34770 }, { "epoch": 0.06662050352305236, "grad_norm": 1.5897154808044434, "learning_rate": 6.660111480727115e-05, "loss": 4.9348, "step": 34780 }, { "epoch": 0.06663965835442759, "grad_norm": 1.5127270221710205, "learning_rate": 6.662026957138021e-05, "loss": 5.0231, "step": 34790 }, { "epoch": 0.06665881318580283, "grad_norm": 1.4737443923950195, "learning_rate": 6.663942433548931e-05, "loss": 4.9898, "step": 34800 }, { "epoch": 0.06667796801717805, "grad_norm": 1.5308462381362915, "learning_rate": 6.665857909959838e-05, "loss": 5.1434, "step": 34810 }, { "epoch": 0.06669712284855328, "grad_norm": 1.4751688241958618, "learning_rate": 6.667773386370746e-05, "loss": 5.0302, "step": 34820 }, { "epoch": 0.06671627767992852, "grad_norm": 1.4316819906234741, "learning_rate": 6.669688862781654e-05, "loss": 5.0139, "step": 34830 }, { "epoch": 0.06673543251130375, "grad_norm": 1.6205717325210571, "learning_rate": 6.671604339192562e-05, "loss": 4.9026, "step": 34840 }, { "epoch": 0.06675458734267897, "grad_norm": 1.5657583475112915, "learning_rate": 6.67351981560347e-05, "loss": 4.934, "step": 34850 }, { "epoch": 0.06677374217405421, "grad_norm": 1.5525174140930176, "learning_rate": 6.675435292014378e-05, "loss": 4.9755, "step": 34860 }, { "epoch": 0.06679289700542944, "grad_norm": 1.544546365737915, "learning_rate": 6.677350768425287e-05, "loss": 4.9632, "step": 34870 }, { "epoch": 0.06681205183680466, "grad_norm": 1.5364702939987183, "learning_rate": 6.679266244836195e-05, "loss": 5.1295, "step": 34880 }, { "epoch": 0.0668312066681799, "grad_norm": 1.495896577835083, "learning_rate": 6.681181721247103e-05, "loss": 4.9811, "step": 34890 }, { "epoch": 0.06685036149955513, "grad_norm": 1.6701622009277344, "learning_rate": 6.68309719765801e-05, "loss": 4.9683, "step": 34900 }, { "epoch": 0.06686951633093036, "grad_norm": 1.6719889640808105, "learning_rate": 6.685012674068919e-05, "loss": 5.0717, "step": 34910 }, { "epoch": 0.0668886711623056, "grad_norm": 1.5668437480926514, "learning_rate": 6.686928150479826e-05, "loss": 4.8037, "step": 34920 }, { "epoch": 0.06690782599368082, "grad_norm": 1.633201003074646, "learning_rate": 6.688843626890734e-05, "loss": 5.0464, "step": 34930 }, { "epoch": 0.06692698082505605, "grad_norm": 1.5616223812103271, "learning_rate": 6.690759103301642e-05, "loss": 4.9314, "step": 34940 }, { "epoch": 0.06694613565643129, "grad_norm": 1.5842818021774292, "learning_rate": 6.69267457971255e-05, "loss": 4.9579, "step": 34950 }, { "epoch": 0.06696529048780651, "grad_norm": 1.5373907089233398, "learning_rate": 6.694590056123458e-05, "loss": 5.0502, "step": 34960 }, { "epoch": 0.06698444531918174, "grad_norm": 1.4564309120178223, "learning_rate": 6.696505532534366e-05, "loss": 5.0838, "step": 34970 }, { "epoch": 0.06700360015055698, "grad_norm": 1.5813449621200562, "learning_rate": 6.698421008945273e-05, "loss": 4.915, "step": 34980 }, { "epoch": 0.0670227549819322, "grad_norm": 1.663820505142212, "learning_rate": 6.700336485356183e-05, "loss": 5.0825, "step": 34990 }, { "epoch": 0.06704190981330743, "grad_norm": 1.669561743736267, "learning_rate": 6.702251961767091e-05, "loss": 4.9273, "step": 35000 }, { "epoch": 0.06706106464468267, "grad_norm": 1.5140225887298584, "learning_rate": 6.704167438177998e-05, "loss": 5.1131, "step": 35010 }, { "epoch": 0.0670802194760579, "grad_norm": 1.4911408424377441, "learning_rate": 6.706082914588906e-05, "loss": 4.9375, "step": 35020 }, { "epoch": 0.06709937430743312, "grad_norm": 1.4568120241165161, "learning_rate": 6.707998390999814e-05, "loss": 5.0086, "step": 35030 }, { "epoch": 0.06711852913880836, "grad_norm": 1.8708359003067017, "learning_rate": 6.709913867410722e-05, "loss": 5.0722, "step": 35040 }, { "epoch": 0.06713768397018359, "grad_norm": 1.5242923498153687, "learning_rate": 6.71182934382163e-05, "loss": 4.9803, "step": 35050 }, { "epoch": 0.06715683880155882, "grad_norm": 1.4948958158493042, "learning_rate": 6.713744820232538e-05, "loss": 4.9464, "step": 35060 }, { "epoch": 0.06717599363293406, "grad_norm": 1.479536533355713, "learning_rate": 6.715660296643446e-05, "loss": 5.0202, "step": 35070 }, { "epoch": 0.06719514846430928, "grad_norm": 1.6004464626312256, "learning_rate": 6.717575773054355e-05, "loss": 4.9475, "step": 35080 }, { "epoch": 0.06721430329568451, "grad_norm": 1.5239393711090088, "learning_rate": 6.719491249465261e-05, "loss": 5.2485, "step": 35090 }, { "epoch": 0.06723345812705975, "grad_norm": 1.5006035566329956, "learning_rate": 6.721406725876171e-05, "loss": 5.0599, "step": 35100 }, { "epoch": 0.06725261295843497, "grad_norm": 1.4576960802078247, "learning_rate": 6.723322202287079e-05, "loss": 5.0879, "step": 35110 }, { "epoch": 0.0672717677898102, "grad_norm": 1.440714716911316, "learning_rate": 6.725237678697986e-05, "loss": 5.0149, "step": 35120 }, { "epoch": 0.06729092262118544, "grad_norm": 1.4546386003494263, "learning_rate": 6.727153155108894e-05, "loss": 5.1359, "step": 35130 }, { "epoch": 0.06731007745256067, "grad_norm": 1.5158021450042725, "learning_rate": 6.729068631519802e-05, "loss": 5.0933, "step": 35140 }, { "epoch": 0.06732923228393589, "grad_norm": 1.4919730424880981, "learning_rate": 6.73098410793071e-05, "loss": 4.972, "step": 35150 }, { "epoch": 0.06734838711531113, "grad_norm": 1.4732996225357056, "learning_rate": 6.732899584341618e-05, "loss": 4.9222, "step": 35160 }, { "epoch": 0.06736754194668636, "grad_norm": 1.4730342626571655, "learning_rate": 6.734815060752526e-05, "loss": 5.0059, "step": 35170 }, { "epoch": 0.06738669677806158, "grad_norm": 1.5561705827713013, "learning_rate": 6.736730537163435e-05, "loss": 5.0255, "step": 35180 }, { "epoch": 0.06740585160943682, "grad_norm": 1.6246845722198486, "learning_rate": 6.738646013574343e-05, "loss": 5.1063, "step": 35190 }, { "epoch": 0.06742500644081205, "grad_norm": 1.5393298864364624, "learning_rate": 6.74056148998525e-05, "loss": 5.0575, "step": 35200 }, { "epoch": 0.06744416127218728, "grad_norm": 1.5333155393600464, "learning_rate": 6.742476966396159e-05, "loss": 5.0479, "step": 35210 }, { "epoch": 0.06746331610356252, "grad_norm": 1.5674614906311035, "learning_rate": 6.744392442807067e-05, "loss": 5.0043, "step": 35220 }, { "epoch": 0.06748247093493774, "grad_norm": 1.4879684448242188, "learning_rate": 6.746307919217974e-05, "loss": 4.9232, "step": 35230 }, { "epoch": 0.06750162576631297, "grad_norm": 1.6005053520202637, "learning_rate": 6.748223395628882e-05, "loss": 5.0447, "step": 35240 }, { "epoch": 0.06752078059768821, "grad_norm": 1.5829393863677979, "learning_rate": 6.75013887203979e-05, "loss": 5.0365, "step": 35250 }, { "epoch": 0.06753993542906343, "grad_norm": 1.5582937002182007, "learning_rate": 6.752054348450698e-05, "loss": 4.8801, "step": 35260 }, { "epoch": 0.06755909026043866, "grad_norm": 1.5879544019699097, "learning_rate": 6.753969824861606e-05, "loss": 4.9388, "step": 35270 }, { "epoch": 0.0675782450918139, "grad_norm": 1.5016632080078125, "learning_rate": 6.755885301272514e-05, "loss": 5.1529, "step": 35280 }, { "epoch": 0.06759739992318912, "grad_norm": 1.5477546453475952, "learning_rate": 6.757800777683423e-05, "loss": 4.9631, "step": 35290 }, { "epoch": 0.06761655475456435, "grad_norm": 1.4894051551818848, "learning_rate": 6.759716254094331e-05, "loss": 5.1317, "step": 35300 }, { "epoch": 0.06763570958593959, "grad_norm": 1.5406274795532227, "learning_rate": 6.761631730505238e-05, "loss": 5.012, "step": 35310 }, { "epoch": 0.06765486441731482, "grad_norm": 1.5410988330841064, "learning_rate": 6.763547206916146e-05, "loss": 5.0833, "step": 35320 }, { "epoch": 0.06767401924869004, "grad_norm": 1.7842191457748413, "learning_rate": 6.765462683327055e-05, "loss": 4.956, "step": 35330 }, { "epoch": 0.06769317408006528, "grad_norm": 1.5408685207366943, "learning_rate": 6.767378159737962e-05, "loss": 4.8963, "step": 35340 }, { "epoch": 0.06771232891144051, "grad_norm": 3.903140068054199, "learning_rate": 6.76929363614887e-05, "loss": 5.087, "step": 35350 }, { "epoch": 0.06773148374281573, "grad_norm": 1.640630841255188, "learning_rate": 6.771209112559778e-05, "loss": 5.0322, "step": 35360 }, { "epoch": 0.06775063857419097, "grad_norm": 1.5265138149261475, "learning_rate": 6.773124588970686e-05, "loss": 5.0476, "step": 35370 }, { "epoch": 0.0677697934055662, "grad_norm": 1.630252480506897, "learning_rate": 6.775040065381594e-05, "loss": 5.1603, "step": 35380 }, { "epoch": 0.06778894823694143, "grad_norm": 1.4978243112564087, "learning_rate": 6.776955541792503e-05, "loss": 5.0648, "step": 35390 }, { "epoch": 0.06780810306831667, "grad_norm": 1.6910446882247925, "learning_rate": 6.778871018203411e-05, "loss": 4.9358, "step": 35400 }, { "epoch": 0.06782725789969189, "grad_norm": 1.5628567934036255, "learning_rate": 6.780786494614319e-05, "loss": 4.9531, "step": 35410 }, { "epoch": 0.06784641273106712, "grad_norm": 1.5472441911697388, "learning_rate": 6.782701971025226e-05, "loss": 4.997, "step": 35420 }, { "epoch": 0.06786556756244236, "grad_norm": 1.6065319776535034, "learning_rate": 6.784617447436134e-05, "loss": 4.8758, "step": 35430 }, { "epoch": 0.06788472239381758, "grad_norm": 1.5221421718597412, "learning_rate": 6.786532923847043e-05, "loss": 5.0795, "step": 35440 }, { "epoch": 0.06790387722519282, "grad_norm": 1.6805044412612915, "learning_rate": 6.78844840025795e-05, "loss": 4.9974, "step": 35450 }, { "epoch": 0.06792303205656805, "grad_norm": 1.6088000535964966, "learning_rate": 6.790363876668858e-05, "loss": 4.9602, "step": 35460 }, { "epoch": 0.06794218688794328, "grad_norm": 1.525740385055542, "learning_rate": 6.792279353079766e-05, "loss": 4.7464, "step": 35470 }, { "epoch": 0.06796134171931852, "grad_norm": 1.5187571048736572, "learning_rate": 6.794194829490674e-05, "loss": 4.9304, "step": 35480 }, { "epoch": 0.06798049655069374, "grad_norm": 1.5121814012527466, "learning_rate": 6.796110305901583e-05, "loss": 5.1036, "step": 35490 }, { "epoch": 0.06799965138206897, "grad_norm": 1.530246615409851, "learning_rate": 6.79802578231249e-05, "loss": 4.8052, "step": 35500 }, { "epoch": 0.06801880621344421, "grad_norm": 1.5682613849639893, "learning_rate": 6.799941258723397e-05, "loss": 4.9546, "step": 35510 }, { "epoch": 0.06803796104481943, "grad_norm": 1.6120555400848389, "learning_rate": 6.801856735134307e-05, "loss": 4.9211, "step": 35520 }, { "epoch": 0.06805711587619466, "grad_norm": 1.4832626581192017, "learning_rate": 6.803772211545214e-05, "loss": 4.9828, "step": 35530 }, { "epoch": 0.0680762707075699, "grad_norm": 1.4979327917099, "learning_rate": 6.805687687956122e-05, "loss": 4.8785, "step": 35540 }, { "epoch": 0.06809542553894513, "grad_norm": 1.4512336254119873, "learning_rate": 6.807603164367031e-05, "loss": 5.0979, "step": 35550 }, { "epoch": 0.06811458037032035, "grad_norm": 1.5227339267730713, "learning_rate": 6.809518640777938e-05, "loss": 5.0068, "step": 35560 }, { "epoch": 0.06813373520169559, "grad_norm": 1.5065358877182007, "learning_rate": 6.811434117188846e-05, "loss": 5.2015, "step": 35570 }, { "epoch": 0.06815289003307082, "grad_norm": 1.609024167060852, "learning_rate": 6.813349593599754e-05, "loss": 4.9918, "step": 35580 }, { "epoch": 0.06817204486444604, "grad_norm": 1.5722967386245728, "learning_rate": 6.815265070010662e-05, "loss": 5.0016, "step": 35590 }, { "epoch": 0.06819119969582128, "grad_norm": 1.4943606853485107, "learning_rate": 6.81718054642157e-05, "loss": 4.9016, "step": 35600 }, { "epoch": 0.06821035452719651, "grad_norm": 1.5025173425674438, "learning_rate": 6.819096022832479e-05, "loss": 5.0137, "step": 35610 }, { "epoch": 0.06822950935857174, "grad_norm": 1.6419333219528198, "learning_rate": 6.821011499243385e-05, "loss": 5.0171, "step": 35620 }, { "epoch": 0.06824866418994698, "grad_norm": 1.6006499528884888, "learning_rate": 6.822926975654295e-05, "loss": 4.9222, "step": 35630 }, { "epoch": 0.0682678190213222, "grad_norm": 1.5287532806396484, "learning_rate": 6.824842452065202e-05, "loss": 5.0439, "step": 35640 }, { "epoch": 0.06828697385269743, "grad_norm": 1.64695405960083, "learning_rate": 6.82675792847611e-05, "loss": 5.049, "step": 35650 }, { "epoch": 0.06830612868407267, "grad_norm": 1.528124213218689, "learning_rate": 6.828673404887018e-05, "loss": 5.006, "step": 35660 }, { "epoch": 0.0683252835154479, "grad_norm": 1.592096209526062, "learning_rate": 6.830588881297926e-05, "loss": 4.9163, "step": 35670 }, { "epoch": 0.06834443834682312, "grad_norm": 1.6083446741104126, "learning_rate": 6.832504357708834e-05, "loss": 4.7762, "step": 35680 }, { "epoch": 0.06836359317819836, "grad_norm": 1.517842173576355, "learning_rate": 6.834419834119742e-05, "loss": 4.935, "step": 35690 }, { "epoch": 0.06838274800957359, "grad_norm": 1.6374586820602417, "learning_rate": 6.836335310530649e-05, "loss": 4.9358, "step": 35700 }, { "epoch": 0.06840190284094881, "grad_norm": 1.4947415590286255, "learning_rate": 6.838250786941559e-05, "loss": 5.0237, "step": 35710 }, { "epoch": 0.06842105767232405, "grad_norm": 1.5422831773757935, "learning_rate": 6.840166263352467e-05, "loss": 5.1029, "step": 35720 }, { "epoch": 0.06844021250369928, "grad_norm": 1.449631929397583, "learning_rate": 6.842081739763374e-05, "loss": 5.0644, "step": 35730 }, { "epoch": 0.0684593673350745, "grad_norm": 1.48075532913208, "learning_rate": 6.843997216174283e-05, "loss": 5.0292, "step": 35740 }, { "epoch": 0.06847852216644974, "grad_norm": 1.5433706045150757, "learning_rate": 6.84591269258519e-05, "loss": 4.9783, "step": 35750 }, { "epoch": 0.06849767699782497, "grad_norm": 1.510180115699768, "learning_rate": 6.847828168996098e-05, "loss": 5.033, "step": 35760 }, { "epoch": 0.0685168318292002, "grad_norm": 1.5516074895858765, "learning_rate": 6.849743645407006e-05, "loss": 4.9946, "step": 35770 }, { "epoch": 0.06853598666057543, "grad_norm": 1.5488786697387695, "learning_rate": 6.851659121817914e-05, "loss": 5.0604, "step": 35780 }, { "epoch": 0.06855514149195066, "grad_norm": 1.7221587896347046, "learning_rate": 6.853574598228822e-05, "loss": 4.9669, "step": 35790 }, { "epoch": 0.06857429632332589, "grad_norm": 1.5086619853973389, "learning_rate": 6.85549007463973e-05, "loss": 4.921, "step": 35800 }, { "epoch": 0.06859345115470113, "grad_norm": 1.4754739999771118, "learning_rate": 6.857405551050637e-05, "loss": 4.9577, "step": 35810 }, { "epoch": 0.06861260598607635, "grad_norm": 1.5227875709533691, "learning_rate": 6.859321027461547e-05, "loss": 4.9326, "step": 35820 }, { "epoch": 0.06863176081745158, "grad_norm": 1.4252617359161377, "learning_rate": 6.861236503872455e-05, "loss": 5.0077, "step": 35830 }, { "epoch": 0.06865091564882682, "grad_norm": 1.6465187072753906, "learning_rate": 6.863151980283362e-05, "loss": 5.0371, "step": 35840 }, { "epoch": 0.06867007048020204, "grad_norm": 1.8810228109359741, "learning_rate": 6.86506745669427e-05, "loss": 4.8566, "step": 35850 }, { "epoch": 0.06868922531157727, "grad_norm": 1.744625210762024, "learning_rate": 6.866982933105178e-05, "loss": 4.8194, "step": 35860 }, { "epoch": 0.06870838014295251, "grad_norm": 1.4577583074569702, "learning_rate": 6.868898409516086e-05, "loss": 5.0772, "step": 35870 }, { "epoch": 0.06872753497432774, "grad_norm": 1.479095220565796, "learning_rate": 6.870813885926994e-05, "loss": 5.0771, "step": 35880 }, { "epoch": 0.06874668980570296, "grad_norm": 1.496179461479187, "learning_rate": 6.872729362337902e-05, "loss": 4.92, "step": 35890 }, { "epoch": 0.0687658446370782, "grad_norm": 1.4962704181671143, "learning_rate": 6.87464483874881e-05, "loss": 4.9144, "step": 35900 }, { "epoch": 0.06878499946845343, "grad_norm": 1.3939076662063599, "learning_rate": 6.876560315159719e-05, "loss": 4.9687, "step": 35910 }, { "epoch": 0.06880415429982865, "grad_norm": 1.527221441268921, "learning_rate": 6.878475791570625e-05, "loss": 4.8702, "step": 35920 }, { "epoch": 0.0688233091312039, "grad_norm": 1.6862154006958008, "learning_rate": 6.880391267981535e-05, "loss": 4.8919, "step": 35930 }, { "epoch": 0.06884246396257912, "grad_norm": 1.5342321395874023, "learning_rate": 6.882306744392443e-05, "loss": 5.066, "step": 35940 }, { "epoch": 0.06886161879395435, "grad_norm": 1.4907958507537842, "learning_rate": 6.88422222080335e-05, "loss": 4.9214, "step": 35950 }, { "epoch": 0.06888077362532959, "grad_norm": 1.531354308128357, "learning_rate": 6.886137697214258e-05, "loss": 4.9762, "step": 35960 }, { "epoch": 0.06889992845670481, "grad_norm": 1.4678910970687866, "learning_rate": 6.888053173625166e-05, "loss": 4.9707, "step": 35970 }, { "epoch": 0.06891908328808004, "grad_norm": 1.5400665998458862, "learning_rate": 6.889968650036074e-05, "loss": 4.8981, "step": 35980 }, { "epoch": 0.06893823811945528, "grad_norm": 1.4657989740371704, "learning_rate": 6.891884126446982e-05, "loss": 5.1313, "step": 35990 }, { "epoch": 0.0689573929508305, "grad_norm": 1.4623862504959106, "learning_rate": 6.89379960285789e-05, "loss": 4.9895, "step": 36000 }, { "epoch": 0.06897654778220573, "grad_norm": 1.7628002166748047, "learning_rate": 6.895715079268799e-05, "loss": 4.8355, "step": 36010 }, { "epoch": 0.06899570261358097, "grad_norm": 1.5051521062850952, "learning_rate": 6.897630555679707e-05, "loss": 4.9317, "step": 36020 }, { "epoch": 0.0690148574449562, "grad_norm": 1.4929598569869995, "learning_rate": 6.899546032090613e-05, "loss": 5.0986, "step": 36030 }, { "epoch": 0.06903401227633142, "grad_norm": 1.6078325510025024, "learning_rate": 6.901461508501522e-05, "loss": 4.7987, "step": 36040 }, { "epoch": 0.06905316710770666, "grad_norm": 1.4946374893188477, "learning_rate": 6.903376984912431e-05, "loss": 5.115, "step": 36050 }, { "epoch": 0.06907232193908189, "grad_norm": 1.7160769701004028, "learning_rate": 6.905292461323338e-05, "loss": 5.0912, "step": 36060 }, { "epoch": 0.06909147677045711, "grad_norm": 1.578895092010498, "learning_rate": 6.907207937734246e-05, "loss": 5.0799, "step": 36070 }, { "epoch": 0.06911063160183235, "grad_norm": 1.6087368726730347, "learning_rate": 6.909123414145154e-05, "loss": 4.9414, "step": 36080 }, { "epoch": 0.06912978643320758, "grad_norm": 1.5815656185150146, "learning_rate": 6.911038890556062e-05, "loss": 4.9695, "step": 36090 }, { "epoch": 0.06914894126458282, "grad_norm": 1.5695472955703735, "learning_rate": 6.91295436696697e-05, "loss": 4.9568, "step": 36100 }, { "epoch": 0.06916809609595805, "grad_norm": 1.9210134744644165, "learning_rate": 6.914869843377878e-05, "loss": 5.0569, "step": 36110 }, { "epoch": 0.06918725092733327, "grad_norm": 1.4815647602081299, "learning_rate": 6.916785319788787e-05, "loss": 4.986, "step": 36120 }, { "epoch": 0.06920640575870851, "grad_norm": 1.481034755706787, "learning_rate": 6.918700796199695e-05, "loss": 5.1127, "step": 36130 }, { "epoch": 0.06922556059008374, "grad_norm": 1.4884997606277466, "learning_rate": 6.920616272610602e-05, "loss": 5.0067, "step": 36140 }, { "epoch": 0.06924471542145896, "grad_norm": 1.5403894186019897, "learning_rate": 6.92253174902151e-05, "loss": 5.01, "step": 36150 }, { "epoch": 0.0692638702528342, "grad_norm": 1.5030646324157715, "learning_rate": 6.924447225432419e-05, "loss": 5.0018, "step": 36160 }, { "epoch": 0.06928302508420943, "grad_norm": 1.5664225816726685, "learning_rate": 6.926362701843326e-05, "loss": 5.0195, "step": 36170 }, { "epoch": 0.06930217991558466, "grad_norm": 1.6736729145050049, "learning_rate": 6.928278178254234e-05, "loss": 5.0384, "step": 36180 }, { "epoch": 0.0693213347469599, "grad_norm": 1.6124721765518188, "learning_rate": 6.930193654665142e-05, "loss": 4.9755, "step": 36190 }, { "epoch": 0.06934048957833512, "grad_norm": 1.4817966222763062, "learning_rate": 6.93210913107605e-05, "loss": 4.9954, "step": 36200 }, { "epoch": 0.06935964440971035, "grad_norm": 1.5364774465560913, "learning_rate": 6.934024607486958e-05, "loss": 5.0038, "step": 36210 }, { "epoch": 0.06937879924108559, "grad_norm": 1.626296043395996, "learning_rate": 6.935940083897867e-05, "loss": 4.8637, "step": 36220 }, { "epoch": 0.06939795407246081, "grad_norm": 1.811115026473999, "learning_rate": 6.937855560308775e-05, "loss": 5.1332, "step": 36230 }, { "epoch": 0.06941710890383604, "grad_norm": 1.5868005752563477, "learning_rate": 6.939771036719683e-05, "loss": 4.8512, "step": 36240 }, { "epoch": 0.06943626373521128, "grad_norm": 1.5342296361923218, "learning_rate": 6.94168651313059e-05, "loss": 4.9379, "step": 36250 }, { "epoch": 0.0694554185665865, "grad_norm": 1.564758062362671, "learning_rate": 6.943601989541498e-05, "loss": 4.9576, "step": 36260 }, { "epoch": 0.06947457339796173, "grad_norm": 1.5126910209655762, "learning_rate": 6.945517465952407e-05, "loss": 4.9423, "step": 36270 }, { "epoch": 0.06949372822933697, "grad_norm": 1.5531376600265503, "learning_rate": 6.947432942363314e-05, "loss": 4.8802, "step": 36280 }, { "epoch": 0.0695128830607122, "grad_norm": 1.5051466226577759, "learning_rate": 6.949348418774222e-05, "loss": 4.9044, "step": 36290 }, { "epoch": 0.06953203789208742, "grad_norm": 1.5732759237289429, "learning_rate": 6.95126389518513e-05, "loss": 4.9357, "step": 36300 }, { "epoch": 0.06955119272346266, "grad_norm": 1.5102347135543823, "learning_rate": 6.953179371596038e-05, "loss": 4.8217, "step": 36310 }, { "epoch": 0.06957034755483789, "grad_norm": 1.5343422889709473, "learning_rate": 6.955094848006946e-05, "loss": 4.9158, "step": 36320 }, { "epoch": 0.06958950238621311, "grad_norm": 1.496111273765564, "learning_rate": 6.957010324417855e-05, "loss": 4.9492, "step": 36330 }, { "epoch": 0.06960865721758835, "grad_norm": 1.4704115390777588, "learning_rate": 6.958925800828761e-05, "loss": 5.0077, "step": 36340 }, { "epoch": 0.06962781204896358, "grad_norm": 1.5043660402297974, "learning_rate": 6.960841277239671e-05, "loss": 5.0408, "step": 36350 }, { "epoch": 0.0696469668803388, "grad_norm": 1.4773082733154297, "learning_rate": 6.962756753650578e-05, "loss": 5.0985, "step": 36360 }, { "epoch": 0.06966612171171405, "grad_norm": 1.4828765392303467, "learning_rate": 6.964672230061486e-05, "loss": 4.8445, "step": 36370 }, { "epoch": 0.06968527654308927, "grad_norm": 1.5031615495681763, "learning_rate": 6.966587706472394e-05, "loss": 4.966, "step": 36380 }, { "epoch": 0.0697044313744645, "grad_norm": 1.6096278429031372, "learning_rate": 6.968503182883302e-05, "loss": 5.0419, "step": 36390 }, { "epoch": 0.06972358620583974, "grad_norm": 1.4791419506072998, "learning_rate": 6.97041865929421e-05, "loss": 4.8902, "step": 36400 }, { "epoch": 0.06974274103721496, "grad_norm": 1.5338295698165894, "learning_rate": 6.972334135705118e-05, "loss": 5.018, "step": 36410 }, { "epoch": 0.06976189586859019, "grad_norm": 1.8302274942398071, "learning_rate": 6.974249612116026e-05, "loss": 5.0023, "step": 36420 }, { "epoch": 0.06978105069996543, "grad_norm": 1.6797360181808472, "learning_rate": 6.976165088526935e-05, "loss": 5.0288, "step": 36430 }, { "epoch": 0.06980020553134066, "grad_norm": 1.5747458934783936, "learning_rate": 6.978080564937843e-05, "loss": 4.9359, "step": 36440 }, { "epoch": 0.06981936036271588, "grad_norm": 1.4933867454528809, "learning_rate": 6.97999604134875e-05, "loss": 5.0988, "step": 36450 }, { "epoch": 0.06983851519409112, "grad_norm": 1.5072070360183716, "learning_rate": 6.981911517759659e-05, "loss": 4.9852, "step": 36460 }, { "epoch": 0.06985767002546635, "grad_norm": 1.5074384212493896, "learning_rate": 6.983826994170566e-05, "loss": 5.0687, "step": 36470 }, { "epoch": 0.06987682485684157, "grad_norm": 1.5359736680984497, "learning_rate": 6.985742470581474e-05, "loss": 4.862, "step": 36480 }, { "epoch": 0.06989597968821681, "grad_norm": 1.6141960620880127, "learning_rate": 6.987657946992382e-05, "loss": 4.9966, "step": 36490 }, { "epoch": 0.06991513451959204, "grad_norm": 1.4613261222839355, "learning_rate": 6.98957342340329e-05, "loss": 5.0615, "step": 36500 }, { "epoch": 0.06993428935096727, "grad_norm": 1.4763543605804443, "learning_rate": 6.991488899814198e-05, "loss": 5.0609, "step": 36510 }, { "epoch": 0.0699534441823425, "grad_norm": 1.5293750762939453, "learning_rate": 6.993404376225106e-05, "loss": 4.9683, "step": 36520 }, { "epoch": 0.06997259901371773, "grad_norm": 1.637502908706665, "learning_rate": 6.995319852636013e-05, "loss": 4.9644, "step": 36530 }, { "epoch": 0.06999175384509296, "grad_norm": 1.5642406940460205, "learning_rate": 6.997235329046923e-05, "loss": 4.9024, "step": 36540 }, { "epoch": 0.0700109086764682, "grad_norm": 1.5238853693008423, "learning_rate": 6.999150805457831e-05, "loss": 4.9909, "step": 36550 }, { "epoch": 0.07003006350784342, "grad_norm": 1.5659269094467163, "learning_rate": 7.001066281868738e-05, "loss": 4.8811, "step": 36560 }, { "epoch": 0.07004921833921865, "grad_norm": 1.5312288999557495, "learning_rate": 7.002981758279647e-05, "loss": 4.795, "step": 36570 }, { "epoch": 0.07006837317059389, "grad_norm": 1.5779553651809692, "learning_rate": 7.004897234690554e-05, "loss": 4.9747, "step": 36580 }, { "epoch": 0.07008752800196912, "grad_norm": 1.552264928817749, "learning_rate": 7.006812711101462e-05, "loss": 4.933, "step": 36590 }, { "epoch": 0.07010668283334434, "grad_norm": 1.6226390600204468, "learning_rate": 7.00872818751237e-05, "loss": 5.1271, "step": 36600 }, { "epoch": 0.07012583766471958, "grad_norm": 1.6823303699493408, "learning_rate": 7.010643663923278e-05, "loss": 4.8702, "step": 36610 }, { "epoch": 0.07014499249609481, "grad_norm": 1.4770084619522095, "learning_rate": 7.012559140334186e-05, "loss": 4.9282, "step": 36620 }, { "epoch": 0.07016414732747003, "grad_norm": 1.5373032093048096, "learning_rate": 7.014474616745094e-05, "loss": 4.9513, "step": 36630 }, { "epoch": 0.07018330215884527, "grad_norm": 1.5489718914031982, "learning_rate": 7.016390093156001e-05, "loss": 4.9947, "step": 36640 }, { "epoch": 0.0702024569902205, "grad_norm": 1.4409936666488647, "learning_rate": 7.018305569566911e-05, "loss": 5.0402, "step": 36650 }, { "epoch": 0.07022161182159573, "grad_norm": 1.7474159002304077, "learning_rate": 7.020221045977819e-05, "loss": 4.8403, "step": 36660 }, { "epoch": 0.07024076665297097, "grad_norm": 1.5261573791503906, "learning_rate": 7.022136522388726e-05, "loss": 4.9619, "step": 36670 }, { "epoch": 0.07025992148434619, "grad_norm": 1.5255250930786133, "learning_rate": 7.024051998799634e-05, "loss": 4.8916, "step": 36680 }, { "epoch": 0.07027907631572142, "grad_norm": 1.7167561054229736, "learning_rate": 7.025967475210542e-05, "loss": 4.7962, "step": 36690 }, { "epoch": 0.07029823114709666, "grad_norm": 1.4600434303283691, "learning_rate": 7.02788295162145e-05, "loss": 4.9695, "step": 36700 }, { "epoch": 0.07031738597847188, "grad_norm": 1.5396454334259033, "learning_rate": 7.029798428032358e-05, "loss": 5.0684, "step": 36710 }, { "epoch": 0.07033654080984711, "grad_norm": 1.528546929359436, "learning_rate": 7.031713904443266e-05, "loss": 4.9295, "step": 36720 }, { "epoch": 0.07035569564122235, "grad_norm": 1.595357894897461, "learning_rate": 7.033629380854174e-05, "loss": 4.86, "step": 36730 }, { "epoch": 0.07037485047259757, "grad_norm": 1.4746536016464233, "learning_rate": 7.035544857265083e-05, "loss": 4.8897, "step": 36740 }, { "epoch": 0.0703940053039728, "grad_norm": 1.616584300994873, "learning_rate": 7.03746033367599e-05, "loss": 4.998, "step": 36750 }, { "epoch": 0.07041316013534804, "grad_norm": 1.5924909114837646, "learning_rate": 7.039375810086899e-05, "loss": 4.9813, "step": 36760 }, { "epoch": 0.07043231496672327, "grad_norm": 1.5329219102859497, "learning_rate": 7.041291286497807e-05, "loss": 5.0081, "step": 36770 }, { "epoch": 0.0704514697980985, "grad_norm": 1.4718166589736938, "learning_rate": 7.043206762908714e-05, "loss": 4.9436, "step": 36780 }, { "epoch": 0.07047062462947373, "grad_norm": 1.4509191513061523, "learning_rate": 7.045122239319622e-05, "loss": 5.0586, "step": 36790 }, { "epoch": 0.07048977946084896, "grad_norm": 1.616441249847412, "learning_rate": 7.04703771573053e-05, "loss": 4.8406, "step": 36800 }, { "epoch": 0.0705089342922242, "grad_norm": 1.504933476448059, "learning_rate": 7.048953192141438e-05, "loss": 4.8649, "step": 36810 }, { "epoch": 0.07052808912359942, "grad_norm": 1.5050021409988403, "learning_rate": 7.050868668552346e-05, "loss": 4.9779, "step": 36820 }, { "epoch": 0.07054724395497465, "grad_norm": 1.4278734922409058, "learning_rate": 7.052784144963254e-05, "loss": 5.0106, "step": 36830 }, { "epoch": 0.07056639878634989, "grad_norm": 1.5008436441421509, "learning_rate": 7.054699621374163e-05, "loss": 5.0312, "step": 36840 }, { "epoch": 0.07058555361772512, "grad_norm": 1.585615873336792, "learning_rate": 7.05661509778507e-05, "loss": 4.7611, "step": 36850 }, { "epoch": 0.07060470844910034, "grad_norm": 1.4641646146774292, "learning_rate": 7.058530574195977e-05, "loss": 4.9034, "step": 36860 }, { "epoch": 0.07062386328047558, "grad_norm": 1.5805381536483765, "learning_rate": 7.060446050606886e-05, "loss": 5.161, "step": 36870 }, { "epoch": 0.07064301811185081, "grad_norm": 1.4878571033477783, "learning_rate": 7.062361527017795e-05, "loss": 4.9537, "step": 36880 }, { "epoch": 0.07066217294322603, "grad_norm": 1.4995836019515991, "learning_rate": 7.064277003428702e-05, "loss": 4.9111, "step": 36890 }, { "epoch": 0.07068132777460127, "grad_norm": 1.4918636083602905, "learning_rate": 7.06619247983961e-05, "loss": 4.9654, "step": 36900 }, { "epoch": 0.0707004826059765, "grad_norm": 1.5451452732086182, "learning_rate": 7.068107956250518e-05, "loss": 5.0649, "step": 36910 }, { "epoch": 0.07071963743735173, "grad_norm": 1.478678822517395, "learning_rate": 7.070023432661426e-05, "loss": 5.1495, "step": 36920 }, { "epoch": 0.07073879226872697, "grad_norm": 1.500884771347046, "learning_rate": 7.071938909072334e-05, "loss": 4.8924, "step": 36930 }, { "epoch": 0.07075794710010219, "grad_norm": 1.4535809755325317, "learning_rate": 7.073854385483242e-05, "loss": 4.9657, "step": 36940 }, { "epoch": 0.07077710193147742, "grad_norm": 1.584787368774414, "learning_rate": 7.07576986189415e-05, "loss": 4.9808, "step": 36950 }, { "epoch": 0.07079625676285266, "grad_norm": 1.5211033821105957, "learning_rate": 7.077685338305059e-05, "loss": 4.974, "step": 36960 }, { "epoch": 0.07081541159422788, "grad_norm": 1.5589957237243652, "learning_rate": 7.079600814715966e-05, "loss": 4.8958, "step": 36970 }, { "epoch": 0.07083456642560311, "grad_norm": 1.4089728593826294, "learning_rate": 7.081516291126874e-05, "loss": 4.9972, "step": 36980 }, { "epoch": 0.07085372125697835, "grad_norm": 1.49006986618042, "learning_rate": 7.083431767537783e-05, "loss": 4.9033, "step": 36990 }, { "epoch": 0.07087287608835358, "grad_norm": 1.4847289323806763, "learning_rate": 7.08534724394869e-05, "loss": 5.0462, "step": 37000 }, { "epoch": 0.0708920309197288, "grad_norm": 1.6129745244979858, "learning_rate": 7.087262720359598e-05, "loss": 5.079, "step": 37010 }, { "epoch": 0.07091118575110404, "grad_norm": 1.5001602172851562, "learning_rate": 7.089178196770506e-05, "loss": 4.9066, "step": 37020 }, { "epoch": 0.07093034058247927, "grad_norm": 1.3682944774627686, "learning_rate": 7.091093673181414e-05, "loss": 4.9744, "step": 37030 }, { "epoch": 0.0709494954138545, "grad_norm": 1.6577926874160767, "learning_rate": 7.093009149592322e-05, "loss": 5.0657, "step": 37040 }, { "epoch": 0.07096865024522973, "grad_norm": 1.5366603136062622, "learning_rate": 7.09492462600323e-05, "loss": 4.8403, "step": 37050 }, { "epoch": 0.07098780507660496, "grad_norm": 1.5228164196014404, "learning_rate": 7.096840102414137e-05, "loss": 4.9104, "step": 37060 }, { "epoch": 0.07100695990798019, "grad_norm": 1.4381195306777954, "learning_rate": 7.098755578825047e-05, "loss": 4.9272, "step": 37070 }, { "epoch": 0.07102611473935543, "grad_norm": 1.4560152292251587, "learning_rate": 7.100671055235954e-05, "loss": 4.8044, "step": 37080 }, { "epoch": 0.07104526957073065, "grad_norm": 1.637985110282898, "learning_rate": 7.102586531646862e-05, "loss": 4.8519, "step": 37090 }, { "epoch": 0.07106442440210588, "grad_norm": 1.553355097770691, "learning_rate": 7.104502008057771e-05, "loss": 4.937, "step": 37100 }, { "epoch": 0.07108357923348112, "grad_norm": 1.4615105390548706, "learning_rate": 7.106417484468678e-05, "loss": 4.9411, "step": 37110 }, { "epoch": 0.07110273406485634, "grad_norm": 1.463911533355713, "learning_rate": 7.108332960879586e-05, "loss": 4.9661, "step": 37120 }, { "epoch": 0.07112188889623157, "grad_norm": 1.5429069995880127, "learning_rate": 7.110248437290494e-05, "loss": 4.93, "step": 37130 }, { "epoch": 0.07114104372760681, "grad_norm": 1.5404735803604126, "learning_rate": 7.112163913701402e-05, "loss": 4.8384, "step": 37140 }, { "epoch": 0.07116019855898204, "grad_norm": 1.5407133102416992, "learning_rate": 7.11407939011231e-05, "loss": 4.8705, "step": 37150 }, { "epoch": 0.07117935339035726, "grad_norm": 1.5849690437316895, "learning_rate": 7.115994866523219e-05, "loss": 4.9661, "step": 37160 }, { "epoch": 0.0711985082217325, "grad_norm": 1.5025748014450073, "learning_rate": 7.117910342934125e-05, "loss": 4.9505, "step": 37170 }, { "epoch": 0.07121766305310773, "grad_norm": 1.5842605829238892, "learning_rate": 7.119825819345035e-05, "loss": 4.9266, "step": 37180 }, { "epoch": 0.07123681788448295, "grad_norm": 1.5029175281524658, "learning_rate": 7.121741295755942e-05, "loss": 4.9587, "step": 37190 }, { "epoch": 0.07125597271585819, "grad_norm": 1.5302623510360718, "learning_rate": 7.12365677216685e-05, "loss": 4.904, "step": 37200 }, { "epoch": 0.07127512754723342, "grad_norm": 1.4621578454971313, "learning_rate": 7.125572248577758e-05, "loss": 4.93, "step": 37210 }, { "epoch": 0.07129428237860864, "grad_norm": 1.5835505723953247, "learning_rate": 7.127487724988666e-05, "loss": 4.8663, "step": 37220 }, { "epoch": 0.07131343720998388, "grad_norm": 1.5980561971664429, "learning_rate": 7.129403201399574e-05, "loss": 4.9402, "step": 37230 }, { "epoch": 0.07133259204135911, "grad_norm": 1.4973634481430054, "learning_rate": 7.131318677810482e-05, "loss": 5.0584, "step": 37240 }, { "epoch": 0.07135174687273434, "grad_norm": 1.5188223123550415, "learning_rate": 7.13323415422139e-05, "loss": 4.9634, "step": 37250 }, { "epoch": 0.07137090170410958, "grad_norm": 1.526567816734314, "learning_rate": 7.135149630632299e-05, "loss": 4.9276, "step": 37260 }, { "epoch": 0.0713900565354848, "grad_norm": 1.5572618246078491, "learning_rate": 7.137065107043207e-05, "loss": 4.9401, "step": 37270 }, { "epoch": 0.07140921136686003, "grad_norm": 1.4622408151626587, "learning_rate": 7.138980583454113e-05, "loss": 4.9766, "step": 37280 }, { "epoch": 0.07142836619823527, "grad_norm": 1.572951316833496, "learning_rate": 7.140896059865023e-05, "loss": 4.9112, "step": 37290 }, { "epoch": 0.0714475210296105, "grad_norm": 1.631742000579834, "learning_rate": 7.14281153627593e-05, "loss": 4.9999, "step": 37300 }, { "epoch": 0.07146667586098572, "grad_norm": 1.4883487224578857, "learning_rate": 7.144727012686838e-05, "loss": 4.8044, "step": 37310 }, { "epoch": 0.07148583069236096, "grad_norm": 1.4351365566253662, "learning_rate": 7.146642489097746e-05, "loss": 4.8693, "step": 37320 }, { "epoch": 0.07150498552373619, "grad_norm": 1.5286688804626465, "learning_rate": 7.148557965508654e-05, "loss": 5.0138, "step": 37330 }, { "epoch": 0.07152414035511141, "grad_norm": 1.4964885711669922, "learning_rate": 7.150473441919562e-05, "loss": 4.9303, "step": 37340 }, { "epoch": 0.07154329518648665, "grad_norm": 1.5086450576782227, "learning_rate": 7.15238891833047e-05, "loss": 5.044, "step": 37350 }, { "epoch": 0.07156245001786188, "grad_norm": 1.4655476808547974, "learning_rate": 7.154304394741377e-05, "loss": 4.9639, "step": 37360 }, { "epoch": 0.0715816048492371, "grad_norm": 1.5341414213180542, "learning_rate": 7.156219871152287e-05, "loss": 4.8572, "step": 37370 }, { "epoch": 0.07160075968061234, "grad_norm": 1.520814299583435, "learning_rate": 7.158135347563195e-05, "loss": 4.8959, "step": 37380 }, { "epoch": 0.07161991451198757, "grad_norm": 1.568187952041626, "learning_rate": 7.160050823974102e-05, "loss": 4.9905, "step": 37390 }, { "epoch": 0.0716390693433628, "grad_norm": 1.6191604137420654, "learning_rate": 7.161774752743919e-05, "loss": 4.7678, "step": 37400 }, { "epoch": 0.07165822417473804, "grad_norm": 1.4847586154937744, "learning_rate": 7.163690229154827e-05, "loss": 4.8575, "step": 37410 }, { "epoch": 0.07167737900611326, "grad_norm": 1.4777578115463257, "learning_rate": 7.165605705565735e-05, "loss": 4.8637, "step": 37420 }, { "epoch": 0.0716965338374885, "grad_norm": 1.5005563497543335, "learning_rate": 7.167521181976643e-05, "loss": 4.9817, "step": 37430 }, { "epoch": 0.07171568866886373, "grad_norm": 1.5048303604125977, "learning_rate": 7.169436658387551e-05, "loss": 4.8272, "step": 37440 }, { "epoch": 0.07173484350023895, "grad_norm": 1.5454821586608887, "learning_rate": 7.17135213479846e-05, "loss": 5.0289, "step": 37450 }, { "epoch": 0.0717539983316142, "grad_norm": 1.7258391380310059, "learning_rate": 7.173267611209368e-05, "loss": 4.8733, "step": 37460 }, { "epoch": 0.07177315316298942, "grad_norm": 1.4644888639450073, "learning_rate": 7.175183087620276e-05, "loss": 4.9321, "step": 37470 }, { "epoch": 0.07179230799436465, "grad_norm": 1.487329125404358, "learning_rate": 7.177098564031184e-05, "loss": 4.9765, "step": 37480 }, { "epoch": 0.07181146282573989, "grad_norm": 1.447044014930725, "learning_rate": 7.179014040442091e-05, "loss": 4.8471, "step": 37490 }, { "epoch": 0.07183061765711511, "grad_norm": 1.80016028881073, "learning_rate": 7.180929516852999e-05, "loss": 4.783, "step": 37500 }, { "epoch": 0.07184977248849034, "grad_norm": 1.5659048557281494, "learning_rate": 7.182844993263907e-05, "loss": 5.0639, "step": 37510 }, { "epoch": 0.07186892731986558, "grad_norm": 1.4586069583892822, "learning_rate": 7.184760469674815e-05, "loss": 4.9768, "step": 37520 }, { "epoch": 0.0718880821512408, "grad_norm": 1.7161173820495605, "learning_rate": 7.186675946085723e-05, "loss": 4.8835, "step": 37530 }, { "epoch": 0.07190723698261603, "grad_norm": 1.5087590217590332, "learning_rate": 7.188591422496631e-05, "loss": 4.9353, "step": 37540 }, { "epoch": 0.07192639181399127, "grad_norm": 1.534852385520935, "learning_rate": 7.19050689890754e-05, "loss": 4.9375, "step": 37550 }, { "epoch": 0.0719455466453665, "grad_norm": 1.4904066324234009, "learning_rate": 7.192422375318448e-05, "loss": 5.0687, "step": 37560 }, { "epoch": 0.07196470147674172, "grad_norm": 1.4833662509918213, "learning_rate": 7.194337851729356e-05, "loss": 4.8694, "step": 37570 }, { "epoch": 0.07198385630811696, "grad_norm": 1.4839595556259155, "learning_rate": 7.196253328140264e-05, "loss": 4.897, "step": 37580 }, { "epoch": 0.07200301113949219, "grad_norm": 1.4259028434753418, "learning_rate": 7.198168804551172e-05, "loss": 5.0143, "step": 37590 }, { "epoch": 0.07202216597086741, "grad_norm": 1.5208669900894165, "learning_rate": 7.200084280962079e-05, "loss": 5.0369, "step": 37600 }, { "epoch": 0.07204132080224265, "grad_norm": 1.5744174718856812, "learning_rate": 7.201999757372987e-05, "loss": 4.8822, "step": 37610 }, { "epoch": 0.07206047563361788, "grad_norm": 1.4019174575805664, "learning_rate": 7.203915233783895e-05, "loss": 4.9147, "step": 37620 }, { "epoch": 0.0720796304649931, "grad_norm": 1.4762755632400513, "learning_rate": 7.205830710194803e-05, "loss": 4.9632, "step": 37630 }, { "epoch": 0.07209878529636835, "grad_norm": 1.5455679893493652, "learning_rate": 7.207746186605711e-05, "loss": 4.9803, "step": 37640 }, { "epoch": 0.07211794012774357, "grad_norm": 1.4914380311965942, "learning_rate": 7.20966166301662e-05, "loss": 4.8899, "step": 37650 }, { "epoch": 0.0721370949591188, "grad_norm": 1.4451225996017456, "learning_rate": 7.211577139427528e-05, "loss": 4.9478, "step": 37660 }, { "epoch": 0.07215624979049404, "grad_norm": 1.6414659023284912, "learning_rate": 7.213492615838436e-05, "loss": 5.0288, "step": 37670 }, { "epoch": 0.07217540462186926, "grad_norm": 1.5825493335723877, "learning_rate": 7.215408092249344e-05, "loss": 5.0205, "step": 37680 }, { "epoch": 0.07219455945324449, "grad_norm": 1.5475924015045166, "learning_rate": 7.21732356866025e-05, "loss": 4.9885, "step": 37690 }, { "epoch": 0.07221371428461973, "grad_norm": 1.5396552085876465, "learning_rate": 7.21923904507116e-05, "loss": 5.0268, "step": 37700 }, { "epoch": 0.07223286911599495, "grad_norm": 1.5166813135147095, "learning_rate": 7.221154521482067e-05, "loss": 4.9572, "step": 37710 }, { "epoch": 0.07225202394737018, "grad_norm": 1.492952585220337, "learning_rate": 7.223069997892975e-05, "loss": 4.8826, "step": 37720 }, { "epoch": 0.07227117877874542, "grad_norm": 1.5479226112365723, "learning_rate": 7.224985474303883e-05, "loss": 4.9489, "step": 37730 }, { "epoch": 0.07229033361012065, "grad_norm": 1.447272539138794, "learning_rate": 7.226900950714791e-05, "loss": 4.9198, "step": 37740 }, { "epoch": 0.07230948844149587, "grad_norm": 1.5169092416763306, "learning_rate": 7.2288164271257e-05, "loss": 4.7998, "step": 37750 }, { "epoch": 0.07232864327287111, "grad_norm": 1.5031442642211914, "learning_rate": 7.230731903536608e-05, "loss": 5.0096, "step": 37760 }, { "epoch": 0.07234779810424634, "grad_norm": 1.455660343170166, "learning_rate": 7.232647379947516e-05, "loss": 5.0211, "step": 37770 }, { "epoch": 0.07236695293562156, "grad_norm": 1.526049256324768, "learning_rate": 7.234562856358424e-05, "loss": 4.9378, "step": 37780 }, { "epoch": 0.0723861077669968, "grad_norm": 1.5123558044433594, "learning_rate": 7.236478332769332e-05, "loss": 4.9858, "step": 37790 }, { "epoch": 0.07240526259837203, "grad_norm": 1.476178526878357, "learning_rate": 7.238393809180239e-05, "loss": 5.0476, "step": 37800 }, { "epoch": 0.07242441742974726, "grad_norm": 1.5233409404754639, "learning_rate": 7.240309285591148e-05, "loss": 4.9107, "step": 37810 }, { "epoch": 0.0724435722611225, "grad_norm": 1.4350045919418335, "learning_rate": 7.242224762002055e-05, "loss": 5.13, "step": 37820 }, { "epoch": 0.07246272709249772, "grad_norm": 1.4310424327850342, "learning_rate": 7.244140238412963e-05, "loss": 4.7679, "step": 37830 }, { "epoch": 0.07248188192387295, "grad_norm": 1.4937562942504883, "learning_rate": 7.246055714823871e-05, "loss": 4.9646, "step": 37840 }, { "epoch": 0.07250103675524819, "grad_norm": 1.482094168663025, "learning_rate": 7.24797119123478e-05, "loss": 4.9555, "step": 37850 }, { "epoch": 0.07252019158662341, "grad_norm": 1.5084539651870728, "learning_rate": 7.249886667645687e-05, "loss": 4.839, "step": 37860 }, { "epoch": 0.07253934641799864, "grad_norm": 1.6453560590744019, "learning_rate": 7.251802144056596e-05, "loss": 4.9649, "step": 37870 }, { "epoch": 0.07255850124937388, "grad_norm": 1.4928522109985352, "learning_rate": 7.253717620467502e-05, "loss": 4.9588, "step": 37880 }, { "epoch": 0.0725776560807491, "grad_norm": 1.4808820486068726, "learning_rate": 7.255633096878412e-05, "loss": 4.9049, "step": 37890 }, { "epoch": 0.07259681091212433, "grad_norm": 1.4932295083999634, "learning_rate": 7.25754857328932e-05, "loss": 4.8696, "step": 37900 }, { "epoch": 0.07261596574349957, "grad_norm": 1.4835435152053833, "learning_rate": 7.259464049700227e-05, "loss": 4.9432, "step": 37910 }, { "epoch": 0.0726351205748748, "grad_norm": 1.5460245609283447, "learning_rate": 7.261379526111136e-05, "loss": 5.0241, "step": 37920 }, { "epoch": 0.07265427540625002, "grad_norm": 1.4566279649734497, "learning_rate": 7.263295002522043e-05, "loss": 5.1239, "step": 37930 }, { "epoch": 0.07267343023762526, "grad_norm": 1.4395396709442139, "learning_rate": 7.265210478932951e-05, "loss": 5.0622, "step": 37940 }, { "epoch": 0.07269258506900049, "grad_norm": 1.5554096698760986, "learning_rate": 7.267125955343859e-05, "loss": 4.8376, "step": 37950 }, { "epoch": 0.07271173990037572, "grad_norm": 1.4475947618484497, "learning_rate": 7.269041431754767e-05, "loss": 4.8124, "step": 37960 }, { "epoch": 0.07273089473175096, "grad_norm": 1.6105995178222656, "learning_rate": 7.270956908165676e-05, "loss": 4.9922, "step": 37970 }, { "epoch": 0.07275004956312618, "grad_norm": 1.487303614616394, "learning_rate": 7.272872384576584e-05, "loss": 4.91, "step": 37980 }, { "epoch": 0.07276920439450141, "grad_norm": 1.4593983888626099, "learning_rate": 7.27478786098749e-05, "loss": 4.9031, "step": 37990 }, { "epoch": 0.07278835922587665, "grad_norm": 1.502777338027954, "learning_rate": 7.2767033373984e-05, "loss": 4.8675, "step": 38000 }, { "epoch": 0.07280751405725187, "grad_norm": 1.4456993341445923, "learning_rate": 7.278618813809307e-05, "loss": 4.978, "step": 38010 }, { "epoch": 0.0728266688886271, "grad_norm": 1.5334628820419312, "learning_rate": 7.280534290220215e-05, "loss": 5.0786, "step": 38020 }, { "epoch": 0.07284582372000234, "grad_norm": 1.5704677104949951, "learning_rate": 7.282449766631123e-05, "loss": 4.8819, "step": 38030 }, { "epoch": 0.07286497855137757, "grad_norm": 1.5382609367370605, "learning_rate": 7.284365243042031e-05, "loss": 5.0359, "step": 38040 }, { "epoch": 0.07288413338275279, "grad_norm": 1.4940441846847534, "learning_rate": 7.286280719452939e-05, "loss": 4.9197, "step": 38050 }, { "epoch": 0.07290328821412803, "grad_norm": 1.4536395072937012, "learning_rate": 7.288196195863847e-05, "loss": 4.9474, "step": 38060 }, { "epoch": 0.07292244304550326, "grad_norm": 1.5291388034820557, "learning_rate": 7.290111672274756e-05, "loss": 4.896, "step": 38070 }, { "epoch": 0.0729415978768785, "grad_norm": 1.4593132734298706, "learning_rate": 7.292027148685664e-05, "loss": 4.9444, "step": 38080 }, { "epoch": 0.07296075270825372, "grad_norm": 1.5187584161758423, "learning_rate": 7.293942625096572e-05, "loss": 5.0056, "step": 38090 }, { "epoch": 0.07297990753962895, "grad_norm": 1.5105998516082764, "learning_rate": 7.295858101507479e-05, "loss": 5.0189, "step": 38100 }, { "epoch": 0.07299906237100419, "grad_norm": 1.4784691333770752, "learning_rate": 7.297773577918388e-05, "loss": 4.9037, "step": 38110 }, { "epoch": 0.07301821720237942, "grad_norm": 1.8770661354064941, "learning_rate": 7.299497506688204e-05, "loss": 4.9664, "step": 38120 }, { "epoch": 0.07303737203375464, "grad_norm": 1.4951902627944946, "learning_rate": 7.301412983099112e-05, "loss": 4.9503, "step": 38130 }, { "epoch": 0.07305652686512988, "grad_norm": 1.5719212293624878, "learning_rate": 7.30332845951002e-05, "loss": 4.9192, "step": 38140 }, { "epoch": 0.07307568169650511, "grad_norm": 1.5540833473205566, "learning_rate": 7.305243935920928e-05, "loss": 4.8929, "step": 38150 }, { "epoch": 0.07309483652788033, "grad_norm": 1.4739781618118286, "learning_rate": 7.307159412331837e-05, "loss": 4.8626, "step": 38160 }, { "epoch": 0.07311399135925557, "grad_norm": 1.5308520793914795, "learning_rate": 7.309074888742745e-05, "loss": 4.7742, "step": 38170 }, { "epoch": 0.0731331461906308, "grad_norm": 1.464581847190857, "learning_rate": 7.310990365153653e-05, "loss": 5.0254, "step": 38180 }, { "epoch": 0.07315230102200602, "grad_norm": 1.4649231433868408, "learning_rate": 7.312905841564561e-05, "loss": 5.0216, "step": 38190 }, { "epoch": 0.07317145585338126, "grad_norm": 1.5120370388031006, "learning_rate": 7.314821317975468e-05, "loss": 5.0258, "step": 38200 }, { "epoch": 0.07319061068475649, "grad_norm": 1.5267895460128784, "learning_rate": 7.316736794386376e-05, "loss": 4.972, "step": 38210 }, { "epoch": 0.07320976551613172, "grad_norm": 1.592403769493103, "learning_rate": 7.318652270797285e-05, "loss": 4.9485, "step": 38220 }, { "epoch": 0.07322892034750696, "grad_norm": 1.5135776996612549, "learning_rate": 7.320567747208192e-05, "loss": 4.9093, "step": 38230 }, { "epoch": 0.07324807517888218, "grad_norm": 1.5304713249206543, "learning_rate": 7.3224832236191e-05, "loss": 4.9786, "step": 38240 }, { "epoch": 0.07326723001025741, "grad_norm": 1.4477660655975342, "learning_rate": 7.324398700030008e-05, "loss": 4.9525, "step": 38250 }, { "epoch": 0.07328638484163265, "grad_norm": 1.483903408050537, "learning_rate": 7.326314176440916e-05, "loss": 5.0188, "step": 38260 }, { "epoch": 0.07330553967300787, "grad_norm": 1.4760384559631348, "learning_rate": 7.328229652851825e-05, "loss": 4.9578, "step": 38270 }, { "epoch": 0.0733246945043831, "grad_norm": 1.465239405632019, "learning_rate": 7.330145129262733e-05, "loss": 4.9985, "step": 38280 }, { "epoch": 0.07334384933575834, "grad_norm": 1.5077004432678223, "learning_rate": 7.332060605673641e-05, "loss": 4.9983, "step": 38290 }, { "epoch": 0.07336300416713357, "grad_norm": 1.592707872390747, "learning_rate": 7.333976082084549e-05, "loss": 4.9468, "step": 38300 }, { "epoch": 0.07338215899850879, "grad_norm": 1.6126219034194946, "learning_rate": 7.335891558495456e-05, "loss": 4.9109, "step": 38310 }, { "epoch": 0.07340131382988403, "grad_norm": 1.4576929807662964, "learning_rate": 7.337807034906364e-05, "loss": 4.9658, "step": 38320 }, { "epoch": 0.07342046866125926, "grad_norm": 1.4607123136520386, "learning_rate": 7.339722511317273e-05, "loss": 5.0264, "step": 38330 }, { "epoch": 0.07343962349263448, "grad_norm": 1.6439458131790161, "learning_rate": 7.34163798772818e-05, "loss": 5.0228, "step": 38340 }, { "epoch": 0.07345877832400972, "grad_norm": 1.5228289365768433, "learning_rate": 7.343553464139088e-05, "loss": 4.9348, "step": 38350 }, { "epoch": 0.07347793315538495, "grad_norm": 1.6182403564453125, "learning_rate": 7.345468940549996e-05, "loss": 4.9007, "step": 38360 }, { "epoch": 0.07349708798676018, "grad_norm": 1.5547640323638916, "learning_rate": 7.347384416960905e-05, "loss": 4.9698, "step": 38370 }, { "epoch": 0.07351624281813542, "grad_norm": 1.5049995183944702, "learning_rate": 7.349299893371813e-05, "loss": 4.7243, "step": 38380 }, { "epoch": 0.07353539764951064, "grad_norm": 1.4203628301620483, "learning_rate": 7.351215369782721e-05, "loss": 5.0673, "step": 38390 }, { "epoch": 0.07355455248088587, "grad_norm": 1.6053968667984009, "learning_rate": 7.353130846193628e-05, "loss": 4.8813, "step": 38400 }, { "epoch": 0.07357370731226111, "grad_norm": 1.4836212396621704, "learning_rate": 7.355046322604537e-05, "loss": 4.9325, "step": 38410 }, { "epoch": 0.07359286214363633, "grad_norm": 1.650900959968567, "learning_rate": 7.356961799015444e-05, "loss": 4.9354, "step": 38420 }, { "epoch": 0.07361201697501156, "grad_norm": 1.468164324760437, "learning_rate": 7.358877275426352e-05, "loss": 4.9875, "step": 38430 }, { "epoch": 0.0736311718063868, "grad_norm": 1.500093698501587, "learning_rate": 7.360792751837261e-05, "loss": 4.9848, "step": 38440 }, { "epoch": 0.07365032663776203, "grad_norm": 1.4693282842636108, "learning_rate": 7.362708228248168e-05, "loss": 4.7731, "step": 38450 }, { "epoch": 0.07366948146913725, "grad_norm": 1.4575068950653076, "learning_rate": 7.364623704659076e-05, "loss": 4.8251, "step": 38460 }, { "epoch": 0.07368863630051249, "grad_norm": 1.4809902906417847, "learning_rate": 7.366539181069984e-05, "loss": 4.9758, "step": 38470 }, { "epoch": 0.07370779113188772, "grad_norm": 1.486607313156128, "learning_rate": 7.368454657480893e-05, "loss": 4.8626, "step": 38480 }, { "epoch": 0.07372694596326294, "grad_norm": 1.5964933633804321, "learning_rate": 7.370370133891801e-05, "loss": 4.9904, "step": 38490 }, { "epoch": 0.07374610079463818, "grad_norm": 1.5163800716400146, "learning_rate": 7.372285610302709e-05, "loss": 5.0237, "step": 38500 }, { "epoch": 0.07376525562601341, "grad_norm": 1.57587730884552, "learning_rate": 7.374201086713616e-05, "loss": 4.7202, "step": 38510 }, { "epoch": 0.07378441045738864, "grad_norm": 1.533761978149414, "learning_rate": 7.376116563124525e-05, "loss": 4.7631, "step": 38520 }, { "epoch": 0.07380356528876388, "grad_norm": 1.4569505453109741, "learning_rate": 7.378032039535432e-05, "loss": 4.7963, "step": 38530 }, { "epoch": 0.0738227201201391, "grad_norm": 1.4923129081726074, "learning_rate": 7.37994751594634e-05, "loss": 4.9755, "step": 38540 }, { "epoch": 0.07384187495151433, "grad_norm": 1.5061107873916626, "learning_rate": 7.381862992357248e-05, "loss": 4.9132, "step": 38550 }, { "epoch": 0.07386102978288957, "grad_norm": 1.479594349861145, "learning_rate": 7.383778468768156e-05, "loss": 4.9017, "step": 38560 }, { "epoch": 0.0738801846142648, "grad_norm": 1.515596628189087, "learning_rate": 7.385693945179064e-05, "loss": 4.9538, "step": 38570 }, { "epoch": 0.07389933944564002, "grad_norm": 1.5025146007537842, "learning_rate": 7.387609421589973e-05, "loss": 5.0679, "step": 38580 }, { "epoch": 0.07391849427701526, "grad_norm": 1.7568072080612183, "learning_rate": 7.389524898000881e-05, "loss": 4.7139, "step": 38590 }, { "epoch": 0.07393764910839049, "grad_norm": 1.4787391424179077, "learning_rate": 7.391440374411789e-05, "loss": 5.005, "step": 38600 }, { "epoch": 0.07395680393976571, "grad_norm": 1.4811879396438599, "learning_rate": 7.393355850822697e-05, "loss": 4.932, "step": 38610 }, { "epoch": 0.07397595877114095, "grad_norm": 1.4596209526062012, "learning_rate": 7.395271327233604e-05, "loss": 5.0413, "step": 38620 }, { "epoch": 0.07399511360251618, "grad_norm": 1.4486079216003418, "learning_rate": 7.397186803644513e-05, "loss": 4.9883, "step": 38630 }, { "epoch": 0.0740142684338914, "grad_norm": 1.5234253406524658, "learning_rate": 7.39910228005542e-05, "loss": 4.8937, "step": 38640 }, { "epoch": 0.07403342326526664, "grad_norm": 1.5567530393600464, "learning_rate": 7.401017756466328e-05, "loss": 4.8979, "step": 38650 }, { "epoch": 0.07405257809664187, "grad_norm": 1.5032798051834106, "learning_rate": 7.402933232877236e-05, "loss": 5.1031, "step": 38660 }, { "epoch": 0.0740717329280171, "grad_norm": 1.5594524145126343, "learning_rate": 7.404848709288144e-05, "loss": 4.8541, "step": 38670 }, { "epoch": 0.07409088775939233, "grad_norm": 1.4461432695388794, "learning_rate": 7.406764185699053e-05, "loss": 5.0098, "step": 38680 }, { "epoch": 0.07411004259076756, "grad_norm": 1.3922349214553833, "learning_rate": 7.40867966210996e-05, "loss": 4.9539, "step": 38690 }, { "epoch": 0.07412919742214279, "grad_norm": 1.4875357151031494, "learning_rate": 7.410595138520869e-05, "loss": 4.8628, "step": 38700 }, { "epoch": 0.07414835225351803, "grad_norm": 1.4760262966156006, "learning_rate": 7.412510614931777e-05, "loss": 5.0086, "step": 38710 }, { "epoch": 0.07416750708489325, "grad_norm": 1.5167875289916992, "learning_rate": 7.414426091342685e-05, "loss": 4.9176, "step": 38720 }, { "epoch": 0.07418666191626848, "grad_norm": 1.4406369924545288, "learning_rate": 7.416341567753592e-05, "loss": 4.8314, "step": 38730 }, { "epoch": 0.07420581674764372, "grad_norm": 1.4701300859451294, "learning_rate": 7.4182570441645e-05, "loss": 4.9253, "step": 38740 }, { "epoch": 0.07422497157901894, "grad_norm": 1.5814117193222046, "learning_rate": 7.420172520575408e-05, "loss": 4.8003, "step": 38750 }, { "epoch": 0.07424412641039418, "grad_norm": 1.4496411085128784, "learning_rate": 7.422087996986316e-05, "loss": 4.8623, "step": 38760 }, { "epoch": 0.07426328124176941, "grad_norm": 1.451596975326538, "learning_rate": 7.424003473397224e-05, "loss": 4.8792, "step": 38770 }, { "epoch": 0.07428243607314464, "grad_norm": 1.546661376953125, "learning_rate": 7.425918949808132e-05, "loss": 4.758, "step": 38780 }, { "epoch": 0.07430159090451988, "grad_norm": 1.4879776239395142, "learning_rate": 7.42783442621904e-05, "loss": 4.8683, "step": 38790 }, { "epoch": 0.0743207457358951, "grad_norm": 1.5775110721588135, "learning_rate": 7.429749902629949e-05, "loss": 4.957, "step": 38800 }, { "epoch": 0.07433990056727033, "grad_norm": 1.592401385307312, "learning_rate": 7.431665379040856e-05, "loss": 4.8161, "step": 38810 }, { "epoch": 0.07435905539864557, "grad_norm": 1.520329236984253, "learning_rate": 7.433580855451765e-05, "loss": 4.8081, "step": 38820 }, { "epoch": 0.0743782102300208, "grad_norm": 1.5252732038497925, "learning_rate": 7.435496331862673e-05, "loss": 4.8083, "step": 38830 }, { "epoch": 0.07439736506139602, "grad_norm": 1.5145444869995117, "learning_rate": 7.43741180827358e-05, "loss": 4.7845, "step": 38840 }, { "epoch": 0.07441651989277126, "grad_norm": 1.5269163846969604, "learning_rate": 7.439327284684488e-05, "loss": 4.9548, "step": 38850 }, { "epoch": 0.07443567472414649, "grad_norm": 1.5006234645843506, "learning_rate": 7.441242761095396e-05, "loss": 4.8946, "step": 38860 }, { "epoch": 0.07445482955552171, "grad_norm": 1.497903823852539, "learning_rate": 7.443158237506304e-05, "loss": 4.9394, "step": 38870 }, { "epoch": 0.07447398438689695, "grad_norm": 1.5162749290466309, "learning_rate": 7.445073713917212e-05, "loss": 4.8276, "step": 38880 }, { "epoch": 0.07449313921827218, "grad_norm": 1.4954286813735962, "learning_rate": 7.44698919032812e-05, "loss": 5.0256, "step": 38890 }, { "epoch": 0.0745122940496474, "grad_norm": 1.561240553855896, "learning_rate": 7.448904666739029e-05, "loss": 4.796, "step": 38900 }, { "epoch": 0.07453144888102264, "grad_norm": 1.4934982061386108, "learning_rate": 7.450820143149937e-05, "loss": 4.9017, "step": 38910 }, { "epoch": 0.07455060371239787, "grad_norm": 1.5400243997573853, "learning_rate": 7.452735619560844e-05, "loss": 4.8666, "step": 38920 }, { "epoch": 0.0745697585437731, "grad_norm": 1.5063353776931763, "learning_rate": 7.454651095971753e-05, "loss": 4.8688, "step": 38930 }, { "epoch": 0.07458891337514834, "grad_norm": 1.417336106300354, "learning_rate": 7.456566572382661e-05, "loss": 4.9582, "step": 38940 }, { "epoch": 0.07460806820652356, "grad_norm": 1.4547054767608643, "learning_rate": 7.458482048793568e-05, "loss": 4.9466, "step": 38950 }, { "epoch": 0.07462722303789879, "grad_norm": 1.6043287515640259, "learning_rate": 7.460397525204476e-05, "loss": 4.9288, "step": 38960 }, { "epoch": 0.07464637786927403, "grad_norm": 1.4379656314849854, "learning_rate": 7.462313001615384e-05, "loss": 4.9704, "step": 38970 }, { "epoch": 0.07466553270064925, "grad_norm": 1.5280654430389404, "learning_rate": 7.464228478026292e-05, "loss": 4.8548, "step": 38980 }, { "epoch": 0.07468468753202448, "grad_norm": 1.5145388841629028, "learning_rate": 7.4661439544372e-05, "loss": 5.0154, "step": 38990 }, { "epoch": 0.07470384236339972, "grad_norm": 1.5164973735809326, "learning_rate": 7.468059430848109e-05, "loss": 4.8439, "step": 39000 }, { "epoch": 0.07472299719477495, "grad_norm": 1.5251798629760742, "learning_rate": 7.469974907259017e-05, "loss": 4.9747, "step": 39010 }, { "epoch": 0.07474215202615017, "grad_norm": 1.737195372581482, "learning_rate": 7.471890383669925e-05, "loss": 4.7837, "step": 39020 }, { "epoch": 0.07476130685752541, "grad_norm": 1.430796504020691, "learning_rate": 7.473805860080832e-05, "loss": 4.8796, "step": 39030 }, { "epoch": 0.07478046168890064, "grad_norm": 1.4999197721481323, "learning_rate": 7.47572133649174e-05, "loss": 4.9211, "step": 39040 }, { "epoch": 0.07479961652027586, "grad_norm": 1.4661606550216675, "learning_rate": 7.477636812902649e-05, "loss": 5.0002, "step": 39050 }, { "epoch": 0.0748187713516511, "grad_norm": 1.4600942134857178, "learning_rate": 7.479552289313556e-05, "loss": 4.9452, "step": 39060 }, { "epoch": 0.07483792618302633, "grad_norm": 1.4992554187774658, "learning_rate": 7.481467765724464e-05, "loss": 4.7846, "step": 39070 }, { "epoch": 0.07485708101440156, "grad_norm": 1.5522816181182861, "learning_rate": 7.483383242135372e-05, "loss": 4.9475, "step": 39080 }, { "epoch": 0.0748762358457768, "grad_norm": 1.4760042428970337, "learning_rate": 7.48529871854628e-05, "loss": 4.9123, "step": 39090 }, { "epoch": 0.07489539067715202, "grad_norm": 1.5138827562332153, "learning_rate": 7.487214194957189e-05, "loss": 4.8641, "step": 39100 }, { "epoch": 0.07491454550852725, "grad_norm": 1.466088891029358, "learning_rate": 7.489129671368097e-05, "loss": 5.063, "step": 39110 }, { "epoch": 0.07493370033990249, "grad_norm": 1.5191484689712524, "learning_rate": 7.491045147779005e-05, "loss": 4.9824, "step": 39120 }, { "epoch": 0.07495285517127771, "grad_norm": 1.50978422164917, "learning_rate": 7.492960624189913e-05, "loss": 4.9306, "step": 39130 }, { "epoch": 0.07497201000265294, "grad_norm": 1.5094611644744873, "learning_rate": 7.49487610060082e-05, "loss": 4.9775, "step": 39140 }, { "epoch": 0.07499116483402818, "grad_norm": 1.5197025537490845, "learning_rate": 7.496791577011728e-05, "loss": 4.862, "step": 39150 }, { "epoch": 0.0750103196654034, "grad_norm": 1.4707348346710205, "learning_rate": 7.498707053422637e-05, "loss": 4.9283, "step": 39160 }, { "epoch": 0.07502947449677863, "grad_norm": 1.451414704322815, "learning_rate": 7.500622529833544e-05, "loss": 4.8874, "step": 39170 }, { "epoch": 0.07504862932815387, "grad_norm": 1.4885411262512207, "learning_rate": 7.502538006244452e-05, "loss": 4.8761, "step": 39180 }, { "epoch": 0.0750677841595291, "grad_norm": 1.4612172842025757, "learning_rate": 7.50445348265536e-05, "loss": 4.9695, "step": 39190 }, { "epoch": 0.07508693899090432, "grad_norm": 1.582555890083313, "learning_rate": 7.506368959066269e-05, "loss": 5.0034, "step": 39200 }, { "epoch": 0.07510609382227956, "grad_norm": 1.4674134254455566, "learning_rate": 7.508284435477177e-05, "loss": 4.6666, "step": 39210 }, { "epoch": 0.07512524865365479, "grad_norm": 1.470680594444275, "learning_rate": 7.510199911888085e-05, "loss": 4.8984, "step": 39220 }, { "epoch": 0.07514440348503001, "grad_norm": 1.4641612768173218, "learning_rate": 7.512115388298992e-05, "loss": 4.8576, "step": 39230 }, { "epoch": 0.07516355831640525, "grad_norm": 1.5015617609024048, "learning_rate": 7.514030864709901e-05, "loss": 4.8158, "step": 39240 }, { "epoch": 0.07518271314778048, "grad_norm": 1.488626480102539, "learning_rate": 7.515946341120808e-05, "loss": 4.8873, "step": 39250 }, { "epoch": 0.0752018679791557, "grad_norm": 1.5283634662628174, "learning_rate": 7.517861817531716e-05, "loss": 4.9753, "step": 39260 }, { "epoch": 0.07522102281053095, "grad_norm": 1.530634880065918, "learning_rate": 7.519777293942625e-05, "loss": 4.8612, "step": 39270 }, { "epoch": 0.07524017764190617, "grad_norm": 1.5853214263916016, "learning_rate": 7.521692770353532e-05, "loss": 4.9649, "step": 39280 }, { "epoch": 0.0752593324732814, "grad_norm": 1.9706395864486694, "learning_rate": 7.52360824676444e-05, "loss": 4.7893, "step": 39290 }, { "epoch": 0.07527848730465664, "grad_norm": 1.5731112957000732, "learning_rate": 7.525523723175348e-05, "loss": 4.9354, "step": 39300 }, { "epoch": 0.07529764213603186, "grad_norm": 1.5194140672683716, "learning_rate": 7.527439199586257e-05, "loss": 4.8217, "step": 39310 }, { "epoch": 0.07531679696740709, "grad_norm": 1.5380324125289917, "learning_rate": 7.529354675997165e-05, "loss": 4.8042, "step": 39320 }, { "epoch": 0.07533595179878233, "grad_norm": 1.449436902999878, "learning_rate": 7.531270152408073e-05, "loss": 4.921, "step": 39330 }, { "epoch": 0.07535510663015756, "grad_norm": 1.5222668647766113, "learning_rate": 7.53318562881898e-05, "loss": 4.7611, "step": 39340 }, { "epoch": 0.07537426146153278, "grad_norm": 1.5191168785095215, "learning_rate": 7.535101105229889e-05, "loss": 4.8638, "step": 39350 }, { "epoch": 0.07539341629290802, "grad_norm": 1.448309063911438, "learning_rate": 7.537016581640796e-05, "loss": 4.8952, "step": 39360 }, { "epoch": 0.07541257112428325, "grad_norm": 1.4428173303604126, "learning_rate": 7.538932058051704e-05, "loss": 4.8238, "step": 39370 }, { "epoch": 0.07543172595565847, "grad_norm": 2.2480087280273438, "learning_rate": 7.540847534462614e-05, "loss": 4.8927, "step": 39380 }, { "epoch": 0.07545088078703371, "grad_norm": 1.4686079025268555, "learning_rate": 7.54276301087352e-05, "loss": 4.9893, "step": 39390 }, { "epoch": 0.07547003561840894, "grad_norm": 1.4231749773025513, "learning_rate": 7.544678487284428e-05, "loss": 4.9678, "step": 39400 }, { "epoch": 0.07548919044978418, "grad_norm": 1.4627773761749268, "learning_rate": 7.546593963695337e-05, "loss": 5.1023, "step": 39410 }, { "epoch": 0.0755083452811594, "grad_norm": 1.5271083116531372, "learning_rate": 7.548509440106245e-05, "loss": 4.8398, "step": 39420 }, { "epoch": 0.07552750011253463, "grad_norm": 1.4452195167541504, "learning_rate": 7.550424916517153e-05, "loss": 4.87, "step": 39430 }, { "epoch": 0.07554665494390987, "grad_norm": 1.6281859874725342, "learning_rate": 7.552340392928061e-05, "loss": 4.8869, "step": 39440 }, { "epoch": 0.0755658097752851, "grad_norm": 1.4444119930267334, "learning_rate": 7.554255869338968e-05, "loss": 5.0115, "step": 39450 }, { "epoch": 0.07558496460666032, "grad_norm": 1.5301207304000854, "learning_rate": 7.556171345749877e-05, "loss": 4.9258, "step": 39460 }, { "epoch": 0.07560411943803556, "grad_norm": 1.4479575157165527, "learning_rate": 7.558086822160784e-05, "loss": 4.8939, "step": 39470 }, { "epoch": 0.07562327426941079, "grad_norm": 1.4461418390274048, "learning_rate": 7.560002298571692e-05, "loss": 4.8468, "step": 39480 }, { "epoch": 0.07564242910078602, "grad_norm": 1.4131914377212524, "learning_rate": 7.561917774982602e-05, "loss": 5.0331, "step": 39490 }, { "epoch": 0.07566158393216126, "grad_norm": 1.4319651126861572, "learning_rate": 7.563833251393508e-05, "loss": 4.9516, "step": 39500 }, { "epoch": 0.07568073876353648, "grad_norm": 1.5616521835327148, "learning_rate": 7.565748727804417e-05, "loss": 4.9725, "step": 39510 }, { "epoch": 0.07569989359491171, "grad_norm": 1.5132478475570679, "learning_rate": 7.567664204215325e-05, "loss": 4.8396, "step": 39520 }, { "epoch": 0.07571904842628695, "grad_norm": 1.4547113180160522, "learning_rate": 7.569579680626231e-05, "loss": 5.0363, "step": 39530 }, { "epoch": 0.07573820325766217, "grad_norm": 1.4960078001022339, "learning_rate": 7.571495157037141e-05, "loss": 4.9147, "step": 39540 }, { "epoch": 0.0757573580890374, "grad_norm": 1.4679850339889526, "learning_rate": 7.573410633448048e-05, "loss": 4.9144, "step": 39550 }, { "epoch": 0.07577651292041264, "grad_norm": 1.4783388376235962, "learning_rate": 7.575326109858956e-05, "loss": 4.9501, "step": 39560 }, { "epoch": 0.07579566775178787, "grad_norm": 1.5805997848510742, "learning_rate": 7.577241586269865e-05, "loss": 4.917, "step": 39570 }, { "epoch": 0.07581482258316309, "grad_norm": 1.462948203086853, "learning_rate": 7.579157062680772e-05, "loss": 4.9722, "step": 39580 }, { "epoch": 0.07583397741453833, "grad_norm": 1.4262633323669434, "learning_rate": 7.58107253909168e-05, "loss": 4.9882, "step": 39590 }, { "epoch": 0.07585313224591356, "grad_norm": 1.5628607273101807, "learning_rate": 7.58298801550259e-05, "loss": 4.8812, "step": 39600 }, { "epoch": 0.07587228707728878, "grad_norm": 1.4994144439697266, "learning_rate": 7.584903491913496e-05, "loss": 4.8067, "step": 39610 }, { "epoch": 0.07589144190866402, "grad_norm": 1.5882824659347534, "learning_rate": 7.586818968324405e-05, "loss": 4.9736, "step": 39620 }, { "epoch": 0.07591059674003925, "grad_norm": 1.4401335716247559, "learning_rate": 7.588734444735313e-05, "loss": 4.8808, "step": 39630 }, { "epoch": 0.07592975157141447, "grad_norm": 1.5251351594924927, "learning_rate": 7.59064992114622e-05, "loss": 4.867, "step": 39640 }, { "epoch": 0.07594890640278971, "grad_norm": 1.434130311012268, "learning_rate": 7.592565397557129e-05, "loss": 4.9807, "step": 39650 }, { "epoch": 0.07596806123416494, "grad_norm": 1.4320878982543945, "learning_rate": 7.594480873968036e-05, "loss": 4.8489, "step": 39660 }, { "epoch": 0.07598721606554017, "grad_norm": 1.5428446531295776, "learning_rate": 7.596396350378944e-05, "loss": 4.9174, "step": 39670 }, { "epoch": 0.0760063708969154, "grad_norm": 1.5281753540039062, "learning_rate": 7.598311826789853e-05, "loss": 4.8876, "step": 39680 }, { "epoch": 0.07602552572829063, "grad_norm": 1.446059226989746, "learning_rate": 7.60022730320076e-05, "loss": 4.9926, "step": 39690 }, { "epoch": 0.07604468055966586, "grad_norm": 1.4378713369369507, "learning_rate": 7.602142779611668e-05, "loss": 4.9767, "step": 39700 }, { "epoch": 0.0760638353910411, "grad_norm": 1.4767045974731445, "learning_rate": 7.604058256022578e-05, "loss": 4.9073, "step": 39710 }, { "epoch": 0.07608299022241632, "grad_norm": 1.409055233001709, "learning_rate": 7.605973732433485e-05, "loss": 4.7669, "step": 39720 }, { "epoch": 0.07610214505379155, "grad_norm": 1.5131028890609741, "learning_rate": 7.607889208844393e-05, "loss": 4.9562, "step": 39730 }, { "epoch": 0.07612129988516679, "grad_norm": 1.5383450984954834, "learning_rate": 7.6098046852553e-05, "loss": 4.8978, "step": 39740 }, { "epoch": 0.07614045471654202, "grad_norm": 1.4484891891479492, "learning_rate": 7.611720161666208e-05, "loss": 4.9261, "step": 39750 }, { "epoch": 0.07615960954791724, "grad_norm": 1.5236923694610596, "learning_rate": 7.613635638077117e-05, "loss": 4.8422, "step": 39760 }, { "epoch": 0.07617876437929248, "grad_norm": 1.4895106554031372, "learning_rate": 7.615551114488024e-05, "loss": 4.8852, "step": 39770 }, { "epoch": 0.07619791921066771, "grad_norm": 1.4343738555908203, "learning_rate": 7.617466590898932e-05, "loss": 4.9262, "step": 39780 }, { "epoch": 0.07621707404204293, "grad_norm": 1.5995783805847168, "learning_rate": 7.619382067309841e-05, "loss": 4.7255, "step": 39790 }, { "epoch": 0.07623622887341817, "grad_norm": 1.4171481132507324, "learning_rate": 7.621297543720748e-05, "loss": 4.8665, "step": 39800 }, { "epoch": 0.0762553837047934, "grad_norm": 1.416857361793518, "learning_rate": 7.623213020131656e-05, "loss": 5.0024, "step": 39810 }, { "epoch": 0.07627453853616863, "grad_norm": 1.4951156377792358, "learning_rate": 7.625128496542566e-05, "loss": 4.8442, "step": 39820 }, { "epoch": 0.07629369336754387, "grad_norm": 1.4527935981750488, "learning_rate": 7.627043972953473e-05, "loss": 4.8259, "step": 39830 }, { "epoch": 0.07631284819891909, "grad_norm": 1.6794891357421875, "learning_rate": 7.628959449364381e-05, "loss": 4.8308, "step": 39840 }, { "epoch": 0.07633200303029432, "grad_norm": 1.511873722076416, "learning_rate": 7.630874925775288e-05, "loss": 4.8183, "step": 39850 }, { "epoch": 0.07635115786166956, "grad_norm": 1.5868041515350342, "learning_rate": 7.632790402186196e-05, "loss": 4.7939, "step": 39860 }, { "epoch": 0.07637031269304478, "grad_norm": 1.5818824768066406, "learning_rate": 7.634705878597105e-05, "loss": 4.8498, "step": 39870 }, { "epoch": 0.07638946752442001, "grad_norm": 1.4648354053497314, "learning_rate": 7.636621355008012e-05, "loss": 4.8549, "step": 39880 }, { "epoch": 0.07640862235579525, "grad_norm": 1.4118167161941528, "learning_rate": 7.63853683141892e-05, "loss": 4.9252, "step": 39890 }, { "epoch": 0.07642777718717048, "grad_norm": 1.5209892988204956, "learning_rate": 7.64045230782983e-05, "loss": 4.9051, "step": 39900 }, { "epoch": 0.0764469320185457, "grad_norm": 1.6631531715393066, "learning_rate": 7.642367784240736e-05, "loss": 4.8836, "step": 39910 }, { "epoch": 0.07646608684992094, "grad_norm": 1.4793026447296143, "learning_rate": 7.644283260651644e-05, "loss": 4.7534, "step": 39920 }, { "epoch": 0.07648524168129617, "grad_norm": 1.4251229763031006, "learning_rate": 7.646198737062551e-05, "loss": 4.8728, "step": 39930 }, { "epoch": 0.0765043965126714, "grad_norm": 1.4064910411834717, "learning_rate": 7.648114213473461e-05, "loss": 4.9676, "step": 39940 }, { "epoch": 0.07652355134404663, "grad_norm": 1.5084607601165771, "learning_rate": 7.650029689884369e-05, "loss": 4.8924, "step": 39950 }, { "epoch": 0.07654270617542186, "grad_norm": 1.5544260740280151, "learning_rate": 7.651945166295276e-05, "loss": 4.8411, "step": 39960 }, { "epoch": 0.07656186100679709, "grad_norm": 1.4490392208099365, "learning_rate": 7.653860642706184e-05, "loss": 4.9275, "step": 39970 }, { "epoch": 0.07658101583817233, "grad_norm": 1.630281686782837, "learning_rate": 7.655776119117093e-05, "loss": 4.8433, "step": 39980 }, { "epoch": 0.07660017066954755, "grad_norm": 1.4374165534973145, "learning_rate": 7.657691595528e-05, "loss": 4.8844, "step": 39990 }, { "epoch": 0.07661932550092278, "grad_norm": 1.5383424758911133, "learning_rate": 7.659607071938908e-05, "loss": 4.8654, "step": 40000 }, { "epoch": 0.07663848033229802, "grad_norm": 1.5224887132644653, "learning_rate": 7.661522548349818e-05, "loss": 5.0602, "step": 40010 }, { "epoch": 0.07665763516367324, "grad_norm": 1.5048699378967285, "learning_rate": 7.663438024760724e-05, "loss": 4.8755, "step": 40020 }, { "epoch": 0.07667678999504847, "grad_norm": 1.4950518608093262, "learning_rate": 7.665353501171633e-05, "loss": 4.8722, "step": 40030 }, { "epoch": 0.07669594482642371, "grad_norm": 1.4498780965805054, "learning_rate": 7.667268977582539e-05, "loss": 4.9313, "step": 40040 }, { "epoch": 0.07671509965779894, "grad_norm": 1.4497148990631104, "learning_rate": 7.669184453993449e-05, "loss": 4.9829, "step": 40050 }, { "epoch": 0.07673425448917416, "grad_norm": 1.4899269342422485, "learning_rate": 7.671099930404357e-05, "loss": 4.8894, "step": 40060 }, { "epoch": 0.0767534093205494, "grad_norm": 1.5287054777145386, "learning_rate": 7.673015406815264e-05, "loss": 4.7922, "step": 40070 }, { "epoch": 0.07677256415192463, "grad_norm": 1.4039702415466309, "learning_rate": 7.674930883226172e-05, "loss": 4.912, "step": 40080 }, { "epoch": 0.07679171898329987, "grad_norm": 1.4577778577804565, "learning_rate": 7.676846359637081e-05, "loss": 4.9344, "step": 40090 }, { "epoch": 0.07681087381467509, "grad_norm": 1.521401047706604, "learning_rate": 7.678761836047988e-05, "loss": 4.8432, "step": 40100 }, { "epoch": 0.07683002864605032, "grad_norm": 1.414502501487732, "learning_rate": 7.680677312458896e-05, "loss": 4.7869, "step": 40110 }, { "epoch": 0.07684918347742556, "grad_norm": 1.5987250804901123, "learning_rate": 7.682592788869806e-05, "loss": 5.0191, "step": 40120 }, { "epoch": 0.07686833830880078, "grad_norm": 1.5990781784057617, "learning_rate": 7.684508265280712e-05, "loss": 4.9006, "step": 40130 }, { "epoch": 0.07688749314017601, "grad_norm": 1.5341717004776, "learning_rate": 7.68642374169162e-05, "loss": 4.8402, "step": 40140 }, { "epoch": 0.07690664797155125, "grad_norm": 1.4191664457321167, "learning_rate": 7.688339218102527e-05, "loss": 5.0071, "step": 40150 }, { "epoch": 0.07692580280292648, "grad_norm": 1.531529426574707, "learning_rate": 7.690254694513437e-05, "loss": 4.7998, "step": 40160 }, { "epoch": 0.0769449576343017, "grad_norm": 1.5148998498916626, "learning_rate": 7.692170170924345e-05, "loss": 4.7327, "step": 40170 }, { "epoch": 0.07696411246567694, "grad_norm": 1.4553987979888916, "learning_rate": 7.694085647335252e-05, "loss": 4.8616, "step": 40180 }, { "epoch": 0.07698326729705217, "grad_norm": 1.560056209564209, "learning_rate": 7.69600112374616e-05, "loss": 4.7706, "step": 40190 }, { "epoch": 0.0770024221284274, "grad_norm": 1.6771587133407593, "learning_rate": 7.69791660015707e-05, "loss": 4.8945, "step": 40200 }, { "epoch": 0.07702157695980263, "grad_norm": 1.4399257898330688, "learning_rate": 7.699832076567976e-05, "loss": 4.8256, "step": 40210 }, { "epoch": 0.07704073179117786, "grad_norm": 1.4307186603546143, "learning_rate": 7.701747552978884e-05, "loss": 4.9128, "step": 40220 }, { "epoch": 0.07705988662255309, "grad_norm": 1.4061355590820312, "learning_rate": 7.703663029389791e-05, "loss": 4.9811, "step": 40230 }, { "epoch": 0.07707904145392833, "grad_norm": 1.4322503805160522, "learning_rate": 7.7055785058007e-05, "loss": 4.8939, "step": 40240 }, { "epoch": 0.07709819628530355, "grad_norm": 1.651299238204956, "learning_rate": 7.707493982211609e-05, "loss": 4.7484, "step": 40250 }, { "epoch": 0.07711735111667878, "grad_norm": 1.4199217557907104, "learning_rate": 7.709409458622515e-05, "loss": 4.8883, "step": 40260 }, { "epoch": 0.07713650594805402, "grad_norm": 1.4556834697723389, "learning_rate": 7.711324935033425e-05, "loss": 4.9244, "step": 40270 }, { "epoch": 0.07715566077942924, "grad_norm": 1.4143375158309937, "learning_rate": 7.713240411444333e-05, "loss": 5.0287, "step": 40280 }, { "epoch": 0.07717481561080447, "grad_norm": 1.4467401504516602, "learning_rate": 7.71515588785524e-05, "loss": 5.025, "step": 40290 }, { "epoch": 0.07719397044217971, "grad_norm": 1.5212785005569458, "learning_rate": 7.717071364266148e-05, "loss": 4.8438, "step": 40300 }, { "epoch": 0.07721312527355494, "grad_norm": 1.5392347574234009, "learning_rate": 7.718986840677057e-05, "loss": 4.9632, "step": 40310 }, { "epoch": 0.07723228010493016, "grad_norm": 1.4136971235275269, "learning_rate": 7.720902317087964e-05, "loss": 4.8496, "step": 40320 }, { "epoch": 0.0772514349363054, "grad_norm": 1.492868185043335, "learning_rate": 7.722817793498872e-05, "loss": 4.6737, "step": 40330 }, { "epoch": 0.07727058976768063, "grad_norm": 1.4534188508987427, "learning_rate": 7.724733269909779e-05, "loss": 4.8483, "step": 40340 }, { "epoch": 0.07728974459905585, "grad_norm": 1.4595897197723389, "learning_rate": 7.726648746320689e-05, "loss": 4.7634, "step": 40350 }, { "epoch": 0.0773088994304311, "grad_norm": 1.4962074756622314, "learning_rate": 7.728564222731597e-05, "loss": 4.8225, "step": 40360 }, { "epoch": 0.07732805426180632, "grad_norm": 1.508605718612671, "learning_rate": 7.730479699142504e-05, "loss": 4.8247, "step": 40370 }, { "epoch": 0.07734720909318155, "grad_norm": 1.4398036003112793, "learning_rate": 7.732395175553413e-05, "loss": 4.9065, "step": 40380 }, { "epoch": 0.07736636392455679, "grad_norm": 1.4551368951797485, "learning_rate": 7.734310651964321e-05, "loss": 4.9037, "step": 40390 }, { "epoch": 0.07738551875593201, "grad_norm": 1.5811383724212646, "learning_rate": 7.736226128375228e-05, "loss": 4.7769, "step": 40400 }, { "epoch": 0.07740467358730724, "grad_norm": 1.4784096479415894, "learning_rate": 7.738141604786136e-05, "loss": 4.9617, "step": 40410 }, { "epoch": 0.07742382841868248, "grad_norm": 1.415574550628662, "learning_rate": 7.740057081197044e-05, "loss": 4.9002, "step": 40420 }, { "epoch": 0.0774429832500577, "grad_norm": 1.4677386283874512, "learning_rate": 7.741972557607952e-05, "loss": 4.8884, "step": 40430 }, { "epoch": 0.07746213808143293, "grad_norm": 1.5527907609939575, "learning_rate": 7.74388803401886e-05, "loss": 4.8635, "step": 40440 }, { "epoch": 0.07748129291280817, "grad_norm": 1.4396620988845825, "learning_rate": 7.745803510429767e-05, "loss": 4.9692, "step": 40450 }, { "epoch": 0.0775004477441834, "grad_norm": 1.4662195444107056, "learning_rate": 7.747718986840677e-05, "loss": 4.8641, "step": 40460 }, { "epoch": 0.07751960257555862, "grad_norm": 1.4540901184082031, "learning_rate": 7.749634463251585e-05, "loss": 4.8843, "step": 40470 }, { "epoch": 0.07753875740693386, "grad_norm": 1.4679633378982544, "learning_rate": 7.751549939662492e-05, "loss": 4.8603, "step": 40480 }, { "epoch": 0.07755791223830909, "grad_norm": 1.525985836982727, "learning_rate": 7.753465416073401e-05, "loss": 4.7629, "step": 40490 }, { "epoch": 0.07757706706968431, "grad_norm": 1.5803050994873047, "learning_rate": 7.755380892484309e-05, "loss": 4.9078, "step": 40500 }, { "epoch": 0.07759622190105955, "grad_norm": 1.472831130027771, "learning_rate": 7.757296368895216e-05, "loss": 4.8972, "step": 40510 }, { "epoch": 0.07761537673243478, "grad_norm": 1.4020525217056274, "learning_rate": 7.759211845306124e-05, "loss": 4.7922, "step": 40520 }, { "epoch": 0.07763453156381, "grad_norm": 1.4577704668045044, "learning_rate": 7.761127321717032e-05, "loss": 4.8958, "step": 40530 }, { "epoch": 0.07765368639518525, "grad_norm": 1.4484106302261353, "learning_rate": 7.76304279812794e-05, "loss": 4.7336, "step": 40540 }, { "epoch": 0.07767284122656047, "grad_norm": 1.3834527730941772, "learning_rate": 7.764958274538849e-05, "loss": 4.8625, "step": 40550 }, { "epoch": 0.0776919960579357, "grad_norm": 1.5601102113723755, "learning_rate": 7.766873750949755e-05, "loss": 5.0263, "step": 40560 }, { "epoch": 0.07771115088931094, "grad_norm": 1.3973448276519775, "learning_rate": 7.768789227360665e-05, "loss": 4.8453, "step": 40570 }, { "epoch": 0.07773030572068616, "grad_norm": 1.4293136596679688, "learning_rate": 7.770704703771573e-05, "loss": 4.8832, "step": 40580 }, { "epoch": 0.07774946055206139, "grad_norm": 1.4067802429199219, "learning_rate": 7.77262018018248e-05, "loss": 4.9352, "step": 40590 }, { "epoch": 0.07776861538343663, "grad_norm": 1.6072341203689575, "learning_rate": 7.774535656593389e-05, "loss": 4.8291, "step": 40600 }, { "epoch": 0.07778777021481185, "grad_norm": 1.4272371530532837, "learning_rate": 7.776451133004296e-05, "loss": 4.8467, "step": 40610 }, { "epoch": 0.07780692504618708, "grad_norm": 1.5182136297225952, "learning_rate": 7.778366609415204e-05, "loss": 4.7678, "step": 40620 }, { "epoch": 0.07782607987756232, "grad_norm": 1.450176477432251, "learning_rate": 7.780282085826112e-05, "loss": 4.8343, "step": 40630 }, { "epoch": 0.07784523470893755, "grad_norm": 1.4813616275787354, "learning_rate": 7.78219756223702e-05, "loss": 4.7796, "step": 40640 }, { "epoch": 0.07786438954031277, "grad_norm": 1.4589732885360718, "learning_rate": 7.784113038647929e-05, "loss": 4.9416, "step": 40650 }, { "epoch": 0.07788354437168801, "grad_norm": 1.600943684577942, "learning_rate": 7.786028515058837e-05, "loss": 4.7787, "step": 40660 }, { "epoch": 0.07790269920306324, "grad_norm": 1.4912796020507812, "learning_rate": 7.787943991469743e-05, "loss": 4.8964, "step": 40670 }, { "epoch": 0.07792185403443846, "grad_norm": 1.482435703277588, "learning_rate": 7.789859467880653e-05, "loss": 4.7009, "step": 40680 }, { "epoch": 0.0779410088658137, "grad_norm": 1.416245937347412, "learning_rate": 7.791774944291561e-05, "loss": 4.8591, "step": 40690 }, { "epoch": 0.07796016369718893, "grad_norm": 1.4726194143295288, "learning_rate": 7.793690420702468e-05, "loss": 4.9487, "step": 40700 }, { "epoch": 0.07797931852856416, "grad_norm": 1.425214409828186, "learning_rate": 7.795605897113377e-05, "loss": 4.899, "step": 40710 }, { "epoch": 0.0779984733599394, "grad_norm": 1.6053274869918823, "learning_rate": 7.797521373524284e-05, "loss": 4.841, "step": 40720 }, { "epoch": 0.07801762819131462, "grad_norm": 1.387940764427185, "learning_rate": 7.799436849935192e-05, "loss": 4.9356, "step": 40730 }, { "epoch": 0.07803678302268986, "grad_norm": 1.4688711166381836, "learning_rate": 7.8013523263461e-05, "loss": 5.0002, "step": 40740 }, { "epoch": 0.07805593785406509, "grad_norm": 1.4417637586593628, "learning_rate": 7.803267802757008e-05, "loss": 4.975, "step": 40750 }, { "epoch": 0.07807509268544031, "grad_norm": 1.490813136100769, "learning_rate": 7.805183279167917e-05, "loss": 4.904, "step": 40760 }, { "epoch": 0.07809424751681555, "grad_norm": 1.5655977725982666, "learning_rate": 7.807098755578825e-05, "loss": 4.8356, "step": 40770 }, { "epoch": 0.07811340234819078, "grad_norm": 1.4149975776672363, "learning_rate": 7.809014231989731e-05, "loss": 4.8869, "step": 40780 }, { "epoch": 0.078132557179566, "grad_norm": 1.412651777267456, "learning_rate": 7.810929708400641e-05, "loss": 4.9085, "step": 40790 }, { "epoch": 0.07815171201094125, "grad_norm": 1.4640358686447144, "learning_rate": 7.812845184811548e-05, "loss": 4.9326, "step": 40800 }, { "epoch": 0.07817086684231647, "grad_norm": 1.4932996034622192, "learning_rate": 7.814760661222456e-05, "loss": 4.8293, "step": 40810 }, { "epoch": 0.0781900216736917, "grad_norm": 1.34166419506073, "learning_rate": 7.816676137633365e-05, "loss": 4.9382, "step": 40820 }, { "epoch": 0.07820917650506694, "grad_norm": 1.4095649719238281, "learning_rate": 7.818591614044272e-05, "loss": 4.8771, "step": 40830 }, { "epoch": 0.07822833133644216, "grad_norm": 1.5980278253555298, "learning_rate": 7.82050709045518e-05, "loss": 4.8187, "step": 40840 }, { "epoch": 0.07824748616781739, "grad_norm": 1.4046170711517334, "learning_rate": 7.822422566866088e-05, "loss": 4.6194, "step": 40850 }, { "epoch": 0.07826664099919263, "grad_norm": 1.4780710935592651, "learning_rate": 7.824338043276997e-05, "loss": 4.8714, "step": 40860 }, { "epoch": 0.07828579583056786, "grad_norm": 1.5578248500823975, "learning_rate": 7.826253519687905e-05, "loss": 4.8506, "step": 40870 }, { "epoch": 0.07830495066194308, "grad_norm": 1.4488451480865479, "learning_rate": 7.828168996098813e-05, "loss": 4.8333, "step": 40880 }, { "epoch": 0.07832410549331832, "grad_norm": 1.4586119651794434, "learning_rate": 7.83008447250972e-05, "loss": 4.8012, "step": 40890 }, { "epoch": 0.07834326032469355, "grad_norm": 1.520452618598938, "learning_rate": 7.831999948920629e-05, "loss": 4.9219, "step": 40900 }, { "epoch": 0.07836241515606877, "grad_norm": 1.6258797645568848, "learning_rate": 7.833915425331536e-05, "loss": 4.9466, "step": 40910 }, { "epoch": 0.07838156998744401, "grad_norm": 1.42532479763031, "learning_rate": 7.835830901742444e-05, "loss": 4.7642, "step": 40920 }, { "epoch": 0.07840072481881924, "grad_norm": 1.5418812036514282, "learning_rate": 7.837746378153353e-05, "loss": 4.8193, "step": 40930 }, { "epoch": 0.07841987965019447, "grad_norm": 1.4950578212738037, "learning_rate": 7.83966185456426e-05, "loss": 4.8482, "step": 40940 }, { "epoch": 0.0784390344815697, "grad_norm": 1.466306209564209, "learning_rate": 7.841577330975168e-05, "loss": 4.8754, "step": 40950 }, { "epoch": 0.07845818931294493, "grad_norm": 1.4643365144729614, "learning_rate": 7.843492807386076e-05, "loss": 4.9956, "step": 40960 }, { "epoch": 0.07847734414432016, "grad_norm": 1.5132532119750977, "learning_rate": 7.845408283796985e-05, "loss": 4.7103, "step": 40970 }, { "epoch": 0.0784964989756954, "grad_norm": 1.4672821760177612, "learning_rate": 7.847323760207893e-05, "loss": 4.756, "step": 40980 }, { "epoch": 0.07851565380707062, "grad_norm": 1.4839075803756714, "learning_rate": 7.849239236618801e-05, "loss": 4.8815, "step": 40990 }, { "epoch": 0.07853480863844585, "grad_norm": 1.471043348312378, "learning_rate": 7.851154713029708e-05, "loss": 4.8175, "step": 41000 }, { "epoch": 0.07855396346982109, "grad_norm": 1.4881083965301514, "learning_rate": 7.853070189440617e-05, "loss": 4.6779, "step": 41010 }, { "epoch": 0.07857311830119632, "grad_norm": 1.4702306985855103, "learning_rate": 7.854985665851524e-05, "loss": 4.7718, "step": 41020 }, { "epoch": 0.07859227313257154, "grad_norm": 1.4149680137634277, "learning_rate": 7.856901142262432e-05, "loss": 4.9373, "step": 41030 }, { "epoch": 0.07861142796394678, "grad_norm": 1.4145557880401611, "learning_rate": 7.858816618673342e-05, "loss": 4.8071, "step": 41040 }, { "epoch": 0.07863058279532201, "grad_norm": 1.4973487854003906, "learning_rate": 7.860732095084248e-05, "loss": 4.9202, "step": 41050 }, { "epoch": 0.07864973762669723, "grad_norm": 1.450502872467041, "learning_rate": 7.862647571495156e-05, "loss": 4.8575, "step": 41060 }, { "epoch": 0.07866889245807247, "grad_norm": 1.576283574104309, "learning_rate": 7.864563047906065e-05, "loss": 4.8185, "step": 41070 }, { "epoch": 0.0786880472894477, "grad_norm": 1.465900182723999, "learning_rate": 7.866478524316971e-05, "loss": 4.8801, "step": 41080 }, { "epoch": 0.07870720212082292, "grad_norm": 1.3863904476165771, "learning_rate": 7.868394000727881e-05, "loss": 4.9327, "step": 41090 }, { "epoch": 0.07872635695219816, "grad_norm": 1.4822005033493042, "learning_rate": 7.870309477138788e-05, "loss": 4.9089, "step": 41100 }, { "epoch": 0.07874551178357339, "grad_norm": 1.4399995803833008, "learning_rate": 7.872224953549696e-05, "loss": 4.6739, "step": 41110 }, { "epoch": 0.07876466661494862, "grad_norm": 1.4776719808578491, "learning_rate": 7.874140429960605e-05, "loss": 4.7618, "step": 41120 }, { "epoch": 0.07878382144632386, "grad_norm": 1.4269235134124756, "learning_rate": 7.876055906371512e-05, "loss": 4.8688, "step": 41130 }, { "epoch": 0.07880297627769908, "grad_norm": 1.4276436567306519, "learning_rate": 7.87797138278242e-05, "loss": 4.9182, "step": 41140 }, { "epoch": 0.07882213110907431, "grad_norm": 1.441603422164917, "learning_rate": 7.87988685919333e-05, "loss": 4.7189, "step": 41150 }, { "epoch": 0.07884128594044955, "grad_norm": 1.4425073862075806, "learning_rate": 7.881802335604236e-05, "loss": 4.8109, "step": 41160 }, { "epoch": 0.07886044077182477, "grad_norm": 1.4928160905838013, "learning_rate": 7.883717812015145e-05, "loss": 4.9171, "step": 41170 }, { "epoch": 0.0788795956032, "grad_norm": 1.4920850992202759, "learning_rate": 7.885633288426053e-05, "loss": 4.8066, "step": 41180 }, { "epoch": 0.07889875043457524, "grad_norm": 1.454804539680481, "learning_rate": 7.88754876483696e-05, "loss": 4.8394, "step": 41190 }, { "epoch": 0.07891790526595047, "grad_norm": 1.4301400184631348, "learning_rate": 7.889464241247869e-05, "loss": 4.9425, "step": 41200 }, { "epoch": 0.07893706009732569, "grad_norm": 1.4691622257232666, "learning_rate": 7.891379717658776e-05, "loss": 4.9207, "step": 41210 }, { "epoch": 0.07895621492870093, "grad_norm": 1.4502253532409668, "learning_rate": 7.893295194069684e-05, "loss": 4.8698, "step": 41220 }, { "epoch": 0.07897536976007616, "grad_norm": 1.5040085315704346, "learning_rate": 7.895210670480593e-05, "loss": 4.8297, "step": 41230 }, { "epoch": 0.07899452459145138, "grad_norm": 1.485991358757019, "learning_rate": 7.8971261468915e-05, "loss": 4.8175, "step": 41240 }, { "epoch": 0.07901367942282662, "grad_norm": 1.5902656316757202, "learning_rate": 7.899041623302408e-05, "loss": 4.9204, "step": 41250 }, { "epoch": 0.07903283425420185, "grad_norm": 1.4194499254226685, "learning_rate": 7.900957099713318e-05, "loss": 5.0427, "step": 41260 }, { "epoch": 0.07905198908557708, "grad_norm": 1.4669581651687622, "learning_rate": 7.902872576124224e-05, "loss": 4.7363, "step": 41270 }, { "epoch": 0.07907114391695232, "grad_norm": 1.6007343530654907, "learning_rate": 7.904788052535133e-05, "loss": 4.8824, "step": 41280 }, { "epoch": 0.07909029874832754, "grad_norm": 1.4464948177337646, "learning_rate": 7.90670352894604e-05, "loss": 4.7782, "step": 41290 }, { "epoch": 0.07910945357970277, "grad_norm": 1.4111566543579102, "learning_rate": 7.908619005356948e-05, "loss": 4.7803, "step": 41300 }, { "epoch": 0.07912860841107801, "grad_norm": 1.5698320865631104, "learning_rate": 7.910534481767857e-05, "loss": 4.838, "step": 41310 }, { "epoch": 0.07914776324245323, "grad_norm": 1.5434602499008179, "learning_rate": 7.912449958178764e-05, "loss": 4.8305, "step": 41320 }, { "epoch": 0.07916691807382846, "grad_norm": 1.4773006439208984, "learning_rate": 7.914365434589672e-05, "loss": 4.927, "step": 41330 }, { "epoch": 0.0791860729052037, "grad_norm": 1.4827364683151245, "learning_rate": 7.916280911000581e-05, "loss": 4.7807, "step": 41340 }, { "epoch": 0.07920522773657893, "grad_norm": 1.510725498199463, "learning_rate": 7.918196387411488e-05, "loss": 4.8227, "step": 41350 }, { "epoch": 0.07922438256795415, "grad_norm": 1.3833568096160889, "learning_rate": 7.920111863822396e-05, "loss": 4.7597, "step": 41360 }, { "epoch": 0.07924353739932939, "grad_norm": 1.4554049968719482, "learning_rate": 7.922027340233306e-05, "loss": 4.8253, "step": 41370 }, { "epoch": 0.07926269223070462, "grad_norm": 1.4618775844573975, "learning_rate": 7.923942816644213e-05, "loss": 4.9209, "step": 41380 }, { "epoch": 0.07928184706207986, "grad_norm": 1.4559192657470703, "learning_rate": 7.925858293055121e-05, "loss": 4.7965, "step": 41390 }, { "epoch": 0.07930100189345508, "grad_norm": 1.5593523979187012, "learning_rate": 7.927773769466027e-05, "loss": 4.8873, "step": 41400 }, { "epoch": 0.07932015672483031, "grad_norm": 1.4727472066879272, "learning_rate": 7.929689245876936e-05, "loss": 4.9724, "step": 41410 }, { "epoch": 0.07933931155620555, "grad_norm": 1.3735982179641724, "learning_rate": 7.931604722287845e-05, "loss": 4.8534, "step": 41420 }, { "epoch": 0.07935846638758078, "grad_norm": 1.5060961246490479, "learning_rate": 7.933520198698752e-05, "loss": 4.8461, "step": 41430 }, { "epoch": 0.079377621218956, "grad_norm": 1.5531398057937622, "learning_rate": 7.93543567510966e-05, "loss": 4.8023, "step": 41440 }, { "epoch": 0.07939677605033124, "grad_norm": 1.4598959684371948, "learning_rate": 7.93735115152057e-05, "loss": 4.8517, "step": 41450 }, { "epoch": 0.07941593088170647, "grad_norm": 1.4402596950531006, "learning_rate": 7.939266627931476e-05, "loss": 4.9066, "step": 41460 }, { "epoch": 0.0794350857130817, "grad_norm": 1.36367666721344, "learning_rate": 7.941182104342384e-05, "loss": 4.7873, "step": 41470 }, { "epoch": 0.07945424054445693, "grad_norm": 1.4391474723815918, "learning_rate": 7.943097580753291e-05, "loss": 4.9011, "step": 41480 }, { "epoch": 0.07947339537583216, "grad_norm": 1.5026016235351562, "learning_rate": 7.9450130571642e-05, "loss": 4.771, "step": 41490 }, { "epoch": 0.07949255020720739, "grad_norm": 1.4553922414779663, "learning_rate": 7.946928533575109e-05, "loss": 4.963, "step": 41500 }, { "epoch": 0.07951170503858263, "grad_norm": 1.450364589691162, "learning_rate": 7.948844009986016e-05, "loss": 4.7157, "step": 41510 }, { "epoch": 0.07953085986995785, "grad_norm": 1.4865802526474, "learning_rate": 7.950759486396924e-05, "loss": 4.9496, "step": 41520 }, { "epoch": 0.07955001470133308, "grad_norm": 1.477342128753662, "learning_rate": 7.952674962807833e-05, "loss": 4.9869, "step": 41530 }, { "epoch": 0.07956916953270832, "grad_norm": 1.4005182981491089, "learning_rate": 7.95459043921874e-05, "loss": 4.9487, "step": 41540 }, { "epoch": 0.07958832436408354, "grad_norm": 1.4514278173446655, "learning_rate": 7.956505915629648e-05, "loss": 4.9115, "step": 41550 }, { "epoch": 0.07960747919545877, "grad_norm": 1.4309751987457275, "learning_rate": 7.958421392040558e-05, "loss": 4.921, "step": 41560 }, { "epoch": 0.07962663402683401, "grad_norm": 1.4614055156707764, "learning_rate": 7.960336868451464e-05, "loss": 4.943, "step": 41570 }, { "epoch": 0.07964578885820923, "grad_norm": 1.4859614372253418, "learning_rate": 7.962252344862372e-05, "loss": 4.7951, "step": 41580 }, { "epoch": 0.07966494368958446, "grad_norm": 1.4834285974502563, "learning_rate": 7.964167821273279e-05, "loss": 4.8715, "step": 41590 }, { "epoch": 0.0796840985209597, "grad_norm": 1.4542394876480103, "learning_rate": 7.966083297684189e-05, "loss": 4.8654, "step": 41600 }, { "epoch": 0.07970325335233493, "grad_norm": 1.5750007629394531, "learning_rate": 7.967998774095097e-05, "loss": 4.9602, "step": 41610 }, { "epoch": 0.07972240818371015, "grad_norm": 1.5079017877578735, "learning_rate": 7.969914250506004e-05, "loss": 4.7989, "step": 41620 }, { "epoch": 0.07974156301508539, "grad_norm": 1.5731087923049927, "learning_rate": 7.971829726916912e-05, "loss": 4.9735, "step": 41630 }, { "epoch": 0.07976071784646062, "grad_norm": 1.4342156648635864, "learning_rate": 7.973745203327821e-05, "loss": 4.7465, "step": 41640 }, { "epoch": 0.07977987267783584, "grad_norm": 1.3832708597183228, "learning_rate": 7.975660679738728e-05, "loss": 4.9011, "step": 41650 }, { "epoch": 0.07979902750921108, "grad_norm": 1.5405551195144653, "learning_rate": 7.977576156149636e-05, "loss": 4.8596, "step": 41660 }, { "epoch": 0.07981818234058631, "grad_norm": 1.4919366836547852, "learning_rate": 7.979491632560546e-05, "loss": 4.8135, "step": 41670 }, { "epoch": 0.07983733717196154, "grad_norm": 1.4800729751586914, "learning_rate": 7.981407108971452e-05, "loss": 4.7095, "step": 41680 }, { "epoch": 0.07985649200333678, "grad_norm": 1.4636934995651245, "learning_rate": 7.98332258538236e-05, "loss": 4.766, "step": 41690 }, { "epoch": 0.079875646834712, "grad_norm": 1.504737138748169, "learning_rate": 7.985238061793267e-05, "loss": 4.81, "step": 41700 }, { "epoch": 0.07989480166608723, "grad_norm": 1.444576621055603, "learning_rate": 7.987153538204177e-05, "loss": 4.8405, "step": 41710 }, { "epoch": 0.07991395649746247, "grad_norm": 1.4153876304626465, "learning_rate": 7.989069014615085e-05, "loss": 5.0133, "step": 41720 }, { "epoch": 0.0799331113288377, "grad_norm": 1.4255415201187134, "learning_rate": 7.990984491025992e-05, "loss": 4.8537, "step": 41730 }, { "epoch": 0.07995226616021292, "grad_norm": 1.6454298496246338, "learning_rate": 7.9928999674369e-05, "loss": 4.8149, "step": 41740 }, { "epoch": 0.07997142099158816, "grad_norm": 1.3734040260314941, "learning_rate": 7.994815443847809e-05, "loss": 4.9313, "step": 41750 }, { "epoch": 0.07999057582296339, "grad_norm": 1.4789938926696777, "learning_rate": 7.996730920258716e-05, "loss": 4.9506, "step": 41760 }, { "epoch": 0.08000973065433861, "grad_norm": 1.447758674621582, "learning_rate": 7.998646396669624e-05, "loss": 4.838, "step": 41770 }, { "epoch": 0.08002888548571385, "grad_norm": 1.3951042890548706, "learning_rate": 8.000561873080531e-05, "loss": 4.9842, "step": 41780 }, { "epoch": 0.08004804031708908, "grad_norm": 1.519370436668396, "learning_rate": 8.00247734949144e-05, "loss": 4.8658, "step": 41790 }, { "epoch": 0.0800671951484643, "grad_norm": 1.4150729179382324, "learning_rate": 8.004392825902349e-05, "loss": 4.9827, "step": 41800 }, { "epoch": 0.08008634997983954, "grad_norm": 1.5427658557891846, "learning_rate": 8.006308302313255e-05, "loss": 4.6329, "step": 41810 }, { "epoch": 0.08010550481121477, "grad_norm": 1.4727263450622559, "learning_rate": 8.008223778724165e-05, "loss": 4.8399, "step": 41820 }, { "epoch": 0.08012465964259, "grad_norm": 1.421019196510315, "learning_rate": 8.010139255135073e-05, "loss": 4.831, "step": 41830 }, { "epoch": 0.08014381447396524, "grad_norm": 1.476572871208191, "learning_rate": 8.01205473154598e-05, "loss": 4.7991, "step": 41840 }, { "epoch": 0.08016296930534046, "grad_norm": 1.4448919296264648, "learning_rate": 8.013970207956888e-05, "loss": 4.782, "step": 41850 }, { "epoch": 0.08018212413671569, "grad_norm": 1.5722267627716064, "learning_rate": 8.015885684367797e-05, "loss": 4.7528, "step": 41860 }, { "epoch": 0.08020127896809093, "grad_norm": 1.5790431499481201, "learning_rate": 8.017801160778704e-05, "loss": 4.7821, "step": 41870 }, { "epoch": 0.08022043379946615, "grad_norm": 1.5010168552398682, "learning_rate": 8.019716637189612e-05, "loss": 4.8506, "step": 41880 }, { "epoch": 0.08023958863084138, "grad_norm": 1.4055747985839844, "learning_rate": 8.021632113600519e-05, "loss": 4.717, "step": 41890 }, { "epoch": 0.08025874346221662, "grad_norm": 1.5179332494735718, "learning_rate": 8.023547590011429e-05, "loss": 4.9183, "step": 41900 }, { "epoch": 0.08027789829359185, "grad_norm": 1.4409432411193848, "learning_rate": 8.025463066422337e-05, "loss": 5.0162, "step": 41910 }, { "epoch": 0.08029705312496707, "grad_norm": 1.393713116645813, "learning_rate": 8.027378542833243e-05, "loss": 4.8826, "step": 41920 }, { "epoch": 0.08031620795634231, "grad_norm": 1.4999772310256958, "learning_rate": 8.029294019244153e-05, "loss": 4.9223, "step": 41930 }, { "epoch": 0.08033536278771754, "grad_norm": 1.5287491083145142, "learning_rate": 8.031209495655061e-05, "loss": 4.7285, "step": 41940 }, { "epoch": 0.08035451761909276, "grad_norm": 1.4900076389312744, "learning_rate": 8.033124972065968e-05, "loss": 4.8007, "step": 41950 }, { "epoch": 0.080373672450468, "grad_norm": 1.573486328125, "learning_rate": 8.035040448476876e-05, "loss": 4.8209, "step": 41960 }, { "epoch": 0.08039282728184323, "grad_norm": 1.478770136833191, "learning_rate": 8.036955924887784e-05, "loss": 4.8247, "step": 41970 }, { "epoch": 0.08041198211321846, "grad_norm": 1.474347710609436, "learning_rate": 8.038871401298692e-05, "loss": 4.871, "step": 41980 }, { "epoch": 0.0804311369445937, "grad_norm": 1.4231036901474, "learning_rate": 8.0407868777096e-05, "loss": 4.9793, "step": 41990 }, { "epoch": 0.08045029177596892, "grad_norm": 1.542468547821045, "learning_rate": 8.042702354120507e-05, "loss": 4.8593, "step": 42000 }, { "epoch": 0.08046944660734415, "grad_norm": 1.4084290266036987, "learning_rate": 8.044617830531417e-05, "loss": 4.7506, "step": 42010 }, { "epoch": 0.08048860143871939, "grad_norm": 1.477000117301941, "learning_rate": 8.046533306942325e-05, "loss": 5.0615, "step": 42020 }, { "epoch": 0.08050775627009461, "grad_norm": 1.4223695993423462, "learning_rate": 8.048448783353232e-05, "loss": 4.9139, "step": 42030 }, { "epoch": 0.08052691110146984, "grad_norm": 1.4153045415878296, "learning_rate": 8.050364259764141e-05, "loss": 4.9353, "step": 42040 }, { "epoch": 0.08054606593284508, "grad_norm": 1.4323807954788208, "learning_rate": 8.052279736175049e-05, "loss": 4.797, "step": 42050 }, { "epoch": 0.0805652207642203, "grad_norm": 1.4420249462127686, "learning_rate": 8.054195212585956e-05, "loss": 4.8524, "step": 42060 }, { "epoch": 0.08058437559559554, "grad_norm": 1.4889580011367798, "learning_rate": 8.056110688996864e-05, "loss": 4.8544, "step": 42070 }, { "epoch": 0.08060353042697077, "grad_norm": 1.4540475606918335, "learning_rate": 8.058026165407772e-05, "loss": 4.9343, "step": 42080 }, { "epoch": 0.080622685258346, "grad_norm": 1.492664098739624, "learning_rate": 8.05994164181868e-05, "loss": 4.6893, "step": 42090 }, { "epoch": 0.08064184008972124, "grad_norm": 1.3849334716796875, "learning_rate": 8.061857118229588e-05, "loss": 4.8133, "step": 42100 }, { "epoch": 0.08066099492109646, "grad_norm": 1.4777835607528687, "learning_rate": 8.063772594640495e-05, "loss": 4.8052, "step": 42110 }, { "epoch": 0.08068014975247169, "grad_norm": 1.4185295104980469, "learning_rate": 8.065688071051405e-05, "loss": 4.728, "step": 42120 }, { "epoch": 0.08069930458384693, "grad_norm": 1.3755367994308472, "learning_rate": 8.067603547462313e-05, "loss": 4.9145, "step": 42130 }, { "epoch": 0.08071845941522215, "grad_norm": 1.4621713161468506, "learning_rate": 8.06951902387322e-05, "loss": 4.7699, "step": 42140 }, { "epoch": 0.08073761424659738, "grad_norm": 1.4930098056793213, "learning_rate": 8.071434500284129e-05, "loss": 4.8399, "step": 42150 }, { "epoch": 0.08075676907797262, "grad_norm": 1.4724488258361816, "learning_rate": 8.073349976695036e-05, "loss": 4.7895, "step": 42160 }, { "epoch": 0.08077592390934785, "grad_norm": 1.4663848876953125, "learning_rate": 8.075265453105944e-05, "loss": 4.8384, "step": 42170 }, { "epoch": 0.08079507874072307, "grad_norm": 1.3628623485565186, "learning_rate": 8.076989381875761e-05, "loss": 4.8405, "step": 42180 }, { "epoch": 0.08081423357209831, "grad_norm": 1.4216082096099854, "learning_rate": 8.07890485828667e-05, "loss": 4.7956, "step": 42190 }, { "epoch": 0.08083338840347354, "grad_norm": 1.4466660022735596, "learning_rate": 8.080820334697578e-05, "loss": 4.8987, "step": 42200 }, { "epoch": 0.08085254323484876, "grad_norm": 1.4631038904190063, "learning_rate": 8.082735811108484e-05, "loss": 4.8673, "step": 42210 }, { "epoch": 0.080871698066224, "grad_norm": 1.3827608823776245, "learning_rate": 8.084651287519394e-05, "loss": 4.8546, "step": 42220 }, { "epoch": 0.08089085289759923, "grad_norm": 1.4129616022109985, "learning_rate": 8.086566763930302e-05, "loss": 4.8117, "step": 42230 }, { "epoch": 0.08091000772897446, "grad_norm": 1.3565971851348877, "learning_rate": 8.088482240341209e-05, "loss": 4.8769, "step": 42240 }, { "epoch": 0.0809291625603497, "grad_norm": 1.4321303367614746, "learning_rate": 8.090397716752118e-05, "loss": 4.7662, "step": 42250 }, { "epoch": 0.08094831739172492, "grad_norm": 1.8952915668487549, "learning_rate": 8.092313193163025e-05, "loss": 4.727, "step": 42260 }, { "epoch": 0.08096747222310015, "grad_norm": 1.428084135055542, "learning_rate": 8.094228669573933e-05, "loss": 4.9496, "step": 42270 }, { "epoch": 0.08098662705447539, "grad_norm": 1.4154072999954224, "learning_rate": 8.096144145984841e-05, "loss": 4.94, "step": 42280 }, { "epoch": 0.08100578188585061, "grad_norm": 1.428460717201233, "learning_rate": 8.09805962239575e-05, "loss": 4.9312, "step": 42290 }, { "epoch": 0.08102493671722584, "grad_norm": 1.7804378271102905, "learning_rate": 8.099975098806658e-05, "loss": 4.8351, "step": 42300 }, { "epoch": 0.08104409154860108, "grad_norm": 1.5213801860809326, "learning_rate": 8.101890575217566e-05, "loss": 4.9118, "step": 42310 }, { "epoch": 0.0810632463799763, "grad_norm": 1.4797754287719727, "learning_rate": 8.103806051628472e-05, "loss": 4.761, "step": 42320 }, { "epoch": 0.08108240121135153, "grad_norm": 1.4987844228744507, "learning_rate": 8.105721528039382e-05, "loss": 4.7156, "step": 42330 }, { "epoch": 0.08110155604272677, "grad_norm": 1.3959523439407349, "learning_rate": 8.10763700445029e-05, "loss": 4.8782, "step": 42340 }, { "epoch": 0.081120710874102, "grad_norm": 1.4697489738464355, "learning_rate": 8.109552480861197e-05, "loss": 4.8599, "step": 42350 }, { "epoch": 0.08113986570547722, "grad_norm": 1.4059840440750122, "learning_rate": 8.111467957272106e-05, "loss": 4.7021, "step": 42360 }, { "epoch": 0.08115902053685246, "grad_norm": 1.5372895002365112, "learning_rate": 8.113191886041922e-05, "loss": 4.7404, "step": 42370 }, { "epoch": 0.08117817536822769, "grad_norm": 1.6131328344345093, "learning_rate": 8.115107362452832e-05, "loss": 4.9602, "step": 42380 }, { "epoch": 0.08119733019960292, "grad_norm": 1.5242347717285156, "learning_rate": 8.117022838863739e-05, "loss": 4.9771, "step": 42390 }, { "epoch": 0.08121648503097816, "grad_norm": 1.5321286916732788, "learning_rate": 8.118938315274647e-05, "loss": 4.8559, "step": 42400 }, { "epoch": 0.08123563986235338, "grad_norm": 1.4419623613357544, "learning_rate": 8.120853791685555e-05, "loss": 4.856, "step": 42410 }, { "epoch": 0.08125479469372861, "grad_norm": 1.415654182434082, "learning_rate": 8.122769268096463e-05, "loss": 4.8937, "step": 42420 }, { "epoch": 0.08127394952510385, "grad_norm": 1.3705178499221802, "learning_rate": 8.124684744507371e-05, "loss": 4.8929, "step": 42430 }, { "epoch": 0.08129310435647907, "grad_norm": 1.5295014381408691, "learning_rate": 8.126600220918278e-05, "loss": 4.889, "step": 42440 }, { "epoch": 0.0813122591878543, "grad_norm": 1.4415606260299683, "learning_rate": 8.128515697329186e-05, "loss": 4.9505, "step": 42450 }, { "epoch": 0.08133141401922954, "grad_norm": 1.4427614212036133, "learning_rate": 8.130431173740095e-05, "loss": 4.8362, "step": 42460 }, { "epoch": 0.08135056885060477, "grad_norm": 1.412187933921814, "learning_rate": 8.132346650151002e-05, "loss": 4.8685, "step": 42470 }, { "epoch": 0.08136972368197999, "grad_norm": 1.4036500453948975, "learning_rate": 8.13426212656191e-05, "loss": 4.9988, "step": 42480 }, { "epoch": 0.08138887851335523, "grad_norm": 1.4243066310882568, "learning_rate": 8.13617760297282e-05, "loss": 5.0013, "step": 42490 }, { "epoch": 0.08140803334473046, "grad_norm": 1.381042242050171, "learning_rate": 8.138093079383727e-05, "loss": 4.7988, "step": 42500 }, { "epoch": 0.08142718817610568, "grad_norm": 1.4258421659469604, "learning_rate": 8.140008555794635e-05, "loss": 4.9376, "step": 42510 }, { "epoch": 0.08144634300748092, "grad_norm": 1.424717664718628, "learning_rate": 8.141924032205543e-05, "loss": 4.9194, "step": 42520 }, { "epoch": 0.08146549783885615, "grad_norm": 1.4019099473953247, "learning_rate": 8.143839508616451e-05, "loss": 4.8459, "step": 42530 }, { "epoch": 0.08148465267023138, "grad_norm": 1.429917812347412, "learning_rate": 8.145754985027359e-05, "loss": 4.9605, "step": 42540 }, { "epoch": 0.08150380750160661, "grad_norm": 1.5450875759124756, "learning_rate": 8.147670461438266e-05, "loss": 4.8995, "step": 42550 }, { "epoch": 0.08152296233298184, "grad_norm": 1.4583277702331543, "learning_rate": 8.149585937849174e-05, "loss": 4.7921, "step": 42560 }, { "epoch": 0.08154211716435707, "grad_norm": 1.4519954919815063, "learning_rate": 8.151501414260084e-05, "loss": 4.8251, "step": 42570 }, { "epoch": 0.0815612719957323, "grad_norm": 1.4517229795455933, "learning_rate": 8.15341689067099e-05, "loss": 4.7662, "step": 42580 }, { "epoch": 0.08158042682710753, "grad_norm": 1.4880417585372925, "learning_rate": 8.155332367081898e-05, "loss": 4.7935, "step": 42590 }, { "epoch": 0.08159958165848276, "grad_norm": 1.4564191102981567, "learning_rate": 8.157247843492808e-05, "loss": 4.8986, "step": 42600 }, { "epoch": 0.081618736489858, "grad_norm": 1.3994944095611572, "learning_rate": 8.159163319903715e-05, "loss": 4.7552, "step": 42610 }, { "epoch": 0.08163789132123322, "grad_norm": 1.4759944677352905, "learning_rate": 8.161078796314623e-05, "loss": 4.8515, "step": 42620 }, { "epoch": 0.08165704615260845, "grad_norm": 1.500089406967163, "learning_rate": 8.16299427272553e-05, "loss": 4.8121, "step": 42630 }, { "epoch": 0.08167620098398369, "grad_norm": 1.43048894405365, "learning_rate": 8.164909749136439e-05, "loss": 4.7118, "step": 42640 }, { "epoch": 0.08169535581535892, "grad_norm": 1.440463662147522, "learning_rate": 8.166825225547347e-05, "loss": 4.9306, "step": 42650 }, { "epoch": 0.08171451064673414, "grad_norm": 1.4003148078918457, "learning_rate": 8.168740701958254e-05, "loss": 4.9194, "step": 42660 }, { "epoch": 0.08173366547810938, "grad_norm": 1.46210515499115, "learning_rate": 8.170656178369162e-05, "loss": 4.7897, "step": 42670 }, { "epoch": 0.08175282030948461, "grad_norm": 1.4420543909072876, "learning_rate": 8.172571654780072e-05, "loss": 4.9703, "step": 42680 }, { "epoch": 0.08177197514085983, "grad_norm": 1.4567903280258179, "learning_rate": 8.174487131190978e-05, "loss": 4.8558, "step": 42690 }, { "epoch": 0.08179112997223507, "grad_norm": 1.4472323656082153, "learning_rate": 8.176402607601887e-05, "loss": 4.8078, "step": 42700 }, { "epoch": 0.0818102848036103, "grad_norm": 1.51057767868042, "learning_rate": 8.178318084012796e-05, "loss": 4.8658, "step": 42710 }, { "epoch": 0.08182943963498554, "grad_norm": 1.411787986755371, "learning_rate": 8.180233560423703e-05, "loss": 4.8589, "step": 42720 }, { "epoch": 0.08184859446636077, "grad_norm": 1.430822491645813, "learning_rate": 8.182149036834611e-05, "loss": 4.8515, "step": 42730 }, { "epoch": 0.08186774929773599, "grad_norm": 1.4517470598220825, "learning_rate": 8.184064513245518e-05, "loss": 4.8963, "step": 42740 }, { "epoch": 0.08188690412911123, "grad_norm": 1.4967401027679443, "learning_rate": 8.185979989656427e-05, "loss": 5.0053, "step": 42750 }, { "epoch": 0.08190605896048646, "grad_norm": 1.4715150594711304, "learning_rate": 8.187895466067335e-05, "loss": 4.8075, "step": 42760 }, { "epoch": 0.08192521379186168, "grad_norm": 1.5144003629684448, "learning_rate": 8.189810942478242e-05, "loss": 4.7749, "step": 42770 }, { "epoch": 0.08194436862323692, "grad_norm": 1.4407318830490112, "learning_rate": 8.19172641888915e-05, "loss": 4.8242, "step": 42780 }, { "epoch": 0.08196352345461215, "grad_norm": 1.456937313079834, "learning_rate": 8.19364189530006e-05, "loss": 4.8259, "step": 42790 }, { "epoch": 0.08198267828598738, "grad_norm": 1.4595277309417725, "learning_rate": 8.195557371710966e-05, "loss": 4.8667, "step": 42800 }, { "epoch": 0.08200183311736262, "grad_norm": 1.5223692655563354, "learning_rate": 8.197472848121875e-05, "loss": 5.0595, "step": 42810 }, { "epoch": 0.08202098794873784, "grad_norm": 1.4992905855178833, "learning_rate": 8.199388324532784e-05, "loss": 4.8654, "step": 42820 }, { "epoch": 0.08204014278011307, "grad_norm": 1.602926254272461, "learning_rate": 8.201303800943691e-05, "loss": 4.7839, "step": 42830 }, { "epoch": 0.08205929761148831, "grad_norm": 1.4441325664520264, "learning_rate": 8.203219277354599e-05, "loss": 4.7444, "step": 42840 }, { "epoch": 0.08207845244286353, "grad_norm": 1.4368436336517334, "learning_rate": 8.205134753765506e-05, "loss": 4.8523, "step": 42850 }, { "epoch": 0.08209760727423876, "grad_norm": 1.4254034757614136, "learning_rate": 8.207050230176415e-05, "loss": 4.8214, "step": 42860 }, { "epoch": 0.082116762105614, "grad_norm": 1.4914493560791016, "learning_rate": 8.208965706587323e-05, "loss": 4.841, "step": 42870 }, { "epoch": 0.08213591693698923, "grad_norm": 1.4966070652008057, "learning_rate": 8.21088118299823e-05, "loss": 4.9513, "step": 42880 }, { "epoch": 0.08215507176836445, "grad_norm": 1.4792038202285767, "learning_rate": 8.212796659409138e-05, "loss": 4.9306, "step": 42890 }, { "epoch": 0.08217422659973969, "grad_norm": 1.4196738004684448, "learning_rate": 8.214712135820048e-05, "loss": 4.749, "step": 42900 }, { "epoch": 0.08219338143111492, "grad_norm": 1.4446382522583008, "learning_rate": 8.216627612230955e-05, "loss": 4.7095, "step": 42910 }, { "epoch": 0.08221253626249014, "grad_norm": 1.4902476072311401, "learning_rate": 8.218543088641863e-05, "loss": 4.7022, "step": 42920 }, { "epoch": 0.08223169109386538, "grad_norm": 1.5060783624649048, "learning_rate": 8.22045856505277e-05, "loss": 4.8626, "step": 42930 }, { "epoch": 0.08225084592524061, "grad_norm": 1.3852611780166626, "learning_rate": 8.222374041463679e-05, "loss": 4.83, "step": 42940 }, { "epoch": 0.08227000075661584, "grad_norm": 1.4527090787887573, "learning_rate": 8.224289517874587e-05, "loss": 4.8062, "step": 42950 }, { "epoch": 0.08228915558799108, "grad_norm": 1.4417473077774048, "learning_rate": 8.226204994285494e-05, "loss": 4.8618, "step": 42960 }, { "epoch": 0.0823083104193663, "grad_norm": 1.5171846151351929, "learning_rate": 8.228120470696402e-05, "loss": 4.8552, "step": 42970 }, { "epoch": 0.08232746525074153, "grad_norm": 1.5150361061096191, "learning_rate": 8.230035947107311e-05, "loss": 4.8087, "step": 42980 }, { "epoch": 0.08234662008211677, "grad_norm": 1.4290390014648438, "learning_rate": 8.231951423518218e-05, "loss": 4.7931, "step": 42990 }, { "epoch": 0.082365774913492, "grad_norm": 1.474460244178772, "learning_rate": 8.233866899929126e-05, "loss": 4.8261, "step": 43000 }, { "epoch": 0.08238492974486722, "grad_norm": 1.4991554021835327, "learning_rate": 8.235782376340036e-05, "loss": 4.7515, "step": 43010 }, { "epoch": 0.08240408457624246, "grad_norm": 1.4881041049957275, "learning_rate": 8.237697852750943e-05, "loss": 4.903, "step": 43020 }, { "epoch": 0.08242323940761768, "grad_norm": 1.3853962421417236, "learning_rate": 8.239613329161851e-05, "loss": 4.8761, "step": 43030 }, { "epoch": 0.08244239423899291, "grad_norm": 1.5955384969711304, "learning_rate": 8.241528805572758e-05, "loss": 4.9931, "step": 43040 }, { "epoch": 0.08246154907036815, "grad_norm": 1.391139268875122, "learning_rate": 8.243444281983667e-05, "loss": 4.7277, "step": 43050 }, { "epoch": 0.08248070390174338, "grad_norm": 1.4977537393569946, "learning_rate": 8.245359758394575e-05, "loss": 4.8458, "step": 43060 }, { "epoch": 0.0824998587331186, "grad_norm": 1.428105354309082, "learning_rate": 8.247275234805482e-05, "loss": 4.6816, "step": 43070 }, { "epoch": 0.08251901356449384, "grad_norm": 1.521772027015686, "learning_rate": 8.24919071121639e-05, "loss": 4.7613, "step": 43080 }, { "epoch": 0.08253816839586907, "grad_norm": 1.467673420906067, "learning_rate": 8.2511061876273e-05, "loss": 4.9619, "step": 43090 }, { "epoch": 0.0825573232272443, "grad_norm": 1.3995466232299805, "learning_rate": 8.253021664038206e-05, "loss": 4.8712, "step": 43100 }, { "epoch": 0.08257647805861953, "grad_norm": 1.5247523784637451, "learning_rate": 8.254937140449114e-05, "loss": 4.8149, "step": 43110 }, { "epoch": 0.08259563288999476, "grad_norm": 1.5527738332748413, "learning_rate": 8.256852616860021e-05, "loss": 4.9886, "step": 43120 }, { "epoch": 0.08261478772136999, "grad_norm": 1.5292695760726929, "learning_rate": 8.258768093270931e-05, "loss": 4.8068, "step": 43130 }, { "epoch": 0.08263394255274523, "grad_norm": 1.613073706626892, "learning_rate": 8.260683569681839e-05, "loss": 4.8087, "step": 43140 }, { "epoch": 0.08265309738412045, "grad_norm": 1.4344489574432373, "learning_rate": 8.262599046092746e-05, "loss": 4.7719, "step": 43150 }, { "epoch": 0.08267225221549568, "grad_norm": 1.4408390522003174, "learning_rate": 8.264514522503655e-05, "loss": 4.8358, "step": 43160 }, { "epoch": 0.08269140704687092, "grad_norm": 1.4556934833526611, "learning_rate": 8.266429998914563e-05, "loss": 4.8389, "step": 43170 }, { "epoch": 0.08271056187824614, "grad_norm": 1.4555728435516357, "learning_rate": 8.26834547532547e-05, "loss": 4.8374, "step": 43180 }, { "epoch": 0.08272971670962137, "grad_norm": 1.5243762731552124, "learning_rate": 8.270260951736378e-05, "loss": 4.8421, "step": 43190 }, { "epoch": 0.08274887154099661, "grad_norm": 1.4536899328231812, "learning_rate": 8.272176428147288e-05, "loss": 4.8676, "step": 43200 }, { "epoch": 0.08276802637237184, "grad_norm": 1.4388313293457031, "learning_rate": 8.274091904558194e-05, "loss": 4.8736, "step": 43210 }, { "epoch": 0.08278718120374706, "grad_norm": 1.375286340713501, "learning_rate": 8.276007380969103e-05, "loss": 4.8752, "step": 43220 }, { "epoch": 0.0828063360351223, "grad_norm": 1.426927924156189, "learning_rate": 8.27792285738001e-05, "loss": 4.7485, "step": 43230 }, { "epoch": 0.08282549086649753, "grad_norm": 1.4476927518844604, "learning_rate": 8.279838333790919e-05, "loss": 4.9124, "step": 43240 }, { "epoch": 0.08284464569787275, "grad_norm": 1.4241341352462769, "learning_rate": 8.281753810201827e-05, "loss": 4.6834, "step": 43250 }, { "epoch": 0.082863800529248, "grad_norm": 1.480594515800476, "learning_rate": 8.283669286612734e-05, "loss": 4.7164, "step": 43260 }, { "epoch": 0.08288295536062322, "grad_norm": 1.4606612920761108, "learning_rate": 8.285584763023643e-05, "loss": 4.8979, "step": 43270 }, { "epoch": 0.08290211019199845, "grad_norm": 1.4771054983139038, "learning_rate": 8.287500239434551e-05, "loss": 4.8093, "step": 43280 }, { "epoch": 0.08292126502337369, "grad_norm": 1.419952154159546, "learning_rate": 8.289415715845458e-05, "loss": 4.8187, "step": 43290 }, { "epoch": 0.08294041985474891, "grad_norm": 1.3686336278915405, "learning_rate": 8.291331192256366e-05, "loss": 4.9232, "step": 43300 }, { "epoch": 0.08295957468612414, "grad_norm": 1.4704691171646118, "learning_rate": 8.293246668667274e-05, "loss": 4.8814, "step": 43310 }, { "epoch": 0.08297872951749938, "grad_norm": 1.446851134300232, "learning_rate": 8.295162145078183e-05, "loss": 4.923, "step": 43320 }, { "epoch": 0.0829978843488746, "grad_norm": 1.3965815305709839, "learning_rate": 8.29707762148909e-05, "loss": 4.9361, "step": 43330 }, { "epoch": 0.08301703918024983, "grad_norm": 1.4491024017333984, "learning_rate": 8.298993097899997e-05, "loss": 4.8153, "step": 43340 }, { "epoch": 0.08303619401162507, "grad_norm": 1.4221818447113037, "learning_rate": 8.300908574310907e-05, "loss": 4.871, "step": 43350 }, { "epoch": 0.0830553488430003, "grad_norm": 1.5874286890029907, "learning_rate": 8.302824050721815e-05, "loss": 4.9237, "step": 43360 }, { "epoch": 0.08307450367437552, "grad_norm": 1.5173193216323853, "learning_rate": 8.304739527132722e-05, "loss": 4.7181, "step": 43370 }, { "epoch": 0.08309365850575076, "grad_norm": 1.4193639755249023, "learning_rate": 8.306655003543631e-05, "loss": 4.9062, "step": 43380 }, { "epoch": 0.08311281333712599, "grad_norm": 1.5388034582138062, "learning_rate": 8.30857047995454e-05, "loss": 4.8114, "step": 43390 }, { "epoch": 0.08313196816850123, "grad_norm": 1.4609551429748535, "learning_rate": 8.310485956365446e-05, "loss": 4.6841, "step": 43400 }, { "epoch": 0.08315112299987645, "grad_norm": 1.4973706007003784, "learning_rate": 8.312401432776354e-05, "loss": 4.7879, "step": 43410 }, { "epoch": 0.08317027783125168, "grad_norm": 1.42003333568573, "learning_rate": 8.314316909187262e-05, "loss": 4.7977, "step": 43420 }, { "epoch": 0.08318943266262692, "grad_norm": 1.3818142414093018, "learning_rate": 8.31623238559817e-05, "loss": 4.8552, "step": 43430 }, { "epoch": 0.08320858749400215, "grad_norm": 1.3693602085113525, "learning_rate": 8.318147862009079e-05, "loss": 4.7341, "step": 43440 }, { "epoch": 0.08322774232537737, "grad_norm": 1.465531587600708, "learning_rate": 8.320063338419985e-05, "loss": 4.8822, "step": 43450 }, { "epoch": 0.08324689715675261, "grad_norm": 1.3812507390975952, "learning_rate": 8.321978814830895e-05, "loss": 4.7586, "step": 43460 }, { "epoch": 0.08326605198812784, "grad_norm": 1.506309151649475, "learning_rate": 8.323894291241803e-05, "loss": 4.8593, "step": 43470 }, { "epoch": 0.08328520681950306, "grad_norm": 1.4155610799789429, "learning_rate": 8.32580976765271e-05, "loss": 4.8178, "step": 43480 }, { "epoch": 0.0833043616508783, "grad_norm": 1.5206128358840942, "learning_rate": 8.32772524406362e-05, "loss": 4.8312, "step": 43490 }, { "epoch": 0.08332351648225353, "grad_norm": 1.3803778886795044, "learning_rate": 8.329640720474526e-05, "loss": 4.789, "step": 43500 }, { "epoch": 0.08334267131362876, "grad_norm": 1.4144477844238281, "learning_rate": 8.331556196885434e-05, "loss": 4.7558, "step": 43510 }, { "epoch": 0.083361826145004, "grad_norm": 1.4130585193634033, "learning_rate": 8.333471673296342e-05, "loss": 4.88, "step": 43520 }, { "epoch": 0.08338098097637922, "grad_norm": 1.485567569732666, "learning_rate": 8.33538714970725e-05, "loss": 4.7529, "step": 43530 }, { "epoch": 0.08340013580775445, "grad_norm": 1.5052542686462402, "learning_rate": 8.337302626118159e-05, "loss": 4.7953, "step": 43540 }, { "epoch": 0.08341929063912969, "grad_norm": 1.429506540298462, "learning_rate": 8.339218102529067e-05, "loss": 4.8563, "step": 43550 }, { "epoch": 0.08343844547050491, "grad_norm": 1.4974552392959595, "learning_rate": 8.341133578939974e-05, "loss": 4.8112, "step": 43560 }, { "epoch": 0.08345760030188014, "grad_norm": 1.4378275871276855, "learning_rate": 8.343049055350883e-05, "loss": 4.6239, "step": 43570 }, { "epoch": 0.08347675513325538, "grad_norm": 1.4815781116485596, "learning_rate": 8.344964531761791e-05, "loss": 4.9198, "step": 43580 }, { "epoch": 0.0834959099646306, "grad_norm": 1.7675706148147583, "learning_rate": 8.346880008172698e-05, "loss": 4.8916, "step": 43590 }, { "epoch": 0.08351506479600583, "grad_norm": 1.4003231525421143, "learning_rate": 8.348795484583607e-05, "loss": 4.903, "step": 43600 }, { "epoch": 0.08353421962738107, "grad_norm": 1.5365790128707886, "learning_rate": 8.350710960994514e-05, "loss": 4.9108, "step": 43610 }, { "epoch": 0.0835533744587563, "grad_norm": 1.3722035884857178, "learning_rate": 8.352626437405422e-05, "loss": 4.8114, "step": 43620 }, { "epoch": 0.08357252929013152, "grad_norm": 1.4676613807678223, "learning_rate": 8.35454191381633e-05, "loss": 4.7238, "step": 43630 }, { "epoch": 0.08359168412150676, "grad_norm": 1.3819142580032349, "learning_rate": 8.356457390227239e-05, "loss": 4.8048, "step": 43640 }, { "epoch": 0.08361083895288199, "grad_norm": 1.4564450979232788, "learning_rate": 8.358372866638147e-05, "loss": 4.8313, "step": 43650 }, { "epoch": 0.08362999378425721, "grad_norm": 1.471479892730713, "learning_rate": 8.360288343049055e-05, "loss": 4.73, "step": 43660 }, { "epoch": 0.08364914861563245, "grad_norm": 1.433857798576355, "learning_rate": 8.362203819459962e-05, "loss": 4.8596, "step": 43670 }, { "epoch": 0.08366830344700768, "grad_norm": 1.4098703861236572, "learning_rate": 8.364119295870871e-05, "loss": 4.8402, "step": 43680 }, { "epoch": 0.0836874582783829, "grad_norm": 1.3817682266235352, "learning_rate": 8.366034772281779e-05, "loss": 4.9387, "step": 43690 }, { "epoch": 0.08370661310975815, "grad_norm": 1.461368441581726, "learning_rate": 8.367950248692686e-05, "loss": 4.861, "step": 43700 }, { "epoch": 0.08372576794113337, "grad_norm": 1.378060221672058, "learning_rate": 8.369865725103596e-05, "loss": 4.8226, "step": 43710 }, { "epoch": 0.0837449227725086, "grad_norm": 1.4055941104888916, "learning_rate": 8.371781201514502e-05, "loss": 4.97, "step": 43720 }, { "epoch": 0.08376407760388384, "grad_norm": 1.4889180660247803, "learning_rate": 8.37369667792541e-05, "loss": 4.7821, "step": 43730 }, { "epoch": 0.08378323243525906, "grad_norm": 1.4604331254959106, "learning_rate": 8.375612154336319e-05, "loss": 4.8965, "step": 43740 }, { "epoch": 0.08380238726663429, "grad_norm": 1.4229297637939453, "learning_rate": 8.377527630747227e-05, "loss": 4.8638, "step": 43750 }, { "epoch": 0.08382154209800953, "grad_norm": 1.3994415998458862, "learning_rate": 8.379443107158135e-05, "loss": 4.8618, "step": 43760 }, { "epoch": 0.08384069692938476, "grad_norm": 1.4443583488464355, "learning_rate": 8.381358583569043e-05, "loss": 4.7809, "step": 43770 }, { "epoch": 0.08385985176075998, "grad_norm": 1.4906423091888428, "learning_rate": 8.38327405997995e-05, "loss": 4.76, "step": 43780 }, { "epoch": 0.08387900659213522, "grad_norm": 1.4144043922424316, "learning_rate": 8.385189536390859e-05, "loss": 4.6231, "step": 43790 }, { "epoch": 0.08389816142351045, "grad_norm": 1.877510905265808, "learning_rate": 8.387105012801766e-05, "loss": 4.7684, "step": 43800 }, { "epoch": 0.08391731625488567, "grad_norm": 1.381956696510315, "learning_rate": 8.389020489212674e-05, "loss": 4.9097, "step": 43810 }, { "epoch": 0.08393647108626091, "grad_norm": 1.4789035320281982, "learning_rate": 8.390935965623584e-05, "loss": 4.8508, "step": 43820 }, { "epoch": 0.08395562591763614, "grad_norm": 1.445147156715393, "learning_rate": 8.39285144203449e-05, "loss": 4.7026, "step": 43830 }, { "epoch": 0.08397478074901137, "grad_norm": 1.452480673789978, "learning_rate": 8.394766918445399e-05, "loss": 4.7506, "step": 43840 }, { "epoch": 0.0839939355803866, "grad_norm": 1.463838815689087, "learning_rate": 8.396682394856307e-05, "loss": 4.7896, "step": 43850 }, { "epoch": 0.08401309041176183, "grad_norm": 1.4552454948425293, "learning_rate": 8.398597871267215e-05, "loss": 4.8248, "step": 43860 }, { "epoch": 0.08403224524313706, "grad_norm": 1.4726392030715942, "learning_rate": 8.400513347678123e-05, "loss": 4.8953, "step": 43870 }, { "epoch": 0.0840514000745123, "grad_norm": 1.4379756450653076, "learning_rate": 8.402428824089031e-05, "loss": 4.8346, "step": 43880 }, { "epoch": 0.08407055490588752, "grad_norm": 1.4202845096588135, "learning_rate": 8.404344300499938e-05, "loss": 4.6393, "step": 43890 }, { "epoch": 0.08408970973726275, "grad_norm": 1.396921157836914, "learning_rate": 8.406259776910847e-05, "loss": 4.7328, "step": 43900 }, { "epoch": 0.08410886456863799, "grad_norm": 1.399364948272705, "learning_rate": 8.408175253321754e-05, "loss": 4.679, "step": 43910 }, { "epoch": 0.08412801940001322, "grad_norm": 1.4508119821548462, "learning_rate": 8.410090729732662e-05, "loss": 4.9876, "step": 43920 }, { "epoch": 0.08414717423138844, "grad_norm": 1.614356517791748, "learning_rate": 8.412006206143572e-05, "loss": 4.7584, "step": 43930 }, { "epoch": 0.08416632906276368, "grad_norm": 1.5551344156265259, "learning_rate": 8.413921682554478e-05, "loss": 4.8303, "step": 43940 }, { "epoch": 0.08418548389413891, "grad_norm": 1.406777024269104, "learning_rate": 8.415837158965387e-05, "loss": 4.8079, "step": 43950 }, { "epoch": 0.08420463872551413, "grad_norm": 1.4292268753051758, "learning_rate": 8.417752635376295e-05, "loss": 4.8461, "step": 43960 }, { "epoch": 0.08422379355688937, "grad_norm": 1.4832844734191895, "learning_rate": 8.419668111787203e-05, "loss": 4.8438, "step": 43970 }, { "epoch": 0.0842429483882646, "grad_norm": 1.3747233152389526, "learning_rate": 8.421583588198111e-05, "loss": 4.865, "step": 43980 }, { "epoch": 0.08426210321963983, "grad_norm": 1.4351423978805542, "learning_rate": 8.423499064609018e-05, "loss": 4.758, "step": 43990 }, { "epoch": 0.08428125805101506, "grad_norm": 1.4816956520080566, "learning_rate": 8.425414541019926e-05, "loss": 4.8592, "step": 44000 }, { "epoch": 0.08430041288239029, "grad_norm": 1.414373755455017, "learning_rate": 8.427330017430835e-05, "loss": 4.8399, "step": 44010 }, { "epoch": 0.08431956771376552, "grad_norm": 1.4246951341629028, "learning_rate": 8.429245493841742e-05, "loss": 4.7386, "step": 44020 }, { "epoch": 0.08433872254514076, "grad_norm": 1.4174445867538452, "learning_rate": 8.43116097025265e-05, "loss": 4.7232, "step": 44030 }, { "epoch": 0.08435787737651598, "grad_norm": 1.4357093572616577, "learning_rate": 8.43307644666356e-05, "loss": 4.8739, "step": 44040 }, { "epoch": 0.08437703220789122, "grad_norm": 1.4226477146148682, "learning_rate": 8.434991923074467e-05, "loss": 4.7304, "step": 44050 }, { "epoch": 0.08439618703926645, "grad_norm": 1.3940640687942505, "learning_rate": 8.436907399485375e-05, "loss": 4.7041, "step": 44060 }, { "epoch": 0.08441534187064167, "grad_norm": 1.4650845527648926, "learning_rate": 8.438822875896283e-05, "loss": 4.6488, "step": 44070 }, { "epoch": 0.08443449670201691, "grad_norm": 1.4987393617630005, "learning_rate": 8.440738352307191e-05, "loss": 4.9076, "step": 44080 }, { "epoch": 0.08445365153339214, "grad_norm": 1.4481289386749268, "learning_rate": 8.442653828718099e-05, "loss": 4.7676, "step": 44090 }, { "epoch": 0.08447280636476737, "grad_norm": 1.5657302141189575, "learning_rate": 8.444569305129006e-05, "loss": 4.88, "step": 44100 }, { "epoch": 0.0844919611961426, "grad_norm": 1.3631422519683838, "learning_rate": 8.446484781539914e-05, "loss": 4.8225, "step": 44110 }, { "epoch": 0.08451111602751783, "grad_norm": 1.440001130104065, "learning_rate": 8.448400257950823e-05, "loss": 4.74, "step": 44120 }, { "epoch": 0.08453027085889306, "grad_norm": 1.4042503833770752, "learning_rate": 8.45031573436173e-05, "loss": 4.762, "step": 44130 }, { "epoch": 0.0845494256902683, "grad_norm": 1.4466279745101929, "learning_rate": 8.452231210772638e-05, "loss": 4.8858, "step": 44140 }, { "epoch": 0.08456858052164352, "grad_norm": 1.4568798542022705, "learning_rate": 8.454146687183548e-05, "loss": 4.7671, "step": 44150 }, { "epoch": 0.08458773535301875, "grad_norm": 1.422316312789917, "learning_rate": 8.456062163594455e-05, "loss": 4.829, "step": 44160 }, { "epoch": 0.08460689018439399, "grad_norm": 1.4463708400726318, "learning_rate": 8.457977640005363e-05, "loss": 4.8627, "step": 44170 }, { "epoch": 0.08462604501576922, "grad_norm": 1.39913809299469, "learning_rate": 8.45989311641627e-05, "loss": 4.8126, "step": 44180 }, { "epoch": 0.08464519984714444, "grad_norm": 1.4449965953826904, "learning_rate": 8.461808592827179e-05, "loss": 4.8047, "step": 44190 }, { "epoch": 0.08466435467851968, "grad_norm": 1.562962532043457, "learning_rate": 8.463724069238087e-05, "loss": 4.6403, "step": 44200 }, { "epoch": 0.08468350950989491, "grad_norm": 1.4776841402053833, "learning_rate": 8.465639545648994e-05, "loss": 4.7236, "step": 44210 }, { "epoch": 0.08470266434127013, "grad_norm": 1.5020798444747925, "learning_rate": 8.467555022059902e-05, "loss": 4.6387, "step": 44220 }, { "epoch": 0.08472181917264537, "grad_norm": 1.431800127029419, "learning_rate": 8.469470498470812e-05, "loss": 4.679, "step": 44230 }, { "epoch": 0.0847409740040206, "grad_norm": 1.4187361001968384, "learning_rate": 8.471385974881718e-05, "loss": 4.659, "step": 44240 }, { "epoch": 0.08476012883539583, "grad_norm": 1.414541482925415, "learning_rate": 8.473301451292626e-05, "loss": 4.7874, "step": 44250 }, { "epoch": 0.08477928366677107, "grad_norm": 1.4127795696258545, "learning_rate": 8.475216927703536e-05, "loss": 4.7395, "step": 44260 }, { "epoch": 0.08479843849814629, "grad_norm": 1.419727087020874, "learning_rate": 8.477132404114443e-05, "loss": 4.6841, "step": 44270 }, { "epoch": 0.08481759332952152, "grad_norm": 1.4476014375686646, "learning_rate": 8.479047880525351e-05, "loss": 4.5454, "step": 44280 }, { "epoch": 0.08483674816089676, "grad_norm": 1.436048150062561, "learning_rate": 8.480963356936258e-05, "loss": 4.8175, "step": 44290 }, { "epoch": 0.08485590299227198, "grad_norm": 1.3750638961791992, "learning_rate": 8.482878833347167e-05, "loss": 4.9145, "step": 44300 }, { "epoch": 0.08487505782364721, "grad_norm": 1.4512684345245361, "learning_rate": 8.484794309758075e-05, "loss": 4.7076, "step": 44310 }, { "epoch": 0.08489421265502245, "grad_norm": 1.432146668434143, "learning_rate": 8.486709786168982e-05, "loss": 4.7669, "step": 44320 }, { "epoch": 0.08491336748639768, "grad_norm": 1.4275234937667847, "learning_rate": 8.48862526257989e-05, "loss": 4.7719, "step": 44330 }, { "epoch": 0.0849325223177729, "grad_norm": 1.4947090148925781, "learning_rate": 8.4905407389908e-05, "loss": 4.8869, "step": 44340 }, { "epoch": 0.08495167714914814, "grad_norm": 1.4352604150772095, "learning_rate": 8.492456215401706e-05, "loss": 4.9577, "step": 44350 }, { "epoch": 0.08497083198052337, "grad_norm": 1.4721195697784424, "learning_rate": 8.494371691812615e-05, "loss": 4.9557, "step": 44360 }, { "epoch": 0.0849899868118986, "grad_norm": 1.4833437204360962, "learning_rate": 8.496287168223524e-05, "loss": 4.7954, "step": 44370 }, { "epoch": 0.08500914164327383, "grad_norm": 1.3472388982772827, "learning_rate": 8.498202644634431e-05, "loss": 4.8801, "step": 44380 }, { "epoch": 0.08502829647464906, "grad_norm": 1.5019843578338623, "learning_rate": 8.500118121045339e-05, "loss": 4.8823, "step": 44390 }, { "epoch": 0.08504745130602429, "grad_norm": 1.4136260747909546, "learning_rate": 8.502033597456246e-05, "loss": 4.6701, "step": 44400 }, { "epoch": 0.08506660613739953, "grad_norm": 1.46396803855896, "learning_rate": 8.503949073867155e-05, "loss": 4.8208, "step": 44410 }, { "epoch": 0.08508576096877475, "grad_norm": 1.4057378768920898, "learning_rate": 8.505864550278063e-05, "loss": 4.8658, "step": 44420 }, { "epoch": 0.08510491580014998, "grad_norm": 1.410547137260437, "learning_rate": 8.50778002668897e-05, "loss": 4.731, "step": 44430 }, { "epoch": 0.08512407063152522, "grad_norm": 1.5450544357299805, "learning_rate": 8.509695503099878e-05, "loss": 4.8503, "step": 44440 }, { "epoch": 0.08514322546290044, "grad_norm": 1.4452325105667114, "learning_rate": 8.511610979510788e-05, "loss": 4.7366, "step": 44450 }, { "epoch": 0.08516238029427567, "grad_norm": 1.3764326572418213, "learning_rate": 8.513526455921694e-05, "loss": 4.7227, "step": 44460 }, { "epoch": 0.08518153512565091, "grad_norm": 1.436355710029602, "learning_rate": 8.515441932332603e-05, "loss": 4.7405, "step": 44470 }, { "epoch": 0.08520068995702614, "grad_norm": 1.5194576978683472, "learning_rate": 8.51735740874351e-05, "loss": 4.7616, "step": 44480 }, { "epoch": 0.08521984478840136, "grad_norm": 1.4975838661193848, "learning_rate": 8.519272885154419e-05, "loss": 4.7653, "step": 44490 }, { "epoch": 0.0852389996197766, "grad_norm": 1.4125895500183105, "learning_rate": 8.521188361565327e-05, "loss": 4.6743, "step": 44500 }, { "epoch": 0.08525815445115183, "grad_norm": 1.4634836912155151, "learning_rate": 8.523103837976234e-05, "loss": 4.7723, "step": 44510 }, { "epoch": 0.08527730928252705, "grad_norm": 1.4373213052749634, "learning_rate": 8.525019314387142e-05, "loss": 4.8181, "step": 44520 }, { "epoch": 0.08529646411390229, "grad_norm": 1.413446068763733, "learning_rate": 8.526934790798051e-05, "loss": 4.7949, "step": 44530 }, { "epoch": 0.08531561894527752, "grad_norm": 1.4301451444625854, "learning_rate": 8.528850267208958e-05, "loss": 4.8371, "step": 44540 }, { "epoch": 0.08533477377665274, "grad_norm": 1.4130271673202515, "learning_rate": 8.530765743619866e-05, "loss": 4.8285, "step": 44550 }, { "epoch": 0.08535392860802798, "grad_norm": 1.6288498640060425, "learning_rate": 8.532681220030776e-05, "loss": 4.7199, "step": 44560 }, { "epoch": 0.08537308343940321, "grad_norm": 1.4331181049346924, "learning_rate": 8.534596696441683e-05, "loss": 4.7212, "step": 44570 }, { "epoch": 0.08539223827077844, "grad_norm": 1.4834662675857544, "learning_rate": 8.536512172852591e-05, "loss": 4.7865, "step": 44580 }, { "epoch": 0.08541139310215368, "grad_norm": 1.4157270193099976, "learning_rate": 8.538427649263497e-05, "loss": 4.8029, "step": 44590 }, { "epoch": 0.0854305479335289, "grad_norm": 1.4084455966949463, "learning_rate": 8.540343125674407e-05, "loss": 4.8153, "step": 44600 }, { "epoch": 0.08544970276490413, "grad_norm": 1.4490222930908203, "learning_rate": 8.542258602085315e-05, "loss": 4.7231, "step": 44610 }, { "epoch": 0.08546885759627937, "grad_norm": 1.4548777341842651, "learning_rate": 8.544174078496222e-05, "loss": 4.7233, "step": 44620 }, { "epoch": 0.0854880124276546, "grad_norm": 1.549860954284668, "learning_rate": 8.54608955490713e-05, "loss": 4.8294, "step": 44630 }, { "epoch": 0.08550716725902982, "grad_norm": 1.3897066116333008, "learning_rate": 8.54800503131804e-05, "loss": 4.8533, "step": 44640 }, { "epoch": 0.08552632209040506, "grad_norm": 1.4358159303665161, "learning_rate": 8.549920507728946e-05, "loss": 4.7354, "step": 44650 }, { "epoch": 0.08554547692178029, "grad_norm": 1.48371422290802, "learning_rate": 8.551835984139854e-05, "loss": 4.7924, "step": 44660 }, { "epoch": 0.08556463175315551, "grad_norm": 1.4414010047912598, "learning_rate": 8.553751460550761e-05, "loss": 4.825, "step": 44670 }, { "epoch": 0.08558378658453075, "grad_norm": 1.3740233182907104, "learning_rate": 8.55566693696167e-05, "loss": 4.8658, "step": 44680 }, { "epoch": 0.08560294141590598, "grad_norm": 1.3574693202972412, "learning_rate": 8.557582413372579e-05, "loss": 4.8284, "step": 44690 }, { "epoch": 0.08562209624728122, "grad_norm": 1.4186781644821167, "learning_rate": 8.559497889783486e-05, "loss": 4.9207, "step": 44700 }, { "epoch": 0.08564125107865644, "grad_norm": 1.3280258178710938, "learning_rate": 8.561413366194395e-05, "loss": 4.7917, "step": 44710 }, { "epoch": 0.08566040591003167, "grad_norm": 1.415539264678955, "learning_rate": 8.563328842605303e-05, "loss": 4.7297, "step": 44720 }, { "epoch": 0.08567956074140691, "grad_norm": 1.564907431602478, "learning_rate": 8.56524431901621e-05, "loss": 4.7285, "step": 44730 }, { "epoch": 0.08569871557278214, "grad_norm": 1.465888500213623, "learning_rate": 8.567159795427118e-05, "loss": 4.8033, "step": 44740 }, { "epoch": 0.08571787040415736, "grad_norm": 1.621904969215393, "learning_rate": 8.569075271838028e-05, "loss": 4.8751, "step": 44750 }, { "epoch": 0.0857370252355326, "grad_norm": 1.3810333013534546, "learning_rate": 8.570990748248934e-05, "loss": 4.8139, "step": 44760 }, { "epoch": 0.08575618006690783, "grad_norm": 1.3955885171890259, "learning_rate": 8.572906224659842e-05, "loss": 4.7724, "step": 44770 }, { "epoch": 0.08577533489828305, "grad_norm": 1.4708759784698486, "learning_rate": 8.574821701070749e-05, "loss": 4.5903, "step": 44780 }, { "epoch": 0.0857944897296583, "grad_norm": 1.3772046566009521, "learning_rate": 8.576737177481659e-05, "loss": 4.7383, "step": 44790 }, { "epoch": 0.08581364456103352, "grad_norm": 1.4261690378189087, "learning_rate": 8.578652653892567e-05, "loss": 4.8105, "step": 44800 }, { "epoch": 0.08583279939240875, "grad_norm": 1.393783688545227, "learning_rate": 8.580568130303474e-05, "loss": 4.8934, "step": 44810 }, { "epoch": 0.08585195422378399, "grad_norm": 1.358963131904602, "learning_rate": 8.582483606714383e-05, "loss": 4.8803, "step": 44820 }, { "epoch": 0.08587110905515921, "grad_norm": 1.4107692241668701, "learning_rate": 8.584399083125291e-05, "loss": 4.8122, "step": 44830 }, { "epoch": 0.08589026388653444, "grad_norm": 1.487943410873413, "learning_rate": 8.586314559536198e-05, "loss": 4.7225, "step": 44840 }, { "epoch": 0.08590941871790968, "grad_norm": 1.4429444074630737, "learning_rate": 8.588230035947106e-05, "loss": 4.8463, "step": 44850 }, { "epoch": 0.0859285735492849, "grad_norm": 1.3925399780273438, "learning_rate": 8.590145512358014e-05, "loss": 4.7764, "step": 44860 }, { "epoch": 0.08594772838066013, "grad_norm": 1.4928791522979736, "learning_rate": 8.592060988768922e-05, "loss": 4.6197, "step": 44870 }, { "epoch": 0.08596688321203537, "grad_norm": 1.3843717575073242, "learning_rate": 8.59397646517983e-05, "loss": 4.7602, "step": 44880 }, { "epoch": 0.0859860380434106, "grad_norm": 1.509521722793579, "learning_rate": 8.595891941590737e-05, "loss": 4.8091, "step": 44890 }, { "epoch": 0.08600519287478582, "grad_norm": 1.3987787961959839, "learning_rate": 8.597807418001647e-05, "loss": 4.7563, "step": 44900 }, { "epoch": 0.08602434770616106, "grad_norm": 1.4782130718231201, "learning_rate": 8.599722894412555e-05, "loss": 4.7796, "step": 44910 }, { "epoch": 0.08604350253753629, "grad_norm": 1.4148861169815063, "learning_rate": 8.601638370823462e-05, "loss": 4.807, "step": 44920 }, { "epoch": 0.08606265736891151, "grad_norm": 1.3835636377334595, "learning_rate": 8.603553847234371e-05, "loss": 4.6254, "step": 44930 }, { "epoch": 0.08608181220028675, "grad_norm": 1.3956069946289062, "learning_rate": 8.60546932364528e-05, "loss": 4.7983, "step": 44940 }, { "epoch": 0.08610096703166198, "grad_norm": 1.4052915573120117, "learning_rate": 8.607384800056186e-05, "loss": 4.8492, "step": 44950 }, { "epoch": 0.0861201218630372, "grad_norm": 1.4411592483520508, "learning_rate": 8.609300276467094e-05, "loss": 4.7793, "step": 44960 }, { "epoch": 0.08613927669441244, "grad_norm": 1.456068754196167, "learning_rate": 8.611215752878002e-05, "loss": 4.7198, "step": 44970 }, { "epoch": 0.08615843152578767, "grad_norm": 1.3323626518249512, "learning_rate": 8.61313122928891e-05, "loss": 4.7671, "step": 44980 }, { "epoch": 0.0861775863571629, "grad_norm": 1.4109631776809692, "learning_rate": 8.615046705699819e-05, "loss": 4.764, "step": 44990 }, { "epoch": 0.08619674118853814, "grad_norm": 1.4752519130706787, "learning_rate": 8.616962182110725e-05, "loss": 4.7728, "step": 45000 }, { "epoch": 0.08621589601991336, "grad_norm": 1.4042102098464966, "learning_rate": 8.618877658521635e-05, "loss": 4.9353, "step": 45010 }, { "epoch": 0.08623505085128859, "grad_norm": 1.3433798551559448, "learning_rate": 8.620793134932543e-05, "loss": 4.6809, "step": 45020 }, { "epoch": 0.08625420568266383, "grad_norm": 1.4285136461257935, "learning_rate": 8.62270861134345e-05, "loss": 4.9374, "step": 45030 }, { "epoch": 0.08627336051403905, "grad_norm": 1.39712655544281, "learning_rate": 8.624624087754359e-05, "loss": 4.9084, "step": 45040 }, { "epoch": 0.08629251534541428, "grad_norm": 1.4703861474990845, "learning_rate": 8.626539564165267e-05, "loss": 4.9588, "step": 45050 }, { "epoch": 0.08631167017678952, "grad_norm": 1.3377043008804321, "learning_rate": 8.628455040576174e-05, "loss": 4.8102, "step": 45060 }, { "epoch": 0.08633082500816475, "grad_norm": 1.3808543682098389, "learning_rate": 8.630370516987082e-05, "loss": 4.8667, "step": 45070 }, { "epoch": 0.08634997983953997, "grad_norm": 1.4031574726104736, "learning_rate": 8.63228599339799e-05, "loss": 4.7703, "step": 45080 }, { "epoch": 0.08636913467091521, "grad_norm": 1.4326773881912231, "learning_rate": 8.634201469808899e-05, "loss": 4.8857, "step": 45090 }, { "epoch": 0.08638828950229044, "grad_norm": 1.4421435594558716, "learning_rate": 8.636116946219807e-05, "loss": 4.8863, "step": 45100 }, { "epoch": 0.08640744433366566, "grad_norm": 1.3759108781814575, "learning_rate": 8.638032422630713e-05, "loss": 4.7098, "step": 45110 }, { "epoch": 0.0864265991650409, "grad_norm": 1.4209620952606201, "learning_rate": 8.639947899041623e-05, "loss": 4.7437, "step": 45120 }, { "epoch": 0.08644575399641613, "grad_norm": 1.405121088027954, "learning_rate": 8.641863375452531e-05, "loss": 4.8801, "step": 45130 }, { "epoch": 0.08646490882779136, "grad_norm": 1.3988412618637085, "learning_rate": 8.643778851863438e-05, "loss": 4.8524, "step": 45140 }, { "epoch": 0.0864840636591666, "grad_norm": 1.4966953992843628, "learning_rate": 8.645694328274347e-05, "loss": 5.0331, "step": 45150 }, { "epoch": 0.08650321849054182, "grad_norm": 1.378580927848816, "learning_rate": 8.647609804685254e-05, "loss": 5.0293, "step": 45160 }, { "epoch": 0.08652237332191705, "grad_norm": 1.5056045055389404, "learning_rate": 8.649525281096162e-05, "loss": 4.7715, "step": 45170 }, { "epoch": 0.08654152815329229, "grad_norm": 1.8180301189422607, "learning_rate": 8.65144075750707e-05, "loss": 4.9226, "step": 45180 }, { "epoch": 0.08656068298466751, "grad_norm": 1.2939417362213135, "learning_rate": 8.653356233917979e-05, "loss": 4.9584, "step": 45190 }, { "epoch": 0.08657983781604274, "grad_norm": 1.3758410215377808, "learning_rate": 8.655271710328887e-05, "loss": 4.8693, "step": 45200 }, { "epoch": 0.08659899264741798, "grad_norm": 1.3704801797866821, "learning_rate": 8.657187186739795e-05, "loss": 4.7884, "step": 45210 }, { "epoch": 0.0866181474787932, "grad_norm": 1.4579358100891113, "learning_rate": 8.659102663150702e-05, "loss": 4.8205, "step": 45220 }, { "epoch": 0.08663730231016843, "grad_norm": 1.461359977722168, "learning_rate": 8.661018139561611e-05, "loss": 4.7124, "step": 45230 }, { "epoch": 0.08665645714154367, "grad_norm": 1.394710659980774, "learning_rate": 8.662933615972519e-05, "loss": 4.794, "step": 45240 }, { "epoch": 0.0866756119729189, "grad_norm": 1.3911619186401367, "learning_rate": 8.664849092383426e-05, "loss": 4.9458, "step": 45250 }, { "epoch": 0.08669476680429412, "grad_norm": 1.4372962713241577, "learning_rate": 8.666764568794335e-05, "loss": 4.9115, "step": 45260 }, { "epoch": 0.08671392163566936, "grad_norm": 1.3897051811218262, "learning_rate": 8.668680045205242e-05, "loss": 4.8431, "step": 45270 }, { "epoch": 0.08673307646704459, "grad_norm": 1.396302342414856, "learning_rate": 8.67059552161615e-05, "loss": 4.8719, "step": 45280 }, { "epoch": 0.08675223129841982, "grad_norm": 1.3777714967727661, "learning_rate": 8.672510998027058e-05, "loss": 4.7903, "step": 45290 }, { "epoch": 0.08677138612979506, "grad_norm": 1.3824892044067383, "learning_rate": 8.674426474437967e-05, "loss": 4.6962, "step": 45300 }, { "epoch": 0.08679054096117028, "grad_norm": 1.3927521705627441, "learning_rate": 8.676341950848875e-05, "loss": 4.7626, "step": 45310 }, { "epoch": 0.08680969579254551, "grad_norm": 1.4015700817108154, "learning_rate": 8.678257427259783e-05, "loss": 4.8573, "step": 45320 }, { "epoch": 0.08682885062392075, "grad_norm": 1.476975440979004, "learning_rate": 8.68017290367069e-05, "loss": 4.8546, "step": 45330 }, { "epoch": 0.08684800545529597, "grad_norm": 1.401815414428711, "learning_rate": 8.682088380081599e-05, "loss": 4.7296, "step": 45340 }, { "epoch": 0.0868671602866712, "grad_norm": 1.4117472171783447, "learning_rate": 8.684003856492506e-05, "loss": 4.7217, "step": 45350 }, { "epoch": 0.08688631511804644, "grad_norm": 1.4105666875839233, "learning_rate": 8.685919332903414e-05, "loss": 4.9, "step": 45360 }, { "epoch": 0.08690546994942167, "grad_norm": 1.3784151077270508, "learning_rate": 8.687834809314324e-05, "loss": 4.6704, "step": 45370 }, { "epoch": 0.0869246247807969, "grad_norm": 1.4066919088363647, "learning_rate": 8.68975028572523e-05, "loss": 4.8115, "step": 45380 }, { "epoch": 0.08694377961217213, "grad_norm": 1.3812111616134644, "learning_rate": 8.691665762136138e-05, "loss": 4.754, "step": 45390 }, { "epoch": 0.08696293444354736, "grad_norm": 1.4103741645812988, "learning_rate": 8.693581238547047e-05, "loss": 4.8333, "step": 45400 }, { "epoch": 0.0869820892749226, "grad_norm": 1.470163345336914, "learning_rate": 8.695496714957955e-05, "loss": 4.7771, "step": 45410 }, { "epoch": 0.08700124410629782, "grad_norm": 1.4888485670089722, "learning_rate": 8.697412191368863e-05, "loss": 4.9719, "step": 45420 }, { "epoch": 0.08702039893767305, "grad_norm": 1.3767510652542114, "learning_rate": 8.699327667779771e-05, "loss": 4.7431, "step": 45430 }, { "epoch": 0.08703955376904829, "grad_norm": 1.4161326885223389, "learning_rate": 8.701243144190678e-05, "loss": 4.7142, "step": 45440 }, { "epoch": 0.08705870860042352, "grad_norm": 1.4455310106277466, "learning_rate": 8.703158620601587e-05, "loss": 4.7128, "step": 45450 }, { "epoch": 0.08707786343179874, "grad_norm": 1.4989839792251587, "learning_rate": 8.705074097012494e-05, "loss": 4.7206, "step": 45460 }, { "epoch": 0.08709701826317398, "grad_norm": 1.457499384880066, "learning_rate": 8.706989573423402e-05, "loss": 4.8361, "step": 45470 }, { "epoch": 0.0871161730945492, "grad_norm": 1.4711096286773682, "learning_rate": 8.708905049834312e-05, "loss": 4.8873, "step": 45480 }, { "epoch": 0.08713532792592443, "grad_norm": 1.370545506477356, "learning_rate": 8.710820526245218e-05, "loss": 4.7044, "step": 45490 }, { "epoch": 0.08715448275729967, "grad_norm": 1.3500103950500488, "learning_rate": 8.712736002656127e-05, "loss": 4.7601, "step": 45500 }, { "epoch": 0.0871736375886749, "grad_norm": 1.4493016004562378, "learning_rate": 8.714651479067035e-05, "loss": 4.7893, "step": 45510 }, { "epoch": 0.08719279242005012, "grad_norm": 1.3978508710861206, "learning_rate": 8.716566955477943e-05, "loss": 4.8053, "step": 45520 }, { "epoch": 0.08721194725142536, "grad_norm": 1.5539741516113281, "learning_rate": 8.718482431888851e-05, "loss": 4.6682, "step": 45530 }, { "epoch": 0.08723110208280059, "grad_norm": 1.4083367586135864, "learning_rate": 8.720397908299758e-05, "loss": 4.8267, "step": 45540 }, { "epoch": 0.08725025691417582, "grad_norm": 1.4016075134277344, "learning_rate": 8.722313384710666e-05, "loss": 4.7144, "step": 45550 }, { "epoch": 0.08726941174555106, "grad_norm": 1.3706454038619995, "learning_rate": 8.724228861121575e-05, "loss": 4.8223, "step": 45560 }, { "epoch": 0.08728856657692628, "grad_norm": 1.4776066541671753, "learning_rate": 8.726144337532482e-05, "loss": 4.8775, "step": 45570 }, { "epoch": 0.08730772140830151, "grad_norm": 1.4322172403335571, "learning_rate": 8.72805981394339e-05, "loss": 4.7916, "step": 45580 }, { "epoch": 0.08732687623967675, "grad_norm": 1.3509135246276855, "learning_rate": 8.7299752903543e-05, "loss": 4.8064, "step": 45590 }, { "epoch": 0.08734603107105197, "grad_norm": 1.4131057262420654, "learning_rate": 8.731890766765206e-05, "loss": 4.758, "step": 45600 }, { "epoch": 0.0873651859024272, "grad_norm": 1.3733634948730469, "learning_rate": 8.733806243176115e-05, "loss": 4.7773, "step": 45610 }, { "epoch": 0.08738434073380244, "grad_norm": 1.3812917470932007, "learning_rate": 8.735721719587023e-05, "loss": 4.9596, "step": 45620 }, { "epoch": 0.08740349556517767, "grad_norm": 1.4564592838287354, "learning_rate": 8.737637195997931e-05, "loss": 4.7484, "step": 45630 }, { "epoch": 0.08742265039655289, "grad_norm": 1.4398542642593384, "learning_rate": 8.739552672408839e-05, "loss": 4.7669, "step": 45640 }, { "epoch": 0.08744180522792813, "grad_norm": 1.3613312244415283, "learning_rate": 8.741468148819746e-05, "loss": 4.7978, "step": 45650 }, { "epoch": 0.08746096005930336, "grad_norm": 1.3962265253067017, "learning_rate": 8.743383625230654e-05, "loss": 4.7077, "step": 45660 }, { "epoch": 0.08748011489067858, "grad_norm": 1.417515516281128, "learning_rate": 8.745299101641563e-05, "loss": 4.8171, "step": 45670 }, { "epoch": 0.08749926972205382, "grad_norm": 1.8214417695999146, "learning_rate": 8.74721457805247e-05, "loss": 4.6826, "step": 45680 }, { "epoch": 0.08751842455342905, "grad_norm": 1.3755415678024292, "learning_rate": 8.749130054463378e-05, "loss": 4.8103, "step": 45690 }, { "epoch": 0.08753757938480428, "grad_norm": 1.4201529026031494, "learning_rate": 8.751045530874288e-05, "loss": 4.7934, "step": 45700 }, { "epoch": 0.08755673421617952, "grad_norm": 1.427423119544983, "learning_rate": 8.752961007285195e-05, "loss": 4.8878, "step": 45710 }, { "epoch": 0.08757588904755474, "grad_norm": 1.407939076423645, "learning_rate": 8.754876483696103e-05, "loss": 4.8085, "step": 45720 }, { "epoch": 0.08759504387892997, "grad_norm": 1.4200596809387207, "learning_rate": 8.756791960107011e-05, "loss": 4.8531, "step": 45730 }, { "epoch": 0.08761419871030521, "grad_norm": 1.3869785070419312, "learning_rate": 8.758707436517919e-05, "loss": 4.7317, "step": 45740 }, { "epoch": 0.08763335354168043, "grad_norm": 1.3954834938049316, "learning_rate": 8.760622912928827e-05, "loss": 4.8317, "step": 45750 }, { "epoch": 0.08765250837305566, "grad_norm": 1.4115991592407227, "learning_rate": 8.762538389339734e-05, "loss": 4.8547, "step": 45760 }, { "epoch": 0.0876716632044309, "grad_norm": 1.4326201677322388, "learning_rate": 8.764453865750642e-05, "loss": 4.7993, "step": 45770 }, { "epoch": 0.08769081803580613, "grad_norm": 1.5086455345153809, "learning_rate": 8.766369342161551e-05, "loss": 4.7243, "step": 45780 }, { "epoch": 0.08770997286718135, "grad_norm": 1.3815971612930298, "learning_rate": 8.768284818572458e-05, "loss": 4.8587, "step": 45790 }, { "epoch": 0.08772912769855659, "grad_norm": 1.4203269481658936, "learning_rate": 8.770200294983366e-05, "loss": 4.6634, "step": 45800 }, { "epoch": 0.08774828252993182, "grad_norm": 1.443149209022522, "learning_rate": 8.772115771394276e-05, "loss": 4.7326, "step": 45810 }, { "epoch": 0.08776743736130704, "grad_norm": 1.3733482360839844, "learning_rate": 8.774031247805183e-05, "loss": 4.8748, "step": 45820 }, { "epoch": 0.08778659219268228, "grad_norm": 1.388957142829895, "learning_rate": 8.775946724216091e-05, "loss": 4.8168, "step": 45830 }, { "epoch": 0.08780574702405751, "grad_norm": 1.4168225526809692, "learning_rate": 8.777862200626998e-05, "loss": 4.7133, "step": 45840 }, { "epoch": 0.08782490185543274, "grad_norm": 1.3986762762069702, "learning_rate": 8.779777677037907e-05, "loss": 4.7363, "step": 45850 }, { "epoch": 0.08784405668680798, "grad_norm": 1.4138811826705933, "learning_rate": 8.781693153448815e-05, "loss": 4.7846, "step": 45860 }, { "epoch": 0.0878632115181832, "grad_norm": 1.465927004814148, "learning_rate": 8.783608629859722e-05, "loss": 4.9321, "step": 45870 }, { "epoch": 0.08788236634955843, "grad_norm": 1.4193592071533203, "learning_rate": 8.78552410627063e-05, "loss": 4.7294, "step": 45880 }, { "epoch": 0.08790152118093367, "grad_norm": 1.390491008758545, "learning_rate": 8.78743958268154e-05, "loss": 4.7591, "step": 45890 }, { "epoch": 0.0879206760123089, "grad_norm": 1.3588693141937256, "learning_rate": 8.789355059092446e-05, "loss": 4.8077, "step": 45900 }, { "epoch": 0.08793983084368412, "grad_norm": 1.35686194896698, "learning_rate": 8.791270535503354e-05, "loss": 4.6682, "step": 45910 }, { "epoch": 0.08795898567505936, "grad_norm": 1.4032319784164429, "learning_rate": 8.793186011914264e-05, "loss": 4.8071, "step": 45920 }, { "epoch": 0.08797814050643459, "grad_norm": 1.861644983291626, "learning_rate": 8.795101488325171e-05, "loss": 4.6672, "step": 45930 }, { "epoch": 0.08799729533780981, "grad_norm": 1.4243172407150269, "learning_rate": 8.797016964736079e-05, "loss": 4.767, "step": 45940 }, { "epoch": 0.08801645016918505, "grad_norm": 1.3501105308532715, "learning_rate": 8.798932441146986e-05, "loss": 4.7943, "step": 45950 }, { "epoch": 0.08803560500056028, "grad_norm": 1.3875521421432495, "learning_rate": 8.800847917557894e-05, "loss": 4.7777, "step": 45960 }, { "epoch": 0.0880547598319355, "grad_norm": 1.429850459098816, "learning_rate": 8.802763393968803e-05, "loss": 4.7726, "step": 45970 }, { "epoch": 0.08807391466331074, "grad_norm": 1.370583415031433, "learning_rate": 8.804487322738619e-05, "loss": 4.9026, "step": 45980 }, { "epoch": 0.08809306949468597, "grad_norm": 1.3577423095703125, "learning_rate": 8.806402799149529e-05, "loss": 4.8583, "step": 45990 }, { "epoch": 0.0881122243260612, "grad_norm": 1.4034165143966675, "learning_rate": 8.808318275560435e-05, "loss": 4.8829, "step": 46000 }, { "epoch": 0.08813137915743643, "grad_norm": 1.3497240543365479, "learning_rate": 8.810233751971344e-05, "loss": 4.6631, "step": 46010 }, { "epoch": 0.08815053398881166, "grad_norm": 1.5029760599136353, "learning_rate": 8.81214922838225e-05, "loss": 4.7571, "step": 46020 }, { "epoch": 0.0881696888201869, "grad_norm": 1.4266831874847412, "learning_rate": 8.81406470479316e-05, "loss": 4.6807, "step": 46030 }, { "epoch": 0.08818884365156213, "grad_norm": 1.3850480318069458, "learning_rate": 8.815980181204068e-05, "loss": 4.7811, "step": 46040 }, { "epoch": 0.08820799848293735, "grad_norm": 1.6327155828475952, "learning_rate": 8.817895657614975e-05, "loss": 4.892, "step": 46050 }, { "epoch": 0.08822715331431259, "grad_norm": 1.3539761304855347, "learning_rate": 8.819811134025884e-05, "loss": 4.6658, "step": 46060 }, { "epoch": 0.08824630814568782, "grad_norm": 1.3567302227020264, "learning_rate": 8.821726610436792e-05, "loss": 4.6656, "step": 46070 }, { "epoch": 0.08826546297706304, "grad_norm": 1.3577607870101929, "learning_rate": 8.823642086847699e-05, "loss": 4.8138, "step": 46080 }, { "epoch": 0.08828461780843828, "grad_norm": 1.4080290794372559, "learning_rate": 8.825557563258607e-05, "loss": 4.8128, "step": 46090 }, { "epoch": 0.08830377263981351, "grad_norm": 1.384946346282959, "learning_rate": 8.827473039669517e-05, "loss": 4.6604, "step": 46100 }, { "epoch": 0.08832292747118874, "grad_norm": 1.4379843473434448, "learning_rate": 8.829388516080424e-05, "loss": 4.7601, "step": 46110 }, { "epoch": 0.08834208230256398, "grad_norm": 1.3904457092285156, "learning_rate": 8.831303992491332e-05, "loss": 4.6483, "step": 46120 }, { "epoch": 0.0883612371339392, "grad_norm": 1.399997591972351, "learning_rate": 8.833219468902238e-05, "loss": 4.8529, "step": 46130 }, { "epoch": 0.08838039196531443, "grad_norm": 1.4019984006881714, "learning_rate": 8.835134945313148e-05, "loss": 4.8653, "step": 46140 }, { "epoch": 0.08839954679668967, "grad_norm": 1.4886938333511353, "learning_rate": 8.837050421724056e-05, "loss": 4.8413, "step": 46150 }, { "epoch": 0.0884187016280649, "grad_norm": 1.4400527477264404, "learning_rate": 8.838965898134963e-05, "loss": 4.6577, "step": 46160 }, { "epoch": 0.08843785645944012, "grad_norm": 1.4387624263763428, "learning_rate": 8.840881374545872e-05, "loss": 4.7812, "step": 46170 }, { "epoch": 0.08845701129081536, "grad_norm": 1.5052504539489746, "learning_rate": 8.84279685095678e-05, "loss": 4.8707, "step": 46180 }, { "epoch": 0.08847616612219059, "grad_norm": 1.3832706212997437, "learning_rate": 8.844712327367687e-05, "loss": 4.7444, "step": 46190 }, { "epoch": 0.08849532095356581, "grad_norm": 1.4027447700500488, "learning_rate": 8.846627803778595e-05, "loss": 4.9105, "step": 46200 }, { "epoch": 0.08851447578494105, "grad_norm": 1.470378041267395, "learning_rate": 8.848543280189503e-05, "loss": 4.7524, "step": 46210 }, { "epoch": 0.08853363061631628, "grad_norm": 1.4110150337219238, "learning_rate": 8.850458756600412e-05, "loss": 4.8535, "step": 46220 }, { "epoch": 0.0885527854476915, "grad_norm": 1.406013011932373, "learning_rate": 8.85237423301132e-05, "loss": 4.8789, "step": 46230 }, { "epoch": 0.08857194027906674, "grad_norm": 1.4439678192138672, "learning_rate": 8.854289709422227e-05, "loss": 4.851, "step": 46240 }, { "epoch": 0.08859109511044197, "grad_norm": 1.325079321861267, "learning_rate": 8.856205185833136e-05, "loss": 4.8106, "step": 46250 }, { "epoch": 0.0886102499418172, "grad_norm": 1.3663592338562012, "learning_rate": 8.858120662244044e-05, "loss": 4.8174, "step": 46260 }, { "epoch": 0.08862940477319244, "grad_norm": 1.3670724630355835, "learning_rate": 8.860036138654951e-05, "loss": 4.7262, "step": 46270 }, { "epoch": 0.08864855960456766, "grad_norm": 1.4033278226852417, "learning_rate": 8.86195161506586e-05, "loss": 4.6847, "step": 46280 }, { "epoch": 0.08866771443594289, "grad_norm": 1.3975887298583984, "learning_rate": 8.863867091476769e-05, "loss": 4.6811, "step": 46290 }, { "epoch": 0.08868686926731813, "grad_norm": 1.4650462865829468, "learning_rate": 8.865782567887675e-05, "loss": 4.6603, "step": 46300 }, { "epoch": 0.08870602409869335, "grad_norm": 1.3635320663452148, "learning_rate": 8.867698044298583e-05, "loss": 4.7455, "step": 46310 }, { "epoch": 0.08872517893006858, "grad_norm": 1.326536774635315, "learning_rate": 8.869613520709492e-05, "loss": 4.7848, "step": 46320 }, { "epoch": 0.08874433376144382, "grad_norm": 1.4010093212127686, "learning_rate": 8.8715289971204e-05, "loss": 4.8075, "step": 46330 }, { "epoch": 0.08876348859281905, "grad_norm": 1.444128155708313, "learning_rate": 8.873444473531308e-05, "loss": 4.8188, "step": 46340 }, { "epoch": 0.08878264342419427, "grad_norm": 1.4034647941589355, "learning_rate": 8.875359949942215e-05, "loss": 4.8445, "step": 46350 }, { "epoch": 0.08880179825556951, "grad_norm": 1.4313541650772095, "learning_rate": 8.877275426353124e-05, "loss": 4.8059, "step": 46360 }, { "epoch": 0.08882095308694474, "grad_norm": 1.4595192670822144, "learning_rate": 8.879190902764032e-05, "loss": 4.7136, "step": 46370 }, { "epoch": 0.08884010791831996, "grad_norm": 1.4657174348831177, "learning_rate": 8.881106379174939e-05, "loss": 4.7001, "step": 46380 }, { "epoch": 0.0888592627496952, "grad_norm": 1.4502604007720947, "learning_rate": 8.883021855585848e-05, "loss": 4.7571, "step": 46390 }, { "epoch": 0.08887841758107043, "grad_norm": 1.4051878452301025, "learning_rate": 8.884937331996757e-05, "loss": 4.7329, "step": 46400 }, { "epoch": 0.08889757241244566, "grad_norm": 1.3963618278503418, "learning_rate": 8.886852808407663e-05, "loss": 4.6967, "step": 46410 }, { "epoch": 0.0889167272438209, "grad_norm": 1.3536020517349243, "learning_rate": 8.888768284818572e-05, "loss": 4.6852, "step": 46420 }, { "epoch": 0.08893588207519612, "grad_norm": 1.4173961877822876, "learning_rate": 8.89068376122948e-05, "loss": 4.8013, "step": 46430 }, { "epoch": 0.08895503690657135, "grad_norm": 1.4778718948364258, "learning_rate": 8.892599237640388e-05, "loss": 4.8625, "step": 46440 }, { "epoch": 0.08897419173794659, "grad_norm": 1.4598140716552734, "learning_rate": 8.894514714051296e-05, "loss": 4.8598, "step": 46450 }, { "epoch": 0.08899334656932181, "grad_norm": 1.3961775302886963, "learning_rate": 8.896430190462203e-05, "loss": 4.7213, "step": 46460 }, { "epoch": 0.08901250140069704, "grad_norm": 1.4420050382614136, "learning_rate": 8.898345666873112e-05, "loss": 4.7392, "step": 46470 }, { "epoch": 0.08903165623207228, "grad_norm": 1.4112519025802612, "learning_rate": 8.90026114328402e-05, "loss": 4.8764, "step": 46480 }, { "epoch": 0.0890508110634475, "grad_norm": 1.4269424676895142, "learning_rate": 8.902176619694927e-05, "loss": 4.6852, "step": 46490 }, { "epoch": 0.08906996589482273, "grad_norm": 1.5511339902877808, "learning_rate": 8.904092096105837e-05, "loss": 4.7602, "step": 46500 }, { "epoch": 0.08908912072619797, "grad_norm": 1.5718532800674438, "learning_rate": 8.906007572516743e-05, "loss": 4.7087, "step": 46510 }, { "epoch": 0.0891082755575732, "grad_norm": 1.3830339908599854, "learning_rate": 8.907923048927651e-05, "loss": 4.6909, "step": 46520 }, { "epoch": 0.08912743038894842, "grad_norm": 1.414452314376831, "learning_rate": 8.90983852533856e-05, "loss": 4.7468, "step": 46530 }, { "epoch": 0.08914658522032366, "grad_norm": 1.4102058410644531, "learning_rate": 8.911754001749468e-05, "loss": 4.9822, "step": 46540 }, { "epoch": 0.08916574005169889, "grad_norm": 1.4435497522354126, "learning_rate": 8.913669478160376e-05, "loss": 4.8305, "step": 46550 }, { "epoch": 0.08918489488307411, "grad_norm": 1.3511515855789185, "learning_rate": 8.915584954571284e-05, "loss": 4.6728, "step": 46560 }, { "epoch": 0.08920404971444935, "grad_norm": 1.436905860900879, "learning_rate": 8.917500430982191e-05, "loss": 4.8752, "step": 46570 }, { "epoch": 0.08922320454582458, "grad_norm": 1.622951626777649, "learning_rate": 8.9194159073931e-05, "loss": 4.9414, "step": 46580 }, { "epoch": 0.0892423593771998, "grad_norm": 1.4466837644577026, "learning_rate": 8.921331383804008e-05, "loss": 4.9165, "step": 46590 }, { "epoch": 0.08926151420857505, "grad_norm": 1.4151684045791626, "learning_rate": 8.923246860214915e-05, "loss": 4.7881, "step": 46600 }, { "epoch": 0.08928066903995027, "grad_norm": 1.372557282447815, "learning_rate": 8.925162336625825e-05, "loss": 4.6367, "step": 46610 }, { "epoch": 0.0892998238713255, "grad_norm": 1.4125925302505493, "learning_rate": 8.927077813036731e-05, "loss": 4.7408, "step": 46620 }, { "epoch": 0.08931897870270074, "grad_norm": 1.434394359588623, "learning_rate": 8.92899328944764e-05, "loss": 4.6002, "step": 46630 }, { "epoch": 0.08933813353407596, "grad_norm": 1.435966968536377, "learning_rate": 8.930908765858548e-05, "loss": 4.6759, "step": 46640 }, { "epoch": 0.08935728836545119, "grad_norm": 1.3629965782165527, "learning_rate": 8.932824242269456e-05, "loss": 4.6944, "step": 46650 }, { "epoch": 0.08937644319682643, "grad_norm": 1.3724173307418823, "learning_rate": 8.934739718680364e-05, "loss": 4.6973, "step": 46660 }, { "epoch": 0.08939559802820166, "grad_norm": 1.3911265134811401, "learning_rate": 8.936655195091272e-05, "loss": 4.7746, "step": 46670 }, { "epoch": 0.08941475285957688, "grad_norm": 1.388973593711853, "learning_rate": 8.938570671502179e-05, "loss": 4.7259, "step": 46680 }, { "epoch": 0.08943390769095212, "grad_norm": 1.418444275856018, "learning_rate": 8.940486147913088e-05, "loss": 4.891, "step": 46690 }, { "epoch": 0.08945306252232735, "grad_norm": 1.3659740686416626, "learning_rate": 8.942401624323995e-05, "loss": 4.7869, "step": 46700 }, { "epoch": 0.08947221735370259, "grad_norm": 1.3995187282562256, "learning_rate": 8.944317100734903e-05, "loss": 4.8241, "step": 46710 }, { "epoch": 0.08949137218507781, "grad_norm": 1.7895562648773193, "learning_rate": 8.946232577145813e-05, "loss": 4.7195, "step": 46720 }, { "epoch": 0.08951052701645304, "grad_norm": 1.3827786445617676, "learning_rate": 8.94814805355672e-05, "loss": 4.747, "step": 46730 }, { "epoch": 0.08952968184782828, "grad_norm": 1.419040322303772, "learning_rate": 8.950063529967628e-05, "loss": 4.7336, "step": 46740 }, { "epoch": 0.0895488366792035, "grad_norm": 1.4321521520614624, "learning_rate": 8.951979006378536e-05, "loss": 4.6504, "step": 46750 }, { "epoch": 0.08956799151057873, "grad_norm": 1.4186482429504395, "learning_rate": 8.953894482789443e-05, "loss": 4.7013, "step": 46760 }, { "epoch": 0.08958714634195397, "grad_norm": 1.3944298028945923, "learning_rate": 8.955809959200352e-05, "loss": 4.8962, "step": 46770 }, { "epoch": 0.0896063011733292, "grad_norm": 1.390984058380127, "learning_rate": 8.95772543561126e-05, "loss": 4.8272, "step": 46780 }, { "epoch": 0.08962545600470442, "grad_norm": 1.3866407871246338, "learning_rate": 8.959640912022167e-05, "loss": 4.7511, "step": 46790 }, { "epoch": 0.08964461083607966, "grad_norm": 1.3453261852264404, "learning_rate": 8.961556388433076e-05, "loss": 4.8195, "step": 46800 }, { "epoch": 0.08966376566745489, "grad_norm": 1.3639774322509766, "learning_rate": 8.963471864843983e-05, "loss": 4.6947, "step": 46810 }, { "epoch": 0.08968292049883012, "grad_norm": 1.4672845602035522, "learning_rate": 8.965387341254891e-05, "loss": 4.8743, "step": 46820 }, { "epoch": 0.08970207533020536, "grad_norm": 1.4272865056991577, "learning_rate": 8.967302817665801e-05, "loss": 4.7741, "step": 46830 }, { "epoch": 0.08972123016158058, "grad_norm": 1.3912279605865479, "learning_rate": 8.969218294076708e-05, "loss": 4.8033, "step": 46840 }, { "epoch": 0.08974038499295581, "grad_norm": 1.391364336013794, "learning_rate": 8.971133770487616e-05, "loss": 4.7636, "step": 46850 }, { "epoch": 0.08975953982433105, "grad_norm": 1.4095255136489868, "learning_rate": 8.973049246898524e-05, "loss": 4.8767, "step": 46860 }, { "epoch": 0.08977869465570627, "grad_norm": 1.3746130466461182, "learning_rate": 8.97496472330943e-05, "loss": 4.7424, "step": 46870 }, { "epoch": 0.0897978494870815, "grad_norm": 1.396385908126831, "learning_rate": 8.97688019972034e-05, "loss": 4.7796, "step": 46880 }, { "epoch": 0.08981700431845674, "grad_norm": 1.3828835487365723, "learning_rate": 8.978795676131247e-05, "loss": 4.7271, "step": 46890 }, { "epoch": 0.08983615914983197, "grad_norm": 1.4809107780456543, "learning_rate": 8.980711152542155e-05, "loss": 4.7444, "step": 46900 }, { "epoch": 0.08985531398120719, "grad_norm": 1.4333055019378662, "learning_rate": 8.982626628953064e-05, "loss": 4.7606, "step": 46910 }, { "epoch": 0.08987446881258243, "grad_norm": 1.4129692316055298, "learning_rate": 8.984542105363971e-05, "loss": 4.7341, "step": 46920 }, { "epoch": 0.08989362364395766, "grad_norm": 1.372816801071167, "learning_rate": 8.98645758177488e-05, "loss": 4.765, "step": 46930 }, { "epoch": 0.08991277847533288, "grad_norm": 1.3880393505096436, "learning_rate": 8.988373058185789e-05, "loss": 4.734, "step": 46940 }, { "epoch": 0.08993193330670812, "grad_norm": 1.423258900642395, "learning_rate": 8.990288534596696e-05, "loss": 4.7657, "step": 46950 }, { "epoch": 0.08995108813808335, "grad_norm": 1.4154707193374634, "learning_rate": 8.992204011007604e-05, "loss": 4.7048, "step": 46960 }, { "epoch": 0.08997024296945857, "grad_norm": 1.3790630102157593, "learning_rate": 8.994119487418512e-05, "loss": 4.7839, "step": 46970 }, { "epoch": 0.08998939780083381, "grad_norm": 1.3899871110916138, "learning_rate": 8.996034963829419e-05, "loss": 4.8308, "step": 46980 }, { "epoch": 0.09000855263220904, "grad_norm": 1.3583037853240967, "learning_rate": 8.997950440240328e-05, "loss": 4.8272, "step": 46990 }, { "epoch": 0.09002770746358427, "grad_norm": 1.3513849973678589, "learning_rate": 8.999865916651235e-05, "loss": 4.8179, "step": 47000 }, { "epoch": 0.0900468622949595, "grad_norm": 1.5145565271377563, "learning_rate": 9.001781393062143e-05, "loss": 4.8292, "step": 47010 }, { "epoch": 0.09006601712633473, "grad_norm": 1.3458994626998901, "learning_rate": 9.003696869473053e-05, "loss": 4.8145, "step": 47020 }, { "epoch": 0.09008517195770996, "grad_norm": 1.4390901327133179, "learning_rate": 9.00561234588396e-05, "loss": 4.8197, "step": 47030 }, { "epoch": 0.0901043267890852, "grad_norm": 1.3541083335876465, "learning_rate": 9.007527822294867e-05, "loss": 4.8595, "step": 47040 }, { "epoch": 0.09012348162046042, "grad_norm": 1.4613083600997925, "learning_rate": 9.009443298705777e-05, "loss": 4.7731, "step": 47050 }, { "epoch": 0.09014263645183565, "grad_norm": 1.418802261352539, "learning_rate": 9.011358775116684e-05, "loss": 4.7551, "step": 47060 }, { "epoch": 0.09016179128321089, "grad_norm": 1.4131629467010498, "learning_rate": 9.013274251527592e-05, "loss": 4.7536, "step": 47070 }, { "epoch": 0.09018094611458612, "grad_norm": 1.3984856605529785, "learning_rate": 9.0151897279385e-05, "loss": 4.5517, "step": 47080 }, { "epoch": 0.09020010094596134, "grad_norm": 1.3994768857955933, "learning_rate": 9.017105204349407e-05, "loss": 4.7495, "step": 47090 }, { "epoch": 0.09021925577733658, "grad_norm": 1.3554494380950928, "learning_rate": 9.019020680760316e-05, "loss": 4.8702, "step": 47100 }, { "epoch": 0.09023841060871181, "grad_norm": 1.3535126447677612, "learning_rate": 9.020936157171223e-05, "loss": 4.8092, "step": 47110 }, { "epoch": 0.09025756544008703, "grad_norm": 1.402559518814087, "learning_rate": 9.022851633582131e-05, "loss": 4.6493, "step": 47120 }, { "epoch": 0.09027672027146227, "grad_norm": 1.4135581254959106, "learning_rate": 9.02476710999304e-05, "loss": 4.8193, "step": 47130 }, { "epoch": 0.0902958751028375, "grad_norm": 1.4634182453155518, "learning_rate": 9.026682586403947e-05, "loss": 4.7973, "step": 47140 }, { "epoch": 0.09031502993421273, "grad_norm": 1.3644262552261353, "learning_rate": 9.028598062814856e-05, "loss": 4.7549, "step": 47150 }, { "epoch": 0.09033418476558797, "grad_norm": 1.4459362030029297, "learning_rate": 9.030513539225764e-05, "loss": 4.7378, "step": 47160 }, { "epoch": 0.09035333959696319, "grad_norm": 1.3697417974472046, "learning_rate": 9.032429015636672e-05, "loss": 4.7102, "step": 47170 }, { "epoch": 0.09037249442833842, "grad_norm": 1.388282299041748, "learning_rate": 9.03434449204758e-05, "loss": 4.851, "step": 47180 }, { "epoch": 0.09039164925971366, "grad_norm": 1.4227025508880615, "learning_rate": 9.036259968458487e-05, "loss": 4.9433, "step": 47190 }, { "epoch": 0.09041080409108888, "grad_norm": 1.3503704071044922, "learning_rate": 9.038175444869395e-05, "loss": 4.816, "step": 47200 }, { "epoch": 0.09042995892246411, "grad_norm": 1.3769819736480713, "learning_rate": 9.040090921280304e-05, "loss": 4.7211, "step": 47210 }, { "epoch": 0.09044911375383935, "grad_norm": 1.5664072036743164, "learning_rate": 9.042006397691211e-05, "loss": 4.7783, "step": 47220 }, { "epoch": 0.09046826858521458, "grad_norm": 1.3429899215698242, "learning_rate": 9.043921874102119e-05, "loss": 4.6922, "step": 47230 }, { "epoch": 0.0904874234165898, "grad_norm": 1.3809397220611572, "learning_rate": 9.045837350513029e-05, "loss": 4.825, "step": 47240 }, { "epoch": 0.09050657824796504, "grad_norm": 1.4239683151245117, "learning_rate": 9.047752826923936e-05, "loss": 4.72, "step": 47250 }, { "epoch": 0.09052573307934027, "grad_norm": 1.443298578262329, "learning_rate": 9.049668303334844e-05, "loss": 4.7254, "step": 47260 }, { "epoch": 0.0905448879107155, "grad_norm": 1.393082618713379, "learning_rate": 9.051583779745752e-05, "loss": 4.7777, "step": 47270 }, { "epoch": 0.09056404274209073, "grad_norm": 1.4038954973220825, "learning_rate": 9.05349925615666e-05, "loss": 4.8214, "step": 47280 }, { "epoch": 0.09058319757346596, "grad_norm": 1.4299936294555664, "learning_rate": 9.055414732567568e-05, "loss": 4.6809, "step": 47290 }, { "epoch": 0.09060235240484119, "grad_norm": 1.4339942932128906, "learning_rate": 9.057330208978475e-05, "loss": 4.7441, "step": 47300 }, { "epoch": 0.09062150723621643, "grad_norm": 1.4323391914367676, "learning_rate": 9.059245685389383e-05, "loss": 4.8104, "step": 47310 }, { "epoch": 0.09064066206759165, "grad_norm": 1.4648890495300293, "learning_rate": 9.061161161800292e-05, "loss": 4.7856, "step": 47320 }, { "epoch": 0.09065981689896688, "grad_norm": 1.4097646474838257, "learning_rate": 9.063076638211199e-05, "loss": 4.8251, "step": 47330 }, { "epoch": 0.09067897173034212, "grad_norm": 1.3831278085708618, "learning_rate": 9.064992114622107e-05, "loss": 4.7531, "step": 47340 }, { "epoch": 0.09069812656171734, "grad_norm": 1.3562400341033936, "learning_rate": 9.066907591033017e-05, "loss": 4.8574, "step": 47350 }, { "epoch": 0.09071728139309258, "grad_norm": 1.4264369010925293, "learning_rate": 9.068823067443924e-05, "loss": 4.7187, "step": 47360 }, { "epoch": 0.09073643622446781, "grad_norm": 1.3797656297683716, "learning_rate": 9.070738543854832e-05, "loss": 4.6705, "step": 47370 }, { "epoch": 0.09075559105584304, "grad_norm": 1.382312536239624, "learning_rate": 9.072654020265739e-05, "loss": 4.7716, "step": 47380 }, { "epoch": 0.09077474588721827, "grad_norm": 2.091989755630493, "learning_rate": 9.074569496676648e-05, "loss": 4.8332, "step": 47390 }, { "epoch": 0.0907939007185935, "grad_norm": 1.4812086820602417, "learning_rate": 9.076484973087556e-05, "loss": 4.7327, "step": 47400 }, { "epoch": 0.09081305554996873, "grad_norm": 1.3968316316604614, "learning_rate": 9.078400449498463e-05, "loss": 4.7758, "step": 47410 }, { "epoch": 0.09083221038134397, "grad_norm": 1.4017452001571655, "learning_rate": 9.080315925909371e-05, "loss": 4.7337, "step": 47420 }, { "epoch": 0.09085136521271919, "grad_norm": 1.375458836555481, "learning_rate": 9.08223140232028e-05, "loss": 4.6834, "step": 47430 }, { "epoch": 0.09087052004409442, "grad_norm": 1.4897749423980713, "learning_rate": 9.084146878731187e-05, "loss": 4.7911, "step": 47440 }, { "epoch": 0.09088967487546966, "grad_norm": 1.4549837112426758, "learning_rate": 9.086062355142095e-05, "loss": 4.724, "step": 47450 }, { "epoch": 0.09090882970684488, "grad_norm": 1.40140962600708, "learning_rate": 9.087977831553005e-05, "loss": 4.6629, "step": 47460 }, { "epoch": 0.09092798453822011, "grad_norm": 1.4246724843978882, "learning_rate": 9.089893307963912e-05, "loss": 4.7663, "step": 47470 }, { "epoch": 0.09094713936959535, "grad_norm": 1.3997756242752075, "learning_rate": 9.09180878437482e-05, "loss": 4.6803, "step": 47480 }, { "epoch": 0.09096629420097058, "grad_norm": 1.3991636037826538, "learning_rate": 9.093724260785727e-05, "loss": 4.6664, "step": 47490 }, { "epoch": 0.0909854490323458, "grad_norm": 1.4862329959869385, "learning_rate": 9.095639737196636e-05, "loss": 4.682, "step": 47500 }, { "epoch": 0.09100460386372104, "grad_norm": 1.349736213684082, "learning_rate": 9.097555213607544e-05, "loss": 4.9387, "step": 47510 }, { "epoch": 0.09102375869509627, "grad_norm": 1.372125506401062, "learning_rate": 9.099470690018451e-05, "loss": 4.6668, "step": 47520 }, { "epoch": 0.0910429135264715, "grad_norm": 1.3385118246078491, "learning_rate": 9.101386166429359e-05, "loss": 4.6906, "step": 47530 }, { "epoch": 0.09106206835784673, "grad_norm": 1.3642656803131104, "learning_rate": 9.103301642840269e-05, "loss": 4.6185, "step": 47540 }, { "epoch": 0.09108122318922196, "grad_norm": 1.4953489303588867, "learning_rate": 9.105217119251175e-05, "loss": 4.8163, "step": 47550 }, { "epoch": 0.09110037802059719, "grad_norm": 1.4075466394424438, "learning_rate": 9.107132595662084e-05, "loss": 4.753, "step": 47560 }, { "epoch": 0.09111953285197243, "grad_norm": 1.3863592147827148, "learning_rate": 9.10904807207299e-05, "loss": 4.8252, "step": 47570 }, { "epoch": 0.09113868768334765, "grad_norm": 1.3645122051239014, "learning_rate": 9.1109635484839e-05, "loss": 4.696, "step": 47580 }, { "epoch": 0.09115784251472288, "grad_norm": 1.3995364904403687, "learning_rate": 9.112879024894808e-05, "loss": 4.7301, "step": 47590 }, { "epoch": 0.09117699734609812, "grad_norm": 1.583868145942688, "learning_rate": 9.114794501305715e-05, "loss": 4.8051, "step": 47600 }, { "epoch": 0.09119615217747334, "grad_norm": 1.3773834705352783, "learning_rate": 9.116709977716624e-05, "loss": 4.7053, "step": 47610 }, { "epoch": 0.09121530700884857, "grad_norm": 1.3794997930526733, "learning_rate": 9.118625454127532e-05, "loss": 4.6985, "step": 47620 }, { "epoch": 0.09123446184022381, "grad_norm": 1.3698521852493286, "learning_rate": 9.120540930538439e-05, "loss": 4.7462, "step": 47630 }, { "epoch": 0.09125361667159904, "grad_norm": 1.366410493850708, "learning_rate": 9.122456406949347e-05, "loss": 4.8437, "step": 47640 }, { "epoch": 0.09127277150297426, "grad_norm": 1.382519245147705, "learning_rate": 9.124371883360257e-05, "loss": 4.8588, "step": 47650 }, { "epoch": 0.0912919263343495, "grad_norm": 1.8829262256622314, "learning_rate": 9.126287359771163e-05, "loss": 4.6063, "step": 47660 }, { "epoch": 0.09131108116572473, "grad_norm": 1.3700200319290161, "learning_rate": 9.128202836182072e-05, "loss": 4.7262, "step": 47670 }, { "epoch": 0.09133023599709995, "grad_norm": 1.4045466184616089, "learning_rate": 9.130118312592978e-05, "loss": 4.7593, "step": 47680 }, { "epoch": 0.0913493908284752, "grad_norm": 1.3834812641143799, "learning_rate": 9.132033789003888e-05, "loss": 4.6198, "step": 47690 }, { "epoch": 0.09136854565985042, "grad_norm": 1.3792296648025513, "learning_rate": 9.133949265414796e-05, "loss": 4.6844, "step": 47700 }, { "epoch": 0.09138770049122565, "grad_norm": 1.3833203315734863, "learning_rate": 9.135864741825703e-05, "loss": 4.797, "step": 47710 }, { "epoch": 0.09140685532260089, "grad_norm": 1.3289693593978882, "learning_rate": 9.137780218236612e-05, "loss": 4.9691, "step": 47720 }, { "epoch": 0.09142601015397611, "grad_norm": 1.3982821702957153, "learning_rate": 9.13969569464752e-05, "loss": 4.6426, "step": 47730 }, { "epoch": 0.09144516498535134, "grad_norm": 1.389595627784729, "learning_rate": 9.141611171058427e-05, "loss": 4.7929, "step": 47740 }, { "epoch": 0.09146431981672658, "grad_norm": 1.363433599472046, "learning_rate": 9.143526647469335e-05, "loss": 4.7683, "step": 47750 }, { "epoch": 0.0914834746481018, "grad_norm": 1.422227382659912, "learning_rate": 9.145442123880245e-05, "loss": 4.7184, "step": 47760 }, { "epoch": 0.09150262947947703, "grad_norm": 1.4272633790969849, "learning_rate": 9.147357600291152e-05, "loss": 4.7194, "step": 47770 }, { "epoch": 0.09152178431085227, "grad_norm": 1.3934152126312256, "learning_rate": 9.14927307670206e-05, "loss": 4.734, "step": 47780 }, { "epoch": 0.0915409391422275, "grad_norm": 1.4504672288894653, "learning_rate": 9.151188553112966e-05, "loss": 4.7398, "step": 47790 }, { "epoch": 0.09156009397360272, "grad_norm": 1.4614081382751465, "learning_rate": 9.153104029523876e-05, "loss": 4.7506, "step": 47800 }, { "epoch": 0.09157924880497796, "grad_norm": 1.346930980682373, "learning_rate": 9.155019505934784e-05, "loss": 4.8086, "step": 47810 }, { "epoch": 0.09159840363635319, "grad_norm": 1.385308027267456, "learning_rate": 9.156934982345691e-05, "loss": 4.7588, "step": 47820 }, { "epoch": 0.09161755846772841, "grad_norm": 1.4091095924377441, "learning_rate": 9.1588504587566e-05, "loss": 4.7812, "step": 47830 }, { "epoch": 0.09163671329910365, "grad_norm": 1.3847780227661133, "learning_rate": 9.160765935167508e-05, "loss": 4.6738, "step": 47840 }, { "epoch": 0.09165586813047888, "grad_norm": 1.5791163444519043, "learning_rate": 9.162681411578415e-05, "loss": 4.707, "step": 47850 }, { "epoch": 0.0916750229618541, "grad_norm": 1.3892240524291992, "learning_rate": 9.164596887989323e-05, "loss": 4.7425, "step": 47860 }, { "epoch": 0.09169417779322935, "grad_norm": 1.4457523822784424, "learning_rate": 9.166512364400231e-05, "loss": 4.7789, "step": 47870 }, { "epoch": 0.09171333262460457, "grad_norm": 1.3964219093322754, "learning_rate": 9.16842784081114e-05, "loss": 4.7924, "step": 47880 }, { "epoch": 0.0917324874559798, "grad_norm": 1.3795204162597656, "learning_rate": 9.170343317222048e-05, "loss": 4.7188, "step": 47890 }, { "epoch": 0.09175164228735504, "grad_norm": 1.3879042863845825, "learning_rate": 9.172258793632955e-05, "loss": 4.5689, "step": 47900 }, { "epoch": 0.09177079711873026, "grad_norm": 1.3591488599777222, "learning_rate": 9.174174270043864e-05, "loss": 4.7179, "step": 47910 }, { "epoch": 0.09178995195010549, "grad_norm": 1.3451247215270996, "learning_rate": 9.176089746454772e-05, "loss": 4.7996, "step": 47920 }, { "epoch": 0.09180910678148073, "grad_norm": 1.3848050832748413, "learning_rate": 9.178005222865679e-05, "loss": 4.8055, "step": 47930 }, { "epoch": 0.09182826161285595, "grad_norm": 1.3456820249557495, "learning_rate": 9.179920699276588e-05, "loss": 4.7342, "step": 47940 }, { "epoch": 0.09184741644423118, "grad_norm": 1.3557285070419312, "learning_rate": 9.181836175687497e-05, "loss": 4.8616, "step": 47950 }, { "epoch": 0.09186657127560642, "grad_norm": 1.5505691766738892, "learning_rate": 9.183751652098403e-05, "loss": 4.8113, "step": 47960 }, { "epoch": 0.09188572610698165, "grad_norm": 1.3715450763702393, "learning_rate": 9.185667128509311e-05, "loss": 4.7826, "step": 47970 }, { "epoch": 0.09190488093835687, "grad_norm": 1.3912606239318848, "learning_rate": 9.18758260492022e-05, "loss": 4.8504, "step": 47980 }, { "epoch": 0.09192403576973211, "grad_norm": 1.3750793933868408, "learning_rate": 9.189498081331128e-05, "loss": 4.7652, "step": 47990 }, { "epoch": 0.09194319060110734, "grad_norm": 1.4224668741226196, "learning_rate": 9.191413557742036e-05, "loss": 4.6959, "step": 48000 }, { "epoch": 0.09196234543248258, "grad_norm": 1.567799687385559, "learning_rate": 9.193329034152943e-05, "loss": 4.8598, "step": 48010 }, { "epoch": 0.0919815002638578, "grad_norm": 1.3311891555786133, "learning_rate": 9.195244510563852e-05, "loss": 4.7831, "step": 48020 }, { "epoch": 0.09200065509523303, "grad_norm": 1.3516042232513428, "learning_rate": 9.19715998697476e-05, "loss": 4.6806, "step": 48030 }, { "epoch": 0.09201980992660827, "grad_norm": 1.4133694171905518, "learning_rate": 9.199075463385667e-05, "loss": 4.8775, "step": 48040 }, { "epoch": 0.0920389647579835, "grad_norm": 1.392685890197754, "learning_rate": 9.200990939796576e-05, "loss": 4.8839, "step": 48050 }, { "epoch": 0.09205811958935872, "grad_norm": 1.3742413520812988, "learning_rate": 9.202906416207483e-05, "loss": 4.832, "step": 48060 }, { "epoch": 0.09207727442073396, "grad_norm": 1.3904979228973389, "learning_rate": 9.204821892618391e-05, "loss": 4.7353, "step": 48070 }, { "epoch": 0.09209642925210919, "grad_norm": 1.3680167198181152, "learning_rate": 9.2067373690293e-05, "loss": 4.6155, "step": 48080 }, { "epoch": 0.09211558408348441, "grad_norm": 1.4250091314315796, "learning_rate": 9.208652845440208e-05, "loss": 4.7369, "step": 48090 }, { "epoch": 0.09213473891485965, "grad_norm": 1.4295660257339478, "learning_rate": 9.210568321851116e-05, "loss": 4.7833, "step": 48100 }, { "epoch": 0.09215389374623488, "grad_norm": 1.3575221300125122, "learning_rate": 9.212483798262024e-05, "loss": 4.9098, "step": 48110 }, { "epoch": 0.0921730485776101, "grad_norm": 1.3817567825317383, "learning_rate": 9.214399274672931e-05, "loss": 4.7012, "step": 48120 }, { "epoch": 0.09219220340898535, "grad_norm": 1.3774840831756592, "learning_rate": 9.21631475108384e-05, "loss": 4.7211, "step": 48130 }, { "epoch": 0.09221135824036057, "grad_norm": 1.3356540203094482, "learning_rate": 9.218230227494748e-05, "loss": 4.7254, "step": 48140 }, { "epoch": 0.0922305130717358, "grad_norm": 1.3598302602767944, "learning_rate": 9.220145703905655e-05, "loss": 4.9334, "step": 48150 }, { "epoch": 0.09224966790311104, "grad_norm": 1.371427297592163, "learning_rate": 9.222061180316565e-05, "loss": 4.895, "step": 48160 }, { "epoch": 0.09226882273448626, "grad_norm": 1.3765586614608765, "learning_rate": 9.223976656727471e-05, "loss": 4.7272, "step": 48170 }, { "epoch": 0.09228797756586149, "grad_norm": 1.410029411315918, "learning_rate": 9.22589213313838e-05, "loss": 4.7022, "step": 48180 }, { "epoch": 0.09230713239723673, "grad_norm": 1.313057541847229, "learning_rate": 9.227807609549288e-05, "loss": 4.8083, "step": 48190 }, { "epoch": 0.09232628722861196, "grad_norm": 1.4010387659072876, "learning_rate": 9.229723085960196e-05, "loss": 4.7437, "step": 48200 }, { "epoch": 0.09234544205998718, "grad_norm": 1.4080561399459839, "learning_rate": 9.231638562371104e-05, "loss": 4.7149, "step": 48210 }, { "epoch": 0.09236459689136242, "grad_norm": 1.3205965757369995, "learning_rate": 9.233554038782012e-05, "loss": 4.6777, "step": 48220 }, { "epoch": 0.09238375172273765, "grad_norm": 1.44107186794281, "learning_rate": 9.235469515192919e-05, "loss": 4.7891, "step": 48230 }, { "epoch": 0.09240290655411287, "grad_norm": 1.347032070159912, "learning_rate": 9.237384991603828e-05, "loss": 4.7961, "step": 48240 }, { "epoch": 0.09242206138548811, "grad_norm": 1.3681221008300781, "learning_rate": 9.239300468014735e-05, "loss": 4.7654, "step": 48250 }, { "epoch": 0.09244121621686334, "grad_norm": 1.3683264255523682, "learning_rate": 9.241215944425643e-05, "loss": 4.7218, "step": 48260 }, { "epoch": 0.09246037104823857, "grad_norm": 1.3730536699295044, "learning_rate": 9.243131420836553e-05, "loss": 4.7523, "step": 48270 }, { "epoch": 0.0924795258796138, "grad_norm": 1.3551069498062134, "learning_rate": 9.24504689724746e-05, "loss": 4.783, "step": 48280 }, { "epoch": 0.09249868071098903, "grad_norm": 1.4055185317993164, "learning_rate": 9.246962373658368e-05, "loss": 4.6964, "step": 48290 }, { "epoch": 0.09251783554236426, "grad_norm": 1.405328392982483, "learning_rate": 9.248877850069276e-05, "loss": 4.721, "step": 48300 }, { "epoch": 0.0925369903737395, "grad_norm": 1.3872007131576538, "learning_rate": 9.250793326480182e-05, "loss": 4.681, "step": 48310 }, { "epoch": 0.09255614520511472, "grad_norm": 1.3865582942962646, "learning_rate": 9.252708802891092e-05, "loss": 4.7748, "step": 48320 }, { "epoch": 0.09257530003648995, "grad_norm": 1.369808316230774, "learning_rate": 9.254624279302e-05, "loss": 4.7223, "step": 48330 }, { "epoch": 0.09259445486786519, "grad_norm": 1.4518011808395386, "learning_rate": 9.256539755712907e-05, "loss": 4.857, "step": 48340 }, { "epoch": 0.09261360969924042, "grad_norm": 1.5316110849380493, "learning_rate": 9.258455232123816e-05, "loss": 4.6289, "step": 48350 }, { "epoch": 0.09263276453061564, "grad_norm": 1.3923298120498657, "learning_rate": 9.260370708534723e-05, "loss": 4.7844, "step": 48360 }, { "epoch": 0.09265191936199088, "grad_norm": 1.3609154224395752, "learning_rate": 9.262286184945631e-05, "loss": 4.7688, "step": 48370 }, { "epoch": 0.09267107419336611, "grad_norm": 1.3769718408584595, "learning_rate": 9.264201661356541e-05, "loss": 4.6208, "step": 48380 }, { "epoch": 0.09269022902474133, "grad_norm": 1.3598862886428833, "learning_rate": 9.266117137767447e-05, "loss": 4.7356, "step": 48390 }, { "epoch": 0.09270938385611657, "grad_norm": 1.5792220830917358, "learning_rate": 9.268032614178356e-05, "loss": 4.6775, "step": 48400 }, { "epoch": 0.0927285386874918, "grad_norm": 1.383853554725647, "learning_rate": 9.269948090589264e-05, "loss": 4.6695, "step": 48410 }, { "epoch": 0.09274769351886702, "grad_norm": 1.4364268779754639, "learning_rate": 9.27186356700017e-05, "loss": 4.7665, "step": 48420 }, { "epoch": 0.09276684835024226, "grad_norm": 1.3932865858078003, "learning_rate": 9.27377904341108e-05, "loss": 4.6424, "step": 48430 }, { "epoch": 0.09278600318161749, "grad_norm": 1.3792088031768799, "learning_rate": 9.275694519821987e-05, "loss": 4.6923, "step": 48440 }, { "epoch": 0.09280515801299272, "grad_norm": 1.4665398597717285, "learning_rate": 9.277609996232895e-05, "loss": 4.7744, "step": 48450 }, { "epoch": 0.09282431284436796, "grad_norm": 1.4384685754776, "learning_rate": 9.279525472643804e-05, "loss": 4.6664, "step": 48460 }, { "epoch": 0.09284346767574318, "grad_norm": 1.4388368129730225, "learning_rate": 9.281440949054711e-05, "loss": 4.8388, "step": 48470 }, { "epoch": 0.09286262250711841, "grad_norm": 1.3475655317306519, "learning_rate": 9.283356425465619e-05, "loss": 4.7113, "step": 48480 }, { "epoch": 0.09288177733849365, "grad_norm": 1.3862429857254028, "learning_rate": 9.285271901876529e-05, "loss": 4.8423, "step": 48490 }, { "epoch": 0.09290093216986887, "grad_norm": 1.3965777158737183, "learning_rate": 9.287187378287436e-05, "loss": 4.7459, "step": 48500 }, { "epoch": 0.0929200870012441, "grad_norm": 1.399373173713684, "learning_rate": 9.289102854698344e-05, "loss": 4.6582, "step": 48510 }, { "epoch": 0.09293924183261934, "grad_norm": 1.4196988344192505, "learning_rate": 9.291018331109252e-05, "loss": 4.7674, "step": 48520 }, { "epoch": 0.09295839666399457, "grad_norm": 1.4363102912902832, "learning_rate": 9.292933807520159e-05, "loss": 4.6793, "step": 48530 }, { "epoch": 0.09297755149536979, "grad_norm": 1.4237618446350098, "learning_rate": 9.294849283931068e-05, "loss": 4.7437, "step": 48540 }, { "epoch": 0.09299670632674503, "grad_norm": 1.6010955572128296, "learning_rate": 9.296764760341975e-05, "loss": 4.7745, "step": 48550 }, { "epoch": 0.09301586115812026, "grad_norm": 1.4429731369018555, "learning_rate": 9.298680236752883e-05, "loss": 4.6786, "step": 48560 }, { "epoch": 0.09303501598949548, "grad_norm": 1.3146508932113647, "learning_rate": 9.300595713163792e-05, "loss": 4.7608, "step": 48570 }, { "epoch": 0.09305417082087072, "grad_norm": 1.4532893896102905, "learning_rate": 9.302511189574699e-05, "loss": 4.7267, "step": 48580 }, { "epoch": 0.09307332565224595, "grad_norm": 1.938466191291809, "learning_rate": 9.304426665985607e-05, "loss": 4.7071, "step": 48590 }, { "epoch": 0.09309248048362118, "grad_norm": 1.3599040508270264, "learning_rate": 9.306342142396517e-05, "loss": 4.7486, "step": 48600 }, { "epoch": 0.09311163531499642, "grad_norm": 1.3376383781433105, "learning_rate": 9.308257618807424e-05, "loss": 4.6233, "step": 48610 }, { "epoch": 0.09313079014637164, "grad_norm": 1.3634939193725586, "learning_rate": 9.310173095218332e-05, "loss": 4.7378, "step": 48620 }, { "epoch": 0.09314994497774687, "grad_norm": 1.3350297212600708, "learning_rate": 9.31208857162924e-05, "loss": 4.7497, "step": 48630 }, { "epoch": 0.09316909980912211, "grad_norm": 1.345228672027588, "learning_rate": 9.314004048040147e-05, "loss": 4.7491, "step": 48640 }, { "epoch": 0.09318825464049733, "grad_norm": 1.3280819654464722, "learning_rate": 9.315919524451056e-05, "loss": 4.6977, "step": 48650 }, { "epoch": 0.09320740947187256, "grad_norm": 1.3881261348724365, "learning_rate": 9.317835000861963e-05, "loss": 4.6701, "step": 48660 }, { "epoch": 0.0932265643032478, "grad_norm": 1.3516284227371216, "learning_rate": 9.319750477272871e-05, "loss": 4.7267, "step": 48670 }, { "epoch": 0.09324571913462303, "grad_norm": 1.389846920967102, "learning_rate": 9.32166595368378e-05, "loss": 4.8319, "step": 48680 }, { "epoch": 0.09326487396599827, "grad_norm": 1.4505181312561035, "learning_rate": 9.323581430094687e-05, "loss": 4.7803, "step": 48690 }, { "epoch": 0.09328402879737349, "grad_norm": 1.4672017097473145, "learning_rate": 9.325496906505595e-05, "loss": 4.6986, "step": 48700 }, { "epoch": 0.09330318362874872, "grad_norm": 1.379573106765747, "learning_rate": 9.327412382916504e-05, "loss": 4.7654, "step": 48710 }, { "epoch": 0.09332233846012396, "grad_norm": 1.3674262762069702, "learning_rate": 9.329327859327412e-05, "loss": 4.7519, "step": 48720 }, { "epoch": 0.09334149329149918, "grad_norm": 1.35512375831604, "learning_rate": 9.33124333573832e-05, "loss": 4.7571, "step": 48730 }, { "epoch": 0.09336064812287441, "grad_norm": 1.3719295263290405, "learning_rate": 9.333158812149227e-05, "loss": 4.7149, "step": 48740 }, { "epoch": 0.09337980295424965, "grad_norm": 1.3434962034225464, "learning_rate": 9.335074288560135e-05, "loss": 4.6606, "step": 48750 }, { "epoch": 0.09339895778562488, "grad_norm": 1.4482673406600952, "learning_rate": 9.336989764971044e-05, "loss": 4.6829, "step": 48760 }, { "epoch": 0.0934181126170001, "grad_norm": 1.3452105522155762, "learning_rate": 9.338905241381951e-05, "loss": 4.8156, "step": 48770 }, { "epoch": 0.09343726744837534, "grad_norm": 1.3618543148040771, "learning_rate": 9.340820717792859e-05, "loss": 4.7679, "step": 48780 }, { "epoch": 0.09345642227975057, "grad_norm": 1.3128389120101929, "learning_rate": 9.342736194203769e-05, "loss": 4.8147, "step": 48790 }, { "epoch": 0.0934755771111258, "grad_norm": 1.4110361337661743, "learning_rate": 9.344651670614675e-05, "loss": 4.6509, "step": 48800 }, { "epoch": 0.09349473194250103, "grad_norm": 1.3454995155334473, "learning_rate": 9.346567147025584e-05, "loss": 4.7883, "step": 48810 }, { "epoch": 0.09351388677387626, "grad_norm": 1.4288069009780884, "learning_rate": 9.348482623436492e-05, "loss": 4.8528, "step": 48820 }, { "epoch": 0.09353304160525149, "grad_norm": 1.3677358627319336, "learning_rate": 9.3503980998474e-05, "loss": 4.7122, "step": 48830 }, { "epoch": 0.09355219643662673, "grad_norm": 1.3725892305374146, "learning_rate": 9.352313576258308e-05, "loss": 4.7051, "step": 48840 }, { "epoch": 0.09357135126800195, "grad_norm": 1.392262578010559, "learning_rate": 9.354229052669215e-05, "loss": 4.8099, "step": 48850 }, { "epoch": 0.09359050609937718, "grad_norm": 1.354305386543274, "learning_rate": 9.356144529080123e-05, "loss": 4.7223, "step": 48860 }, { "epoch": 0.09360966093075242, "grad_norm": 1.3399834632873535, "learning_rate": 9.358060005491032e-05, "loss": 4.5885, "step": 48870 }, { "epoch": 0.09362881576212764, "grad_norm": 1.534500002861023, "learning_rate": 9.359975481901939e-05, "loss": 4.9055, "step": 48880 }, { "epoch": 0.09364797059350287, "grad_norm": 1.435516357421875, "learning_rate": 9.361890958312847e-05, "loss": 4.855, "step": 48890 }, { "epoch": 0.09366712542487811, "grad_norm": 1.3722987174987793, "learning_rate": 9.363806434723757e-05, "loss": 4.6261, "step": 48900 }, { "epoch": 0.09368628025625333, "grad_norm": 1.3815157413482666, "learning_rate": 9.365721911134664e-05, "loss": 4.7022, "step": 48910 }, { "epoch": 0.09370543508762856, "grad_norm": 1.3939836025238037, "learning_rate": 9.367637387545572e-05, "loss": 4.8969, "step": 48920 }, { "epoch": 0.0937245899190038, "grad_norm": 1.4203996658325195, "learning_rate": 9.369552863956478e-05, "loss": 4.7628, "step": 48930 }, { "epoch": 0.09374374475037903, "grad_norm": 1.4170281887054443, "learning_rate": 9.371468340367388e-05, "loss": 4.776, "step": 48940 }, { "epoch": 0.09376289958175425, "grad_norm": 1.3403466939926147, "learning_rate": 9.373383816778296e-05, "loss": 4.6761, "step": 48950 }, { "epoch": 0.09378205441312949, "grad_norm": 1.3819115161895752, "learning_rate": 9.375299293189203e-05, "loss": 4.771, "step": 48960 }, { "epoch": 0.09380120924450472, "grad_norm": 1.3892849683761597, "learning_rate": 9.377214769600111e-05, "loss": 4.7154, "step": 48970 }, { "epoch": 0.09382036407587994, "grad_norm": 1.279750943183899, "learning_rate": 9.37913024601102e-05, "loss": 4.7199, "step": 48980 }, { "epoch": 0.09383951890725518, "grad_norm": 1.382900595664978, "learning_rate": 9.381045722421927e-05, "loss": 4.7356, "step": 48990 }, { "epoch": 0.09385867373863041, "grad_norm": 1.5447001457214355, "learning_rate": 9.382961198832835e-05, "loss": 4.7325, "step": 49000 }, { "epoch": 0.09387782857000564, "grad_norm": 1.4891729354858398, "learning_rate": 9.384876675243745e-05, "loss": 4.8537, "step": 49010 }, { "epoch": 0.09389698340138088, "grad_norm": 1.409818410873413, "learning_rate": 9.386600604013561e-05, "loss": 4.7795, "step": 49020 }, { "epoch": 0.0939161382327561, "grad_norm": 1.360300064086914, "learning_rate": 9.388516080424468e-05, "loss": 4.6579, "step": 49030 }, { "epoch": 0.09393529306413133, "grad_norm": 1.372948408126831, "learning_rate": 9.390431556835377e-05, "loss": 4.731, "step": 49040 }, { "epoch": 0.09395444789550657, "grad_norm": 1.357609748840332, "learning_rate": 9.392347033246285e-05, "loss": 4.635, "step": 49050 }, { "epoch": 0.0939736027268818, "grad_norm": 1.3706117868423462, "learning_rate": 9.394262509657192e-05, "loss": 4.8665, "step": 49060 }, { "epoch": 0.09399275755825702, "grad_norm": 1.3428876399993896, "learning_rate": 9.396177986068101e-05, "loss": 4.8031, "step": 49070 }, { "epoch": 0.09401191238963226, "grad_norm": 1.4620274305343628, "learning_rate": 9.39809346247901e-05, "loss": 4.8345, "step": 49080 }, { "epoch": 0.09403106722100749, "grad_norm": 1.4061650037765503, "learning_rate": 9.400008938889916e-05, "loss": 4.7988, "step": 49090 }, { "epoch": 0.09405022205238271, "grad_norm": 1.3659344911575317, "learning_rate": 9.401924415300824e-05, "loss": 4.7798, "step": 49100 }, { "epoch": 0.09406937688375795, "grad_norm": 1.3615154027938843, "learning_rate": 9.403839891711734e-05, "loss": 4.888, "step": 49110 }, { "epoch": 0.09408853171513318, "grad_norm": 1.389667272567749, "learning_rate": 9.405755368122641e-05, "loss": 4.6777, "step": 49120 }, { "epoch": 0.0941076865465084, "grad_norm": 1.3374396562576294, "learning_rate": 9.407670844533549e-05, "loss": 4.7139, "step": 49130 }, { "epoch": 0.09412684137788364, "grad_norm": 1.399621605873108, "learning_rate": 9.409586320944456e-05, "loss": 4.7443, "step": 49140 }, { "epoch": 0.09414599620925887, "grad_norm": 1.4638746976852417, "learning_rate": 9.411501797355365e-05, "loss": 4.8629, "step": 49150 }, { "epoch": 0.0941651510406341, "grad_norm": 1.4138646125793457, "learning_rate": 9.413417273766273e-05, "loss": 4.8183, "step": 49160 }, { "epoch": 0.09418430587200934, "grad_norm": 1.4263916015625, "learning_rate": 9.41533275017718e-05, "loss": 4.8571, "step": 49170 }, { "epoch": 0.09420346070338456, "grad_norm": 1.4400008916854858, "learning_rate": 9.41724822658809e-05, "loss": 4.7676, "step": 49180 }, { "epoch": 0.09422261553475979, "grad_norm": 1.37529718875885, "learning_rate": 9.419163702998998e-05, "loss": 4.7842, "step": 49190 }, { "epoch": 0.09424177036613503, "grad_norm": 1.4363459348678589, "learning_rate": 9.421079179409904e-05, "loss": 4.654, "step": 49200 }, { "epoch": 0.09426092519751025, "grad_norm": 1.3706142902374268, "learning_rate": 9.422994655820813e-05, "loss": 4.7975, "step": 49210 }, { "epoch": 0.09428008002888548, "grad_norm": 1.3901840448379517, "learning_rate": 9.42491013223172e-05, "loss": 4.8321, "step": 49220 }, { "epoch": 0.09429923486026072, "grad_norm": 1.4534364938735962, "learning_rate": 9.426825608642629e-05, "loss": 4.8061, "step": 49230 }, { "epoch": 0.09431838969163595, "grad_norm": 1.3904685974121094, "learning_rate": 9.428741085053537e-05, "loss": 4.7747, "step": 49240 }, { "epoch": 0.09433754452301117, "grad_norm": 1.3352069854736328, "learning_rate": 9.430656561464444e-05, "loss": 4.7916, "step": 49250 }, { "epoch": 0.09435669935438641, "grad_norm": 1.353621244430542, "learning_rate": 9.432572037875353e-05, "loss": 4.6845, "step": 49260 }, { "epoch": 0.09437585418576164, "grad_norm": 1.4590704441070557, "learning_rate": 9.434487514286261e-05, "loss": 4.7487, "step": 49270 }, { "epoch": 0.09439500901713686, "grad_norm": 1.4072057008743286, "learning_rate": 9.436402990697168e-05, "loss": 4.8993, "step": 49280 }, { "epoch": 0.0944141638485121, "grad_norm": 1.3965203762054443, "learning_rate": 9.438318467108078e-05, "loss": 4.747, "step": 49290 }, { "epoch": 0.09443331867988733, "grad_norm": 1.3803738355636597, "learning_rate": 9.440233943518986e-05, "loss": 4.6279, "step": 49300 }, { "epoch": 0.09445247351126256, "grad_norm": 1.3630592823028564, "learning_rate": 9.442149419929893e-05, "loss": 4.66, "step": 49310 }, { "epoch": 0.0944716283426378, "grad_norm": 1.3339600563049316, "learning_rate": 9.4440648963408e-05, "loss": 4.6455, "step": 49320 }, { "epoch": 0.09449078317401302, "grad_norm": 1.4003163576126099, "learning_rate": 9.445980372751707e-05, "loss": 4.7971, "step": 49330 }, { "epoch": 0.09450993800538826, "grad_norm": 1.330847978591919, "learning_rate": 9.447895849162617e-05, "loss": 4.8169, "step": 49340 }, { "epoch": 0.09452909283676349, "grad_norm": 1.4855409860610962, "learning_rate": 9.449811325573525e-05, "loss": 4.7519, "step": 49350 }, { "epoch": 0.09454824766813871, "grad_norm": 1.3603652715682983, "learning_rate": 9.451726801984432e-05, "loss": 4.7068, "step": 49360 }, { "epoch": 0.09456740249951395, "grad_norm": 1.3553357124328613, "learning_rate": 9.453642278395341e-05, "loss": 4.853, "step": 49370 }, { "epoch": 0.09458655733088918, "grad_norm": 1.4034639596939087, "learning_rate": 9.45555775480625e-05, "loss": 4.702, "step": 49380 }, { "epoch": 0.0946057121622644, "grad_norm": 1.37429678440094, "learning_rate": 9.457473231217156e-05, "loss": 4.8675, "step": 49390 }, { "epoch": 0.09462486699363964, "grad_norm": 1.3330354690551758, "learning_rate": 9.459388707628064e-05, "loss": 4.7657, "step": 49400 }, { "epoch": 0.09464402182501487, "grad_norm": 1.5536843538284302, "learning_rate": 9.461304184038972e-05, "loss": 4.7635, "step": 49410 }, { "epoch": 0.0946631766563901, "grad_norm": 1.4272024631500244, "learning_rate": 9.46321966044988e-05, "loss": 4.775, "step": 49420 }, { "epoch": 0.09468233148776534, "grad_norm": 1.4444549083709717, "learning_rate": 9.465135136860789e-05, "loss": 4.7085, "step": 49430 }, { "epoch": 0.09470148631914056, "grad_norm": 1.4474903345108032, "learning_rate": 9.467050613271695e-05, "loss": 4.7584, "step": 49440 }, { "epoch": 0.09472064115051579, "grad_norm": 1.3830697536468506, "learning_rate": 9.468966089682605e-05, "loss": 4.6874, "step": 49450 }, { "epoch": 0.09473979598189103, "grad_norm": 1.325923204421997, "learning_rate": 9.470881566093513e-05, "loss": 4.8858, "step": 49460 }, { "epoch": 0.09475895081326625, "grad_norm": 1.3901718854904175, "learning_rate": 9.47279704250442e-05, "loss": 4.7205, "step": 49470 }, { "epoch": 0.09477810564464148, "grad_norm": 1.356909155845642, "learning_rate": 9.47471251891533e-05, "loss": 4.7957, "step": 49480 }, { "epoch": 0.09479726047601672, "grad_norm": 1.4155876636505127, "learning_rate": 9.476627995326237e-05, "loss": 4.8375, "step": 49490 }, { "epoch": 0.09481641530739195, "grad_norm": 1.3802940845489502, "learning_rate": 9.478543471737144e-05, "loss": 4.7434, "step": 49500 }, { "epoch": 0.09483557013876717, "grad_norm": 1.3626307249069214, "learning_rate": 9.480458948148052e-05, "loss": 4.673, "step": 49510 }, { "epoch": 0.09485472497014241, "grad_norm": 1.2892351150512695, "learning_rate": 9.48237442455896e-05, "loss": 4.8766, "step": 49520 }, { "epoch": 0.09487387980151764, "grad_norm": 1.3725956678390503, "learning_rate": 9.484289900969869e-05, "loss": 4.6483, "step": 49530 }, { "epoch": 0.09489303463289286, "grad_norm": 1.3127771615982056, "learning_rate": 9.486205377380777e-05, "loss": 4.7926, "step": 49540 }, { "epoch": 0.0949121894642681, "grad_norm": 1.336655855178833, "learning_rate": 9.488120853791684e-05, "loss": 4.7794, "step": 49550 }, { "epoch": 0.09493134429564333, "grad_norm": 1.3805631399154663, "learning_rate": 9.490036330202593e-05, "loss": 4.8096, "step": 49560 }, { "epoch": 0.09495049912701856, "grad_norm": 1.3205533027648926, "learning_rate": 9.491951806613501e-05, "loss": 4.7418, "step": 49570 }, { "epoch": 0.0949696539583938, "grad_norm": 1.3049122095108032, "learning_rate": 9.493867283024408e-05, "loss": 4.6977, "step": 49580 }, { "epoch": 0.09498880878976902, "grad_norm": 1.3748726844787598, "learning_rate": 9.495782759435317e-05, "loss": 4.7469, "step": 49590 }, { "epoch": 0.09500796362114425, "grad_norm": 1.4090001583099365, "learning_rate": 9.497698235846224e-05, "loss": 4.858, "step": 49600 }, { "epoch": 0.09502711845251949, "grad_norm": 1.4403120279312134, "learning_rate": 9.499613712257132e-05, "loss": 4.744, "step": 49610 }, { "epoch": 0.09504627328389471, "grad_norm": 1.3651349544525146, "learning_rate": 9.50152918866804e-05, "loss": 4.773, "step": 49620 }, { "epoch": 0.09506542811526994, "grad_norm": 1.3487915992736816, "learning_rate": 9.503444665078949e-05, "loss": 4.6814, "step": 49630 }, { "epoch": 0.09508458294664518, "grad_norm": 1.320332646369934, "learning_rate": 9.505360141489857e-05, "loss": 4.66, "step": 49640 }, { "epoch": 0.0951037377780204, "grad_norm": 1.3622534275054932, "learning_rate": 9.507275617900765e-05, "loss": 4.8186, "step": 49650 }, { "epoch": 0.09512289260939563, "grad_norm": 1.4071978330612183, "learning_rate": 9.509191094311672e-05, "loss": 4.6473, "step": 49660 }, { "epoch": 0.09514204744077087, "grad_norm": 1.5017428398132324, "learning_rate": 9.511106570722581e-05, "loss": 4.6119, "step": 49670 }, { "epoch": 0.0951612022721461, "grad_norm": 1.400241732597351, "learning_rate": 9.513022047133489e-05, "loss": 4.7223, "step": 49680 }, { "epoch": 0.09518035710352132, "grad_norm": 1.3671317100524902, "learning_rate": 9.514937523544396e-05, "loss": 4.7084, "step": 49690 }, { "epoch": 0.09519951193489656, "grad_norm": 1.342231035232544, "learning_rate": 9.516852999955306e-05, "loss": 4.8288, "step": 49700 }, { "epoch": 0.09521866676627179, "grad_norm": 1.371870517730713, "learning_rate": 9.518768476366212e-05, "loss": 4.7228, "step": 49710 }, { "epoch": 0.09523782159764702, "grad_norm": 1.4129059314727783, "learning_rate": 9.52068395277712e-05, "loss": 4.8045, "step": 49720 }, { "epoch": 0.09525697642902226, "grad_norm": 1.3281855583190918, "learning_rate": 9.522599429188029e-05, "loss": 4.6968, "step": 49730 }, { "epoch": 0.09527613126039748, "grad_norm": 1.3656001091003418, "learning_rate": 9.524514905598937e-05, "loss": 4.6984, "step": 49740 }, { "epoch": 0.09529528609177271, "grad_norm": 1.3900939226150513, "learning_rate": 9.526430382009845e-05, "loss": 4.8609, "step": 49750 }, { "epoch": 0.09531444092314795, "grad_norm": 1.4355370998382568, "learning_rate": 9.528345858420753e-05, "loss": 4.6617, "step": 49760 }, { "epoch": 0.09533359575452317, "grad_norm": 1.4123173952102661, "learning_rate": 9.53026133483166e-05, "loss": 4.7562, "step": 49770 }, { "epoch": 0.0953527505858984, "grad_norm": 1.3478201627731323, "learning_rate": 9.532176811242569e-05, "loss": 4.896, "step": 49780 }, { "epoch": 0.09537190541727364, "grad_norm": 1.354347586631775, "learning_rate": 9.534092287653476e-05, "loss": 4.7601, "step": 49790 }, { "epoch": 0.09539106024864887, "grad_norm": 1.3763188123703003, "learning_rate": 9.536007764064384e-05, "loss": 4.5771, "step": 49800 }, { "epoch": 0.09541021508002409, "grad_norm": 1.3440216779708862, "learning_rate": 9.537923240475294e-05, "loss": 4.6345, "step": 49810 }, { "epoch": 0.09542936991139933, "grad_norm": 1.3283487558364868, "learning_rate": 9.5398387168862e-05, "loss": 4.7225, "step": 49820 }, { "epoch": 0.09544852474277456, "grad_norm": 1.3885273933410645, "learning_rate": 9.541754193297109e-05, "loss": 4.8438, "step": 49830 }, { "epoch": 0.09546767957414978, "grad_norm": 1.3852876424789429, "learning_rate": 9.543669669708017e-05, "loss": 4.6458, "step": 49840 }, { "epoch": 0.09548683440552502, "grad_norm": 1.348909616470337, "learning_rate": 9.545585146118925e-05, "loss": 4.7366, "step": 49850 }, { "epoch": 0.09550598923690025, "grad_norm": 1.3322457075119019, "learning_rate": 9.547500622529833e-05, "loss": 4.6626, "step": 49860 }, { "epoch": 0.09552514406827547, "grad_norm": 1.3277690410614014, "learning_rate": 9.549416098940741e-05, "loss": 4.7129, "step": 49870 }, { "epoch": 0.09554429889965071, "grad_norm": 1.3043651580810547, "learning_rate": 9.551331575351648e-05, "loss": 4.6795, "step": 49880 }, { "epoch": 0.09556345373102594, "grad_norm": 1.3385263681411743, "learning_rate": 9.553247051762557e-05, "loss": 4.8204, "step": 49890 }, { "epoch": 0.09558260856240117, "grad_norm": 1.291907548904419, "learning_rate": 9.555162528173464e-05, "loss": 4.8675, "step": 49900 }, { "epoch": 0.0956017633937764, "grad_norm": 1.351219654083252, "learning_rate": 9.557078004584372e-05, "loss": 4.6981, "step": 49910 }, { "epoch": 0.09562091822515163, "grad_norm": 1.4171017408370972, "learning_rate": 9.558993480995282e-05, "loss": 4.6853, "step": 49920 }, { "epoch": 0.09564007305652686, "grad_norm": 1.4263583421707153, "learning_rate": 9.560908957406188e-05, "loss": 4.7726, "step": 49930 }, { "epoch": 0.0956592278879021, "grad_norm": 1.322769045829773, "learning_rate": 9.562824433817097e-05, "loss": 4.7051, "step": 49940 }, { "epoch": 0.09567838271927732, "grad_norm": 1.365640640258789, "learning_rate": 9.564739910228005e-05, "loss": 4.6864, "step": 49950 }, { "epoch": 0.09569753755065255, "grad_norm": 1.3774160146713257, "learning_rate": 9.566655386638913e-05, "loss": 4.5984, "step": 49960 }, { "epoch": 0.09571669238202779, "grad_norm": 1.4020557403564453, "learning_rate": 9.568570863049821e-05, "loss": 4.6565, "step": 49970 }, { "epoch": 0.09573584721340302, "grad_norm": 1.3423123359680176, "learning_rate": 9.570486339460729e-05, "loss": 4.8718, "step": 49980 }, { "epoch": 0.09575500204477824, "grad_norm": 1.3852951526641846, "learning_rate": 9.572401815871636e-05, "loss": 4.676, "step": 49990 }, { "epoch": 0.09577415687615348, "grad_norm": 1.3427033424377441, "learning_rate": 9.574317292282545e-05, "loss": 4.7926, "step": 50000 }, { "epoch": 0.09579331170752871, "grad_norm": 1.318526268005371, "learning_rate": 9.576232768693452e-05, "loss": 4.7288, "step": 50010 }, { "epoch": 0.09581246653890395, "grad_norm": 1.4891771078109741, "learning_rate": 9.57814824510436e-05, "loss": 4.6378, "step": 50020 }, { "epoch": 0.09583162137027917, "grad_norm": 1.3585609197616577, "learning_rate": 9.58006372151527e-05, "loss": 4.5751, "step": 50030 }, { "epoch": 0.0958507762016544, "grad_norm": 1.5285625457763672, "learning_rate": 9.581979197926177e-05, "loss": 4.7256, "step": 50040 }, { "epoch": 0.09586993103302964, "grad_norm": 1.432433009147644, "learning_rate": 9.583894674337085e-05, "loss": 4.7883, "step": 50050 }, { "epoch": 0.09588908586440487, "grad_norm": 1.3806523084640503, "learning_rate": 9.585810150747993e-05, "loss": 4.7376, "step": 50060 }, { "epoch": 0.09590824069578009, "grad_norm": 1.3666738271713257, "learning_rate": 9.587725627158901e-05, "loss": 4.8818, "step": 50070 }, { "epoch": 0.09592739552715533, "grad_norm": 1.3192880153656006, "learning_rate": 9.589641103569809e-05, "loss": 4.6986, "step": 50080 }, { "epoch": 0.09594655035853056, "grad_norm": 1.369114875793457, "learning_rate": 9.591556579980716e-05, "loss": 4.692, "step": 50090 }, { "epoch": 0.09596570518990578, "grad_norm": 1.334564447402954, "learning_rate": 9.593472056391624e-05, "loss": 4.6759, "step": 50100 }, { "epoch": 0.09598486002128102, "grad_norm": 1.4350274801254272, "learning_rate": 9.595387532802533e-05, "loss": 4.6736, "step": 50110 }, { "epoch": 0.09600401485265625, "grad_norm": 1.3391247987747192, "learning_rate": 9.59730300921344e-05, "loss": 4.7849, "step": 50120 }, { "epoch": 0.09602316968403148, "grad_norm": 1.3702077865600586, "learning_rate": 9.599218485624348e-05, "loss": 4.7708, "step": 50130 }, { "epoch": 0.09604232451540672, "grad_norm": 1.440189003944397, "learning_rate": 9.601133962035258e-05, "loss": 4.6937, "step": 50140 }, { "epoch": 0.09606147934678194, "grad_norm": 1.3980194330215454, "learning_rate": 9.603049438446165e-05, "loss": 4.701, "step": 50150 }, { "epoch": 0.09608063417815717, "grad_norm": 1.4100255966186523, "learning_rate": 9.604964914857073e-05, "loss": 4.7616, "step": 50160 }, { "epoch": 0.09609978900953241, "grad_norm": 1.4238066673278809, "learning_rate": 9.606880391267981e-05, "loss": 4.6586, "step": 50170 }, { "epoch": 0.09611894384090763, "grad_norm": 1.347027063369751, "learning_rate": 9.608795867678889e-05, "loss": 4.5815, "step": 50180 }, { "epoch": 0.09613809867228286, "grad_norm": 1.376680612564087, "learning_rate": 9.610711344089797e-05, "loss": 4.7394, "step": 50190 }, { "epoch": 0.0961572535036581, "grad_norm": 1.361847996711731, "learning_rate": 9.612626820500704e-05, "loss": 4.7134, "step": 50200 }, { "epoch": 0.09617640833503333, "grad_norm": 1.3559350967407227, "learning_rate": 9.614542296911612e-05, "loss": 4.6801, "step": 50210 }, { "epoch": 0.09619556316640855, "grad_norm": 1.376112699508667, "learning_rate": 9.616457773322522e-05, "loss": 4.7262, "step": 50220 }, { "epoch": 0.09621471799778379, "grad_norm": 1.3359720706939697, "learning_rate": 9.618373249733428e-05, "loss": 4.7951, "step": 50230 }, { "epoch": 0.09623387282915902, "grad_norm": 1.3442566394805908, "learning_rate": 9.620288726144336e-05, "loss": 4.7378, "step": 50240 }, { "epoch": 0.09625302766053424, "grad_norm": 1.3073774576187134, "learning_rate": 9.622204202555246e-05, "loss": 4.6806, "step": 50250 }, { "epoch": 0.09627218249190948, "grad_norm": 1.2718061208724976, "learning_rate": 9.624119678966153e-05, "loss": 4.7377, "step": 50260 }, { "epoch": 0.09629133732328471, "grad_norm": 1.3493980169296265, "learning_rate": 9.626035155377061e-05, "loss": 4.7092, "step": 50270 }, { "epoch": 0.09631049215465994, "grad_norm": 1.3704211711883545, "learning_rate": 9.627950631787968e-05, "loss": 4.6194, "step": 50280 }, { "epoch": 0.09632964698603518, "grad_norm": 1.3715112209320068, "learning_rate": 9.629866108198877e-05, "loss": 4.7681, "step": 50290 }, { "epoch": 0.0963488018174104, "grad_norm": 1.3702778816223145, "learning_rate": 9.631781584609785e-05, "loss": 4.6852, "step": 50300 }, { "epoch": 0.09636795664878563, "grad_norm": 1.3621195554733276, "learning_rate": 9.633697061020692e-05, "loss": 4.8696, "step": 50310 }, { "epoch": 0.09638711148016087, "grad_norm": 1.4196066856384277, "learning_rate": 9.6356125374316e-05, "loss": 4.7183, "step": 50320 }, { "epoch": 0.09640626631153609, "grad_norm": 1.3553181886672974, "learning_rate": 9.63752801384251e-05, "loss": 4.7251, "step": 50330 }, { "epoch": 0.09642542114291132, "grad_norm": 1.3216866254806519, "learning_rate": 9.639443490253416e-05, "loss": 4.7502, "step": 50340 }, { "epoch": 0.09644457597428656, "grad_norm": 1.3218406438827515, "learning_rate": 9.641358966664325e-05, "loss": 4.7365, "step": 50350 }, { "epoch": 0.09646373080566178, "grad_norm": 1.3934478759765625, "learning_rate": 9.643274443075234e-05, "loss": 4.6809, "step": 50360 }, { "epoch": 0.09648288563703701, "grad_norm": 1.3908674716949463, "learning_rate": 9.645189919486141e-05, "loss": 4.7033, "step": 50370 }, { "epoch": 0.09650204046841225, "grad_norm": 1.3784260749816895, "learning_rate": 9.647105395897049e-05, "loss": 4.643, "step": 50380 }, { "epoch": 0.09652119529978748, "grad_norm": 1.3243238925933838, "learning_rate": 9.649020872307956e-05, "loss": 4.7698, "step": 50390 }, { "epoch": 0.0965403501311627, "grad_norm": 1.3100920915603638, "learning_rate": 9.650936348718865e-05, "loss": 4.7144, "step": 50400 }, { "epoch": 0.09655950496253794, "grad_norm": 1.3935980796813965, "learning_rate": 9.652851825129773e-05, "loss": 4.6656, "step": 50410 }, { "epoch": 0.09657865979391317, "grad_norm": 1.3244277238845825, "learning_rate": 9.65476730154068e-05, "loss": 4.8132, "step": 50420 }, { "epoch": 0.0965978146252884, "grad_norm": 1.3690894842147827, "learning_rate": 9.656682777951588e-05, "loss": 4.6887, "step": 50430 }, { "epoch": 0.09661696945666363, "grad_norm": 1.3053579330444336, "learning_rate": 9.658598254362498e-05, "loss": 4.7278, "step": 50440 }, { "epoch": 0.09663612428803886, "grad_norm": 1.3243657350540161, "learning_rate": 9.660513730773404e-05, "loss": 4.7083, "step": 50450 }, { "epoch": 0.09665527911941409, "grad_norm": 1.3846008777618408, "learning_rate": 9.662429207184313e-05, "loss": 4.8945, "step": 50460 }, { "epoch": 0.09667443395078933, "grad_norm": 1.3633641004562378, "learning_rate": 9.66434468359522e-05, "loss": 4.7429, "step": 50470 }, { "epoch": 0.09669358878216455, "grad_norm": 1.3476134538650513, "learning_rate": 9.666260160006129e-05, "loss": 4.7658, "step": 50480 }, { "epoch": 0.09671274361353978, "grad_norm": 1.4067411422729492, "learning_rate": 9.668175636417037e-05, "loss": 4.6335, "step": 50490 }, { "epoch": 0.09673189844491502, "grad_norm": 1.3694214820861816, "learning_rate": 9.670091112827944e-05, "loss": 4.8012, "step": 50500 }, { "epoch": 0.09675105327629024, "grad_norm": 1.3032987117767334, "learning_rate": 9.672006589238853e-05, "loss": 4.6975, "step": 50510 }, { "epoch": 0.09677020810766547, "grad_norm": 1.3015508651733398, "learning_rate": 9.673922065649761e-05, "loss": 4.8309, "step": 50520 }, { "epoch": 0.09678936293904071, "grad_norm": 1.3597595691680908, "learning_rate": 9.675837542060668e-05, "loss": 4.7613, "step": 50530 }, { "epoch": 0.09680851777041594, "grad_norm": 1.341257095336914, "learning_rate": 9.677753018471576e-05, "loss": 4.8488, "step": 50540 }, { "epoch": 0.09682767260179116, "grad_norm": 1.376255750656128, "learning_rate": 9.679668494882486e-05, "loss": 4.7661, "step": 50550 }, { "epoch": 0.0968468274331664, "grad_norm": 1.3640053272247314, "learning_rate": 9.681583971293393e-05, "loss": 4.6602, "step": 50560 }, { "epoch": 0.09686598226454163, "grad_norm": 1.6416468620300293, "learning_rate": 9.683499447704301e-05, "loss": 4.7321, "step": 50570 }, { "epoch": 0.09688513709591685, "grad_norm": 1.4285880327224731, "learning_rate": 9.685414924115207e-05, "loss": 4.6964, "step": 50580 }, { "epoch": 0.0969042919272921, "grad_norm": 1.3671401739120483, "learning_rate": 9.687330400526117e-05, "loss": 4.6725, "step": 50590 }, { "epoch": 0.09692344675866732, "grad_norm": 1.4426840543746948, "learning_rate": 9.689245876937025e-05, "loss": 4.5436, "step": 50600 }, { "epoch": 0.09694260159004255, "grad_norm": 1.3754140138626099, "learning_rate": 9.691161353347932e-05, "loss": 4.685, "step": 50610 }, { "epoch": 0.09696175642141779, "grad_norm": 1.3206219673156738, "learning_rate": 9.693076829758841e-05, "loss": 4.7431, "step": 50620 }, { "epoch": 0.09698091125279301, "grad_norm": 1.418336033821106, "learning_rate": 9.69499230616975e-05, "loss": 4.8763, "step": 50630 }, { "epoch": 0.09700006608416824, "grad_norm": 1.403748631477356, "learning_rate": 9.696907782580656e-05, "loss": 4.8049, "step": 50640 }, { "epoch": 0.09701922091554348, "grad_norm": 1.4397448301315308, "learning_rate": 9.698823258991564e-05, "loss": 4.6611, "step": 50650 }, { "epoch": 0.0970383757469187, "grad_norm": 1.4181759357452393, "learning_rate": 9.700738735402474e-05, "loss": 4.6472, "step": 50660 }, { "epoch": 0.09705753057829394, "grad_norm": 1.3507914543151855, "learning_rate": 9.70265421181338e-05, "loss": 4.6662, "step": 50670 }, { "epoch": 0.09707668540966917, "grad_norm": 1.4053415060043335, "learning_rate": 9.704569688224289e-05, "loss": 4.8088, "step": 50680 }, { "epoch": 0.0970958402410444, "grad_norm": 1.3845113515853882, "learning_rate": 9.706485164635196e-05, "loss": 4.6842, "step": 50690 }, { "epoch": 0.09711499507241964, "grad_norm": 1.439717411994934, "learning_rate": 9.708400641046105e-05, "loss": 4.7469, "step": 50700 }, { "epoch": 0.09713414990379486, "grad_norm": 1.34816575050354, "learning_rate": 9.710316117457013e-05, "loss": 4.7205, "step": 50710 }, { "epoch": 0.09715330473517009, "grad_norm": 1.4015429019927979, "learning_rate": 9.71223159386792e-05, "loss": 4.6416, "step": 50720 }, { "epoch": 0.09717245956654533, "grad_norm": 1.393739104270935, "learning_rate": 9.71414707027883e-05, "loss": 4.6303, "step": 50730 }, { "epoch": 0.09719161439792055, "grad_norm": 1.345677137374878, "learning_rate": 9.716062546689738e-05, "loss": 4.7147, "step": 50740 }, { "epoch": 0.09721076922929578, "grad_norm": 1.3154494762420654, "learning_rate": 9.717978023100644e-05, "loss": 4.682, "step": 50750 }, { "epoch": 0.09722992406067102, "grad_norm": 1.383957028388977, "learning_rate": 9.719893499511552e-05, "loss": 4.5766, "step": 50760 }, { "epoch": 0.09724907889204625, "grad_norm": 1.3651074171066284, "learning_rate": 9.721808975922459e-05, "loss": 4.6484, "step": 50770 }, { "epoch": 0.09726823372342147, "grad_norm": 1.3350085020065308, "learning_rate": 9.723724452333369e-05, "loss": 4.7719, "step": 50780 }, { "epoch": 0.09728738855479671, "grad_norm": 1.353078007698059, "learning_rate": 9.725639928744277e-05, "loss": 4.6914, "step": 50790 }, { "epoch": 0.09730654338617194, "grad_norm": 1.5243057012557983, "learning_rate": 9.727555405155184e-05, "loss": 4.7474, "step": 50800 }, { "epoch": 0.09732569821754716, "grad_norm": 1.3691949844360352, "learning_rate": 9.729470881566093e-05, "loss": 4.8542, "step": 50810 }, { "epoch": 0.0973448530489224, "grad_norm": 1.2872930765151978, "learning_rate": 9.731386357977001e-05, "loss": 4.7795, "step": 50820 }, { "epoch": 0.09736400788029763, "grad_norm": 1.379819393157959, "learning_rate": 9.733301834387908e-05, "loss": 4.762, "step": 50830 }, { "epoch": 0.09738316271167285, "grad_norm": 1.3621827363967896, "learning_rate": 9.735217310798818e-05, "loss": 4.5594, "step": 50840 }, { "epoch": 0.0974023175430481, "grad_norm": 1.375242829322815, "learning_rate": 9.737132787209726e-05, "loss": 4.737, "step": 50850 }, { "epoch": 0.09742147237442332, "grad_norm": 1.4240175485610962, "learning_rate": 9.739048263620632e-05, "loss": 4.7671, "step": 50860 }, { "epoch": 0.09744062720579855, "grad_norm": 1.3971261978149414, "learning_rate": 9.74096374003154e-05, "loss": 4.7225, "step": 50870 }, { "epoch": 0.09745978203717379, "grad_norm": 1.3232749700546265, "learning_rate": 9.742879216442447e-05, "loss": 4.8043, "step": 50880 }, { "epoch": 0.09747893686854901, "grad_norm": 1.369922161102295, "learning_rate": 9.744794692853357e-05, "loss": 4.5934, "step": 50890 }, { "epoch": 0.09749809169992424, "grad_norm": 1.4727966785430908, "learning_rate": 9.746710169264265e-05, "loss": 4.6176, "step": 50900 }, { "epoch": 0.09751724653129948, "grad_norm": 1.4283541440963745, "learning_rate": 9.748625645675172e-05, "loss": 4.672, "step": 50910 }, { "epoch": 0.0975364013626747, "grad_norm": 1.3892543315887451, "learning_rate": 9.750541122086081e-05, "loss": 4.698, "step": 50920 }, { "epoch": 0.09755555619404993, "grad_norm": 1.3691277503967285, "learning_rate": 9.75245659849699e-05, "loss": 4.7027, "step": 50930 }, { "epoch": 0.09757471102542517, "grad_norm": 1.4079878330230713, "learning_rate": 9.754372074907896e-05, "loss": 4.7147, "step": 50940 }, { "epoch": 0.0975938658568004, "grad_norm": 1.3640788793563843, "learning_rate": 9.756287551318804e-05, "loss": 4.7847, "step": 50950 }, { "epoch": 0.09761302068817562, "grad_norm": 1.3251665830612183, "learning_rate": 9.758203027729712e-05, "loss": 4.7596, "step": 50960 }, { "epoch": 0.09763217551955086, "grad_norm": 1.427051067352295, "learning_rate": 9.76011850414062e-05, "loss": 4.6088, "step": 50970 }, { "epoch": 0.09765133035092609, "grad_norm": 1.3688870668411255, "learning_rate": 9.762033980551529e-05, "loss": 4.6893, "step": 50980 }, { "epoch": 0.09767048518230131, "grad_norm": 1.3604735136032104, "learning_rate": 9.763949456962435e-05, "loss": 4.7666, "step": 50990 }, { "epoch": 0.09768964001367655, "grad_norm": 1.3755180835723877, "learning_rate": 9.765864933373345e-05, "loss": 4.8105, "step": 51000 }, { "epoch": 0.09770879484505178, "grad_norm": 1.4015287160873413, "learning_rate": 9.767780409784253e-05, "loss": 4.6524, "step": 51010 }, { "epoch": 0.097727949676427, "grad_norm": 1.4125709533691406, "learning_rate": 9.76969588619516e-05, "loss": 4.7943, "step": 51020 }, { "epoch": 0.09774710450780225, "grad_norm": 1.3912853002548218, "learning_rate": 9.771611362606069e-05, "loss": 4.691, "step": 51030 }, { "epoch": 0.09776625933917747, "grad_norm": 1.4376938343048096, "learning_rate": 9.773526839016977e-05, "loss": 4.6679, "step": 51040 }, { "epoch": 0.0977854141705527, "grad_norm": 1.3502020835876465, "learning_rate": 9.775442315427884e-05, "loss": 4.8198, "step": 51050 }, { "epoch": 0.09780456900192794, "grad_norm": 1.4190479516983032, "learning_rate": 9.777357791838792e-05, "loss": 4.6963, "step": 51060 }, { "epoch": 0.09782372383330316, "grad_norm": 1.336432933807373, "learning_rate": 9.7792732682497e-05, "loss": 4.7598, "step": 51070 }, { "epoch": 0.09784287866467839, "grad_norm": 1.3014966249465942, "learning_rate": 9.781188744660609e-05, "loss": 4.5666, "step": 51080 }, { "epoch": 0.09786203349605363, "grad_norm": 1.3439863920211792, "learning_rate": 9.783104221071517e-05, "loss": 4.7724, "step": 51090 }, { "epoch": 0.09788118832742886, "grad_norm": 1.3376171588897705, "learning_rate": 9.785019697482423e-05, "loss": 4.744, "step": 51100 }, { "epoch": 0.09790034315880408, "grad_norm": 1.3638728857040405, "learning_rate": 9.786935173893333e-05, "loss": 4.5458, "step": 51110 }, { "epoch": 0.09791949799017932, "grad_norm": 1.466318130493164, "learning_rate": 9.788850650304241e-05, "loss": 4.6945, "step": 51120 }, { "epoch": 0.09793865282155455, "grad_norm": 1.3120317459106445, "learning_rate": 9.790766126715148e-05, "loss": 4.7035, "step": 51130 }, { "epoch": 0.09795780765292977, "grad_norm": 1.2997790575027466, "learning_rate": 9.792681603126057e-05, "loss": 4.7302, "step": 51140 }, { "epoch": 0.09797696248430501, "grad_norm": 1.3423248529434204, "learning_rate": 9.794597079536964e-05, "loss": 4.6943, "step": 51150 }, { "epoch": 0.09799611731568024, "grad_norm": 1.3484611511230469, "learning_rate": 9.796512555947872e-05, "loss": 4.8277, "step": 51160 }, { "epoch": 0.09801527214705547, "grad_norm": 1.3510799407958984, "learning_rate": 9.79842803235878e-05, "loss": 4.6539, "step": 51170 }, { "epoch": 0.0980344269784307, "grad_norm": 1.3212435245513916, "learning_rate": 9.800343508769689e-05, "loss": 4.7048, "step": 51180 }, { "epoch": 0.09805358180980593, "grad_norm": 1.3573731184005737, "learning_rate": 9.802258985180597e-05, "loss": 4.7031, "step": 51190 }, { "epoch": 0.09807273664118116, "grad_norm": 1.3520554304122925, "learning_rate": 9.804174461591505e-05, "loss": 4.6451, "step": 51200 }, { "epoch": 0.0980918914725564, "grad_norm": 1.2686622142791748, "learning_rate": 9.806089938002412e-05, "loss": 4.9004, "step": 51210 }, { "epoch": 0.09811104630393162, "grad_norm": 1.324004054069519, "learning_rate": 9.808005414413321e-05, "loss": 4.7412, "step": 51220 }, { "epoch": 0.09813020113530685, "grad_norm": 1.4484742879867554, "learning_rate": 9.809920890824229e-05, "loss": 4.5407, "step": 51230 }, { "epoch": 0.09814935596668209, "grad_norm": 1.3412829637527466, "learning_rate": 9.811836367235136e-05, "loss": 4.7711, "step": 51240 }, { "epoch": 0.09816851079805732, "grad_norm": 1.2978147268295288, "learning_rate": 9.813751843646045e-05, "loss": 4.7835, "step": 51250 }, { "epoch": 0.09818766562943254, "grad_norm": 1.3615690469741821, "learning_rate": 9.815667320056952e-05, "loss": 4.8712, "step": 51260 }, { "epoch": 0.09820682046080778, "grad_norm": 1.271457552909851, "learning_rate": 9.81758279646786e-05, "loss": 4.7715, "step": 51270 }, { "epoch": 0.09822597529218301, "grad_norm": 1.3466389179229736, "learning_rate": 9.819498272878768e-05, "loss": 4.7522, "step": 51280 }, { "epoch": 0.09824513012355823, "grad_norm": 1.3904211521148682, "learning_rate": 9.821413749289677e-05, "loss": 4.8131, "step": 51290 }, { "epoch": 0.09826428495493347, "grad_norm": 1.4862390756607056, "learning_rate": 9.823329225700585e-05, "loss": 4.7112, "step": 51300 }, { "epoch": 0.0982834397863087, "grad_norm": 1.396736741065979, "learning_rate": 9.825244702111493e-05, "loss": 4.6992, "step": 51310 }, { "epoch": 0.09830259461768394, "grad_norm": 1.4093706607818604, "learning_rate": 9.8271601785224e-05, "loss": 4.6788, "step": 51320 }, { "epoch": 0.09832174944905916, "grad_norm": 1.3242746591567993, "learning_rate": 9.829075654933309e-05, "loss": 4.6823, "step": 51330 }, { "epoch": 0.09834090428043439, "grad_norm": 1.3281745910644531, "learning_rate": 9.830991131344217e-05, "loss": 4.665, "step": 51340 }, { "epoch": 0.09836005911180963, "grad_norm": 1.401903510093689, "learning_rate": 9.832906607755124e-05, "loss": 4.7622, "step": 51350 }, { "epoch": 0.09837921394318486, "grad_norm": 1.3654485940933228, "learning_rate": 9.834822084166034e-05, "loss": 4.7711, "step": 51360 }, { "epoch": 0.09839836877456008, "grad_norm": 1.3227753639221191, "learning_rate": 9.83673756057694e-05, "loss": 4.6438, "step": 51370 }, { "epoch": 0.09841752360593532, "grad_norm": 1.4020674228668213, "learning_rate": 9.838653036987848e-05, "loss": 4.6839, "step": 51380 }, { "epoch": 0.09843667843731055, "grad_norm": 1.317630410194397, "learning_rate": 9.840568513398757e-05, "loss": 4.6619, "step": 51390 }, { "epoch": 0.09845583326868577, "grad_norm": 1.400559902191162, "learning_rate": 9.842483989809665e-05, "loss": 4.716, "step": 51400 }, { "epoch": 0.09847498810006101, "grad_norm": 1.3617757558822632, "learning_rate": 9.844399466220573e-05, "loss": 4.685, "step": 51410 }, { "epoch": 0.09849414293143624, "grad_norm": 1.3709936141967773, "learning_rate": 9.846314942631481e-05, "loss": 4.8706, "step": 51420 }, { "epoch": 0.09851329776281147, "grad_norm": 1.391937017440796, "learning_rate": 9.848230419042388e-05, "loss": 4.6519, "step": 51430 }, { "epoch": 0.0985324525941867, "grad_norm": 1.3389774560928345, "learning_rate": 9.850145895453297e-05, "loss": 4.676, "step": 51440 }, { "epoch": 0.09855160742556193, "grad_norm": 1.3742398023605347, "learning_rate": 9.852061371864204e-05, "loss": 4.7341, "step": 51450 }, { "epoch": 0.09857076225693716, "grad_norm": 1.2974375486373901, "learning_rate": 9.853976848275112e-05, "loss": 4.7304, "step": 51460 }, { "epoch": 0.0985899170883124, "grad_norm": 1.3228226900100708, "learning_rate": 9.855892324686022e-05, "loss": 4.6642, "step": 51470 }, { "epoch": 0.09860907191968762, "grad_norm": 1.3799364566802979, "learning_rate": 9.857807801096928e-05, "loss": 4.6145, "step": 51480 }, { "epoch": 0.09862822675106285, "grad_norm": 1.333537220954895, "learning_rate": 9.859723277507837e-05, "loss": 4.7068, "step": 51490 }, { "epoch": 0.09864738158243809, "grad_norm": 1.3256338834762573, "learning_rate": 9.861638753918745e-05, "loss": 4.7609, "step": 51500 }, { "epoch": 0.09866653641381332, "grad_norm": 1.3582978248596191, "learning_rate": 9.863554230329653e-05, "loss": 4.5793, "step": 51510 }, { "epoch": 0.09868569124518854, "grad_norm": 1.3862402439117432, "learning_rate": 9.865469706740561e-05, "loss": 4.6464, "step": 51520 }, { "epoch": 0.09870484607656378, "grad_norm": 1.411257028579712, "learning_rate": 9.867385183151469e-05, "loss": 4.6134, "step": 51530 }, { "epoch": 0.09872400090793901, "grad_norm": 1.356938362121582, "learning_rate": 9.869300659562376e-05, "loss": 4.6963, "step": 51540 }, { "epoch": 0.09874315573931423, "grad_norm": 1.3139930963516235, "learning_rate": 9.871216135973285e-05, "loss": 4.7483, "step": 51550 }, { "epoch": 0.09876231057068947, "grad_norm": 1.3694247007369995, "learning_rate": 9.873131612384192e-05, "loss": 4.7668, "step": 51560 }, { "epoch": 0.0987814654020647, "grad_norm": 1.4006544351577759, "learning_rate": 9.8750470887951e-05, "loss": 4.6549, "step": 51570 }, { "epoch": 0.09880062023343993, "grad_norm": 1.3639189004898071, "learning_rate": 9.87696256520601e-05, "loss": 4.6982, "step": 51580 }, { "epoch": 0.09881977506481517, "grad_norm": 1.3863887786865234, "learning_rate": 9.878878041616916e-05, "loss": 4.7032, "step": 51590 }, { "epoch": 0.09883892989619039, "grad_norm": 1.363623857498169, "learning_rate": 9.880793518027825e-05, "loss": 4.6975, "step": 51600 }, { "epoch": 0.09885808472756562, "grad_norm": 1.3719348907470703, "learning_rate": 9.882708994438733e-05, "loss": 4.6855, "step": 51610 }, { "epoch": 0.09887723955894086, "grad_norm": 1.3114986419677734, "learning_rate": 9.884624470849641e-05, "loss": 4.789, "step": 51620 }, { "epoch": 0.09889639439031608, "grad_norm": 1.4232876300811768, "learning_rate": 9.886539947260549e-05, "loss": 4.6133, "step": 51630 }, { "epoch": 0.09891554922169131, "grad_norm": 1.3781508207321167, "learning_rate": 9.888455423671456e-05, "loss": 4.6792, "step": 51640 }, { "epoch": 0.09893470405306655, "grad_norm": 1.304282546043396, "learning_rate": 9.890370900082364e-05, "loss": 4.9452, "step": 51650 }, { "epoch": 0.09895385888444178, "grad_norm": 1.3347512483596802, "learning_rate": 9.892286376493273e-05, "loss": 4.5997, "step": 51660 }, { "epoch": 0.098973013715817, "grad_norm": 1.359505295753479, "learning_rate": 9.89420185290418e-05, "loss": 4.7258, "step": 51670 }, { "epoch": 0.09899216854719224, "grad_norm": 1.3002221584320068, "learning_rate": 9.896117329315088e-05, "loss": 4.7958, "step": 51680 }, { "epoch": 0.09901132337856747, "grad_norm": 1.3231754302978516, "learning_rate": 9.898032805725998e-05, "loss": 4.7663, "step": 51690 }, { "epoch": 0.0990304782099427, "grad_norm": 1.317831039428711, "learning_rate": 9.899948282136905e-05, "loss": 4.783, "step": 51700 }, { "epoch": 0.09904963304131793, "grad_norm": 1.4793739318847656, "learning_rate": 9.901863758547813e-05, "loss": 4.6717, "step": 51710 }, { "epoch": 0.09906878787269316, "grad_norm": 1.2620106935501099, "learning_rate": 9.903779234958721e-05, "loss": 4.7283, "step": 51720 }, { "epoch": 0.09908794270406839, "grad_norm": 1.34036123752594, "learning_rate": 9.905694711369629e-05, "loss": 4.6573, "step": 51730 }, { "epoch": 0.09910709753544363, "grad_norm": 1.272940754890442, "learning_rate": 9.907610187780537e-05, "loss": 4.7968, "step": 51740 }, { "epoch": 0.09912625236681885, "grad_norm": 1.3052668571472168, "learning_rate": 9.909525664191444e-05, "loss": 4.7031, "step": 51750 }, { "epoch": 0.09914540719819408, "grad_norm": 1.5065456628799438, "learning_rate": 9.911441140602352e-05, "loss": 4.7799, "step": 51760 }, { "epoch": 0.09916456202956932, "grad_norm": 1.4536709785461426, "learning_rate": 9.913356617013261e-05, "loss": 4.5551, "step": 51770 }, { "epoch": 0.09918371686094454, "grad_norm": 1.347120761871338, "learning_rate": 9.915272093424168e-05, "loss": 4.6992, "step": 51780 }, { "epoch": 0.09920287169231977, "grad_norm": 1.3221629858016968, "learning_rate": 9.917187569835076e-05, "loss": 4.7416, "step": 51790 }, { "epoch": 0.09922202652369501, "grad_norm": 1.3355473279953003, "learning_rate": 9.919103046245986e-05, "loss": 4.7668, "step": 51800 }, { "epoch": 0.09924118135507023, "grad_norm": 1.3673996925354004, "learning_rate": 9.921018522656893e-05, "loss": 4.7512, "step": 51810 }, { "epoch": 0.09926033618644546, "grad_norm": 1.3121310472488403, "learning_rate": 9.922933999067801e-05, "loss": 4.7719, "step": 51820 }, { "epoch": 0.0992794910178207, "grad_norm": 1.3228622674942017, "learning_rate": 9.924849475478708e-05, "loss": 4.7025, "step": 51830 }, { "epoch": 0.09929864584919593, "grad_norm": 1.343088150024414, "learning_rate": 9.926764951889617e-05, "loss": 4.658, "step": 51840 }, { "epoch": 0.09931780068057115, "grad_norm": 1.5643994808197021, "learning_rate": 9.928680428300525e-05, "loss": 4.7638, "step": 51850 }, { "epoch": 0.09933695551194639, "grad_norm": 1.303972601890564, "learning_rate": 9.930595904711432e-05, "loss": 4.671, "step": 51860 }, { "epoch": 0.09935611034332162, "grad_norm": 1.2834932804107666, "learning_rate": 9.93251138112234e-05, "loss": 4.6649, "step": 51870 }, { "epoch": 0.09937526517469684, "grad_norm": 1.4178205728530884, "learning_rate": 9.93442685753325e-05, "loss": 4.7822, "step": 51880 }, { "epoch": 0.09939442000607208, "grad_norm": 1.3283543586730957, "learning_rate": 9.936342333944156e-05, "loss": 4.7073, "step": 51890 }, { "epoch": 0.09941357483744731, "grad_norm": 1.3639085292816162, "learning_rate": 9.938257810355064e-05, "loss": 4.6238, "step": 51900 }, { "epoch": 0.09943272966882254, "grad_norm": 1.367584466934204, "learning_rate": 9.940173286765974e-05, "loss": 4.6975, "step": 51910 }, { "epoch": 0.09945188450019778, "grad_norm": 1.3474247455596924, "learning_rate": 9.942088763176881e-05, "loss": 4.5927, "step": 51920 }, { "epoch": 0.099471039331573, "grad_norm": 1.3313472270965576, "learning_rate": 9.944004239587789e-05, "loss": 4.6536, "step": 51930 }, { "epoch": 0.09949019416294823, "grad_norm": 1.4005131721496582, "learning_rate": 9.945919715998696e-05, "loss": 4.7617, "step": 51940 }, { "epoch": 0.09950934899432347, "grad_norm": 1.3606749773025513, "learning_rate": 9.947835192409605e-05, "loss": 4.8196, "step": 51950 }, { "epoch": 0.0995285038256987, "grad_norm": 1.3119771480560303, "learning_rate": 9.949750668820513e-05, "loss": 4.6737, "step": 51960 }, { "epoch": 0.09954765865707392, "grad_norm": 1.3395514488220215, "learning_rate": 9.95166614523142e-05, "loss": 4.7076, "step": 51970 }, { "epoch": 0.09956681348844916, "grad_norm": 1.301755666732788, "learning_rate": 9.953581621642328e-05, "loss": 4.7857, "step": 51980 }, { "epoch": 0.09958596831982439, "grad_norm": 1.3458449840545654, "learning_rate": 9.955497098053238e-05, "loss": 4.8172, "step": 51990 }, { "epoch": 0.09960512315119963, "grad_norm": 1.3213655948638916, "learning_rate": 9.957412574464144e-05, "loss": 4.8922, "step": 52000 }, { "epoch": 0.09962427798257485, "grad_norm": 1.3635951280593872, "learning_rate": 9.959328050875053e-05, "loss": 4.6663, "step": 52010 }, { "epoch": 0.09964343281395008, "grad_norm": 1.3909101486206055, "learning_rate": 9.961243527285962e-05, "loss": 4.7134, "step": 52020 }, { "epoch": 0.09966258764532532, "grad_norm": 1.291198492050171, "learning_rate": 9.963159003696869e-05, "loss": 4.7857, "step": 52030 }, { "epoch": 0.09968174247670054, "grad_norm": 1.3598668575286865, "learning_rate": 9.965074480107777e-05, "loss": 4.4123, "step": 52040 }, { "epoch": 0.09970089730807577, "grad_norm": 1.33322012424469, "learning_rate": 9.966989956518684e-05, "loss": 4.6878, "step": 52050 }, { "epoch": 0.09972005213945101, "grad_norm": 1.2845888137817383, "learning_rate": 9.968905432929593e-05, "loss": 4.6625, "step": 52060 }, { "epoch": 0.09973920697082624, "grad_norm": 1.3188937902450562, "learning_rate": 9.970820909340501e-05, "loss": 4.7607, "step": 52070 }, { "epoch": 0.09975836180220146, "grad_norm": 1.3482906818389893, "learning_rate": 9.972736385751408e-05, "loss": 4.6377, "step": 52080 }, { "epoch": 0.0997775166335767, "grad_norm": 1.3858680725097656, "learning_rate": 9.974651862162316e-05, "loss": 4.6914, "step": 52090 }, { "epoch": 0.09979667146495193, "grad_norm": 1.4308487176895142, "learning_rate": 9.976567338573226e-05, "loss": 4.6905, "step": 52100 }, { "epoch": 0.09981582629632715, "grad_norm": 1.3185142278671265, "learning_rate": 9.978482814984132e-05, "loss": 4.8201, "step": 52110 }, { "epoch": 0.0998349811277024, "grad_norm": 1.3383179903030396, "learning_rate": 9.98039829139504e-05, "loss": 4.7418, "step": 52120 }, { "epoch": 0.09985413595907762, "grad_norm": 1.327594518661499, "learning_rate": 9.982313767805947e-05, "loss": 4.7183, "step": 52130 }, { "epoch": 0.09987329079045285, "grad_norm": 1.3475794792175293, "learning_rate": 9.984229244216857e-05, "loss": 4.8421, "step": 52140 }, { "epoch": 0.09989244562182809, "grad_norm": 1.3464694023132324, "learning_rate": 9.986144720627765e-05, "loss": 4.7055, "step": 52150 }, { "epoch": 0.09991160045320331, "grad_norm": 1.314220666885376, "learning_rate": 9.988060197038672e-05, "loss": 4.8985, "step": 52160 }, { "epoch": 0.09993075528457854, "grad_norm": 1.382858157157898, "learning_rate": 9.989975673449581e-05, "loss": 4.6086, "step": 52170 }, { "epoch": 0.09994991011595378, "grad_norm": 1.45159912109375, "learning_rate": 9.99189114986049e-05, "loss": 4.77, "step": 52180 }, { "epoch": 0.099969064947329, "grad_norm": 1.3732869625091553, "learning_rate": 9.993806626271396e-05, "loss": 4.8437, "step": 52190 }, { "epoch": 0.09998821977870423, "grad_norm": 1.3516600131988525, "learning_rate": 9.995722102682304e-05, "loss": 4.7848, "step": 52200 }, { "epoch": 0.10000737461007947, "grad_norm": 1.3695429563522339, "learning_rate": 9.997637579093214e-05, "loss": 4.6251, "step": 52210 }, { "epoch": 0.1000265294414547, "grad_norm": 1.3050168752670288, "learning_rate": 9.99955305550412e-05, "loss": 4.8538, "step": 52220 }, { "epoch": 0.10004568427282992, "grad_norm": 1.3085564374923706, "learning_rate": 0.00010001468531915029, "loss": 4.6862, "step": 52230 }, { "epoch": 0.10006483910420516, "grad_norm": 1.309739112854004, "learning_rate": 0.00010003384008325935, "loss": 4.6909, "step": 52240 }, { "epoch": 0.10008399393558039, "grad_norm": 1.306302785873413, "learning_rate": 0.00010005299484736845, "loss": 4.6546, "step": 52250 }, { "epoch": 0.10010314876695561, "grad_norm": 1.340957522392273, "learning_rate": 0.00010007214961147753, "loss": 4.7251, "step": 52260 }, { "epoch": 0.10012230359833085, "grad_norm": 1.3761372566223145, "learning_rate": 0.0001000913043755866, "loss": 4.64, "step": 52270 }, { "epoch": 0.10014145842970608, "grad_norm": 1.3223772048950195, "learning_rate": 0.0001001104591396957, "loss": 4.7216, "step": 52280 }, { "epoch": 0.1001606132610813, "grad_norm": 1.3674492835998535, "learning_rate": 0.00010012961390380477, "loss": 4.6053, "step": 52290 }, { "epoch": 0.10017976809245654, "grad_norm": 1.4233357906341553, "learning_rate": 0.00010014876866791384, "loss": 4.7367, "step": 52300 }, { "epoch": 0.10019892292383177, "grad_norm": 1.3269696235656738, "learning_rate": 0.00010016792343202292, "loss": 4.6875, "step": 52310 }, { "epoch": 0.100218077755207, "grad_norm": 1.3561944961547852, "learning_rate": 0.00010018707819613199, "loss": 4.572, "step": 52320 }, { "epoch": 0.10023723258658224, "grad_norm": 1.3359557390213013, "learning_rate": 0.00010020623296024109, "loss": 4.7197, "step": 52330 }, { "epoch": 0.10025638741795746, "grad_norm": 1.3476176261901855, "learning_rate": 0.00010022538772435017, "loss": 4.7536, "step": 52340 }, { "epoch": 0.10027554224933269, "grad_norm": 1.3348287343978882, "learning_rate": 0.00010024454248845924, "loss": 4.6192, "step": 52350 }, { "epoch": 0.10029469708070793, "grad_norm": 1.3032103776931763, "learning_rate": 0.00010026369725256833, "loss": 4.7411, "step": 52360 }, { "epoch": 0.10031385191208315, "grad_norm": 1.3301479816436768, "learning_rate": 0.00010028285201667741, "loss": 4.7719, "step": 52370 }, { "epoch": 0.10033300674345838, "grad_norm": 1.3177825212478638, "learning_rate": 0.00010030200678078648, "loss": 4.7451, "step": 52380 }, { "epoch": 0.10035216157483362, "grad_norm": 1.32858145236969, "learning_rate": 0.00010032116154489556, "loss": 4.7489, "step": 52390 }, { "epoch": 0.10037131640620885, "grad_norm": 1.2836302518844604, "learning_rate": 0.00010034031630900466, "loss": 4.7434, "step": 52400 }, { "epoch": 0.10039047123758407, "grad_norm": 1.4177005290985107, "learning_rate": 0.00010035947107311372, "loss": 4.6476, "step": 52410 }, { "epoch": 0.10040962606895931, "grad_norm": 1.3201580047607422, "learning_rate": 0.0001003786258372228, "loss": 4.683, "step": 52420 }, { "epoch": 0.10042878090033454, "grad_norm": 1.3244755268096924, "learning_rate": 0.00010039778060133187, "loss": 4.6384, "step": 52430 }, { "epoch": 0.10044793573170976, "grad_norm": 1.3147908449172974, "learning_rate": 0.00010041693536544097, "loss": 4.7215, "step": 52440 }, { "epoch": 0.100467090563085, "grad_norm": 1.293439507484436, "learning_rate": 0.00010043609012955005, "loss": 4.7034, "step": 52450 }, { "epoch": 0.10048624539446023, "grad_norm": 1.33495032787323, "learning_rate": 0.00010045524489365912, "loss": 4.7038, "step": 52460 }, { "epoch": 0.10050540022583546, "grad_norm": 1.333815336227417, "learning_rate": 0.00010047439965776821, "loss": 4.63, "step": 52470 }, { "epoch": 0.1005245550572107, "grad_norm": 1.324779987335205, "learning_rate": 0.00010049355442187729, "loss": 4.7247, "step": 52480 }, { "epoch": 0.10054370988858592, "grad_norm": 1.3687775135040283, "learning_rate": 0.00010051270918598636, "loss": 4.713, "step": 52490 }, { "epoch": 0.10056286471996115, "grad_norm": 1.341680645942688, "learning_rate": 0.00010053186395009544, "loss": 4.6775, "step": 52500 }, { "epoch": 0.10058201955133639, "grad_norm": 1.3361458778381348, "learning_rate": 0.00010055101871420452, "loss": 4.774, "step": 52510 }, { "epoch": 0.10060117438271161, "grad_norm": 1.3405221700668335, "learning_rate": 0.0001005701734783136, "loss": 4.713, "step": 52520 }, { "epoch": 0.10062032921408684, "grad_norm": 1.3213481903076172, "learning_rate": 0.00010058932824242269, "loss": 4.7564, "step": 52530 }, { "epoch": 0.10063948404546208, "grad_norm": 1.2967973947525024, "learning_rate": 0.00010060848300653175, "loss": 4.7237, "step": 52540 }, { "epoch": 0.1006586388768373, "grad_norm": 1.3996351957321167, "learning_rate": 0.00010062763777064085, "loss": 4.6507, "step": 52550 }, { "epoch": 0.10067779370821253, "grad_norm": 1.3348891735076904, "learning_rate": 0.00010064679253474993, "loss": 4.6488, "step": 52560 }, { "epoch": 0.10069694853958777, "grad_norm": 1.3414251804351807, "learning_rate": 0.000100665947298859, "loss": 4.8305, "step": 52570 }, { "epoch": 0.100716103370963, "grad_norm": 1.243887186050415, "learning_rate": 0.00010068510206296809, "loss": 4.6888, "step": 52580 }, { "epoch": 0.10073525820233822, "grad_norm": 1.3000569343566895, "learning_rate": 0.00010070425682707717, "loss": 4.5713, "step": 52590 }, { "epoch": 0.10075441303371346, "grad_norm": 1.3322842121124268, "learning_rate": 0.00010072341159118624, "loss": 4.7539, "step": 52600 }, { "epoch": 0.10077356786508869, "grad_norm": 1.378294825553894, "learning_rate": 0.00010074256635529532, "loss": 4.6161, "step": 52610 }, { "epoch": 0.10079272269646392, "grad_norm": 1.4292715787887573, "learning_rate": 0.0001007617211194044, "loss": 4.8331, "step": 52620 }, { "epoch": 0.10081187752783916, "grad_norm": 1.7142126560211182, "learning_rate": 0.00010078087588351348, "loss": 4.7296, "step": 52630 }, { "epoch": 0.10083103235921438, "grad_norm": 1.4164992570877075, "learning_rate": 0.00010080003064762257, "loss": 4.694, "step": 52640 }, { "epoch": 0.10085018719058962, "grad_norm": 1.30662202835083, "learning_rate": 0.00010081918541173163, "loss": 4.6339, "step": 52650 }, { "epoch": 0.10086934202196485, "grad_norm": 1.3443894386291504, "learning_rate": 0.00010083834017584073, "loss": 4.6751, "step": 52660 }, { "epoch": 0.10088849685334007, "grad_norm": 1.2661476135253906, "learning_rate": 0.00010085749493994981, "loss": 4.5939, "step": 52670 }, { "epoch": 0.10090765168471531, "grad_norm": 1.682453989982605, "learning_rate": 0.00010087664970405888, "loss": 4.7327, "step": 52680 }, { "epoch": 0.10092680651609054, "grad_norm": 1.3372018337249756, "learning_rate": 0.00010089580446816797, "loss": 4.7509, "step": 52690 }, { "epoch": 0.10094596134746577, "grad_norm": 1.3832266330718994, "learning_rate": 0.00010091495923227705, "loss": 4.7249, "step": 52700 }, { "epoch": 0.100965116178841, "grad_norm": 1.3661999702453613, "learning_rate": 0.00010093411399638612, "loss": 4.7218, "step": 52710 }, { "epoch": 0.10098427101021623, "grad_norm": 1.3218859434127808, "learning_rate": 0.0001009532687604952, "loss": 4.6872, "step": 52720 }, { "epoch": 0.10100342584159146, "grad_norm": 1.3098077774047852, "learning_rate": 0.00010097242352460428, "loss": 4.7973, "step": 52730 }, { "epoch": 0.1010225806729667, "grad_norm": 1.3670237064361572, "learning_rate": 0.00010099157828871337, "loss": 4.7011, "step": 52740 }, { "epoch": 0.10104173550434192, "grad_norm": 1.3784016370773315, "learning_rate": 0.00010101073305282245, "loss": 4.707, "step": 52750 }, { "epoch": 0.10106089033571715, "grad_norm": 1.3412470817565918, "learning_rate": 0.00010102988781693151, "loss": 4.6455, "step": 52760 }, { "epoch": 0.10108004516709239, "grad_norm": 1.260037899017334, "learning_rate": 0.00010104904258104061, "loss": 4.8701, "step": 52770 }, { "epoch": 0.10109919999846761, "grad_norm": 1.3968236446380615, "learning_rate": 0.00010106819734514969, "loss": 4.7671, "step": 52780 }, { "epoch": 0.10111835482984284, "grad_norm": 1.3341283798217773, "learning_rate": 0.00010108735210925876, "loss": 4.7552, "step": 52790 }, { "epoch": 0.10113750966121808, "grad_norm": 1.3277404308319092, "learning_rate": 0.00010110650687336785, "loss": 4.7247, "step": 52800 }, { "epoch": 0.1011566644925933, "grad_norm": 1.305694341659546, "learning_rate": 0.00010112566163747692, "loss": 4.6509, "step": 52810 }, { "epoch": 0.10117581932396853, "grad_norm": 1.3120568990707397, "learning_rate": 0.000101144816401586, "loss": 4.7034, "step": 52820 }, { "epoch": 0.10119497415534377, "grad_norm": 1.3110222816467285, "learning_rate": 0.00010116397116569508, "loss": 4.6537, "step": 52830 }, { "epoch": 0.101214128986719, "grad_norm": 1.336370587348938, "learning_rate": 0.00010118312592980417, "loss": 4.5256, "step": 52840 }, { "epoch": 0.10123328381809422, "grad_norm": 1.3285346031188965, "learning_rate": 0.00010120228069391325, "loss": 4.8478, "step": 52850 }, { "epoch": 0.10125243864946946, "grad_norm": 1.3282783031463623, "learning_rate": 0.00010122143545802233, "loss": 4.742, "step": 52860 }, { "epoch": 0.10127159348084469, "grad_norm": 1.3463093042373657, "learning_rate": 0.0001012405902221314, "loss": 4.5427, "step": 52870 }, { "epoch": 0.10129074831221992, "grad_norm": 1.3230502605438232, "learning_rate": 0.00010125974498624049, "loss": 4.7948, "step": 52880 }, { "epoch": 0.10130990314359516, "grad_norm": 1.4444152116775513, "learning_rate": 0.00010127889975034957, "loss": 4.613, "step": 52890 }, { "epoch": 0.10132905797497038, "grad_norm": 1.3412771224975586, "learning_rate": 0.00010129805451445864, "loss": 4.6457, "step": 52900 }, { "epoch": 0.10134821280634561, "grad_norm": 1.3432120084762573, "learning_rate": 0.00010131720927856773, "loss": 4.8266, "step": 52910 }, { "epoch": 0.10136736763772085, "grad_norm": 1.341667890548706, "learning_rate": 0.0001013363640426768, "loss": 4.6795, "step": 52920 }, { "epoch": 0.10138652246909607, "grad_norm": 1.320428729057312, "learning_rate": 0.00010135551880678588, "loss": 4.6984, "step": 52930 }, { "epoch": 0.1014056773004713, "grad_norm": 1.326418161392212, "learning_rate": 0.00010137467357089496, "loss": 4.6908, "step": 52940 }, { "epoch": 0.10142483213184654, "grad_norm": 1.3340116739273071, "learning_rate": 0.00010139382833500405, "loss": 4.7296, "step": 52950 }, { "epoch": 0.10144398696322177, "grad_norm": 1.320558786392212, "learning_rate": 0.00010141298309911313, "loss": 4.7008, "step": 52960 }, { "epoch": 0.10146314179459699, "grad_norm": 1.3497774600982666, "learning_rate": 0.00010143213786322221, "loss": 4.7042, "step": 52970 }, { "epoch": 0.10148229662597223, "grad_norm": 1.3801624774932861, "learning_rate": 0.00010145129262733128, "loss": 4.6355, "step": 52980 }, { "epoch": 0.10150145145734746, "grad_norm": 1.3946421146392822, "learning_rate": 0.00010147044739144037, "loss": 4.7565, "step": 52990 }, { "epoch": 0.10152060628872268, "grad_norm": 1.3110567331314087, "learning_rate": 0.00010148960215554944, "loss": 4.7319, "step": 53000 }, { "epoch": 0.10153976112009792, "grad_norm": 1.4221009016036987, "learning_rate": 0.00010150875691965852, "loss": 4.6535, "step": 53010 }, { "epoch": 0.10155891595147315, "grad_norm": 1.310524344444275, "learning_rate": 0.00010152791168376762, "loss": 4.7321, "step": 53020 }, { "epoch": 0.10157807078284838, "grad_norm": 1.3126144409179688, "learning_rate": 0.00010154515097146577, "loss": 4.7432, "step": 53030 }, { "epoch": 0.10159722561422362, "grad_norm": 1.9482882022857666, "learning_rate": 0.00010156430573557487, "loss": 4.6234, "step": 53040 }, { "epoch": 0.10161638044559884, "grad_norm": 1.381924033164978, "learning_rate": 0.00010158346049968394, "loss": 4.6453, "step": 53050 }, { "epoch": 0.10163553527697407, "grad_norm": 1.3594129085540771, "learning_rate": 0.00010160261526379302, "loss": 4.7002, "step": 53060 }, { "epoch": 0.10165469010834931, "grad_norm": 1.2997534275054932, "learning_rate": 0.0001016217700279021, "loss": 4.6884, "step": 53070 }, { "epoch": 0.10167384493972453, "grad_norm": 1.3962135314941406, "learning_rate": 0.00010164092479201118, "loss": 4.709, "step": 53080 }, { "epoch": 0.10169299977109976, "grad_norm": 1.3421226739883423, "learning_rate": 0.00010166007955612026, "loss": 4.7046, "step": 53090 }, { "epoch": 0.101712154602475, "grad_norm": 1.3395580053329468, "learning_rate": 0.00010167923432022933, "loss": 4.5966, "step": 53100 }, { "epoch": 0.10173130943385023, "grad_norm": 1.3550868034362793, "learning_rate": 0.00010169838908433841, "loss": 4.7428, "step": 53110 }, { "epoch": 0.10175046426522545, "grad_norm": 1.31368887424469, "learning_rate": 0.0001017175438484475, "loss": 4.7483, "step": 53120 }, { "epoch": 0.10176961909660069, "grad_norm": 1.3498928546905518, "learning_rate": 0.00010173669861255657, "loss": 4.5425, "step": 53130 }, { "epoch": 0.10178877392797592, "grad_norm": 1.3321865797042847, "learning_rate": 0.00010175585337666566, "loss": 4.6404, "step": 53140 }, { "epoch": 0.10180792875935114, "grad_norm": 1.3564116954803467, "learning_rate": 0.00010177500814077475, "loss": 4.6057, "step": 53150 }, { "epoch": 0.10182708359072638, "grad_norm": 1.312509536743164, "learning_rate": 0.00010179416290488382, "loss": 4.6493, "step": 53160 }, { "epoch": 0.10184623842210161, "grad_norm": 1.2764321565628052, "learning_rate": 0.0001018133176689929, "loss": 4.5988, "step": 53170 }, { "epoch": 0.10186539325347684, "grad_norm": 1.333714485168457, "learning_rate": 0.00010183247243310197, "loss": 4.719, "step": 53180 }, { "epoch": 0.10188454808485208, "grad_norm": 1.344409704208374, "learning_rate": 0.00010185162719721105, "loss": 4.6643, "step": 53190 }, { "epoch": 0.1019037029162273, "grad_norm": 1.3030341863632202, "learning_rate": 0.00010187078196132014, "loss": 4.7497, "step": 53200 }, { "epoch": 0.10192285774760253, "grad_norm": 1.3638391494750977, "learning_rate": 0.00010188993672542921, "loss": 4.8378, "step": 53210 }, { "epoch": 0.10194201257897777, "grad_norm": 1.3071706295013428, "learning_rate": 0.00010190909148953829, "loss": 4.7571, "step": 53220 }, { "epoch": 0.10196116741035299, "grad_norm": 1.392134666442871, "learning_rate": 0.00010192824625364739, "loss": 4.7332, "step": 53230 }, { "epoch": 0.10198032224172822, "grad_norm": 1.2759445905685425, "learning_rate": 0.00010194740101775646, "loss": 4.8025, "step": 53240 }, { "epoch": 0.10199947707310346, "grad_norm": 1.3681316375732422, "learning_rate": 0.00010196655578186554, "loss": 4.6396, "step": 53250 }, { "epoch": 0.10201863190447868, "grad_norm": 1.3440700769424438, "learning_rate": 0.00010198571054597463, "loss": 4.7001, "step": 53260 }, { "epoch": 0.10203778673585391, "grad_norm": 1.309860110282898, "learning_rate": 0.0001020048653100837, "loss": 4.608, "step": 53270 }, { "epoch": 0.10205694156722915, "grad_norm": 1.3204253911972046, "learning_rate": 0.00010202402007419278, "loss": 4.7034, "step": 53280 }, { "epoch": 0.10207609639860438, "grad_norm": 1.4057090282440186, "learning_rate": 0.00010204317483830185, "loss": 4.8307, "step": 53290 }, { "epoch": 0.1020952512299796, "grad_norm": 1.310391902923584, "learning_rate": 0.00010206232960241093, "loss": 4.8025, "step": 53300 }, { "epoch": 0.10211440606135484, "grad_norm": 1.3311119079589844, "learning_rate": 0.00010208148436652002, "loss": 4.5419, "step": 53310 }, { "epoch": 0.10213356089273007, "grad_norm": 1.3047043085098267, "learning_rate": 0.00010210063913062909, "loss": 4.6783, "step": 53320 }, { "epoch": 0.10215271572410531, "grad_norm": 1.3349230289459229, "learning_rate": 0.00010211979389473817, "loss": 4.7192, "step": 53330 }, { "epoch": 0.10217187055548053, "grad_norm": 1.3950556516647339, "learning_rate": 0.00010213894865884727, "loss": 4.6949, "step": 53340 }, { "epoch": 0.10219102538685576, "grad_norm": 1.316072940826416, "learning_rate": 0.00010215810342295634, "loss": 4.7955, "step": 53350 }, { "epoch": 0.102210180218231, "grad_norm": 1.3630450963974, "learning_rate": 0.00010217725818706542, "loss": 4.7069, "step": 53360 }, { "epoch": 0.10222933504960623, "grad_norm": 1.3643375635147095, "learning_rate": 0.00010219641295117451, "loss": 4.6754, "step": 53370 }, { "epoch": 0.10224848988098145, "grad_norm": 1.3580586910247803, "learning_rate": 0.00010221556771528358, "loss": 4.6492, "step": 53380 }, { "epoch": 0.10226764471235669, "grad_norm": 1.5105218887329102, "learning_rate": 0.00010223472247939266, "loss": 4.7271, "step": 53390 }, { "epoch": 0.10228679954373192, "grad_norm": 1.3153632879257202, "learning_rate": 0.00010225387724350173, "loss": 4.6317, "step": 53400 }, { "epoch": 0.10230595437510714, "grad_norm": 1.3495945930480957, "learning_rate": 0.00010227303200761081, "loss": 4.7458, "step": 53410 }, { "epoch": 0.10232510920648238, "grad_norm": 1.2801017761230469, "learning_rate": 0.0001022921867717199, "loss": 4.7304, "step": 53420 }, { "epoch": 0.10234426403785761, "grad_norm": 1.383142352104187, "learning_rate": 0.00010231134153582897, "loss": 4.7752, "step": 53430 }, { "epoch": 0.10236341886923284, "grad_norm": 1.2503067255020142, "learning_rate": 0.00010233049629993805, "loss": 4.5989, "step": 53440 }, { "epoch": 0.10238257370060808, "grad_norm": 1.3752326965332031, "learning_rate": 0.00010234965106404715, "loss": 4.6897, "step": 53450 }, { "epoch": 0.1024017285319833, "grad_norm": 1.2887846231460571, "learning_rate": 0.00010236880582815622, "loss": 4.6686, "step": 53460 }, { "epoch": 0.10242088336335853, "grad_norm": 1.3244069814682007, "learning_rate": 0.0001023879605922653, "loss": 4.5556, "step": 53470 }, { "epoch": 0.10244003819473377, "grad_norm": 1.3316686153411865, "learning_rate": 0.00010240711535637437, "loss": 4.6776, "step": 53480 }, { "epoch": 0.102459193026109, "grad_norm": 1.4522991180419922, "learning_rate": 0.00010242627012048346, "loss": 4.6808, "step": 53490 }, { "epoch": 0.10247834785748422, "grad_norm": 1.3275575637817383, "learning_rate": 0.00010244542488459254, "loss": 4.6815, "step": 53500 }, { "epoch": 0.10249750268885946, "grad_norm": 1.3197304010391235, "learning_rate": 0.00010246457964870161, "loss": 4.6712, "step": 53510 }, { "epoch": 0.10251665752023469, "grad_norm": 1.3033510446548462, "learning_rate": 0.00010248373441281069, "loss": 4.6794, "step": 53520 }, { "epoch": 0.10253581235160991, "grad_norm": 1.2560510635375977, "learning_rate": 0.00010250288917691979, "loss": 4.7422, "step": 53530 }, { "epoch": 0.10255496718298515, "grad_norm": 1.3607871532440186, "learning_rate": 0.00010252204394102885, "loss": 4.6665, "step": 53540 }, { "epoch": 0.10257412201436038, "grad_norm": 1.2959697246551514, "learning_rate": 0.00010254119870513793, "loss": 4.6851, "step": 53550 }, { "epoch": 0.1025932768457356, "grad_norm": 1.3116365671157837, "learning_rate": 0.00010256035346924703, "loss": 4.6841, "step": 53560 }, { "epoch": 0.10261243167711084, "grad_norm": 1.3051116466522217, "learning_rate": 0.0001025795082333561, "loss": 4.7063, "step": 53570 }, { "epoch": 0.10263158650848607, "grad_norm": 1.3278496265411377, "learning_rate": 0.00010259866299746518, "loss": 4.7129, "step": 53580 }, { "epoch": 0.1026507413398613, "grad_norm": 1.297516942024231, "learning_rate": 0.00010261781776157425, "loss": 4.7841, "step": 53590 }, { "epoch": 0.10266989617123654, "grad_norm": 1.3550217151641846, "learning_rate": 0.00010263697252568334, "loss": 4.6622, "step": 53600 }, { "epoch": 0.10268905100261176, "grad_norm": 1.3122133016586304, "learning_rate": 0.00010265612728979242, "loss": 4.8717, "step": 53610 }, { "epoch": 0.10270820583398699, "grad_norm": 1.3475168943405151, "learning_rate": 0.00010267528205390149, "loss": 4.6569, "step": 53620 }, { "epoch": 0.10272736066536223, "grad_norm": 1.3833260536193848, "learning_rate": 0.00010269443681801057, "loss": 4.5693, "step": 53630 }, { "epoch": 0.10274651549673745, "grad_norm": 1.3277696371078491, "learning_rate": 0.00010271359158211967, "loss": 4.7708, "step": 53640 }, { "epoch": 0.10276567032811268, "grad_norm": 1.3823679685592651, "learning_rate": 0.00010273274634622873, "loss": 4.5848, "step": 53650 }, { "epoch": 0.10278482515948792, "grad_norm": 1.4058994054794312, "learning_rate": 0.00010275190111033782, "loss": 4.7082, "step": 53660 }, { "epoch": 0.10280397999086315, "grad_norm": 1.2422490119934082, "learning_rate": 0.00010277105587444688, "loss": 4.7499, "step": 53670 }, { "epoch": 0.10282313482223837, "grad_norm": 1.3372108936309814, "learning_rate": 0.00010279021063855598, "loss": 4.695, "step": 53680 }, { "epoch": 0.10284228965361361, "grad_norm": 1.700449824333191, "learning_rate": 0.00010280936540266506, "loss": 4.6261, "step": 53690 }, { "epoch": 0.10286144448498884, "grad_norm": 1.3300645351409912, "learning_rate": 0.00010282852016677413, "loss": 4.8052, "step": 53700 }, { "epoch": 0.10288059931636406, "grad_norm": 1.3249683380126953, "learning_rate": 0.00010284767493088322, "loss": 4.7375, "step": 53710 }, { "epoch": 0.1028997541477393, "grad_norm": 1.4193439483642578, "learning_rate": 0.0001028668296949923, "loss": 4.8352, "step": 53720 }, { "epoch": 0.10291890897911453, "grad_norm": 1.3582125902175903, "learning_rate": 0.00010288598445910137, "loss": 4.6815, "step": 53730 }, { "epoch": 0.10293806381048975, "grad_norm": 1.3638339042663574, "learning_rate": 0.00010290513922321045, "loss": 4.583, "step": 53740 }, { "epoch": 0.102957218641865, "grad_norm": 1.3471522331237793, "learning_rate": 0.00010292429398731955, "loss": 4.5942, "step": 53750 }, { "epoch": 0.10297637347324022, "grad_norm": 1.2927995920181274, "learning_rate": 0.00010294344875142862, "loss": 4.7055, "step": 53760 }, { "epoch": 0.10299552830461545, "grad_norm": 1.301961064338684, "learning_rate": 0.0001029626035155377, "loss": 4.6997, "step": 53770 }, { "epoch": 0.10301468313599069, "grad_norm": 1.3345681428909302, "learning_rate": 0.00010298175827964676, "loss": 4.6852, "step": 53780 }, { "epoch": 0.10303383796736591, "grad_norm": 1.3260849714279175, "learning_rate": 0.00010300091304375586, "loss": 4.698, "step": 53790 }, { "epoch": 0.10305299279874114, "grad_norm": 1.3771642446517944, "learning_rate": 0.00010302006780786494, "loss": 4.5722, "step": 53800 }, { "epoch": 0.10307214763011638, "grad_norm": 1.2427948713302612, "learning_rate": 0.00010303922257197401, "loss": 4.6853, "step": 53810 }, { "epoch": 0.1030913024614916, "grad_norm": 1.3652215003967285, "learning_rate": 0.0001030583773360831, "loss": 4.7523, "step": 53820 }, { "epoch": 0.10311045729286683, "grad_norm": 1.297537922859192, "learning_rate": 0.00010307753210019218, "loss": 4.8129, "step": 53830 }, { "epoch": 0.10312961212424207, "grad_norm": 1.3287800550460815, "learning_rate": 0.00010309668686430125, "loss": 4.7582, "step": 53840 }, { "epoch": 0.1031487669556173, "grad_norm": 1.3017098903656006, "learning_rate": 0.00010311584162841033, "loss": 4.6985, "step": 53850 }, { "epoch": 0.10316792178699252, "grad_norm": 1.399429440498352, "learning_rate": 0.00010313499639251941, "loss": 4.7191, "step": 53860 }, { "epoch": 0.10318707661836776, "grad_norm": 1.2415404319763184, "learning_rate": 0.0001031541511566285, "loss": 4.6178, "step": 53870 }, { "epoch": 0.10320623144974299, "grad_norm": 1.4752947092056274, "learning_rate": 0.00010317330592073758, "loss": 4.7235, "step": 53880 }, { "epoch": 0.10322538628111821, "grad_norm": 1.3846434354782104, "learning_rate": 0.00010319246068484665, "loss": 4.6595, "step": 53890 }, { "epoch": 0.10324454111249345, "grad_norm": 1.3055144548416138, "learning_rate": 0.00010321161544895574, "loss": 4.6449, "step": 53900 }, { "epoch": 0.10326369594386868, "grad_norm": 1.2972303628921509, "learning_rate": 0.00010323077021306482, "loss": 4.6603, "step": 53910 }, { "epoch": 0.1032828507752439, "grad_norm": 1.3039177656173706, "learning_rate": 0.00010324992497717389, "loss": 4.6904, "step": 53920 }, { "epoch": 0.10330200560661915, "grad_norm": 1.4283157587051392, "learning_rate": 0.00010326907974128298, "loss": 4.6497, "step": 53930 }, { "epoch": 0.10332116043799437, "grad_norm": 1.332516074180603, "learning_rate": 0.00010328823450539207, "loss": 4.7153, "step": 53940 }, { "epoch": 0.1033403152693696, "grad_norm": 1.3610767126083374, "learning_rate": 0.00010330738926950113, "loss": 4.5939, "step": 53950 }, { "epoch": 0.10335947010074484, "grad_norm": 1.3287259340286255, "learning_rate": 0.00010332654403361021, "loss": 4.723, "step": 53960 }, { "epoch": 0.10337862493212006, "grad_norm": 1.332013487815857, "learning_rate": 0.0001033456987977193, "loss": 4.6888, "step": 53970 }, { "epoch": 0.1033977797634953, "grad_norm": 1.308135986328125, "learning_rate": 0.00010336485356182838, "loss": 4.7125, "step": 53980 }, { "epoch": 0.10341693459487053, "grad_norm": 1.3217864036560059, "learning_rate": 0.00010338400832593746, "loss": 4.8551, "step": 53990 }, { "epoch": 0.10343608942624576, "grad_norm": 1.2835456132888794, "learning_rate": 0.00010340316309004653, "loss": 4.7374, "step": 54000 }, { "epoch": 0.103455244257621, "grad_norm": 1.338334560394287, "learning_rate": 0.00010342231785415562, "loss": 4.6375, "step": 54010 }, { "epoch": 0.10347439908899622, "grad_norm": 1.33512544631958, "learning_rate": 0.0001034414726182647, "loss": 4.7491, "step": 54020 }, { "epoch": 0.10349355392037145, "grad_norm": 1.2835237979888916, "learning_rate": 0.00010346062738237377, "loss": 4.7168, "step": 54030 }, { "epoch": 0.10351270875174669, "grad_norm": 1.3273296356201172, "learning_rate": 0.00010347978214648286, "loss": 4.7102, "step": 54040 }, { "epoch": 0.10353186358312191, "grad_norm": 1.3247618675231934, "learning_rate": 0.00010349893691059195, "loss": 4.7914, "step": 54050 }, { "epoch": 0.10355101841449714, "grad_norm": 1.3323906660079956, "learning_rate": 0.00010351809167470101, "loss": 4.6888, "step": 54060 }, { "epoch": 0.10357017324587238, "grad_norm": 1.3297723531723022, "learning_rate": 0.00010353533096239919, "loss": 4.6803, "step": 54070 }, { "epoch": 0.1035893280772476, "grad_norm": 1.310154914855957, "learning_rate": 0.00010355448572650827, "loss": 4.6624, "step": 54080 }, { "epoch": 0.10360848290862283, "grad_norm": 1.4430155754089355, "learning_rate": 0.00010357364049061735, "loss": 4.6994, "step": 54090 }, { "epoch": 0.10362763773999807, "grad_norm": 1.3438258171081543, "learning_rate": 0.00010359279525472642, "loss": 4.712, "step": 54100 }, { "epoch": 0.1036467925713733, "grad_norm": 1.2883230447769165, "learning_rate": 0.00010361195001883551, "loss": 4.5852, "step": 54110 }, { "epoch": 0.10366594740274852, "grad_norm": 1.3339004516601562, "learning_rate": 0.0001036311047829446, "loss": 4.8381, "step": 54120 }, { "epoch": 0.10368510223412376, "grad_norm": 1.3481923341751099, "learning_rate": 0.00010365025954705366, "loss": 4.6451, "step": 54130 }, { "epoch": 0.10370425706549899, "grad_norm": 1.3266040086746216, "learning_rate": 0.00010366941431116276, "loss": 4.7949, "step": 54140 }, { "epoch": 0.10372341189687422, "grad_norm": 1.3177884817123413, "learning_rate": 0.00010368856907527182, "loss": 4.6502, "step": 54150 }, { "epoch": 0.10374256672824946, "grad_norm": 1.302548885345459, "learning_rate": 0.0001037077238393809, "loss": 4.7979, "step": 54160 }, { "epoch": 0.10376172155962468, "grad_norm": 1.3020585775375366, "learning_rate": 0.00010372687860349, "loss": 4.6848, "step": 54170 }, { "epoch": 0.10378087639099991, "grad_norm": 1.3081709146499634, "learning_rate": 0.00010374603336759907, "loss": 4.8288, "step": 54180 }, { "epoch": 0.10380003122237515, "grad_norm": 1.5016342401504517, "learning_rate": 0.00010376518813170815, "loss": 4.6466, "step": 54190 }, { "epoch": 0.10381918605375037, "grad_norm": 1.2834141254425049, "learning_rate": 0.00010378434289581723, "loss": 4.6443, "step": 54200 }, { "epoch": 0.1038383408851256, "grad_norm": 1.2721588611602783, "learning_rate": 0.0001038034976599263, "loss": 4.7971, "step": 54210 }, { "epoch": 0.10385749571650084, "grad_norm": 1.3465900421142578, "learning_rate": 0.00010382265242403539, "loss": 4.5805, "step": 54220 }, { "epoch": 0.10387665054787606, "grad_norm": 1.329437494277954, "learning_rate": 0.00010384180718814447, "loss": 4.7149, "step": 54230 }, { "epoch": 0.10389580537925129, "grad_norm": 1.3615245819091797, "learning_rate": 0.00010386096195225354, "loss": 4.6694, "step": 54240 }, { "epoch": 0.10391496021062653, "grad_norm": 1.3437007665634155, "learning_rate": 0.00010388011671636264, "loss": 4.7874, "step": 54250 }, { "epoch": 0.10393411504200176, "grad_norm": 1.3323057889938354, "learning_rate": 0.0001038992714804717, "loss": 4.6366, "step": 54260 }, { "epoch": 0.10395326987337698, "grad_norm": 1.312994122505188, "learning_rate": 0.00010391842624458079, "loss": 4.6181, "step": 54270 }, { "epoch": 0.10397242470475222, "grad_norm": 1.3413559198379517, "learning_rate": 0.00010393758100868988, "loss": 4.6201, "step": 54280 }, { "epoch": 0.10399157953612745, "grad_norm": 1.2831871509552002, "learning_rate": 0.00010395673577279895, "loss": 4.6322, "step": 54290 }, { "epoch": 0.10401073436750267, "grad_norm": 1.325691819190979, "learning_rate": 0.00010397589053690803, "loss": 4.6851, "step": 54300 }, { "epoch": 0.10402988919887791, "grad_norm": 1.4338678121566772, "learning_rate": 0.00010399504530101711, "loss": 4.5236, "step": 54310 }, { "epoch": 0.10404904403025314, "grad_norm": 1.3918673992156982, "learning_rate": 0.00010401420006512618, "loss": 4.7708, "step": 54320 }, { "epoch": 0.10406819886162837, "grad_norm": 1.3288899660110474, "learning_rate": 0.00010403335482923527, "loss": 4.6711, "step": 54330 }, { "epoch": 0.1040873536930036, "grad_norm": 1.3133606910705566, "learning_rate": 0.00010405250959334434, "loss": 4.6622, "step": 54340 }, { "epoch": 0.10410650852437883, "grad_norm": 1.3739681243896484, "learning_rate": 0.00010407166435745342, "loss": 4.7084, "step": 54350 }, { "epoch": 0.10412566335575406, "grad_norm": 1.2942262887954712, "learning_rate": 0.00010409081912156252, "loss": 4.7189, "step": 54360 }, { "epoch": 0.1041448181871293, "grad_norm": 1.2877027988433838, "learning_rate": 0.00010410997388567159, "loss": 4.5442, "step": 54370 }, { "epoch": 0.10416397301850452, "grad_norm": 1.4118093252182007, "learning_rate": 0.00010412912864978067, "loss": 4.4838, "step": 54380 }, { "epoch": 0.10418312784987975, "grad_norm": 1.4057185649871826, "learning_rate": 0.00010414828341388975, "loss": 4.685, "step": 54390 }, { "epoch": 0.10420228268125499, "grad_norm": 1.3559019565582275, "learning_rate": 0.00010416743817799883, "loss": 4.6794, "step": 54400 }, { "epoch": 0.10422143751263022, "grad_norm": 1.324845552444458, "learning_rate": 0.00010418659294210791, "loss": 4.6826, "step": 54410 }, { "epoch": 0.10424059234400544, "grad_norm": 1.3753399848937988, "learning_rate": 0.00010420574770621699, "loss": 4.6152, "step": 54420 }, { "epoch": 0.10425974717538068, "grad_norm": 1.3853156566619873, "learning_rate": 0.00010422490247032606, "loss": 4.5877, "step": 54430 }, { "epoch": 0.10427890200675591, "grad_norm": 1.3441659212112427, "learning_rate": 0.00010424405723443515, "loss": 4.6119, "step": 54440 }, { "epoch": 0.10429805683813113, "grad_norm": 1.3773330450057983, "learning_rate": 0.00010426321199854422, "loss": 4.6944, "step": 54450 }, { "epoch": 0.10431721166950637, "grad_norm": 1.2573204040527344, "learning_rate": 0.0001042823667626533, "loss": 4.6255, "step": 54460 }, { "epoch": 0.1043363665008816, "grad_norm": 1.3091269731521606, "learning_rate": 0.0001043015215267624, "loss": 4.7921, "step": 54470 }, { "epoch": 0.10435552133225683, "grad_norm": 1.3541735410690308, "learning_rate": 0.00010432067629087147, "loss": 4.5912, "step": 54480 }, { "epoch": 0.10437467616363207, "grad_norm": 1.332929253578186, "learning_rate": 0.00010433983105498055, "loss": 4.6643, "step": 54490 }, { "epoch": 0.10439383099500729, "grad_norm": 1.3299607038497925, "learning_rate": 0.00010435898581908963, "loss": 4.6246, "step": 54500 }, { "epoch": 0.10441298582638252, "grad_norm": 1.2841581106185913, "learning_rate": 0.00010437814058319871, "loss": 4.5195, "step": 54510 }, { "epoch": 0.10443214065775776, "grad_norm": 1.3989887237548828, "learning_rate": 0.00010439729534730779, "loss": 4.8331, "step": 54520 }, { "epoch": 0.10445129548913298, "grad_norm": 1.3641111850738525, "learning_rate": 0.00010441645011141686, "loss": 4.6027, "step": 54530 }, { "epoch": 0.10447045032050821, "grad_norm": 1.2625788450241089, "learning_rate": 0.00010443560487552594, "loss": 4.6789, "step": 54540 }, { "epoch": 0.10448960515188345, "grad_norm": 1.2624125480651855, "learning_rate": 0.00010445475963963504, "loss": 4.7561, "step": 54550 }, { "epoch": 0.10450875998325868, "grad_norm": 1.3771920204162598, "learning_rate": 0.0001044739144037441, "loss": 4.6683, "step": 54560 }, { "epoch": 0.1045279148146339, "grad_norm": 1.273124098777771, "learning_rate": 0.00010449306916785318, "loss": 4.642, "step": 54570 }, { "epoch": 0.10454706964600914, "grad_norm": 1.3241597414016724, "learning_rate": 0.00010451222393196228, "loss": 4.7531, "step": 54580 }, { "epoch": 0.10456622447738437, "grad_norm": 1.3415873050689697, "learning_rate": 0.00010453137869607135, "loss": 4.4748, "step": 54590 }, { "epoch": 0.1045853793087596, "grad_norm": 1.3357963562011719, "learning_rate": 0.00010455053346018043, "loss": 4.7681, "step": 54600 }, { "epoch": 0.10460453414013483, "grad_norm": 1.3459300994873047, "learning_rate": 0.00010456968822428951, "loss": 4.753, "step": 54610 }, { "epoch": 0.10462368897151006, "grad_norm": 1.2656056880950928, "learning_rate": 0.00010458884298839859, "loss": 4.6818, "step": 54620 }, { "epoch": 0.1046428438028853, "grad_norm": 1.256028413772583, "learning_rate": 0.00010460799775250767, "loss": 4.6759, "step": 54630 }, { "epoch": 0.10466199863426053, "grad_norm": 1.343768835067749, "learning_rate": 0.00010462715251661674, "loss": 4.7478, "step": 54640 }, { "epoch": 0.10468115346563575, "grad_norm": 1.3639851808547974, "learning_rate": 0.00010464630728072582, "loss": 4.7526, "step": 54650 }, { "epoch": 0.10470030829701099, "grad_norm": 1.351242184638977, "learning_rate": 0.00010466546204483492, "loss": 4.585, "step": 54660 }, { "epoch": 0.10471946312838622, "grad_norm": 1.3368781805038452, "learning_rate": 0.00010468461680894398, "loss": 4.7201, "step": 54670 }, { "epoch": 0.10473861795976144, "grad_norm": 1.2841862440109253, "learning_rate": 0.00010470377157305307, "loss": 4.7114, "step": 54680 }, { "epoch": 0.10475777279113668, "grad_norm": 1.3337069749832153, "learning_rate": 0.00010472292633716216, "loss": 4.6779, "step": 54690 }, { "epoch": 0.10477692762251191, "grad_norm": 1.3404197692871094, "learning_rate": 0.00010474208110127123, "loss": 4.6734, "step": 54700 }, { "epoch": 0.10479608245388713, "grad_norm": 1.297909140586853, "learning_rate": 0.00010476123586538031, "loss": 4.6911, "step": 54710 }, { "epoch": 0.10481523728526237, "grad_norm": 1.3420295715332031, "learning_rate": 0.00010478039062948939, "loss": 4.6756, "step": 54720 }, { "epoch": 0.1048343921166376, "grad_norm": 1.326669454574585, "learning_rate": 0.00010479954539359847, "loss": 4.6815, "step": 54730 }, { "epoch": 0.10485354694801283, "grad_norm": 1.2911663055419922, "learning_rate": 0.00010481870015770755, "loss": 4.7506, "step": 54740 }, { "epoch": 0.10487270177938807, "grad_norm": 1.3178693056106567, "learning_rate": 0.00010483785492181662, "loss": 4.7155, "step": 54750 }, { "epoch": 0.10489185661076329, "grad_norm": 1.4572381973266602, "learning_rate": 0.0001048570096859257, "loss": 4.7648, "step": 54760 }, { "epoch": 0.10491101144213852, "grad_norm": 1.3659306764602661, "learning_rate": 0.0001048761644500348, "loss": 4.6228, "step": 54770 }, { "epoch": 0.10493016627351376, "grad_norm": 1.2884570360183716, "learning_rate": 0.00010489531921414386, "loss": 4.7685, "step": 54780 }, { "epoch": 0.10494932110488898, "grad_norm": 1.3208072185516357, "learning_rate": 0.00010491447397825295, "loss": 4.717, "step": 54790 }, { "epoch": 0.10496847593626421, "grad_norm": 1.3646596670150757, "learning_rate": 0.00010493362874236204, "loss": 4.8366, "step": 54800 }, { "epoch": 0.10498763076763945, "grad_norm": 1.4619430303573608, "learning_rate": 0.00010495278350647111, "loss": 4.7094, "step": 54810 }, { "epoch": 0.10500678559901468, "grad_norm": 1.3205856084823608, "learning_rate": 0.00010497193827058019, "loss": 4.719, "step": 54820 }, { "epoch": 0.1050259404303899, "grad_norm": 1.6469603776931763, "learning_rate": 0.00010499109303468926, "loss": 4.5419, "step": 54830 }, { "epoch": 0.10504509526176514, "grad_norm": 1.3402197360992432, "learning_rate": 0.00010501024779879835, "loss": 4.787, "step": 54840 }, { "epoch": 0.10506425009314037, "grad_norm": 1.2779390811920166, "learning_rate": 0.00010502940256290743, "loss": 4.7734, "step": 54850 }, { "epoch": 0.1050834049245156, "grad_norm": 1.2594605684280396, "learning_rate": 0.0001050485573270165, "loss": 4.7095, "step": 54860 }, { "epoch": 0.10510255975589083, "grad_norm": 1.3218753337860107, "learning_rate": 0.00010506771209112558, "loss": 4.6573, "step": 54870 }, { "epoch": 0.10512171458726606, "grad_norm": 1.318089246749878, "learning_rate": 0.00010508686685523468, "loss": 4.6451, "step": 54880 }, { "epoch": 0.10514086941864129, "grad_norm": 1.3919912576675415, "learning_rate": 0.00010510602161934375, "loss": 4.636, "step": 54890 }, { "epoch": 0.10516002425001653, "grad_norm": 1.320151448249817, "learning_rate": 0.00010512517638345283, "loss": 4.6383, "step": 54900 }, { "epoch": 0.10517917908139175, "grad_norm": 1.3211233615875244, "learning_rate": 0.00010514433114756192, "loss": 4.6393, "step": 54910 }, { "epoch": 0.10519833391276698, "grad_norm": 1.3039259910583496, "learning_rate": 0.00010516348591167099, "loss": 4.4867, "step": 54920 }, { "epoch": 0.10521748874414222, "grad_norm": 1.294238567352295, "learning_rate": 0.00010518264067578007, "loss": 4.6758, "step": 54930 }, { "epoch": 0.10523664357551744, "grad_norm": 1.2397464513778687, "learning_rate": 0.00010520179543988914, "loss": 4.8074, "step": 54940 }, { "epoch": 0.10525579840689267, "grad_norm": 1.3709365129470825, "learning_rate": 0.00010522095020399823, "loss": 4.766, "step": 54950 }, { "epoch": 0.10527495323826791, "grad_norm": 1.3566393852233887, "learning_rate": 0.00010524010496810731, "loss": 4.6658, "step": 54960 }, { "epoch": 0.10529410806964314, "grad_norm": 1.3164445161819458, "learning_rate": 0.00010525925973221638, "loss": 4.8202, "step": 54970 }, { "epoch": 0.10531326290101836, "grad_norm": 1.2984566688537598, "learning_rate": 0.00010527841449632546, "loss": 4.6834, "step": 54980 }, { "epoch": 0.1053324177323936, "grad_norm": 1.3485567569732666, "learning_rate": 0.00010529756926043456, "loss": 4.7069, "step": 54990 }, { "epoch": 0.10535157256376883, "grad_norm": 1.2552849054336548, "learning_rate": 0.00010531672402454363, "loss": 4.6343, "step": 55000 }, { "epoch": 0.10537072739514405, "grad_norm": 1.306863784790039, "learning_rate": 0.00010533587878865271, "loss": 4.5957, "step": 55010 }, { "epoch": 0.1053898822265193, "grad_norm": 1.2756744623184204, "learning_rate": 0.00010535503355276178, "loss": 4.683, "step": 55020 }, { "epoch": 0.10540903705789452, "grad_norm": 1.342628002166748, "learning_rate": 0.00010537418831687087, "loss": 4.7159, "step": 55030 }, { "epoch": 0.10542819188926975, "grad_norm": 1.291404128074646, "learning_rate": 0.00010539334308097995, "loss": 4.732, "step": 55040 }, { "epoch": 0.10544734672064499, "grad_norm": 1.3003909587860107, "learning_rate": 0.00010541249784508902, "loss": 4.6953, "step": 55050 }, { "epoch": 0.10546650155202021, "grad_norm": 1.290932297706604, "learning_rate": 0.00010543165260919811, "loss": 4.5767, "step": 55060 }, { "epoch": 0.10548565638339544, "grad_norm": 1.3440756797790527, "learning_rate": 0.0001054508073733072, "loss": 4.657, "step": 55070 }, { "epoch": 0.10550481121477068, "grad_norm": 1.2923974990844727, "learning_rate": 0.00010546996213741626, "loss": 4.8194, "step": 55080 }, { "epoch": 0.1055239660461459, "grad_norm": 1.3428584337234497, "learning_rate": 0.00010548911690152534, "loss": 4.6962, "step": 55090 }, { "epoch": 0.10554312087752113, "grad_norm": 1.2671868801116943, "learning_rate": 0.00010550827166563444, "loss": 4.804, "step": 55100 }, { "epoch": 0.10556227570889637, "grad_norm": 1.3376487493515015, "learning_rate": 0.00010552742642974351, "loss": 4.6225, "step": 55110 }, { "epoch": 0.1055814305402716, "grad_norm": 1.2719677686691284, "learning_rate": 0.00010554658119385259, "loss": 4.6406, "step": 55120 }, { "epoch": 0.10560058537164682, "grad_norm": 1.3167755603790283, "learning_rate": 0.00010556573595796166, "loss": 4.6839, "step": 55130 }, { "epoch": 0.10561974020302206, "grad_norm": 1.2720035314559937, "learning_rate": 0.00010558489072207075, "loss": 4.6917, "step": 55140 }, { "epoch": 0.10563889503439729, "grad_norm": 1.3016562461853027, "learning_rate": 0.00010560404548617983, "loss": 4.6504, "step": 55150 }, { "epoch": 0.10565804986577251, "grad_norm": 1.4148268699645996, "learning_rate": 0.0001056232002502889, "loss": 4.6006, "step": 55160 }, { "epoch": 0.10567720469714775, "grad_norm": 1.2658801078796387, "learning_rate": 0.000105642355014398, "loss": 4.7121, "step": 55170 }, { "epoch": 0.10569635952852298, "grad_norm": 1.270416021347046, "learning_rate": 0.00010566150977850708, "loss": 4.6982, "step": 55180 }, { "epoch": 0.1057155143598982, "grad_norm": 1.3495490550994873, "learning_rate": 0.00010568066454261614, "loss": 4.7062, "step": 55190 }, { "epoch": 0.10573466919127344, "grad_norm": 1.3069424629211426, "learning_rate": 0.00010569981930672523, "loss": 4.6972, "step": 55200 }, { "epoch": 0.10575382402264867, "grad_norm": 1.5093704462051392, "learning_rate": 0.00010571897407083431, "loss": 4.6694, "step": 55210 }, { "epoch": 0.1057729788540239, "grad_norm": 1.2625950574874878, "learning_rate": 0.00010573812883494339, "loss": 4.653, "step": 55220 }, { "epoch": 0.10579213368539914, "grad_norm": 1.2984272241592407, "learning_rate": 0.00010575728359905247, "loss": 4.666, "step": 55230 }, { "epoch": 0.10581128851677436, "grad_norm": 1.3095232248306274, "learning_rate": 0.00010577643836316154, "loss": 4.71, "step": 55240 }, { "epoch": 0.10583044334814959, "grad_norm": 1.2812516689300537, "learning_rate": 0.00010579559312727063, "loss": 4.7375, "step": 55250 }, { "epoch": 0.10584959817952483, "grad_norm": 1.2795143127441406, "learning_rate": 0.00010581474789137971, "loss": 4.7341, "step": 55260 }, { "epoch": 0.10586875301090005, "grad_norm": 1.3060322999954224, "learning_rate": 0.00010583390265548878, "loss": 4.7763, "step": 55270 }, { "epoch": 0.10588790784227528, "grad_norm": 1.404948115348816, "learning_rate": 0.00010585305741959788, "loss": 4.6162, "step": 55280 }, { "epoch": 0.10590706267365052, "grad_norm": 1.321714997291565, "learning_rate": 0.00010587221218370696, "loss": 4.625, "step": 55290 }, { "epoch": 0.10592621750502575, "grad_norm": 1.304076910018921, "learning_rate": 0.00010589136694781602, "loss": 4.7652, "step": 55300 }, { "epoch": 0.10594537233640099, "grad_norm": 1.3094849586486816, "learning_rate": 0.0001059105217119251, "loss": 4.6377, "step": 55310 }, { "epoch": 0.10596452716777621, "grad_norm": 1.2939571142196655, "learning_rate": 0.00010592967647603419, "loss": 4.7584, "step": 55320 }, { "epoch": 0.10598368199915144, "grad_norm": 1.272701382637024, "learning_rate": 0.00010594883124014327, "loss": 4.6715, "step": 55330 }, { "epoch": 0.10600283683052668, "grad_norm": 1.3382997512817383, "learning_rate": 0.00010596798600425235, "loss": 4.5623, "step": 55340 }, { "epoch": 0.1060219916619019, "grad_norm": 1.3627740144729614, "learning_rate": 0.00010598714076836142, "loss": 4.6641, "step": 55350 }, { "epoch": 0.10604114649327713, "grad_norm": 1.3595890998840332, "learning_rate": 0.00010600629553247051, "loss": 4.7845, "step": 55360 }, { "epoch": 0.10606030132465237, "grad_norm": 1.3165202140808105, "learning_rate": 0.0001060254502965796, "loss": 4.6527, "step": 55370 }, { "epoch": 0.1060794561560276, "grad_norm": 1.3686927556991577, "learning_rate": 0.00010604460506068866, "loss": 4.6268, "step": 55380 }, { "epoch": 0.10609861098740282, "grad_norm": 1.3575811386108398, "learning_rate": 0.00010606375982479776, "loss": 4.6674, "step": 55390 }, { "epoch": 0.10611776581877806, "grad_norm": 1.3222830295562744, "learning_rate": 0.00010608291458890684, "loss": 4.6433, "step": 55400 }, { "epoch": 0.10613692065015329, "grad_norm": 1.4130315780639648, "learning_rate": 0.0001061020693530159, "loss": 4.7513, "step": 55410 }, { "epoch": 0.10615607548152851, "grad_norm": 1.2741048336029053, "learning_rate": 0.00010612122411712499, "loss": 4.6989, "step": 55420 }, { "epoch": 0.10617523031290375, "grad_norm": 1.3106554746627808, "learning_rate": 0.00010614037888123405, "loss": 4.6471, "step": 55430 }, { "epoch": 0.10619438514427898, "grad_norm": 1.2673845291137695, "learning_rate": 0.00010615953364534315, "loss": 4.7048, "step": 55440 }, { "epoch": 0.1062135399756542, "grad_norm": 1.332181453704834, "learning_rate": 0.00010617868840945223, "loss": 4.6531, "step": 55450 }, { "epoch": 0.10623269480702945, "grad_norm": 1.3422952890396118, "learning_rate": 0.0001061978431735613, "loss": 4.5056, "step": 55460 }, { "epoch": 0.10625184963840467, "grad_norm": 1.307405710220337, "learning_rate": 0.0001062169979376704, "loss": 4.5835, "step": 55470 }, { "epoch": 0.1062710044697799, "grad_norm": 1.254188060760498, "learning_rate": 0.00010623615270177947, "loss": 4.753, "step": 55480 }, { "epoch": 0.10629015930115514, "grad_norm": 1.2934223413467407, "learning_rate": 0.00010625530746588854, "loss": 4.6986, "step": 55490 }, { "epoch": 0.10630931413253036, "grad_norm": 1.309238076210022, "learning_rate": 0.00010627446222999764, "loss": 4.7378, "step": 55500 }, { "epoch": 0.10632846896390559, "grad_norm": 1.3190234899520874, "learning_rate": 0.0001062936169941067, "loss": 4.587, "step": 55510 }, { "epoch": 0.10634762379528083, "grad_norm": 1.4077417850494385, "learning_rate": 0.00010631277175821579, "loss": 4.7533, "step": 55520 }, { "epoch": 0.10636677862665606, "grad_norm": 1.3302415609359741, "learning_rate": 0.00010633192652232487, "loss": 4.5789, "step": 55530 }, { "epoch": 0.10638593345803128, "grad_norm": 1.2463065385818481, "learning_rate": 0.00010635108128643394, "loss": 4.8054, "step": 55540 }, { "epoch": 0.10640508828940652, "grad_norm": 1.3326503038406372, "learning_rate": 0.00010637023605054303, "loss": 4.7054, "step": 55550 }, { "epoch": 0.10642424312078175, "grad_norm": 1.3008449077606201, "learning_rate": 0.00010638939081465211, "loss": 4.6647, "step": 55560 }, { "epoch": 0.10644339795215697, "grad_norm": 1.3550341129302979, "learning_rate": 0.00010640854557876118, "loss": 4.5189, "step": 55570 }, { "epoch": 0.10646255278353221, "grad_norm": 1.2766557931900024, "learning_rate": 0.00010642770034287027, "loss": 4.8545, "step": 55580 }, { "epoch": 0.10648170761490744, "grad_norm": 1.3136765956878662, "learning_rate": 0.00010644685510697936, "loss": 4.7913, "step": 55590 }, { "epoch": 0.10650086244628267, "grad_norm": 1.3327367305755615, "learning_rate": 0.00010646600987108842, "loss": 4.6667, "step": 55600 }, { "epoch": 0.1065200172776579, "grad_norm": 1.354877233505249, "learning_rate": 0.00010648516463519752, "loss": 4.6065, "step": 55610 }, { "epoch": 0.10653917210903313, "grad_norm": 1.317044734954834, "learning_rate": 0.00010650431939930659, "loss": 4.6619, "step": 55620 }, { "epoch": 0.10655832694040836, "grad_norm": 1.29511559009552, "learning_rate": 0.00010652347416341567, "loss": 4.7343, "step": 55630 }, { "epoch": 0.1065774817717836, "grad_norm": 1.3196040391921997, "learning_rate": 0.00010654262892752475, "loss": 4.7892, "step": 55640 }, { "epoch": 0.10659663660315882, "grad_norm": 1.3107765913009644, "learning_rate": 0.00010656178369163382, "loss": 4.6219, "step": 55650 }, { "epoch": 0.10661579143453405, "grad_norm": 1.3353654146194458, "learning_rate": 0.00010658093845574291, "loss": 4.7118, "step": 55660 }, { "epoch": 0.10663494626590929, "grad_norm": 1.3424969911575317, "learning_rate": 0.00010660009321985199, "loss": 4.6255, "step": 55670 }, { "epoch": 0.10665410109728451, "grad_norm": 1.4022268056869507, "learning_rate": 0.00010661924798396106, "loss": 4.7237, "step": 55680 }, { "epoch": 0.10667325592865974, "grad_norm": 1.3701685667037964, "learning_rate": 0.00010663840274807016, "loss": 4.6409, "step": 55690 }, { "epoch": 0.10669241076003498, "grad_norm": 1.337689995765686, "learning_rate": 0.00010665755751217922, "loss": 4.8374, "step": 55700 }, { "epoch": 0.1067115655914102, "grad_norm": 1.285791277885437, "learning_rate": 0.0001066767122762883, "loss": 4.7145, "step": 55710 }, { "epoch": 0.10673072042278543, "grad_norm": 1.2876365184783936, "learning_rate": 0.0001066958670403974, "loss": 4.6396, "step": 55720 }, { "epoch": 0.10674987525416067, "grad_norm": 1.3025991916656494, "learning_rate": 0.00010671502180450647, "loss": 4.6832, "step": 55730 }, { "epoch": 0.1067690300855359, "grad_norm": 1.2123029232025146, "learning_rate": 0.00010673417656861555, "loss": 4.7662, "step": 55740 }, { "epoch": 0.10678818491691112, "grad_norm": 1.3353192806243896, "learning_rate": 0.00010675333133272463, "loss": 4.6745, "step": 55750 }, { "epoch": 0.10680733974828636, "grad_norm": 1.2402050495147705, "learning_rate": 0.0001067724860968337, "loss": 4.6207, "step": 55760 }, { "epoch": 0.10682649457966159, "grad_norm": 1.2889220714569092, "learning_rate": 0.00010679164086094279, "loss": 4.8527, "step": 55770 }, { "epoch": 0.10684564941103682, "grad_norm": 1.4187915325164795, "learning_rate": 0.00010681079562505187, "loss": 4.7002, "step": 55780 }, { "epoch": 0.10686480424241206, "grad_norm": 1.4246357679367065, "learning_rate": 0.00010682995038916094, "loss": 4.6225, "step": 55790 }, { "epoch": 0.10688395907378728, "grad_norm": 1.3271353244781494, "learning_rate": 0.00010684910515327004, "loss": 4.6536, "step": 55800 }, { "epoch": 0.10690311390516251, "grad_norm": 1.2894361019134521, "learning_rate": 0.0001068682599173791, "loss": 4.6639, "step": 55810 }, { "epoch": 0.10692226873653775, "grad_norm": 1.2550753355026245, "learning_rate": 0.00010688741468148819, "loss": 4.7507, "step": 55820 }, { "epoch": 0.10694142356791297, "grad_norm": 1.279250144958496, "learning_rate": 0.00010690656944559727, "loss": 4.7682, "step": 55830 }, { "epoch": 0.1069605783992882, "grad_norm": 1.27120041847229, "learning_rate": 0.00010692572420970635, "loss": 4.7149, "step": 55840 }, { "epoch": 0.10697973323066344, "grad_norm": 1.297024130821228, "learning_rate": 0.00010694487897381543, "loss": 4.6829, "step": 55850 }, { "epoch": 0.10699888806203867, "grad_norm": 1.2578376531600952, "learning_rate": 0.00010696403373792451, "loss": 4.7081, "step": 55860 }, { "epoch": 0.10701804289341389, "grad_norm": 1.4041792154312134, "learning_rate": 0.00010698318850203358, "loss": 4.6328, "step": 55870 }, { "epoch": 0.10703719772478913, "grad_norm": 1.3004064559936523, "learning_rate": 0.00010700234326614267, "loss": 4.7351, "step": 55880 }, { "epoch": 0.10705635255616436, "grad_norm": 1.6812318563461304, "learning_rate": 0.00010702149803025174, "loss": 4.6944, "step": 55890 }, { "epoch": 0.10707550738753958, "grad_norm": 1.2782710790634155, "learning_rate": 0.00010704065279436082, "loss": 4.5534, "step": 55900 }, { "epoch": 0.10709466221891482, "grad_norm": 1.2807835340499878, "learning_rate": 0.00010705980755846992, "loss": 4.7621, "step": 55910 }, { "epoch": 0.10711381705029005, "grad_norm": 1.273976445198059, "learning_rate": 0.00010707896232257898, "loss": 4.668, "step": 55920 }, { "epoch": 0.10713297188166528, "grad_norm": 1.360121250152588, "learning_rate": 0.00010709811708668807, "loss": 4.6361, "step": 55930 }, { "epoch": 0.10715212671304052, "grad_norm": 1.291548728942871, "learning_rate": 0.00010711727185079715, "loss": 4.569, "step": 55940 }, { "epoch": 0.10717128154441574, "grad_norm": 1.3284555673599243, "learning_rate": 0.00010713642661490623, "loss": 4.6763, "step": 55950 }, { "epoch": 0.10719043637579098, "grad_norm": 1.3946192264556885, "learning_rate": 0.00010715558137901531, "loss": 4.6105, "step": 55960 }, { "epoch": 0.10720959120716621, "grad_norm": 1.259298324584961, "learning_rate": 0.00010717473614312439, "loss": 4.7683, "step": 55970 }, { "epoch": 0.10722874603854143, "grad_norm": 1.3536489009857178, "learning_rate": 0.00010719389090723346, "loss": 4.6907, "step": 55980 }, { "epoch": 0.10724790086991667, "grad_norm": 1.3143869638442993, "learning_rate": 0.00010721304567134255, "loss": 4.7618, "step": 55990 }, { "epoch": 0.1072670557012919, "grad_norm": 1.4105901718139648, "learning_rate": 0.00010723220043545162, "loss": 4.6226, "step": 56000 }, { "epoch": 0.10728621053266713, "grad_norm": 1.288631558418274, "learning_rate": 0.0001072513551995607, "loss": 4.6534, "step": 56010 }, { "epoch": 0.10730536536404237, "grad_norm": 1.2901252508163452, "learning_rate": 0.0001072705099636698, "loss": 4.7656, "step": 56020 }, { "epoch": 0.10732452019541759, "grad_norm": 1.346657395362854, "learning_rate": 0.00010728966472777887, "loss": 4.7398, "step": 56030 }, { "epoch": 0.10734367502679282, "grad_norm": 1.2843610048294067, "learning_rate": 0.00010730881949188795, "loss": 4.6614, "step": 56040 }, { "epoch": 0.10736282985816806, "grad_norm": 1.294022798538208, "learning_rate": 0.00010732797425599703, "loss": 4.677, "step": 56050 }, { "epoch": 0.10738198468954328, "grad_norm": 1.2714999914169312, "learning_rate": 0.00010734712902010611, "loss": 4.5977, "step": 56060 }, { "epoch": 0.10740113952091851, "grad_norm": 1.3130502700805664, "learning_rate": 0.00010736628378421519, "loss": 4.7744, "step": 56070 }, { "epoch": 0.10742029435229375, "grad_norm": 1.2438846826553345, "learning_rate": 0.00010738543854832427, "loss": 4.7559, "step": 56080 }, { "epoch": 0.10743944918366898, "grad_norm": 1.2778955698013306, "learning_rate": 0.00010740459331243334, "loss": 4.663, "step": 56090 }, { "epoch": 0.1074586040150442, "grad_norm": 1.2644200325012207, "learning_rate": 0.00010742374807654243, "loss": 4.6505, "step": 56100 }, { "epoch": 0.10747775884641944, "grad_norm": 1.2810754776000977, "learning_rate": 0.0001074429028406515, "loss": 4.6922, "step": 56110 }, { "epoch": 0.10749691367779467, "grad_norm": 1.470516324043274, "learning_rate": 0.00010746205760476058, "loss": 4.6863, "step": 56120 }, { "epoch": 0.1075160685091699, "grad_norm": 1.3157858848571777, "learning_rate": 0.00010748121236886968, "loss": 4.7739, "step": 56130 }, { "epoch": 0.10753522334054513, "grad_norm": 1.3068275451660156, "learning_rate": 0.00010750036713297875, "loss": 4.5873, "step": 56140 }, { "epoch": 0.10755437817192036, "grad_norm": 1.4268466234207153, "learning_rate": 0.00010751952189708783, "loss": 4.752, "step": 56150 }, { "epoch": 0.10757353300329558, "grad_norm": 1.2848725318908691, "learning_rate": 0.00010753867666119691, "loss": 4.6518, "step": 56160 }, { "epoch": 0.10759268783467082, "grad_norm": 1.274601697921753, "learning_rate": 0.00010755783142530599, "loss": 4.761, "step": 56170 }, { "epoch": 0.10761184266604605, "grad_norm": 1.285367488861084, "learning_rate": 0.00010757698618941507, "loss": 4.6527, "step": 56180 }, { "epoch": 0.10763099749742128, "grad_norm": 1.2617655992507935, "learning_rate": 0.00010759614095352414, "loss": 4.572, "step": 56190 }, { "epoch": 0.10765015232879652, "grad_norm": 1.3299546241760254, "learning_rate": 0.00010761529571763322, "loss": 4.6058, "step": 56200 }, { "epoch": 0.10766930716017174, "grad_norm": 1.2869142293930054, "learning_rate": 0.00010763445048174232, "loss": 4.6641, "step": 56210 }, { "epoch": 0.10768846199154697, "grad_norm": 1.3281058073043823, "learning_rate": 0.00010765360524585138, "loss": 4.5272, "step": 56220 }, { "epoch": 0.10770761682292221, "grad_norm": 1.2933237552642822, "learning_rate": 0.00010767276000996046, "loss": 4.6633, "step": 56230 }, { "epoch": 0.10772677165429743, "grad_norm": 1.3436219692230225, "learning_rate": 0.00010769191477406956, "loss": 4.5792, "step": 56240 }, { "epoch": 0.10774592648567266, "grad_norm": 1.2802194356918335, "learning_rate": 0.00010771106953817863, "loss": 4.6906, "step": 56250 }, { "epoch": 0.1077650813170479, "grad_norm": 1.2828435897827148, "learning_rate": 0.00010773022430228771, "loss": 4.58, "step": 56260 }, { "epoch": 0.10778423614842313, "grad_norm": 1.3265689611434937, "learning_rate": 0.00010774937906639679, "loss": 4.5824, "step": 56270 }, { "epoch": 0.10780339097979835, "grad_norm": 1.2977757453918457, "learning_rate": 0.00010776853383050587, "loss": 4.6575, "step": 56280 }, { "epoch": 0.10782254581117359, "grad_norm": 1.2872986793518066, "learning_rate": 0.00010778768859461495, "loss": 4.4672, "step": 56290 }, { "epoch": 0.10784170064254882, "grad_norm": 1.3088606595993042, "learning_rate": 0.00010780684335872402, "loss": 4.6854, "step": 56300 }, { "epoch": 0.10786085547392404, "grad_norm": 1.282242774963379, "learning_rate": 0.0001078259981228331, "loss": 4.6199, "step": 56310 }, { "epoch": 0.10788001030529928, "grad_norm": 1.3000237941741943, "learning_rate": 0.0001078451528869422, "loss": 4.7731, "step": 56320 }, { "epoch": 0.10789916513667451, "grad_norm": 1.309700846672058, "learning_rate": 0.00010786430765105126, "loss": 4.7027, "step": 56330 }, { "epoch": 0.10791831996804974, "grad_norm": 1.2624341249465942, "learning_rate": 0.00010788346241516035, "loss": 4.6407, "step": 56340 }, { "epoch": 0.10793747479942498, "grad_norm": 1.270470380783081, "learning_rate": 0.00010790261717926944, "loss": 4.6391, "step": 56350 }, { "epoch": 0.1079566296308002, "grad_norm": 1.3454731702804565, "learning_rate": 0.00010792177194337851, "loss": 4.453, "step": 56360 }, { "epoch": 0.10797578446217543, "grad_norm": 1.2784432172775269, "learning_rate": 0.00010794092670748759, "loss": 4.6646, "step": 56370 }, { "epoch": 0.10799493929355067, "grad_norm": 1.3868210315704346, "learning_rate": 0.00010796008147159666, "loss": 4.5986, "step": 56380 }, { "epoch": 0.1080140941249259, "grad_norm": 1.2776743173599243, "learning_rate": 0.00010797923623570575, "loss": 4.7046, "step": 56390 }, { "epoch": 0.10803324895630112, "grad_norm": Infinity, "learning_rate": 0.00010799839099981483, "loss": 4.6013, "step": 56400 }, { "epoch": 0.10805240378767636, "grad_norm": 1.316718578338623, "learning_rate": 0.000108015630287513, "loss": 4.5779, "step": 56410 }, { "epoch": 0.10807155861905159, "grad_norm": 1.2845114469528198, "learning_rate": 0.00010803478505162209, "loss": 4.7113, "step": 56420 }, { "epoch": 0.10809071345042681, "grad_norm": 1.2957106828689575, "learning_rate": 0.00010805393981573116, "loss": 4.5327, "step": 56430 }, { "epoch": 0.10810986828180205, "grad_norm": 1.3663650751113892, "learning_rate": 0.00010807309457984024, "loss": 4.5932, "step": 56440 }, { "epoch": 0.10812902311317728, "grad_norm": 1.2701104879379272, "learning_rate": 0.00010809224934394933, "loss": 4.7283, "step": 56450 }, { "epoch": 0.1081481779445525, "grad_norm": 1.2766646146774292, "learning_rate": 0.0001081114041080584, "loss": 4.5437, "step": 56460 }, { "epoch": 0.10816733277592774, "grad_norm": 1.256941556930542, "learning_rate": 0.00010813055887216748, "loss": 4.6462, "step": 56470 }, { "epoch": 0.10818648760730297, "grad_norm": 1.312282919883728, "learning_rate": 0.00010814971363627655, "loss": 4.731, "step": 56480 }, { "epoch": 0.1082056424386782, "grad_norm": 1.2830783128738403, "learning_rate": 0.00010816886840038564, "loss": 4.6303, "step": 56490 }, { "epoch": 0.10822479727005344, "grad_norm": 1.2706482410430908, "learning_rate": 0.00010818802316449472, "loss": 4.6755, "step": 56500 }, { "epoch": 0.10824395210142866, "grad_norm": 1.3161709308624268, "learning_rate": 0.00010820717792860379, "loss": 4.7737, "step": 56510 }, { "epoch": 0.10826310693280389, "grad_norm": 1.2637666463851929, "learning_rate": 0.00010822633269271289, "loss": 4.6167, "step": 56520 }, { "epoch": 0.10828226176417913, "grad_norm": 1.3161810636520386, "learning_rate": 0.00010824548745682197, "loss": 4.6977, "step": 56530 }, { "epoch": 0.10830141659555435, "grad_norm": 1.277546763420105, "learning_rate": 0.00010826464222093104, "loss": 4.7171, "step": 56540 }, { "epoch": 0.10832057142692958, "grad_norm": 1.5477217435836792, "learning_rate": 0.00010828379698504012, "loss": 4.7171, "step": 56550 }, { "epoch": 0.10833972625830482, "grad_norm": 1.273675799369812, "learning_rate": 0.00010830295174914919, "loss": 4.5197, "step": 56560 }, { "epoch": 0.10835888108968005, "grad_norm": 1.2841167449951172, "learning_rate": 0.00010832210651325828, "loss": 4.6591, "step": 56570 }, { "epoch": 0.10837803592105527, "grad_norm": 1.3107819557189941, "learning_rate": 0.00010834126127736736, "loss": 4.7407, "step": 56580 }, { "epoch": 0.10839719075243051, "grad_norm": 1.3668776750564575, "learning_rate": 0.00010836041604147643, "loss": 4.5414, "step": 56590 }, { "epoch": 0.10841634558380574, "grad_norm": 1.29189932346344, "learning_rate": 0.00010837957080558552, "loss": 4.7284, "step": 56600 }, { "epoch": 0.10843550041518096, "grad_norm": 1.2877870798110962, "learning_rate": 0.0001083987255696946, "loss": 4.6187, "step": 56610 }, { "epoch": 0.1084546552465562, "grad_norm": 1.29145085811615, "learning_rate": 0.00010841788033380367, "loss": 4.8395, "step": 56620 }, { "epoch": 0.10847381007793143, "grad_norm": 1.2921488285064697, "learning_rate": 0.00010843703509791275, "loss": 4.6386, "step": 56630 }, { "epoch": 0.10849296490930667, "grad_norm": 1.3231414556503296, "learning_rate": 0.00010845618986202185, "loss": 4.6217, "step": 56640 }, { "epoch": 0.1085121197406819, "grad_norm": 1.244593858718872, "learning_rate": 0.00010847534462613092, "loss": 4.6719, "step": 56650 }, { "epoch": 0.10853127457205712, "grad_norm": 1.275685429573059, "learning_rate": 0.00010849449939024, "loss": 4.5014, "step": 56660 }, { "epoch": 0.10855042940343236, "grad_norm": 1.235026240348816, "learning_rate": 0.00010851365415434907, "loss": 4.8074, "step": 56670 }, { "epoch": 0.10856958423480759, "grad_norm": 1.3440313339233398, "learning_rate": 0.00010853280891845816, "loss": 4.8602, "step": 56680 }, { "epoch": 0.10858873906618281, "grad_norm": 1.2673869132995605, "learning_rate": 0.00010855196368256724, "loss": 4.6419, "step": 56690 }, { "epoch": 0.10860789389755805, "grad_norm": 1.430790901184082, "learning_rate": 0.00010857111844667631, "loss": 4.5322, "step": 56700 }, { "epoch": 0.10862704872893328, "grad_norm": 1.317102313041687, "learning_rate": 0.0001085902732107854, "loss": 4.6831, "step": 56710 }, { "epoch": 0.1086462035603085, "grad_norm": 1.427930474281311, "learning_rate": 0.00010860942797489449, "loss": 4.521, "step": 56720 }, { "epoch": 0.10866535839168374, "grad_norm": 1.2652404308319092, "learning_rate": 0.00010862858273900355, "loss": 4.6833, "step": 56730 }, { "epoch": 0.10868451322305897, "grad_norm": 1.287894606590271, "learning_rate": 0.00010864773750311264, "loss": 4.6899, "step": 56740 }, { "epoch": 0.1087036680544342, "grad_norm": 1.4431753158569336, "learning_rate": 0.00010866689226722173, "loss": 4.7138, "step": 56750 }, { "epoch": 0.10872282288580944, "grad_norm": 1.2784587144851685, "learning_rate": 0.0001086860470313308, "loss": 4.6429, "step": 56760 }, { "epoch": 0.10874197771718466, "grad_norm": 1.291843295097351, "learning_rate": 0.00010870520179543988, "loss": 4.715, "step": 56770 }, { "epoch": 0.10876113254855989, "grad_norm": 1.2273738384246826, "learning_rate": 0.00010872435655954895, "loss": 4.7793, "step": 56780 }, { "epoch": 0.10878028737993513, "grad_norm": 1.311368703842163, "learning_rate": 0.00010874351132365804, "loss": 4.6825, "step": 56790 }, { "epoch": 0.10879944221131035, "grad_norm": 1.269781470298767, "learning_rate": 0.00010876266608776712, "loss": 4.6225, "step": 56800 }, { "epoch": 0.10881859704268558, "grad_norm": 1.2997145652770996, "learning_rate": 0.00010878182085187619, "loss": 4.6735, "step": 56810 }, { "epoch": 0.10883775187406082, "grad_norm": 1.2779964208602905, "learning_rate": 0.00010880097561598529, "loss": 4.5732, "step": 56820 }, { "epoch": 0.10885690670543605, "grad_norm": 1.2969499826431274, "learning_rate": 0.00010882013038009437, "loss": 4.6662, "step": 56830 }, { "epoch": 0.10887606153681127, "grad_norm": 1.2536118030548096, "learning_rate": 0.00010883928514420343, "loss": 4.6556, "step": 56840 }, { "epoch": 0.10889521636818651, "grad_norm": 1.3873884677886963, "learning_rate": 0.00010885843990831252, "loss": 4.6188, "step": 56850 }, { "epoch": 0.10891437119956174, "grad_norm": 1.2414600849151611, "learning_rate": 0.0001088775946724216, "loss": 4.6343, "step": 56860 }, { "epoch": 0.10893352603093696, "grad_norm": 1.2712544202804565, "learning_rate": 0.00010889674943653068, "loss": 4.6412, "step": 56870 }, { "epoch": 0.1089526808623122, "grad_norm": 1.399962067604065, "learning_rate": 0.00010891590420063976, "loss": 4.7786, "step": 56880 }, { "epoch": 0.10897183569368743, "grad_norm": 1.3811841011047363, "learning_rate": 0.00010893505896474883, "loss": 4.5442, "step": 56890 }, { "epoch": 0.10899099052506266, "grad_norm": 1.2963340282440186, "learning_rate": 0.00010895421372885792, "loss": 4.5487, "step": 56900 }, { "epoch": 0.1090101453564379, "grad_norm": 1.300392508506775, "learning_rate": 0.000108973368492967, "loss": 4.6561, "step": 56910 }, { "epoch": 0.10902930018781312, "grad_norm": 1.2969962358474731, "learning_rate": 0.00010899252325707607, "loss": 4.7153, "step": 56920 }, { "epoch": 0.10904845501918835, "grad_norm": 1.371863842010498, "learning_rate": 0.00010901167802118517, "loss": 4.5511, "step": 56930 }, { "epoch": 0.10906760985056359, "grad_norm": 1.3271697759628296, "learning_rate": 0.00010903083278529425, "loss": 4.7169, "step": 56940 }, { "epoch": 0.10908676468193881, "grad_norm": 1.3714762926101685, "learning_rate": 0.00010904998754940332, "loss": 4.6949, "step": 56950 }, { "epoch": 0.10910591951331404, "grad_norm": 1.3175890445709229, "learning_rate": 0.0001090691423135124, "loss": 4.6666, "step": 56960 }, { "epoch": 0.10912507434468928, "grad_norm": 1.2349752187728882, "learning_rate": 0.00010908829707762148, "loss": 4.6395, "step": 56970 }, { "epoch": 0.1091442291760645, "grad_norm": 1.319723129272461, "learning_rate": 0.00010910745184173056, "loss": 4.7601, "step": 56980 }, { "epoch": 0.10916338400743973, "grad_norm": 1.2860784530639648, "learning_rate": 0.00010912660660583964, "loss": 4.6959, "step": 56990 }, { "epoch": 0.10918253883881497, "grad_norm": 1.3395406007766724, "learning_rate": 0.00010914576136994871, "loss": 4.5978, "step": 57000 }, { "epoch": 0.1092016936701902, "grad_norm": 1.2683336734771729, "learning_rate": 0.0001091649161340578, "loss": 4.7778, "step": 57010 }, { "epoch": 0.10922084850156542, "grad_norm": 1.2530481815338135, "learning_rate": 0.00010918407089816688, "loss": 4.7153, "step": 57020 }, { "epoch": 0.10924000333294066, "grad_norm": 1.3592371940612793, "learning_rate": 0.00010920322566227595, "loss": 4.6487, "step": 57030 }, { "epoch": 0.10925915816431589, "grad_norm": 1.3160210847854614, "learning_rate": 0.00010922238042638505, "loss": 4.5617, "step": 57040 }, { "epoch": 0.10927831299569112, "grad_norm": 1.3091371059417725, "learning_rate": 0.00010924153519049412, "loss": 4.6494, "step": 57050 }, { "epoch": 0.10929746782706636, "grad_norm": 1.3152092695236206, "learning_rate": 0.0001092606899546032, "loss": 4.625, "step": 57060 }, { "epoch": 0.10931662265844158, "grad_norm": 1.2985491752624512, "learning_rate": 0.00010927984471871228, "loss": 4.7396, "step": 57070 }, { "epoch": 0.10933577748981681, "grad_norm": 1.2749016284942627, "learning_rate": 0.00010929899948282136, "loss": 4.6611, "step": 57080 }, { "epoch": 0.10935493232119205, "grad_norm": 1.4898717403411865, "learning_rate": 0.00010931815424693044, "loss": 4.6797, "step": 57090 }, { "epoch": 0.10937408715256727, "grad_norm": 1.3227488994598389, "learning_rate": 0.00010933730901103952, "loss": 4.5438, "step": 57100 }, { "epoch": 0.1093932419839425, "grad_norm": 1.2619457244873047, "learning_rate": 0.00010935646377514859, "loss": 4.6829, "step": 57110 }, { "epoch": 0.10941239681531774, "grad_norm": 1.2586113214492798, "learning_rate": 0.00010937561853925768, "loss": 4.6354, "step": 57120 }, { "epoch": 0.10943155164669296, "grad_norm": 1.2859667539596558, "learning_rate": 0.00010939477330336677, "loss": 4.6623, "step": 57130 }, { "epoch": 0.10945070647806819, "grad_norm": 1.7196707725524902, "learning_rate": 0.00010941392806747583, "loss": 4.5217, "step": 57140 }, { "epoch": 0.10946986130944343, "grad_norm": 1.2629021406173706, "learning_rate": 0.00010943308283158493, "loss": 4.6401, "step": 57150 }, { "epoch": 0.10948901614081866, "grad_norm": 1.3606728315353394, "learning_rate": 0.000109452237595694, "loss": 4.6364, "step": 57160 }, { "epoch": 0.10950817097219388, "grad_norm": 1.2565606832504272, "learning_rate": 0.00010947139235980308, "loss": 4.6926, "step": 57170 }, { "epoch": 0.10952732580356912, "grad_norm": 1.3029396533966064, "learning_rate": 0.00010949054712391216, "loss": 4.6788, "step": 57180 }, { "epoch": 0.10954648063494435, "grad_norm": 1.2951809167861938, "learning_rate": 0.00010950970188802124, "loss": 4.6507, "step": 57190 }, { "epoch": 0.10956563546631957, "grad_norm": 1.2837809324264526, "learning_rate": 0.00010952885665213032, "loss": 4.7224, "step": 57200 }, { "epoch": 0.10958479029769481, "grad_norm": 1.2580115795135498, "learning_rate": 0.0001095480114162394, "loss": 4.565, "step": 57210 }, { "epoch": 0.10960394512907004, "grad_norm": 1.247239589691162, "learning_rate": 0.00010956716618034847, "loss": 4.6949, "step": 57220 }, { "epoch": 0.10962309996044527, "grad_norm": 1.2998372316360474, "learning_rate": 0.00010958632094445756, "loss": 4.6746, "step": 57230 }, { "epoch": 0.1096422547918205, "grad_norm": 1.3602865934371948, "learning_rate": 0.00010960547570856663, "loss": 4.5708, "step": 57240 }, { "epoch": 0.10966140962319573, "grad_norm": 1.3194422721862793, "learning_rate": 0.00010962463047267571, "loss": 4.6289, "step": 57250 }, { "epoch": 0.10968056445457096, "grad_norm": 1.3144034147262573, "learning_rate": 0.00010964378523678481, "loss": 4.6549, "step": 57260 }, { "epoch": 0.1096997192859462, "grad_norm": 1.2304260730743408, "learning_rate": 0.00010966294000089388, "loss": 4.6778, "step": 57270 }, { "epoch": 0.10971887411732142, "grad_norm": 1.2882156372070312, "learning_rate": 0.00010968209476500296, "loss": 4.5847, "step": 57280 }, { "epoch": 0.10973802894869666, "grad_norm": 1.3294757604599, "learning_rate": 0.00010970124952911204, "loss": 4.7347, "step": 57290 }, { "epoch": 0.10975718378007189, "grad_norm": 1.2897213697433472, "learning_rate": 0.00010972040429322112, "loss": 4.7246, "step": 57300 }, { "epoch": 0.10977633861144712, "grad_norm": 1.2331297397613525, "learning_rate": 0.0001097395590573302, "loss": 4.6853, "step": 57310 }, { "epoch": 0.10979549344282236, "grad_norm": 1.2565919160842896, "learning_rate": 0.00010975871382143928, "loss": 4.6204, "step": 57320 }, { "epoch": 0.10981464827419758, "grad_norm": 1.2510926723480225, "learning_rate": 0.00010977786858554835, "loss": 4.6797, "step": 57330 }, { "epoch": 0.10983380310557281, "grad_norm": 1.3760703802108765, "learning_rate": 0.00010979702334965745, "loss": 4.6106, "step": 57340 }, { "epoch": 0.10985295793694805, "grad_norm": 1.2703033685684204, "learning_rate": 0.00010981617811376651, "loss": 4.673, "step": 57350 }, { "epoch": 0.10987211276832327, "grad_norm": 1.3285499811172485, "learning_rate": 0.0001098353328778756, "loss": 4.7979, "step": 57360 }, { "epoch": 0.1098912675996985, "grad_norm": 1.342847466468811, "learning_rate": 0.00010985448764198469, "loss": 4.4986, "step": 57370 }, { "epoch": 0.10991042243107374, "grad_norm": 1.3453222513198853, "learning_rate": 0.00010987364240609376, "loss": 4.7755, "step": 57380 }, { "epoch": 0.10992957726244897, "grad_norm": 1.3011391162872314, "learning_rate": 0.00010989279717020284, "loss": 4.5817, "step": 57390 }, { "epoch": 0.10994873209382419, "grad_norm": 1.2817845344543457, "learning_rate": 0.00010991195193431192, "loss": 4.5862, "step": 57400 }, { "epoch": 0.10996788692519943, "grad_norm": 1.2422841787338257, "learning_rate": 0.000109931106698421, "loss": 4.616, "step": 57410 }, { "epoch": 0.10998704175657466, "grad_norm": 1.2431533336639404, "learning_rate": 0.00010995026146253008, "loss": 4.6181, "step": 57420 }, { "epoch": 0.11000619658794988, "grad_norm": 1.2786864042282104, "learning_rate": 0.00010996941622663916, "loss": 4.7138, "step": 57430 }, { "epoch": 0.11002535141932512, "grad_norm": 1.249833345413208, "learning_rate": 0.00010998857099074823, "loss": 4.719, "step": 57440 }, { "epoch": 0.11004450625070035, "grad_norm": 1.325195074081421, "learning_rate": 0.00011000772575485733, "loss": 4.7095, "step": 57450 }, { "epoch": 0.11006366108207558, "grad_norm": 1.2900148630142212, "learning_rate": 0.0001100268805189664, "loss": 4.6379, "step": 57460 }, { "epoch": 0.11008281591345082, "grad_norm": 1.415517807006836, "learning_rate": 0.00011004603528307548, "loss": 4.6801, "step": 57470 }, { "epoch": 0.11010197074482604, "grad_norm": 1.2787494659423828, "learning_rate": 0.00011006519004718457, "loss": 4.6548, "step": 57480 }, { "epoch": 0.11012112557620127, "grad_norm": 1.278685212135315, "learning_rate": 0.00011008434481129364, "loss": 4.7503, "step": 57490 }, { "epoch": 0.11014028040757651, "grad_norm": 1.245365023612976, "learning_rate": 0.00011010349957540272, "loss": 4.6449, "step": 57500 }, { "epoch": 0.11015943523895173, "grad_norm": 1.3662350177764893, "learning_rate": 0.0001101226543395118, "loss": 4.6596, "step": 57510 }, { "epoch": 0.11017859007032696, "grad_norm": 1.3234432935714722, "learning_rate": 0.00011014180910362088, "loss": 4.711, "step": 57520 }, { "epoch": 0.1101977449017022, "grad_norm": 1.431208610534668, "learning_rate": 0.00011016096386772996, "loss": 4.6144, "step": 57530 }, { "epoch": 0.11021689973307743, "grad_norm": 1.2708879709243774, "learning_rate": 0.00011018011863183903, "loss": 4.6944, "step": 57540 }, { "epoch": 0.11023605456445265, "grad_norm": 1.2796714305877686, "learning_rate": 0.00011019927339594811, "loss": 4.6472, "step": 57550 }, { "epoch": 0.11025520939582789, "grad_norm": 1.3254692554473877, "learning_rate": 0.00011021842816005721, "loss": 4.6475, "step": 57560 }, { "epoch": 0.11027436422720312, "grad_norm": 1.3902982473373413, "learning_rate": 0.00011023758292416628, "loss": 4.5766, "step": 57570 }, { "epoch": 0.11029351905857834, "grad_norm": 1.279494047164917, "learning_rate": 0.00011025673768827536, "loss": 4.692, "step": 57580 }, { "epoch": 0.11031267388995358, "grad_norm": 1.2629939317703247, "learning_rate": 0.00011027589245238445, "loss": 4.6426, "step": 57590 }, { "epoch": 0.11033182872132881, "grad_norm": 1.2653560638427734, "learning_rate": 0.00011029504721649352, "loss": 4.6771, "step": 57600 }, { "epoch": 0.11035098355270404, "grad_norm": 1.2567768096923828, "learning_rate": 0.0001103142019806026, "loss": 4.7367, "step": 57610 }, { "epoch": 0.11037013838407927, "grad_norm": 1.2779457569122314, "learning_rate": 0.00011033335674471168, "loss": 4.5241, "step": 57620 }, { "epoch": 0.1103892932154545, "grad_norm": 1.3521536588668823, "learning_rate": 0.00011035251150882076, "loss": 4.6442, "step": 57630 }, { "epoch": 0.11040844804682973, "grad_norm": 1.2377053499221802, "learning_rate": 0.00011037166627292984, "loss": 4.6272, "step": 57640 }, { "epoch": 0.11042760287820497, "grad_norm": 1.2917473316192627, "learning_rate": 0.00011039082103703891, "loss": 4.5242, "step": 57650 }, { "epoch": 0.11044675770958019, "grad_norm": 1.3732837438583374, "learning_rate": 0.000110409975801148, "loss": 4.5659, "step": 57660 }, { "epoch": 0.11046591254095542, "grad_norm": 1.2405519485473633, "learning_rate": 0.00011042913056525709, "loss": 4.708, "step": 57670 }, { "epoch": 0.11048506737233066, "grad_norm": 1.2675232887268066, "learning_rate": 0.00011044828532936616, "loss": 4.6531, "step": 57680 }, { "epoch": 0.11050422220370588, "grad_norm": 1.3304157257080078, "learning_rate": 0.00011046744009347524, "loss": 4.6683, "step": 57690 }, { "epoch": 0.11052337703508111, "grad_norm": 1.283262848854065, "learning_rate": 0.00011048659485758433, "loss": 4.636, "step": 57700 }, { "epoch": 0.11054253186645635, "grad_norm": 1.3054496049880981, "learning_rate": 0.0001105057496216934, "loss": 4.5682, "step": 57710 }, { "epoch": 0.11056168669783158, "grad_norm": 1.3202539682388306, "learning_rate": 0.00011052490438580248, "loss": 4.602, "step": 57720 }, { "epoch": 0.1105808415292068, "grad_norm": 1.2779765129089355, "learning_rate": 0.00011054405914991155, "loss": 4.7831, "step": 57730 }, { "epoch": 0.11059999636058204, "grad_norm": 1.2677043676376343, "learning_rate": 0.00011056321391402064, "loss": 4.6811, "step": 57740 }, { "epoch": 0.11061915119195727, "grad_norm": 1.2719955444335938, "learning_rate": 0.00011058236867812973, "loss": 4.5448, "step": 57750 }, { "epoch": 0.1106383060233325, "grad_norm": 1.2872437238693237, "learning_rate": 0.00011060152344223879, "loss": 4.684, "step": 57760 }, { "epoch": 0.11065746085470773, "grad_norm": 1.2672302722930908, "learning_rate": 0.00011062067820634787, "loss": 4.6863, "step": 57770 }, { "epoch": 0.11067661568608296, "grad_norm": 1.2530516386032104, "learning_rate": 0.00011063983297045697, "loss": 4.7387, "step": 57780 }, { "epoch": 0.11069577051745819, "grad_norm": 1.2704046964645386, "learning_rate": 0.00011065898773456604, "loss": 4.6373, "step": 57790 }, { "epoch": 0.11071492534883343, "grad_norm": 1.3111122846603394, "learning_rate": 0.00011067814249867512, "loss": 4.582, "step": 57800 }, { "epoch": 0.11073408018020865, "grad_norm": 1.266080617904663, "learning_rate": 0.00011069729726278421, "loss": 4.6697, "step": 57810 }, { "epoch": 0.11075323501158388, "grad_norm": 1.2376677989959717, "learning_rate": 0.00011071645202689328, "loss": 4.7826, "step": 57820 }, { "epoch": 0.11077238984295912, "grad_norm": 1.2651493549346924, "learning_rate": 0.00011073560679100236, "loss": 4.6126, "step": 57830 }, { "epoch": 0.11079154467433434, "grad_norm": 1.3586394786834717, "learning_rate": 0.00011075476155511143, "loss": 4.5971, "step": 57840 }, { "epoch": 0.11081069950570957, "grad_norm": 1.2617522478103638, "learning_rate": 0.00011077391631922052, "loss": 4.5672, "step": 57850 }, { "epoch": 0.11082985433708481, "grad_norm": 1.2998485565185547, "learning_rate": 0.0001107930710833296, "loss": 4.6777, "step": 57860 }, { "epoch": 0.11084900916846004, "grad_norm": 1.3952456712722778, "learning_rate": 0.00011081222584743867, "loss": 4.5859, "step": 57870 }, { "epoch": 0.11086816399983526, "grad_norm": 1.3627660274505615, "learning_rate": 0.00011083138061154775, "loss": 4.5462, "step": 57880 }, { "epoch": 0.1108873188312105, "grad_norm": 1.3981211185455322, "learning_rate": 0.00011085053537565685, "loss": 4.5857, "step": 57890 }, { "epoch": 0.11090647366258573, "grad_norm": 1.2908343076705933, "learning_rate": 0.00011086969013976592, "loss": 4.5738, "step": 57900 }, { "epoch": 0.11092562849396095, "grad_norm": 1.3227967023849487, "learning_rate": 0.000110888844903875, "loss": 4.703, "step": 57910 }, { "epoch": 0.1109447833253362, "grad_norm": 1.3089426755905151, "learning_rate": 0.00011090799966798407, "loss": 4.7339, "step": 57920 }, { "epoch": 0.11096393815671142, "grad_norm": 1.2756781578063965, "learning_rate": 0.00011092715443209316, "loss": 4.6895, "step": 57930 }, { "epoch": 0.11098309298808665, "grad_norm": 1.3205124139785767, "learning_rate": 0.00011094630919620224, "loss": 4.4544, "step": 57940 }, { "epoch": 0.11100224781946189, "grad_norm": 1.3021759986877441, "learning_rate": 0.00011096546396031131, "loss": 4.5094, "step": 57950 }, { "epoch": 0.11102140265083711, "grad_norm": 1.311957597732544, "learning_rate": 0.0001109846187244204, "loss": 4.6851, "step": 57960 }, { "epoch": 0.11104055748221235, "grad_norm": 1.2858760356903076, "learning_rate": 0.00011100377348852949, "loss": 4.6226, "step": 57970 }, { "epoch": 0.11105971231358758, "grad_norm": 1.322681188583374, "learning_rate": 0.00011102292825263855, "loss": 4.6281, "step": 57980 }, { "epoch": 0.1110788671449628, "grad_norm": 1.2491902112960815, "learning_rate": 0.00011104208301674764, "loss": 4.7747, "step": 57990 }, { "epoch": 0.11109802197633804, "grad_norm": 1.2496768236160278, "learning_rate": 0.00011106123778085673, "loss": 4.6719, "step": 58000 }, { "epoch": 0.11111717680771327, "grad_norm": 1.2746440172195435, "learning_rate": 0.0001110803925449658, "loss": 4.6109, "step": 58010 }, { "epoch": 0.1111363316390885, "grad_norm": 1.2802389860153198, "learning_rate": 0.00011109954730907488, "loss": 4.6265, "step": 58020 }, { "epoch": 0.11115548647046374, "grad_norm": 1.27724027633667, "learning_rate": 0.00011111870207318395, "loss": 4.6166, "step": 58030 }, { "epoch": 0.11117464130183896, "grad_norm": 1.299224615097046, "learning_rate": 0.00011113785683729304, "loss": 4.6909, "step": 58040 }, { "epoch": 0.11119379613321419, "grad_norm": 1.294726014137268, "learning_rate": 0.00011115701160140212, "loss": 4.6025, "step": 58050 }, { "epoch": 0.11121295096458943, "grad_norm": 1.3183951377868652, "learning_rate": 0.00011117616636551119, "loss": 4.5613, "step": 58060 }, { "epoch": 0.11123210579596465, "grad_norm": 1.2759126424789429, "learning_rate": 0.00011119532112962029, "loss": 4.768, "step": 58070 }, { "epoch": 0.11125126062733988, "grad_norm": 1.2313960790634155, "learning_rate": 0.00011121447589372937, "loss": 4.7618, "step": 58080 }, { "epoch": 0.11127041545871512, "grad_norm": 1.3358979225158691, "learning_rate": 0.00011123363065783844, "loss": 4.632, "step": 58090 }, { "epoch": 0.11128957029009034, "grad_norm": 1.2630497217178345, "learning_rate": 0.00011125278542194752, "loss": 4.5714, "step": 58100 }, { "epoch": 0.11130872512146557, "grad_norm": 1.2585457563400269, "learning_rate": 0.00011127194018605658, "loss": 4.524, "step": 58110 }, { "epoch": 0.11132787995284081, "grad_norm": 1.2984079122543335, "learning_rate": 0.00011129109495016568, "loss": 4.787, "step": 58120 }, { "epoch": 0.11134703478421604, "grad_norm": 1.2471327781677246, "learning_rate": 0.00011131024971427476, "loss": 4.6104, "step": 58130 }, { "epoch": 0.11136618961559126, "grad_norm": 1.2805874347686768, "learning_rate": 0.00011132940447838383, "loss": 4.5982, "step": 58140 }, { "epoch": 0.1113853444469665, "grad_norm": 1.2619445323944092, "learning_rate": 0.00011134855924249292, "loss": 4.5972, "step": 58150 }, { "epoch": 0.11140449927834173, "grad_norm": 1.2518811225891113, "learning_rate": 0.000111367714006602, "loss": 4.8414, "step": 58160 }, { "epoch": 0.11142365410971695, "grad_norm": 1.2401769161224365, "learning_rate": 0.00011138686877071107, "loss": 4.532, "step": 58170 }, { "epoch": 0.1114428089410922, "grad_norm": 1.2510182857513428, "learning_rate": 0.00011140602353482015, "loss": 4.4225, "step": 58180 }, { "epoch": 0.11146196377246742, "grad_norm": 1.2544001340866089, "learning_rate": 0.00011142517829892925, "loss": 4.6492, "step": 58190 }, { "epoch": 0.11148111860384265, "grad_norm": 1.2955800294876099, "learning_rate": 0.00011144433306303832, "loss": 4.6258, "step": 58200 }, { "epoch": 0.11150027343521789, "grad_norm": 1.3551563024520874, "learning_rate": 0.0001114634878271474, "loss": 4.4933, "step": 58210 }, { "epoch": 0.11151942826659311, "grad_norm": 1.5274455547332764, "learning_rate": 0.00011148264259125647, "loss": 4.6496, "step": 58220 }, { "epoch": 0.11153858309796834, "grad_norm": 1.4416853189468384, "learning_rate": 0.00011150179735536556, "loss": 4.6938, "step": 58230 }, { "epoch": 0.11155773792934358, "grad_norm": 1.278913974761963, "learning_rate": 0.00011152095211947464, "loss": 4.6997, "step": 58240 }, { "epoch": 0.1115768927607188, "grad_norm": 1.2264156341552734, "learning_rate": 0.00011154010688358371, "loss": 4.627, "step": 58250 }, { "epoch": 0.11159604759209403, "grad_norm": 1.3066985607147217, "learning_rate": 0.0001115592616476928, "loss": 4.5581, "step": 58260 }, { "epoch": 0.11161520242346927, "grad_norm": 1.27218759059906, "learning_rate": 0.00011157841641180189, "loss": 4.6942, "step": 58270 }, { "epoch": 0.1116343572548445, "grad_norm": 1.2422667741775513, "learning_rate": 0.00011159757117591095, "loss": 4.5883, "step": 58280 }, { "epoch": 0.11165351208621972, "grad_norm": 1.2457809448242188, "learning_rate": 0.00011161672594002003, "loss": 4.6794, "step": 58290 }, { "epoch": 0.11167266691759496, "grad_norm": 1.2916817665100098, "learning_rate": 0.00011163588070412913, "loss": 4.5747, "step": 58300 }, { "epoch": 0.11169182174897019, "grad_norm": 1.3035577535629272, "learning_rate": 0.0001116550354682382, "loss": 4.6272, "step": 58310 }, { "epoch": 0.11171097658034541, "grad_norm": 1.2700828313827515, "learning_rate": 0.00011167419023234728, "loss": 4.6276, "step": 58320 }, { "epoch": 0.11173013141172065, "grad_norm": 1.2733954191207886, "learning_rate": 0.00011169334499645635, "loss": 4.7227, "step": 58330 }, { "epoch": 0.11174928624309588, "grad_norm": 1.2722883224487305, "learning_rate": 0.00011171249976056544, "loss": 4.774, "step": 58340 }, { "epoch": 0.1117684410744711, "grad_norm": 1.331499695777893, "learning_rate": 0.00011173165452467452, "loss": 4.6106, "step": 58350 }, { "epoch": 0.11178759590584635, "grad_norm": 1.3972885608673096, "learning_rate": 0.00011175080928878359, "loss": 4.5038, "step": 58360 }, { "epoch": 0.11180675073722157, "grad_norm": 1.3038369417190552, "learning_rate": 0.00011176996405289268, "loss": 4.6568, "step": 58370 }, { "epoch": 0.1118259055685968, "grad_norm": 1.2560960054397583, "learning_rate": 0.00011178911881700177, "loss": 4.6364, "step": 58380 }, { "epoch": 0.11184506039997204, "grad_norm": 1.279327392578125, "learning_rate": 0.00011180827358111083, "loss": 4.5212, "step": 58390 }, { "epoch": 0.11186421523134726, "grad_norm": 1.2932138442993164, "learning_rate": 0.00011182742834521992, "loss": 4.5587, "step": 58400 }, { "epoch": 0.11188337006272249, "grad_norm": 1.3146915435791016, "learning_rate": 0.000111846583109329, "loss": 4.7021, "step": 58410 }, { "epoch": 0.11190252489409773, "grad_norm": 1.3657524585723877, "learning_rate": 0.00011186573787343808, "loss": 4.7074, "step": 58420 }, { "epoch": 0.11192167972547296, "grad_norm": 1.2904294729232788, "learning_rate": 0.00011188489263754716, "loss": 4.7159, "step": 58430 }, { "epoch": 0.11194083455684818, "grad_norm": 1.2776858806610107, "learning_rate": 0.00011190404740165623, "loss": 4.6812, "step": 58440 }, { "epoch": 0.11195998938822342, "grad_norm": 1.3127164840698242, "learning_rate": 0.00011192320216576532, "loss": 4.6484, "step": 58450 }, { "epoch": 0.11197914421959865, "grad_norm": 1.2924152612686157, "learning_rate": 0.0001119423569298744, "loss": 4.6707, "step": 58460 }, { "epoch": 0.11199829905097387, "grad_norm": 1.2379173040390015, "learning_rate": 0.00011196151169398347, "loss": 4.8108, "step": 58470 }, { "epoch": 0.11201745388234911, "grad_norm": 1.2983427047729492, "learning_rate": 0.00011198066645809257, "loss": 4.6258, "step": 58480 }, { "epoch": 0.11203660871372434, "grad_norm": 1.2935272455215454, "learning_rate": 0.00011199982122220165, "loss": 4.59, "step": 58490 }, { "epoch": 0.11205576354509957, "grad_norm": 1.2447494268417358, "learning_rate": 0.00011201897598631071, "loss": 4.6581, "step": 58500 }, { "epoch": 0.1120749183764748, "grad_norm": 1.2649050951004028, "learning_rate": 0.0001120381307504198, "loss": 4.6855, "step": 58510 }, { "epoch": 0.11209407320785003, "grad_norm": 1.2866019010543823, "learning_rate": 0.00011205728551452888, "loss": 4.6362, "step": 58520 }, { "epoch": 0.11211322803922526, "grad_norm": 1.2340079545974731, "learning_rate": 0.00011207644027863796, "loss": 4.6233, "step": 58530 }, { "epoch": 0.1121323828706005, "grad_norm": 1.312457799911499, "learning_rate": 0.00011209559504274704, "loss": 4.7192, "step": 58540 }, { "epoch": 0.11215153770197572, "grad_norm": 1.2733405828475952, "learning_rate": 0.00011211474980685611, "loss": 4.6662, "step": 58550 }, { "epoch": 0.11217069253335095, "grad_norm": 1.3140275478363037, "learning_rate": 0.0001121339045709652, "loss": 4.6606, "step": 58560 }, { "epoch": 0.11218984736472619, "grad_norm": 1.2035706043243408, "learning_rate": 0.00011215305933507428, "loss": 4.7427, "step": 58570 }, { "epoch": 0.11220900219610142, "grad_norm": 1.3143267631530762, "learning_rate": 0.00011217221409918335, "loss": 4.677, "step": 58580 }, { "epoch": 0.11222815702747664, "grad_norm": 1.3305941820144653, "learning_rate": 0.00011219136886329245, "loss": 4.6054, "step": 58590 }, { "epoch": 0.11224731185885188, "grad_norm": 1.3068110942840576, "learning_rate": 0.00011221052362740151, "loss": 4.5813, "step": 58600 }, { "epoch": 0.1122664666902271, "grad_norm": 1.2684309482574463, "learning_rate": 0.0001122296783915106, "loss": 4.6845, "step": 58610 }, { "epoch": 0.11228562152160235, "grad_norm": 1.2804392576217651, "learning_rate": 0.00011224883315561968, "loss": 4.6856, "step": 58620 }, { "epoch": 0.11230477635297757, "grad_norm": 1.2330013513565063, "learning_rate": 0.00011226798791972876, "loss": 4.6622, "step": 58630 }, { "epoch": 0.1123239311843528, "grad_norm": 1.3174426555633545, "learning_rate": 0.00011228714268383784, "loss": 4.6233, "step": 58640 }, { "epoch": 0.11234308601572804, "grad_norm": 1.2590465545654297, "learning_rate": 0.00011230629744794692, "loss": 4.7211, "step": 58650 }, { "epoch": 0.11236224084710326, "grad_norm": 1.2605444192886353, "learning_rate": 0.00011232545221205599, "loss": 4.7115, "step": 58660 }, { "epoch": 0.11238139567847849, "grad_norm": 1.2548575401306152, "learning_rate": 0.00011234460697616508, "loss": 4.7732, "step": 58670 }, { "epoch": 0.11240055050985373, "grad_norm": 1.3349872827529907, "learning_rate": 0.00011236376174027416, "loss": 4.5942, "step": 58680 }, { "epoch": 0.11241970534122896, "grad_norm": 1.3358644247055054, "learning_rate": 0.00011238291650438323, "loss": 4.6936, "step": 58690 }, { "epoch": 0.11243886017260418, "grad_norm": 1.3634358644485474, "learning_rate": 0.00011240207126849233, "loss": 4.5758, "step": 58700 }, { "epoch": 0.11245801500397942, "grad_norm": 1.2703566551208496, "learning_rate": 0.0001124212260326014, "loss": 4.6288, "step": 58710 }, { "epoch": 0.11247716983535465, "grad_norm": 1.2508271932601929, "learning_rate": 0.00011244038079671048, "loss": 4.603, "step": 58720 }, { "epoch": 0.11249632466672987, "grad_norm": 1.3117481470108032, "learning_rate": 0.00011245953556081956, "loss": 4.7308, "step": 58730 }, { "epoch": 0.11251547949810511, "grad_norm": 1.2685409784317017, "learning_rate": 0.00011247869032492864, "loss": 4.6235, "step": 58740 }, { "epoch": 0.11253463432948034, "grad_norm": 1.26566743850708, "learning_rate": 0.00011249784508903772, "loss": 4.6983, "step": 58750 }, { "epoch": 0.11255378916085557, "grad_norm": 1.2507481575012207, "learning_rate": 0.0001125169998531468, "loss": 4.7036, "step": 58760 }, { "epoch": 0.1125729439922308, "grad_norm": 1.3112380504608154, "learning_rate": 0.00011253615461725587, "loss": 4.5508, "step": 58770 }, { "epoch": 0.11259209882360603, "grad_norm": 1.2601094245910645, "learning_rate": 0.00011255530938136496, "loss": 4.5318, "step": 58780 }, { "epoch": 0.11261125365498126, "grad_norm": 1.250343680381775, "learning_rate": 0.00011257446414547403, "loss": 4.5736, "step": 58790 }, { "epoch": 0.1126304084863565, "grad_norm": 1.2769408226013184, "learning_rate": 0.00011259361890958311, "loss": 4.5551, "step": 58800 }, { "epoch": 0.11264956331773172, "grad_norm": 1.2865904569625854, "learning_rate": 0.00011261277367369221, "loss": 4.6779, "step": 58810 }, { "epoch": 0.11266871814910695, "grad_norm": 1.2449073791503906, "learning_rate": 0.00011263192843780128, "loss": 4.5676, "step": 58820 }, { "epoch": 0.11268787298048219, "grad_norm": 1.2704682350158691, "learning_rate": 0.00011265108320191036, "loss": 4.7155, "step": 58830 }, { "epoch": 0.11270702781185742, "grad_norm": 1.3420906066894531, "learning_rate": 0.00011267023796601944, "loss": 4.6705, "step": 58840 }, { "epoch": 0.11272618264323264, "grad_norm": 1.3062055110931396, "learning_rate": 0.00011268939273012852, "loss": 4.498, "step": 58850 }, { "epoch": 0.11274533747460788, "grad_norm": 1.3440839052200317, "learning_rate": 0.0001127085474942376, "loss": 4.583, "step": 58860 }, { "epoch": 0.11276449230598311, "grad_norm": 1.2900124788284302, "learning_rate": 0.00011272770225834668, "loss": 4.6163, "step": 58870 }, { "epoch": 0.11278364713735833, "grad_norm": 1.2837715148925781, "learning_rate": 0.00011274685702245575, "loss": 4.6889, "step": 58880 }, { "epoch": 0.11280280196873357, "grad_norm": 1.2822178602218628, "learning_rate": 0.00011276601178656484, "loss": 4.5156, "step": 58890 }, { "epoch": 0.1128219568001088, "grad_norm": 1.281772494316101, "learning_rate": 0.00011278516655067391, "loss": 4.7237, "step": 58900 }, { "epoch": 0.11284111163148403, "grad_norm": 1.264805793762207, "learning_rate": 0.000112804321314783, "loss": 4.5603, "step": 58910 }, { "epoch": 0.11286026646285927, "grad_norm": 1.3037022352218628, "learning_rate": 0.00011282347607889209, "loss": 4.7696, "step": 58920 }, { "epoch": 0.11287942129423449, "grad_norm": 1.3161104917526245, "learning_rate": 0.00011284263084300116, "loss": 4.6771, "step": 58930 }, { "epoch": 0.11289857612560972, "grad_norm": 1.2603073120117188, "learning_rate": 0.00011286178560711024, "loss": 4.6482, "step": 58940 }, { "epoch": 0.11291773095698496, "grad_norm": 1.268676996231079, "learning_rate": 0.00011288094037121932, "loss": 4.7102, "step": 58950 }, { "epoch": 0.11293688578836018, "grad_norm": 1.3045475482940674, "learning_rate": 0.0001129000951353284, "loss": 4.7729, "step": 58960 }, { "epoch": 0.11295604061973541, "grad_norm": 1.3148759603500366, "learning_rate": 0.00011291924989943748, "loss": 4.6469, "step": 58970 }, { "epoch": 0.11297519545111065, "grad_norm": 1.28268301486969, "learning_rate": 0.00011293840466354656, "loss": 4.6814, "step": 58980 }, { "epoch": 0.11299435028248588, "grad_norm": 1.2625963687896729, "learning_rate": 0.00011295755942765563, "loss": 4.6096, "step": 58990 }, { "epoch": 0.1130135051138611, "grad_norm": 1.2638907432556152, "learning_rate": 0.00011297671419176473, "loss": 4.6529, "step": 59000 }, { "epoch": 0.11303265994523634, "grad_norm": 1.2761338949203491, "learning_rate": 0.0001129958689558738, "loss": 4.524, "step": 59010 }, { "epoch": 0.11305181477661157, "grad_norm": 1.2831857204437256, "learning_rate": 0.00011301502371998287, "loss": 4.666, "step": 59020 }, { "epoch": 0.1130709696079868, "grad_norm": 1.2377251386642456, "learning_rate": 0.00011303417848409197, "loss": 4.5914, "step": 59030 }, { "epoch": 0.11309012443936203, "grad_norm": 1.2583332061767578, "learning_rate": 0.00011305333324820104, "loss": 4.5964, "step": 59040 }, { "epoch": 0.11310927927073726, "grad_norm": 1.350313663482666, "learning_rate": 0.00011307248801231012, "loss": 4.752, "step": 59050 }, { "epoch": 0.11312843410211249, "grad_norm": 1.2447800636291504, "learning_rate": 0.0001130916427764192, "loss": 4.7472, "step": 59060 }, { "epoch": 0.11314758893348772, "grad_norm": 1.2995548248291016, "learning_rate": 0.00011311079754052828, "loss": 4.7151, "step": 59070 }, { "epoch": 0.11316674376486295, "grad_norm": 1.2365938425064087, "learning_rate": 0.00011312995230463736, "loss": 4.6466, "step": 59080 }, { "epoch": 0.11318589859623818, "grad_norm": 1.284917950630188, "learning_rate": 0.00011314910706874643, "loss": 4.7126, "step": 59090 }, { "epoch": 0.11320505342761342, "grad_norm": 1.251927137374878, "learning_rate": 0.00011316826183285551, "loss": 4.7575, "step": 59100 }, { "epoch": 0.11322420825898864, "grad_norm": 1.2918928861618042, "learning_rate": 0.0001131874165969646, "loss": 4.6402, "step": 59110 }, { "epoch": 0.11324336309036387, "grad_norm": 1.2405716180801392, "learning_rate": 0.00011320465588466277, "loss": 4.7593, "step": 59120 }, { "epoch": 0.11326251792173911, "grad_norm": 1.256744146347046, "learning_rate": 0.00011322381064877186, "loss": 4.7331, "step": 59130 }, { "epoch": 0.11328167275311433, "grad_norm": 1.2193214893341064, "learning_rate": 0.00011324296541288093, "loss": 4.6559, "step": 59140 }, { "epoch": 0.11330082758448956, "grad_norm": 1.3421385288238525, "learning_rate": 0.00011326212017699001, "loss": 4.7374, "step": 59150 }, { "epoch": 0.1133199824158648, "grad_norm": 1.269494652748108, "learning_rate": 0.0001132812749410991, "loss": 4.653, "step": 59160 }, { "epoch": 0.11333913724724003, "grad_norm": 1.2117568254470825, "learning_rate": 0.00011330042970520817, "loss": 4.7913, "step": 59170 }, { "epoch": 0.11335829207861525, "grad_norm": 1.3042206764221191, "learning_rate": 0.00011331958446931725, "loss": 4.5938, "step": 59180 }, { "epoch": 0.11337744690999049, "grad_norm": 1.2961243391036987, "learning_rate": 0.00011333873923342632, "loss": 4.8164, "step": 59190 }, { "epoch": 0.11339660174136572, "grad_norm": 1.2129616737365723, "learning_rate": 0.0001133578939975354, "loss": 4.831, "step": 59200 }, { "epoch": 0.11341575657274094, "grad_norm": 1.273960828781128, "learning_rate": 0.0001133770487616445, "loss": 4.5799, "step": 59210 }, { "epoch": 0.11343491140411618, "grad_norm": 1.2968807220458984, "learning_rate": 0.00011339620352575357, "loss": 4.6564, "step": 59220 }, { "epoch": 0.11345406623549141, "grad_norm": 1.2621816396713257, "learning_rate": 0.00011341535828986265, "loss": 4.4689, "step": 59230 }, { "epoch": 0.11347322106686664, "grad_norm": 1.2784199714660645, "learning_rate": 0.00011343451305397174, "loss": 4.7201, "step": 59240 }, { "epoch": 0.11349237589824188, "grad_norm": 1.305983543395996, "learning_rate": 0.00011345366781808081, "loss": 4.6431, "step": 59250 }, { "epoch": 0.1135115307296171, "grad_norm": 1.2842681407928467, "learning_rate": 0.00011347282258218989, "loss": 4.6181, "step": 59260 }, { "epoch": 0.11353068556099234, "grad_norm": 1.3195509910583496, "learning_rate": 0.00011349197734629896, "loss": 4.6254, "step": 59270 }, { "epoch": 0.11354984039236757, "grad_norm": 1.3271605968475342, "learning_rate": 0.00011351113211040805, "loss": 4.6283, "step": 59280 }, { "epoch": 0.1135689952237428, "grad_norm": 1.272781491279602, "learning_rate": 0.00011353028687451713, "loss": 4.6283, "step": 59290 }, { "epoch": 0.11358815005511803, "grad_norm": 1.2627217769622803, "learning_rate": 0.0001135494416386262, "loss": 4.6056, "step": 59300 }, { "epoch": 0.11360730488649326, "grad_norm": 1.3385343551635742, "learning_rate": 0.00011356859640273528, "loss": 4.5941, "step": 59310 }, { "epoch": 0.11362645971786849, "grad_norm": 1.2460094690322876, "learning_rate": 0.00011358775116684438, "loss": 4.6916, "step": 59320 }, { "epoch": 0.11364561454924373, "grad_norm": 1.3125678300857544, "learning_rate": 0.00011360690593095345, "loss": 4.5985, "step": 59330 }, { "epoch": 0.11366476938061895, "grad_norm": 1.2672948837280273, "learning_rate": 0.00011362606069506253, "loss": 4.6438, "step": 59340 }, { "epoch": 0.11368392421199418, "grad_norm": 1.3281176090240479, "learning_rate": 0.00011364521545917162, "loss": 4.6875, "step": 59350 }, { "epoch": 0.11370307904336942, "grad_norm": 1.216286540031433, "learning_rate": 0.00011366437022328069, "loss": 4.5674, "step": 59360 }, { "epoch": 0.11372223387474464, "grad_norm": 1.301007628440857, "learning_rate": 0.00011368352498738977, "loss": 4.5726, "step": 59370 }, { "epoch": 0.11374138870611987, "grad_norm": 1.2694164514541626, "learning_rate": 0.00011370267975149884, "loss": 4.5845, "step": 59380 }, { "epoch": 0.11376054353749511, "grad_norm": 1.2087085247039795, "learning_rate": 0.00011372183451560793, "loss": 4.6963, "step": 59390 }, { "epoch": 0.11377969836887034, "grad_norm": 1.2282555103302002, "learning_rate": 0.00011374098927971702, "loss": 4.6818, "step": 59400 }, { "epoch": 0.11379885320024556, "grad_norm": 1.2679543495178223, "learning_rate": 0.00011376014404382608, "loss": 4.6762, "step": 59410 }, { "epoch": 0.1138180080316208, "grad_norm": 1.3193246126174927, "learning_rate": 0.00011377929880793516, "loss": 4.5807, "step": 59420 }, { "epoch": 0.11383716286299603, "grad_norm": 1.248953104019165, "learning_rate": 0.00011379845357204426, "loss": 4.7235, "step": 59430 }, { "epoch": 0.11385631769437125, "grad_norm": 1.2191245555877686, "learning_rate": 0.00011381760833615333, "loss": 4.7335, "step": 59440 }, { "epoch": 0.1138754725257465, "grad_norm": 1.2349236011505127, "learning_rate": 0.00011383676310026241, "loss": 4.689, "step": 59450 }, { "epoch": 0.11389462735712172, "grad_norm": 1.3108303546905518, "learning_rate": 0.00011385591786437148, "loss": 4.5019, "step": 59460 }, { "epoch": 0.11391378218849695, "grad_norm": 1.2735650539398193, "learning_rate": 0.00011387507262848057, "loss": 4.5629, "step": 59470 }, { "epoch": 0.11393293701987219, "grad_norm": 1.317572832107544, "learning_rate": 0.00011389422739258965, "loss": 4.7327, "step": 59480 }, { "epoch": 0.11395209185124741, "grad_norm": 1.3340818881988525, "learning_rate": 0.00011391338215669872, "loss": 4.6076, "step": 59490 }, { "epoch": 0.11397124668262264, "grad_norm": 1.2937597036361694, "learning_rate": 0.00011393253692080782, "loss": 4.6381, "step": 59500 }, { "epoch": 0.11399040151399788, "grad_norm": 1.2675820589065552, "learning_rate": 0.0001139516916849169, "loss": 4.6269, "step": 59510 }, { "epoch": 0.1140095563453731, "grad_norm": 1.3470969200134277, "learning_rate": 0.00011397084644902596, "loss": 4.6384, "step": 59520 }, { "epoch": 0.11402871117674833, "grad_norm": 1.2821663618087769, "learning_rate": 0.00011399000121313505, "loss": 4.6526, "step": 59530 }, { "epoch": 0.11404786600812357, "grad_norm": 1.2267327308654785, "learning_rate": 0.00011400915597724414, "loss": 4.6346, "step": 59540 }, { "epoch": 0.1140670208394988, "grad_norm": 1.3023688793182373, "learning_rate": 0.00011402831074135321, "loss": 4.669, "step": 59550 }, { "epoch": 0.11408617567087402, "grad_norm": 1.2278845310211182, "learning_rate": 0.00011404746550546229, "loss": 4.6696, "step": 59560 }, { "epoch": 0.11410533050224926, "grad_norm": 1.2482706308364868, "learning_rate": 0.00011406662026957136, "loss": 4.5616, "step": 59570 }, { "epoch": 0.11412448533362449, "grad_norm": 1.2666000127792358, "learning_rate": 0.00011408577503368045, "loss": 4.471, "step": 59580 }, { "epoch": 0.11414364016499971, "grad_norm": 1.2491154670715332, "learning_rate": 0.00011410492979778953, "loss": 4.6608, "step": 59590 }, { "epoch": 0.11416279499637495, "grad_norm": 1.3162082433700562, "learning_rate": 0.0001141240845618986, "loss": 4.6943, "step": 59600 }, { "epoch": 0.11418194982775018, "grad_norm": 1.2977252006530762, "learning_rate": 0.0001141432393260077, "loss": 4.6304, "step": 59610 }, { "epoch": 0.1142011046591254, "grad_norm": 1.225848913192749, "learning_rate": 0.00011416239409011678, "loss": 4.6301, "step": 59620 }, { "epoch": 0.11422025949050064, "grad_norm": 1.2525051832199097, "learning_rate": 0.00011418154885422585, "loss": 4.7162, "step": 59630 }, { "epoch": 0.11423941432187587, "grad_norm": 1.266435146331787, "learning_rate": 0.00011420070361833493, "loss": 4.6445, "step": 59640 }, { "epoch": 0.1142585691532511, "grad_norm": 1.2742713689804077, "learning_rate": 0.00011421985838244402, "loss": 4.6872, "step": 59650 }, { "epoch": 0.11427772398462634, "grad_norm": 1.2361469268798828, "learning_rate": 0.00011423901314655309, "loss": 4.6534, "step": 59660 }, { "epoch": 0.11429687881600156, "grad_norm": 1.322104811668396, "learning_rate": 0.00011425816791066217, "loss": 4.4943, "step": 59670 }, { "epoch": 0.11431603364737679, "grad_norm": 1.262701153755188, "learning_rate": 0.00011427732267477124, "loss": 4.7822, "step": 59680 }, { "epoch": 0.11433518847875203, "grad_norm": 1.3279943466186523, "learning_rate": 0.00011429647743888033, "loss": 4.7622, "step": 59690 }, { "epoch": 0.11435434331012725, "grad_norm": 1.2828192710876465, "learning_rate": 0.00011431563220298941, "loss": 4.561, "step": 59700 }, { "epoch": 0.11437349814150248, "grad_norm": 1.4530823230743408, "learning_rate": 0.00011433478696709848, "loss": 4.555, "step": 59710 }, { "epoch": 0.11439265297287772, "grad_norm": 1.2471728324890137, "learning_rate": 0.00011435394173120758, "loss": 4.5054, "step": 59720 }, { "epoch": 0.11441180780425295, "grad_norm": 1.2808756828308105, "learning_rate": 0.00011437309649531666, "loss": 4.5082, "step": 59730 }, { "epoch": 0.11443096263562817, "grad_norm": 1.241782784461975, "learning_rate": 0.00011439225125942573, "loss": 4.6576, "step": 59740 }, { "epoch": 0.11445011746700341, "grad_norm": 1.2235769033432007, "learning_rate": 0.00011441140602353481, "loss": 4.7226, "step": 59750 }, { "epoch": 0.11446927229837864, "grad_norm": 1.3089332580566406, "learning_rate": 0.00011443056078764389, "loss": 4.5365, "step": 59760 }, { "epoch": 0.11448842712975386, "grad_norm": 1.2360751628875732, "learning_rate": 0.00011444971555175297, "loss": 4.6104, "step": 59770 }, { "epoch": 0.1145075819611291, "grad_norm": 1.3036413192749023, "learning_rate": 0.00011446887031586205, "loss": 4.5913, "step": 59780 }, { "epoch": 0.11452673679250433, "grad_norm": 1.2320866584777832, "learning_rate": 0.00011448802507997112, "loss": 4.6661, "step": 59790 }, { "epoch": 0.11454589162387956, "grad_norm": 1.2868163585662842, "learning_rate": 0.00011450717984408021, "loss": 4.719, "step": 59800 }, { "epoch": 0.1145650464552548, "grad_norm": 1.2482372522354126, "learning_rate": 0.0001145263346081893, "loss": 4.698, "step": 59810 }, { "epoch": 0.11458420128663002, "grad_norm": 1.2586499452590942, "learning_rate": 0.00011454548937229836, "loss": 4.5931, "step": 59820 }, { "epoch": 0.11460335611800525, "grad_norm": 1.2632453441619873, "learning_rate": 0.00011456464413640746, "loss": 4.4448, "step": 59830 }, { "epoch": 0.11462251094938049, "grad_norm": 1.2994413375854492, "learning_rate": 0.00011458379890051654, "loss": 4.6053, "step": 59840 }, { "epoch": 0.11464166578075571, "grad_norm": 1.3015059232711792, "learning_rate": 0.0001146029536646256, "loss": 4.6899, "step": 59850 }, { "epoch": 0.11466082061213094, "grad_norm": 1.2890251874923706, "learning_rate": 0.00011462210842873469, "loss": 4.6046, "step": 59860 }, { "epoch": 0.11467997544350618, "grad_norm": 1.3087443113327026, "learning_rate": 0.00011464126319284377, "loss": 4.611, "step": 59870 }, { "epoch": 0.1146991302748814, "grad_norm": 1.2301150560379028, "learning_rate": 0.00011466041795695285, "loss": 4.6536, "step": 59880 }, { "epoch": 0.11471828510625663, "grad_norm": 1.2341828346252441, "learning_rate": 0.00011467957272106193, "loss": 4.6305, "step": 59890 }, { "epoch": 0.11473743993763187, "grad_norm": 1.349501132965088, "learning_rate": 0.000114698727485171, "loss": 4.6754, "step": 59900 }, { "epoch": 0.1147565947690071, "grad_norm": 1.331185221672058, "learning_rate": 0.0001147178822492801, "loss": 4.5619, "step": 59910 }, { "epoch": 0.11477574960038232, "grad_norm": 1.3213013410568237, "learning_rate": 0.00011473703701338918, "loss": 4.6009, "step": 59920 }, { "epoch": 0.11479490443175756, "grad_norm": 1.3177388906478882, "learning_rate": 0.00011475619177749824, "loss": 4.5959, "step": 59930 }, { "epoch": 0.11481405926313279, "grad_norm": 1.3106316328048706, "learning_rate": 0.00011477534654160734, "loss": 4.6382, "step": 59940 }, { "epoch": 0.11483321409450803, "grad_norm": 1.3192014694213867, "learning_rate": 0.0001147945013057164, "loss": 4.7646, "step": 59950 }, { "epoch": 0.11485236892588326, "grad_norm": 1.4071872234344482, "learning_rate": 0.00011481365606982549, "loss": 4.6775, "step": 59960 }, { "epoch": 0.11487152375725848, "grad_norm": 1.29054594039917, "learning_rate": 0.00011483281083393457, "loss": 4.6281, "step": 59970 }, { "epoch": 0.11489067858863372, "grad_norm": 1.3103477954864502, "learning_rate": 0.00011485196559804365, "loss": 4.666, "step": 59980 }, { "epoch": 0.11490983342000895, "grad_norm": 1.3057032823562622, "learning_rate": 0.00011487112036215273, "loss": 4.6502, "step": 59990 }, { "epoch": 0.11492898825138417, "grad_norm": 1.3006786108016968, "learning_rate": 0.00011489027512626181, "loss": 4.6633, "step": 60000 }, { "epoch": 0.11494814308275941, "grad_norm": 1.259671926498413, "learning_rate": 0.00011490942989037088, "loss": 4.7627, "step": 60010 }, { "epoch": 0.11496729791413464, "grad_norm": 1.2022697925567627, "learning_rate": 0.00011492858465447998, "loss": 4.6231, "step": 60020 }, { "epoch": 0.11498645274550987, "grad_norm": 1.264588713645935, "learning_rate": 0.00011494773941858906, "loss": 4.6269, "step": 60030 }, { "epoch": 0.1150056075768851, "grad_norm": 1.4211598634719849, "learning_rate": 0.00011496689418269812, "loss": 4.5826, "step": 60040 }, { "epoch": 0.11502476240826033, "grad_norm": 1.271798849105835, "learning_rate": 0.00011498604894680722, "loss": 4.5128, "step": 60050 }, { "epoch": 0.11504391723963556, "grad_norm": 1.3162474632263184, "learning_rate": 0.00011500520371091629, "loss": 4.5688, "step": 60060 }, { "epoch": 0.1150630720710108, "grad_norm": 1.2630555629730225, "learning_rate": 0.00011502435847502537, "loss": 4.5334, "step": 60070 }, { "epoch": 0.11508222690238602, "grad_norm": 1.2625861167907715, "learning_rate": 0.00011504351323913445, "loss": 4.6904, "step": 60080 }, { "epoch": 0.11510138173376125, "grad_norm": 1.2857773303985596, "learning_rate": 0.00011506266800324353, "loss": 4.7422, "step": 60090 }, { "epoch": 0.11512053656513649, "grad_norm": 1.288794755935669, "learning_rate": 0.00011508182276735261, "loss": 4.5894, "step": 60100 }, { "epoch": 0.11513969139651171, "grad_norm": 1.2390151023864746, "learning_rate": 0.0001151009775314617, "loss": 4.6026, "step": 60110 }, { "epoch": 0.11515884622788694, "grad_norm": 1.290802240371704, "learning_rate": 0.00011512013229557076, "loss": 4.6894, "step": 60120 }, { "epoch": 0.11517800105926218, "grad_norm": 1.245935320854187, "learning_rate": 0.00011513928705967986, "loss": 4.5573, "step": 60130 }, { "epoch": 0.1151971558906374, "grad_norm": 1.3088366985321045, "learning_rate": 0.00011515844182378892, "loss": 4.6688, "step": 60140 }, { "epoch": 0.11521631072201263, "grad_norm": 1.233139157295227, "learning_rate": 0.000115177596587898, "loss": 4.7225, "step": 60150 }, { "epoch": 0.11523546555338787, "grad_norm": 1.2477755546569824, "learning_rate": 0.0001151967513520071, "loss": 4.7245, "step": 60160 }, { "epoch": 0.1152546203847631, "grad_norm": 1.379575252532959, "learning_rate": 0.00011521590611611617, "loss": 4.4821, "step": 60170 }, { "epoch": 0.11527377521613832, "grad_norm": 1.2576338052749634, "learning_rate": 0.00011523506088022525, "loss": 4.6308, "step": 60180 }, { "epoch": 0.11529293004751356, "grad_norm": 1.2556841373443604, "learning_rate": 0.00011525421564433433, "loss": 4.7102, "step": 60190 }, { "epoch": 0.11531208487888879, "grad_norm": 1.252996563911438, "learning_rate": 0.00011527337040844341, "loss": 4.7188, "step": 60200 }, { "epoch": 0.11533123971026402, "grad_norm": 1.2484010457992554, "learning_rate": 0.00011529252517255249, "loss": 4.7449, "step": 60210 }, { "epoch": 0.11535039454163926, "grad_norm": 1.2622677087783813, "learning_rate": 0.00011531167993666157, "loss": 4.6481, "step": 60220 }, { "epoch": 0.11536954937301448, "grad_norm": 1.2540581226348877, "learning_rate": 0.00011533083470077064, "loss": 4.6344, "step": 60230 }, { "epoch": 0.11538870420438971, "grad_norm": 1.2285857200622559, "learning_rate": 0.00011534998946487974, "loss": 4.513, "step": 60240 }, { "epoch": 0.11540785903576495, "grad_norm": 1.2237573862075806, "learning_rate": 0.0001153691442289888, "loss": 4.6699, "step": 60250 }, { "epoch": 0.11542701386714017, "grad_norm": 1.2868075370788574, "learning_rate": 0.00011538829899309789, "loss": 4.7241, "step": 60260 }, { "epoch": 0.1154461686985154, "grad_norm": 1.2808834314346313, "learning_rate": 0.00011540745375720698, "loss": 4.6492, "step": 60270 }, { "epoch": 0.11546532352989064, "grad_norm": 1.2478036880493164, "learning_rate": 0.00011542660852131605, "loss": 4.6262, "step": 60280 }, { "epoch": 0.11548447836126587, "grad_norm": 1.2450931072235107, "learning_rate": 0.00011544576328542513, "loss": 4.6563, "step": 60290 }, { "epoch": 0.11550363319264109, "grad_norm": 1.321126937866211, "learning_rate": 0.00011546491804953421, "loss": 4.6981, "step": 60300 }, { "epoch": 0.11552278802401633, "grad_norm": 1.2273955345153809, "learning_rate": 0.00011548407281364329, "loss": 4.8022, "step": 60310 }, { "epoch": 0.11554194285539156, "grad_norm": 1.2755167484283447, "learning_rate": 0.00011550322757775237, "loss": 4.717, "step": 60320 }, { "epoch": 0.11556109768676678, "grad_norm": 1.2949427366256714, "learning_rate": 0.00011552238234186146, "loss": 4.5619, "step": 60330 }, { "epoch": 0.11558025251814202, "grad_norm": 1.231820821762085, "learning_rate": 0.00011554153710597052, "loss": 4.5386, "step": 60340 }, { "epoch": 0.11559940734951725, "grad_norm": 1.3193511962890625, "learning_rate": 0.00011556069187007962, "loss": 4.7104, "step": 60350 }, { "epoch": 0.11561856218089248, "grad_norm": 1.23191237449646, "learning_rate": 0.00011557984663418869, "loss": 4.6277, "step": 60360 }, { "epoch": 0.11563771701226772, "grad_norm": 1.2991869449615479, "learning_rate": 0.00011559900139829777, "loss": 4.6931, "step": 60370 }, { "epoch": 0.11565687184364294, "grad_norm": 1.2473468780517578, "learning_rate": 0.00011561815616240686, "loss": 4.4474, "step": 60380 }, { "epoch": 0.11567602667501817, "grad_norm": 1.274716854095459, "learning_rate": 0.00011563731092651593, "loss": 4.5902, "step": 60390 }, { "epoch": 0.11569518150639341, "grad_norm": 1.283530831336975, "learning_rate": 0.00011565646569062501, "loss": 4.4982, "step": 60400 }, { "epoch": 0.11571433633776863, "grad_norm": 1.2885562181472778, "learning_rate": 0.00011567562045473409, "loss": 4.6782, "step": 60410 }, { "epoch": 0.11573349116914386, "grad_norm": 1.2121118307113647, "learning_rate": 0.00011569477521884316, "loss": 4.7394, "step": 60420 }, { "epoch": 0.1157526460005191, "grad_norm": 1.2478798627853394, "learning_rate": 0.00011571392998295225, "loss": 4.7541, "step": 60430 }, { "epoch": 0.11577180083189433, "grad_norm": 1.2475522756576538, "learning_rate": 0.00011573308474706132, "loss": 4.7558, "step": 60440 }, { "epoch": 0.11579095566326955, "grad_norm": 1.238539457321167, "learning_rate": 0.0001157522395111704, "loss": 4.5899, "step": 60450 }, { "epoch": 0.11581011049464479, "grad_norm": 1.2348989248275757, "learning_rate": 0.0001157713942752795, "loss": 4.6354, "step": 60460 }, { "epoch": 0.11582926532602002, "grad_norm": 1.2528992891311646, "learning_rate": 0.00011579054903938857, "loss": 4.6054, "step": 60470 }, { "epoch": 0.11584842015739524, "grad_norm": 1.3089542388916016, "learning_rate": 0.00011580970380349765, "loss": 4.6269, "step": 60480 }, { "epoch": 0.11586757498877048, "grad_norm": 1.2616294622421265, "learning_rate": 0.00011582885856760674, "loss": 4.7392, "step": 60490 }, { "epoch": 0.11588672982014571, "grad_norm": 1.2267049551010132, "learning_rate": 0.00011584801333171581, "loss": 4.4859, "step": 60500 }, { "epoch": 0.11590588465152094, "grad_norm": 1.2647100687026978, "learning_rate": 0.00011586716809582489, "loss": 4.5287, "step": 60510 }, { "epoch": 0.11592503948289617, "grad_norm": 1.24247407913208, "learning_rate": 0.00011588632285993397, "loss": 4.6511, "step": 60520 }, { "epoch": 0.1159441943142714, "grad_norm": 1.2851179838180542, "learning_rate": 0.00011590547762404304, "loss": 4.684, "step": 60530 }, { "epoch": 0.11596334914564663, "grad_norm": 1.210500955581665, "learning_rate": 0.00011592463238815214, "loss": 4.8429, "step": 60540 }, { "epoch": 0.11598250397702187, "grad_norm": 1.2507764101028442, "learning_rate": 0.0001159437871522612, "loss": 4.5676, "step": 60550 }, { "epoch": 0.11600165880839709, "grad_norm": 1.2196621894836426, "learning_rate": 0.00011596294191637028, "loss": 4.6762, "step": 60560 }, { "epoch": 0.11602081363977232, "grad_norm": 1.2441333532333374, "learning_rate": 0.00011598209668047938, "loss": 4.4618, "step": 60570 }, { "epoch": 0.11603996847114756, "grad_norm": 1.2178590297698975, "learning_rate": 0.00011600125144458845, "loss": 4.6331, "step": 60580 }, { "epoch": 0.11605912330252278, "grad_norm": 1.2459863424301147, "learning_rate": 0.00011602040620869753, "loss": 4.6157, "step": 60590 }, { "epoch": 0.11607827813389802, "grad_norm": 1.1993855237960815, "learning_rate": 0.00011603956097280662, "loss": 4.6664, "step": 60600 }, { "epoch": 0.11609743296527325, "grad_norm": 1.1755492687225342, "learning_rate": 0.00011605871573691569, "loss": 4.7419, "step": 60610 }, { "epoch": 0.11611658779664848, "grad_norm": 1.2966225147247314, "learning_rate": 0.00011607787050102477, "loss": 4.5415, "step": 60620 }, { "epoch": 0.11613574262802372, "grad_norm": 1.213980793952942, "learning_rate": 0.00011609702526513384, "loss": 4.6453, "step": 60630 }, { "epoch": 0.11615489745939894, "grad_norm": 1.3319363594055176, "learning_rate": 0.00011611618002924292, "loss": 4.6692, "step": 60640 }, { "epoch": 0.11617405229077417, "grad_norm": 1.2676392793655396, "learning_rate": 0.00011613533479335202, "loss": 4.7085, "step": 60650 }, { "epoch": 0.11619320712214941, "grad_norm": 1.2965199947357178, "learning_rate": 0.00011615448955746108, "loss": 4.5763, "step": 60660 }, { "epoch": 0.11621236195352463, "grad_norm": 1.268775224685669, "learning_rate": 0.00011617364432157017, "loss": 4.9079, "step": 60670 }, { "epoch": 0.11623151678489986, "grad_norm": 1.2749289274215698, "learning_rate": 0.00011619279908567926, "loss": 4.6353, "step": 60680 }, { "epoch": 0.1162506716162751, "grad_norm": 1.3394489288330078, "learning_rate": 0.00011621195384978833, "loss": 4.5725, "step": 60690 }, { "epoch": 0.11626982644765033, "grad_norm": 1.3005175590515137, "learning_rate": 0.00011623110861389741, "loss": 4.7268, "step": 60700 }, { "epoch": 0.11628898127902555, "grad_norm": 1.2467314004898071, "learning_rate": 0.0001162502633780065, "loss": 4.6125, "step": 60710 }, { "epoch": 0.11630813611040079, "grad_norm": 1.211430311203003, "learning_rate": 0.00011626941814211557, "loss": 4.6494, "step": 60720 }, { "epoch": 0.11632729094177602, "grad_norm": 1.249473214149475, "learning_rate": 0.00011628857290622465, "loss": 4.6469, "step": 60730 }, { "epoch": 0.11634644577315124, "grad_norm": 1.2239785194396973, "learning_rate": 0.00011630772767033372, "loss": 4.7621, "step": 60740 }, { "epoch": 0.11636560060452648, "grad_norm": 1.2615727186203003, "learning_rate": 0.0001163268824344428, "loss": 4.629, "step": 60750 }, { "epoch": 0.11638475543590171, "grad_norm": 1.3266375064849854, "learning_rate": 0.0001163460371985519, "loss": 4.6835, "step": 60760 }, { "epoch": 0.11640391026727694, "grad_norm": 1.2567534446716309, "learning_rate": 0.00011636519196266096, "loss": 4.7419, "step": 60770 }, { "epoch": 0.11642306509865218, "grad_norm": 1.1986520290374756, "learning_rate": 0.00011638434672677005, "loss": 4.7531, "step": 60780 }, { "epoch": 0.1164422199300274, "grad_norm": 1.1968740224838257, "learning_rate": 0.00011640350149087914, "loss": 4.7475, "step": 60790 }, { "epoch": 0.11646137476140263, "grad_norm": 1.275757908821106, "learning_rate": 0.00011642265625498821, "loss": 4.5681, "step": 60800 }, { "epoch": 0.11648052959277787, "grad_norm": 1.2534432411193848, "learning_rate": 0.00011644181101909729, "loss": 4.6512, "step": 60810 }, { "epoch": 0.1164996844241531, "grad_norm": 1.2820260524749756, "learning_rate": 0.00011646096578320636, "loss": 4.583, "step": 60820 }, { "epoch": 0.11651883925552832, "grad_norm": 1.2322254180908203, "learning_rate": 0.00011648012054731545, "loss": 4.6338, "step": 60830 }, { "epoch": 0.11653799408690356, "grad_norm": 1.2801748514175415, "learning_rate": 0.00011649927531142453, "loss": 4.5297, "step": 60840 }, { "epoch": 0.11655714891827879, "grad_norm": 1.22176992893219, "learning_rate": 0.0001165184300755336, "loss": 4.6885, "step": 60850 }, { "epoch": 0.11657630374965401, "grad_norm": 1.23463773727417, "learning_rate": 0.00011653758483964268, "loss": 4.6451, "step": 60860 }, { "epoch": 0.11659545858102925, "grad_norm": 1.2072806358337402, "learning_rate": 0.00011655673960375178, "loss": 4.5841, "step": 60870 }, { "epoch": 0.11661461341240448, "grad_norm": 1.3247766494750977, "learning_rate": 0.00011657589436786085, "loss": 4.5404, "step": 60880 }, { "epoch": 0.1166337682437797, "grad_norm": 1.2603003978729248, "learning_rate": 0.00011659504913196993, "loss": 4.5742, "step": 60890 }, { "epoch": 0.11665292307515494, "grad_norm": 1.2781193256378174, "learning_rate": 0.00011661420389607902, "loss": 4.5463, "step": 60900 }, { "epoch": 0.11667207790653017, "grad_norm": 1.2395042181015015, "learning_rate": 0.00011663335866018809, "loss": 4.4737, "step": 60910 }, { "epoch": 0.1166912327379054, "grad_norm": 1.2443387508392334, "learning_rate": 0.00011665251342429717, "loss": 4.6188, "step": 60920 }, { "epoch": 0.11671038756928064, "grad_norm": 1.2496691942214966, "learning_rate": 0.00011667166818840624, "loss": 4.6874, "step": 60930 }, { "epoch": 0.11672954240065586, "grad_norm": 1.308167576789856, "learning_rate": 0.00011669082295251533, "loss": 4.6322, "step": 60940 }, { "epoch": 0.11674869723203109, "grad_norm": 1.2597277164459229, "learning_rate": 0.00011670997771662441, "loss": 4.5132, "step": 60950 }, { "epoch": 0.11676785206340633, "grad_norm": 1.242637276649475, "learning_rate": 0.00011672913248073348, "loss": 4.6536, "step": 60960 }, { "epoch": 0.11678700689478155, "grad_norm": 1.265488624572754, "learning_rate": 0.00011674828724484256, "loss": 4.5154, "step": 60970 }, { "epoch": 0.11680616172615678, "grad_norm": 1.3308734893798828, "learning_rate": 0.00011676744200895166, "loss": 4.5676, "step": 60980 }, { "epoch": 0.11682531655753202, "grad_norm": 1.3561629056930542, "learning_rate": 0.00011678659677306073, "loss": 4.7087, "step": 60990 }, { "epoch": 0.11684447138890725, "grad_norm": 1.2111090421676636, "learning_rate": 0.00011680575153716981, "loss": 4.6921, "step": 61000 }, { "epoch": 0.11686362622028247, "grad_norm": 1.3260985612869263, "learning_rate": 0.0001168249063012789, "loss": 4.7349, "step": 61010 }, { "epoch": 0.11688278105165771, "grad_norm": 1.2640146017074585, "learning_rate": 0.00011684406106538797, "loss": 4.5809, "step": 61020 }, { "epoch": 0.11690193588303294, "grad_norm": 1.2236708402633667, "learning_rate": 0.00011686321582949705, "loss": 4.471, "step": 61030 }, { "epoch": 0.11692109071440816, "grad_norm": 1.2021324634552002, "learning_rate": 0.00011688237059360612, "loss": 4.6731, "step": 61040 }, { "epoch": 0.1169402455457834, "grad_norm": 1.2537351846694946, "learning_rate": 0.00011690152535771521, "loss": 4.5748, "step": 61050 }, { "epoch": 0.11695940037715863, "grad_norm": 1.4238734245300293, "learning_rate": 0.0001169206801218243, "loss": 4.4283, "step": 61060 }, { "epoch": 0.11697855520853385, "grad_norm": 1.28218412399292, "learning_rate": 0.00011693983488593336, "loss": 4.715, "step": 61070 }, { "epoch": 0.1169977100399091, "grad_norm": 1.255177617073059, "learning_rate": 0.00011695898965004244, "loss": 4.6289, "step": 61080 }, { "epoch": 0.11701686487128432, "grad_norm": 1.2417069673538208, "learning_rate": 0.00011697814441415154, "loss": 4.6254, "step": 61090 }, { "epoch": 0.11703601970265955, "grad_norm": 1.245322346687317, "learning_rate": 0.00011699729917826061, "loss": 4.711, "step": 61100 }, { "epoch": 0.11705517453403479, "grad_norm": 1.2514094114303589, "learning_rate": 0.00011701645394236969, "loss": 4.8101, "step": 61110 }, { "epoch": 0.11707432936541001, "grad_norm": 1.273497462272644, "learning_rate": 0.00011703560870647876, "loss": 4.6279, "step": 61120 }, { "epoch": 0.11709348419678524, "grad_norm": 1.5760111808776855, "learning_rate": 0.00011705476347058785, "loss": 4.6085, "step": 61130 }, { "epoch": 0.11711263902816048, "grad_norm": 1.3355485200881958, "learning_rate": 0.00011707391823469693, "loss": 4.5474, "step": 61140 }, { "epoch": 0.1171317938595357, "grad_norm": 1.2647161483764648, "learning_rate": 0.000117093072998806, "loss": 4.7078, "step": 61150 }, { "epoch": 0.11715094869091093, "grad_norm": 1.3143786191940308, "learning_rate": 0.0001171122277629151, "loss": 4.536, "step": 61160 }, { "epoch": 0.11717010352228617, "grad_norm": 1.3175435066223145, "learning_rate": 0.00011713138252702418, "loss": 4.7083, "step": 61170 }, { "epoch": 0.1171892583536614, "grad_norm": 1.2550673484802246, "learning_rate": 0.00011715053729113324, "loss": 4.5439, "step": 61180 }, { "epoch": 0.11720841318503662, "grad_norm": 1.257554531097412, "learning_rate": 0.00011716969205524233, "loss": 4.6508, "step": 61190 }, { "epoch": 0.11722756801641186, "grad_norm": 1.2820758819580078, "learning_rate": 0.00011718884681935142, "loss": 4.6955, "step": 61200 }, { "epoch": 0.11724672284778709, "grad_norm": 1.2414438724517822, "learning_rate": 0.00011720800158346049, "loss": 4.5439, "step": 61210 }, { "epoch": 0.11726587767916231, "grad_norm": 1.2093513011932373, "learning_rate": 0.00011722715634756957, "loss": 4.6143, "step": 61220 }, { "epoch": 0.11728503251053755, "grad_norm": 1.2283143997192383, "learning_rate": 0.00011724631111167864, "loss": 4.5814, "step": 61230 }, { "epoch": 0.11730418734191278, "grad_norm": 1.22707200050354, "learning_rate": 0.00011726546587578773, "loss": 4.6825, "step": 61240 }, { "epoch": 0.117323342173288, "grad_norm": 1.2855600118637085, "learning_rate": 0.00011728270516348589, "loss": 4.4837, "step": 61250 }, { "epoch": 0.11734249700466325, "grad_norm": 1.2490830421447754, "learning_rate": 0.00011730185992759499, "loss": 4.5937, "step": 61260 }, { "epoch": 0.11736165183603847, "grad_norm": 1.3411396741867065, "learning_rate": 0.00011732101469170407, "loss": 4.71, "step": 61270 }, { "epoch": 0.11738080666741371, "grad_norm": 1.2505507469177246, "learning_rate": 0.00011734016945581314, "loss": 4.5602, "step": 61280 }, { "epoch": 0.11739996149878894, "grad_norm": 1.3052517175674438, "learning_rate": 0.00011735932421992223, "loss": 4.5287, "step": 61290 }, { "epoch": 0.11741911633016416, "grad_norm": 1.2674610614776611, "learning_rate": 0.0001173784789840313, "loss": 4.59, "step": 61300 }, { "epoch": 0.1174382711615394, "grad_norm": 1.2699552774429321, "learning_rate": 0.00011739763374814038, "loss": 4.7126, "step": 61310 }, { "epoch": 0.11745742599291463, "grad_norm": 1.285522699356079, "learning_rate": 0.00011741678851224946, "loss": 4.6376, "step": 61320 }, { "epoch": 0.11747658082428986, "grad_norm": 1.2302998304367065, "learning_rate": 0.00011743594327635853, "loss": 4.4989, "step": 61330 }, { "epoch": 0.1174957356556651, "grad_norm": 1.2844294309616089, "learning_rate": 0.00011745509804046762, "loss": 4.6544, "step": 61340 }, { "epoch": 0.11751489048704032, "grad_norm": 1.2486823797225952, "learning_rate": 0.0001174742528045767, "loss": 4.5653, "step": 61350 }, { "epoch": 0.11753404531841555, "grad_norm": 1.2294009923934937, "learning_rate": 0.00011749340756868577, "loss": 4.6199, "step": 61360 }, { "epoch": 0.11755320014979079, "grad_norm": 1.2091989517211914, "learning_rate": 0.00011751256233279487, "loss": 4.4917, "step": 61370 }, { "epoch": 0.11757235498116601, "grad_norm": 1.2342585325241089, "learning_rate": 0.00011753171709690395, "loss": 4.681, "step": 61380 }, { "epoch": 0.11759150981254124, "grad_norm": 1.2454051971435547, "learning_rate": 0.00011755087186101302, "loss": 4.583, "step": 61390 }, { "epoch": 0.11761066464391648, "grad_norm": 1.2280672788619995, "learning_rate": 0.00011757002662512211, "loss": 4.6845, "step": 61400 }, { "epoch": 0.1176298194752917, "grad_norm": 1.2486459016799927, "learning_rate": 0.00011758918138923118, "loss": 4.5941, "step": 61410 }, { "epoch": 0.11764897430666693, "grad_norm": 1.3462209701538086, "learning_rate": 0.00011760833615334026, "loss": 4.6504, "step": 61420 }, { "epoch": 0.11766812913804217, "grad_norm": 1.253758430480957, "learning_rate": 0.00011762749091744934, "loss": 4.6591, "step": 61430 }, { "epoch": 0.1176872839694174, "grad_norm": 1.2378898859024048, "learning_rate": 0.00011764664568155841, "loss": 4.5588, "step": 61440 }, { "epoch": 0.11770643880079262, "grad_norm": 1.325212836265564, "learning_rate": 0.0001176658004456675, "loss": 4.6171, "step": 61450 }, { "epoch": 0.11772559363216786, "grad_norm": 1.2002601623535156, "learning_rate": 0.00011768495520977659, "loss": 4.713, "step": 61460 }, { "epoch": 0.11774474846354309, "grad_norm": 1.2365546226501465, "learning_rate": 0.00011770410997388565, "loss": 4.682, "step": 61470 }, { "epoch": 0.11776390329491832, "grad_norm": 1.3118383884429932, "learning_rate": 0.00011772326473799475, "loss": 4.6079, "step": 61480 }, { "epoch": 0.11778305812629355, "grad_norm": 1.2680940628051758, "learning_rate": 0.00011774241950210382, "loss": 4.6251, "step": 61490 }, { "epoch": 0.11780221295766878, "grad_norm": 1.2795931100845337, "learning_rate": 0.0001177615742662129, "loss": 4.7032, "step": 61500 }, { "epoch": 0.11782136778904401, "grad_norm": 1.2585111856460571, "learning_rate": 0.00011778072903032199, "loss": 4.5791, "step": 61510 }, { "epoch": 0.11784052262041925, "grad_norm": 1.2446956634521484, "learning_rate": 0.00011779988379443106, "loss": 4.6687, "step": 61520 }, { "epoch": 0.11785967745179447, "grad_norm": 1.2901074886322021, "learning_rate": 0.00011781903855854014, "loss": 4.6672, "step": 61530 }, { "epoch": 0.1178788322831697, "grad_norm": 1.2584002017974854, "learning_rate": 0.00011783819332264922, "loss": 4.6033, "step": 61540 }, { "epoch": 0.11789798711454494, "grad_norm": 1.2887145280838013, "learning_rate": 0.00011785734808675829, "loss": 4.6367, "step": 61550 }, { "epoch": 0.11791714194592016, "grad_norm": 1.297065019607544, "learning_rate": 0.00011787650285086738, "loss": 4.6079, "step": 61560 }, { "epoch": 0.11793629677729539, "grad_norm": 1.2153400182724, "learning_rate": 0.00011789565761497647, "loss": 4.7635, "step": 61570 }, { "epoch": 0.11795545160867063, "grad_norm": 1.205741286277771, "learning_rate": 0.00011791481237908553, "loss": 4.6541, "step": 61580 }, { "epoch": 0.11797460644004586, "grad_norm": 1.2380316257476807, "learning_rate": 0.00011793396714319463, "loss": 4.6922, "step": 61590 }, { "epoch": 0.11799376127142108, "grad_norm": 1.2846049070358276, "learning_rate": 0.0001179531219073037, "loss": 4.6571, "step": 61600 }, { "epoch": 0.11801291610279632, "grad_norm": 1.2378122806549072, "learning_rate": 0.00011797227667141278, "loss": 4.7134, "step": 61610 }, { "epoch": 0.11803207093417155, "grad_norm": 1.2305406332015991, "learning_rate": 0.00011799143143552186, "loss": 4.5772, "step": 61620 }, { "epoch": 0.11805122576554677, "grad_norm": 1.242658019065857, "learning_rate": 0.00011801058619963094, "loss": 4.671, "step": 61630 }, { "epoch": 0.11807038059692201, "grad_norm": 1.2027177810668945, "learning_rate": 0.00011802974096374002, "loss": 4.7254, "step": 61640 }, { "epoch": 0.11808953542829724, "grad_norm": 1.3254930973052979, "learning_rate": 0.0001180488957278491, "loss": 4.4514, "step": 61650 }, { "epoch": 0.11810869025967247, "grad_norm": 1.210127830505371, "learning_rate": 0.00011806805049195817, "loss": 4.5676, "step": 61660 }, { "epoch": 0.1181278450910477, "grad_norm": 1.2175160646438599, "learning_rate": 0.00011808720525606727, "loss": 4.7408, "step": 61670 }, { "epoch": 0.11814699992242293, "grad_norm": 1.3211477994918823, "learning_rate": 0.00011810636002017635, "loss": 4.6902, "step": 61680 }, { "epoch": 0.11816615475379816, "grad_norm": 1.3226326704025269, "learning_rate": 0.00011812551478428541, "loss": 4.6853, "step": 61690 }, { "epoch": 0.1181853095851734, "grad_norm": 1.3072879314422607, "learning_rate": 0.00011814466954839451, "loss": 4.8314, "step": 61700 }, { "epoch": 0.11820446441654862, "grad_norm": 1.2318711280822754, "learning_rate": 0.00011816382431250358, "loss": 4.5998, "step": 61710 }, { "epoch": 0.11822361924792385, "grad_norm": 1.2162309885025024, "learning_rate": 0.00011818297907661266, "loss": 4.6928, "step": 61720 }, { "epoch": 0.11824277407929909, "grad_norm": 1.289018154144287, "learning_rate": 0.00011820213384072174, "loss": 4.6107, "step": 61730 }, { "epoch": 0.11826192891067432, "grad_norm": 1.3680044412612915, "learning_rate": 0.00011822128860483082, "loss": 4.5549, "step": 61740 }, { "epoch": 0.11828108374204954, "grad_norm": 1.2445082664489746, "learning_rate": 0.0001182404433689399, "loss": 4.6382, "step": 61750 }, { "epoch": 0.11830023857342478, "grad_norm": 1.294301986694336, "learning_rate": 0.00011825959813304898, "loss": 4.6558, "step": 61760 }, { "epoch": 0.11831939340480001, "grad_norm": 1.231257677078247, "learning_rate": 0.00011827875289715805, "loss": 4.6395, "step": 61770 }, { "epoch": 0.11833854823617523, "grad_norm": 1.2170493602752686, "learning_rate": 0.00011829790766126715, "loss": 4.6686, "step": 61780 }, { "epoch": 0.11835770306755047, "grad_norm": 1.3260143995285034, "learning_rate": 0.00011831706242537621, "loss": 4.5678, "step": 61790 }, { "epoch": 0.1183768578989257, "grad_norm": 1.2121121883392334, "learning_rate": 0.0001183362171894853, "loss": 4.6713, "step": 61800 }, { "epoch": 0.11839601273030093, "grad_norm": 1.2724055051803589, "learning_rate": 0.00011835537195359439, "loss": 4.6712, "step": 61810 }, { "epoch": 0.11841516756167617, "grad_norm": 1.2896127700805664, "learning_rate": 0.00011837452671770346, "loss": 4.6261, "step": 61820 }, { "epoch": 0.11843432239305139, "grad_norm": 1.2492623329162598, "learning_rate": 0.00011839368148181254, "loss": 4.6797, "step": 61830 }, { "epoch": 0.11845347722442662, "grad_norm": 1.2426754236221313, "learning_rate": 0.00011841283624592162, "loss": 4.6115, "step": 61840 }, { "epoch": 0.11847263205580186, "grad_norm": 1.2982735633850098, "learning_rate": 0.0001184319910100307, "loss": 4.6148, "step": 61850 }, { "epoch": 0.11849178688717708, "grad_norm": 1.2606098651885986, "learning_rate": 0.00011845114577413978, "loss": 4.4762, "step": 61860 }, { "epoch": 0.11851094171855231, "grad_norm": 1.2492750883102417, "learning_rate": 0.00011847030053824886, "loss": 4.5829, "step": 61870 }, { "epoch": 0.11853009654992755, "grad_norm": 1.3319700956344604, "learning_rate": 0.00011848945530235793, "loss": 4.5767, "step": 61880 }, { "epoch": 0.11854925138130278, "grad_norm": 1.309891939163208, "learning_rate": 0.00011850861006646703, "loss": 4.6658, "step": 61890 }, { "epoch": 0.118568406212678, "grad_norm": 1.366032361984253, "learning_rate": 0.0001185277648305761, "loss": 4.6613, "step": 61900 }, { "epoch": 0.11858756104405324, "grad_norm": 1.229213833808899, "learning_rate": 0.00011854691959468518, "loss": 4.6377, "step": 61910 }, { "epoch": 0.11860671587542847, "grad_norm": 1.3265280723571777, "learning_rate": 0.00011856607435879427, "loss": 4.7193, "step": 61920 }, { "epoch": 0.11862587070680371, "grad_norm": 1.2645694017410278, "learning_rate": 0.00011858522912290334, "loss": 4.5995, "step": 61930 }, { "epoch": 0.11864502553817893, "grad_norm": 1.2518501281738281, "learning_rate": 0.00011860438388701242, "loss": 4.598, "step": 61940 }, { "epoch": 0.11866418036955416, "grad_norm": 1.2690520286560059, "learning_rate": 0.0001186235386511215, "loss": 4.6475, "step": 61950 }, { "epoch": 0.1186833352009294, "grad_norm": 1.2306245565414429, "learning_rate": 0.00011864269341523058, "loss": 4.6379, "step": 61960 }, { "epoch": 0.11870249003230463, "grad_norm": 1.2584121227264404, "learning_rate": 0.00011866184817933966, "loss": 4.5955, "step": 61970 }, { "epoch": 0.11872164486367985, "grad_norm": 1.2440906763076782, "learning_rate": 0.00011868100294344873, "loss": 4.6576, "step": 61980 }, { "epoch": 0.11874079969505509, "grad_norm": 1.2081551551818848, "learning_rate": 0.00011870015770755781, "loss": 4.5535, "step": 61990 }, { "epoch": 0.11875995452643032, "grad_norm": 1.2856217622756958, "learning_rate": 0.00011871931247166691, "loss": 4.5656, "step": 62000 }, { "epoch": 0.11877910935780554, "grad_norm": 1.2429406642913818, "learning_rate": 0.00011873846723577598, "loss": 4.3665, "step": 62010 }, { "epoch": 0.11879826418918078, "grad_norm": 1.2269117832183838, "learning_rate": 0.00011875762199988506, "loss": 4.6713, "step": 62020 }, { "epoch": 0.11881741902055601, "grad_norm": 1.201438069343567, "learning_rate": 0.00011877677676399415, "loss": 4.6701, "step": 62030 }, { "epoch": 0.11883657385193123, "grad_norm": 1.2321698665618896, "learning_rate": 0.00011879593152810322, "loss": 4.6499, "step": 62040 }, { "epoch": 0.11885572868330647, "grad_norm": 1.3810815811157227, "learning_rate": 0.0001188150862922123, "loss": 4.667, "step": 62050 }, { "epoch": 0.1188748835146817, "grad_norm": 1.2552622556686401, "learning_rate": 0.00011883424105632138, "loss": 4.4778, "step": 62060 }, { "epoch": 0.11889403834605693, "grad_norm": 1.2854115962982178, "learning_rate": 0.00011885339582043046, "loss": 4.5699, "step": 62070 }, { "epoch": 0.11891319317743217, "grad_norm": 1.2727882862091064, "learning_rate": 0.00011887255058453955, "loss": 4.6419, "step": 62080 }, { "epoch": 0.11893234800880739, "grad_norm": 1.2633510828018188, "learning_rate": 0.00011889170534864861, "loss": 4.773, "step": 62090 }, { "epoch": 0.11895150284018262, "grad_norm": 1.2208480834960938, "learning_rate": 0.0001189108601127577, "loss": 4.667, "step": 62100 }, { "epoch": 0.11897065767155786, "grad_norm": 1.2831190824508667, "learning_rate": 0.00011893001487686679, "loss": 4.6633, "step": 62110 }, { "epoch": 0.11898981250293308, "grad_norm": 1.2611351013183594, "learning_rate": 0.00011894916964097586, "loss": 4.5745, "step": 62120 }, { "epoch": 0.11900896733430831, "grad_norm": 1.3001304864883423, "learning_rate": 0.00011896832440508494, "loss": 4.6141, "step": 62130 }, { "epoch": 0.11902812216568355, "grad_norm": 1.317517876625061, "learning_rate": 0.00011898747916919403, "loss": 4.7095, "step": 62140 }, { "epoch": 0.11904727699705878, "grad_norm": 1.259527564048767, "learning_rate": 0.0001190066339333031, "loss": 4.8137, "step": 62150 }, { "epoch": 0.119066431828434, "grad_norm": 1.228148102760315, "learning_rate": 0.00011902578869741218, "loss": 4.6706, "step": 62160 }, { "epoch": 0.11908558665980924, "grad_norm": 1.225210428237915, "learning_rate": 0.00011904494346152125, "loss": 4.7124, "step": 62170 }, { "epoch": 0.11910474149118447, "grad_norm": 1.2580424547195435, "learning_rate": 0.00011906409822563034, "loss": 4.5799, "step": 62180 }, { "epoch": 0.1191238963225597, "grad_norm": 1.2337031364440918, "learning_rate": 0.00011908325298973943, "loss": 4.5978, "step": 62190 }, { "epoch": 0.11914305115393493, "grad_norm": 1.206788420677185, "learning_rate": 0.0001191024077538485, "loss": 4.7146, "step": 62200 }, { "epoch": 0.11916220598531016, "grad_norm": 1.2161915302276611, "learning_rate": 0.00011912156251795758, "loss": 4.6818, "step": 62210 }, { "epoch": 0.11918136081668539, "grad_norm": 1.263684630393982, "learning_rate": 0.00011914071728206667, "loss": 4.7925, "step": 62220 }, { "epoch": 0.11920051564806063, "grad_norm": 1.2547823190689087, "learning_rate": 0.00011915987204617574, "loss": 4.7023, "step": 62230 }, { "epoch": 0.11921967047943585, "grad_norm": 1.2119543552398682, "learning_rate": 0.00011917902681028482, "loss": 4.5319, "step": 62240 }, { "epoch": 0.11923882531081108, "grad_norm": 1.2215666770935059, "learning_rate": 0.00011919818157439391, "loss": 4.658, "step": 62250 }, { "epoch": 0.11925798014218632, "grad_norm": 1.3176989555358887, "learning_rate": 0.00011921733633850298, "loss": 4.6115, "step": 62260 }, { "epoch": 0.11927713497356154, "grad_norm": 1.2672433853149414, "learning_rate": 0.00011923649110261206, "loss": 4.6423, "step": 62270 }, { "epoch": 0.11929628980493677, "grad_norm": 1.2119146585464478, "learning_rate": 0.00011925564586672113, "loss": 4.6338, "step": 62280 }, { "epoch": 0.11931544463631201, "grad_norm": 1.2742838859558105, "learning_rate": 0.00011927480063083023, "loss": 4.6841, "step": 62290 }, { "epoch": 0.11933459946768724, "grad_norm": 1.2672021389007568, "learning_rate": 0.0001192939553949393, "loss": 4.5707, "step": 62300 }, { "epoch": 0.11935375429906246, "grad_norm": 1.395836353302002, "learning_rate": 0.00011931311015904837, "loss": 4.5417, "step": 62310 }, { "epoch": 0.1193729091304377, "grad_norm": 1.2475285530090332, "learning_rate": 0.00011933226492315746, "loss": 4.6035, "step": 62320 }, { "epoch": 0.11939206396181293, "grad_norm": 1.2200369834899902, "learning_rate": 0.00011935141968726655, "loss": 4.5684, "step": 62330 }, { "epoch": 0.11941121879318815, "grad_norm": 1.2156203985214233, "learning_rate": 0.00011937057445137562, "loss": 4.6257, "step": 62340 }, { "epoch": 0.1194303736245634, "grad_norm": 1.2363742589950562, "learning_rate": 0.0001193897292154847, "loss": 4.5741, "step": 62350 }, { "epoch": 0.11944952845593862, "grad_norm": 1.241987943649292, "learning_rate": 0.0001194088839795938, "loss": 4.8035, "step": 62360 }, { "epoch": 0.11946868328731385, "grad_norm": 1.1905652284622192, "learning_rate": 0.00011942803874370286, "loss": 4.5131, "step": 62370 }, { "epoch": 0.11948783811868909, "grad_norm": 1.2176403999328613, "learning_rate": 0.00011944719350781194, "loss": 4.7132, "step": 62380 }, { "epoch": 0.11950699295006431, "grad_norm": 1.2531496286392212, "learning_rate": 0.00011946634827192101, "loss": 4.5818, "step": 62390 }, { "epoch": 0.11952614778143954, "grad_norm": 1.2592545747756958, "learning_rate": 0.0001194855030360301, "loss": 4.6068, "step": 62400 }, { "epoch": 0.11954530261281478, "grad_norm": 1.2480974197387695, "learning_rate": 0.00011950465780013919, "loss": 4.5994, "step": 62410 }, { "epoch": 0.11956445744419, "grad_norm": 1.239676833152771, "learning_rate": 0.00011952381256424826, "loss": 4.5934, "step": 62420 }, { "epoch": 0.11958361227556523, "grad_norm": 1.2441946268081665, "learning_rate": 0.00011954296732835734, "loss": 4.6213, "step": 62430 }, { "epoch": 0.11960276710694047, "grad_norm": 1.2347437143325806, "learning_rate": 0.00011956212209246643, "loss": 4.765, "step": 62440 }, { "epoch": 0.1196219219383157, "grad_norm": 1.2895784378051758, "learning_rate": 0.0001195812768565755, "loss": 4.5888, "step": 62450 }, { "epoch": 0.11964107676969092, "grad_norm": 1.203643798828125, "learning_rate": 0.00011960043162068458, "loss": 4.6363, "step": 62460 }, { "epoch": 0.11966023160106616, "grad_norm": 1.2259693145751953, "learning_rate": 0.00011961958638479365, "loss": 4.6394, "step": 62470 }, { "epoch": 0.11967938643244139, "grad_norm": 1.2770553827285767, "learning_rate": 0.00011963874114890274, "loss": 4.5937, "step": 62480 }, { "epoch": 0.11969854126381661, "grad_norm": 1.226736307144165, "learning_rate": 0.00011965789591301182, "loss": 4.6389, "step": 62490 }, { "epoch": 0.11971769609519185, "grad_norm": 1.2366557121276855, "learning_rate": 0.00011967705067712089, "loss": 4.5735, "step": 62500 }, { "epoch": 0.11973685092656708, "grad_norm": 1.2223622798919678, "learning_rate": 0.00011969620544122999, "loss": 4.5712, "step": 62510 }, { "epoch": 0.1197560057579423, "grad_norm": 1.1880608797073364, "learning_rate": 0.00011971536020533907, "loss": 4.7198, "step": 62520 }, { "epoch": 0.11977516058931754, "grad_norm": 1.2668731212615967, "learning_rate": 0.00011973451496944814, "loss": 4.4465, "step": 62530 }, { "epoch": 0.11979431542069277, "grad_norm": 1.3143337965011597, "learning_rate": 0.00011975366973355722, "loss": 4.4983, "step": 62540 }, { "epoch": 0.119813470252068, "grad_norm": 1.1780509948730469, "learning_rate": 0.00011977282449766631, "loss": 4.6265, "step": 62550 }, { "epoch": 0.11983262508344324, "grad_norm": 1.2482565641403198, "learning_rate": 0.00011979197926177538, "loss": 4.606, "step": 62560 }, { "epoch": 0.11985177991481846, "grad_norm": 1.2293416261672974, "learning_rate": 0.00011981113402588446, "loss": 4.8146, "step": 62570 }, { "epoch": 0.1198709347461937, "grad_norm": 1.253226637840271, "learning_rate": 0.00011983028878999353, "loss": 4.6053, "step": 62580 }, { "epoch": 0.11989008957756893, "grad_norm": 1.2139719724655151, "learning_rate": 0.00011984944355410262, "loss": 4.6846, "step": 62590 }, { "epoch": 0.11990924440894415, "grad_norm": 1.3129652738571167, "learning_rate": 0.0001198685983182117, "loss": 4.5576, "step": 62600 }, { "epoch": 0.1199283992403194, "grad_norm": 1.1922234296798706, "learning_rate": 0.00011988775308232077, "loss": 4.698, "step": 62610 }, { "epoch": 0.11994755407169462, "grad_norm": 1.204681634902954, "learning_rate": 0.00011990690784642987, "loss": 4.7032, "step": 62620 }, { "epoch": 0.11996670890306985, "grad_norm": 1.3475569486618042, "learning_rate": 0.00011992606261053895, "loss": 4.633, "step": 62630 }, { "epoch": 0.11998586373444509, "grad_norm": 1.295149803161621, "learning_rate": 0.00011994521737464802, "loss": 4.4509, "step": 62640 }, { "epoch": 0.12000501856582031, "grad_norm": 1.1867042779922485, "learning_rate": 0.0001199643721387571, "loss": 4.7639, "step": 62650 }, { "epoch": 0.12002417339719554, "grad_norm": 1.2297372817993164, "learning_rate": 0.00011998352690286617, "loss": 4.7187, "step": 62660 }, { "epoch": 0.12004332822857078, "grad_norm": 1.2279125452041626, "learning_rate": 0.00012000268166697526, "loss": 4.543, "step": 62670 }, { "epoch": 0.120062483059946, "grad_norm": 1.3104151487350464, "learning_rate": 0.00012002183643108434, "loss": 4.6281, "step": 62680 }, { "epoch": 0.12008163789132123, "grad_norm": 1.2786355018615723, "learning_rate": 0.00012004099119519341, "loss": 4.5443, "step": 62690 }, { "epoch": 0.12010079272269647, "grad_norm": 1.2496180534362793, "learning_rate": 0.0001200601459593025, "loss": 4.6581, "step": 62700 }, { "epoch": 0.1201199475540717, "grad_norm": 1.2325468063354492, "learning_rate": 0.00012007930072341159, "loss": 4.5856, "step": 62710 }, { "epoch": 0.12013910238544692, "grad_norm": 1.211103916168213, "learning_rate": 0.00012009845548752065, "loss": 4.5705, "step": 62720 }, { "epoch": 0.12015825721682216, "grad_norm": 1.2623052597045898, "learning_rate": 0.00012011761025162975, "loss": 4.5961, "step": 62730 }, { "epoch": 0.12017741204819739, "grad_norm": 1.299439787864685, "learning_rate": 0.00012013676501573883, "loss": 4.6557, "step": 62740 }, { "epoch": 0.12019656687957261, "grad_norm": 1.2402067184448242, "learning_rate": 0.0001201559197798479, "loss": 4.6573, "step": 62750 }, { "epoch": 0.12021572171094785, "grad_norm": 1.2130589485168457, "learning_rate": 0.00012017507454395698, "loss": 4.6667, "step": 62760 }, { "epoch": 0.12023487654232308, "grad_norm": 1.226491093635559, "learning_rate": 0.00012019422930806605, "loss": 4.6187, "step": 62770 }, { "epoch": 0.1202540313736983, "grad_norm": 1.2243012189865112, "learning_rate": 0.00012021338407217514, "loss": 4.6164, "step": 62780 }, { "epoch": 0.12027318620507355, "grad_norm": 1.244097352027893, "learning_rate": 0.00012023253883628422, "loss": 4.6082, "step": 62790 }, { "epoch": 0.12029234103644877, "grad_norm": 1.3659114837646484, "learning_rate": 0.00012025169360039329, "loss": 4.6315, "step": 62800 }, { "epoch": 0.120311495867824, "grad_norm": 1.3578402996063232, "learning_rate": 0.00012027084836450239, "loss": 4.6192, "step": 62810 }, { "epoch": 0.12033065069919924, "grad_norm": 1.3792425394058228, "learning_rate": 0.00012029000312861147, "loss": 4.5835, "step": 62820 }, { "epoch": 0.12034980553057446, "grad_norm": 1.1618058681488037, "learning_rate": 0.00012030915789272053, "loss": 4.5155, "step": 62830 }, { "epoch": 0.12036896036194969, "grad_norm": 1.259734869003296, "learning_rate": 0.00012032831265682963, "loss": 4.7596, "step": 62840 }, { "epoch": 0.12038811519332493, "grad_norm": 1.259177565574646, "learning_rate": 0.0001203474674209387, "loss": 4.52, "step": 62850 }, { "epoch": 0.12040727002470016, "grad_norm": 1.2763867378234863, "learning_rate": 0.00012036662218504778, "loss": 4.5996, "step": 62860 }, { "epoch": 0.12042642485607538, "grad_norm": 1.3038825988769531, "learning_rate": 0.00012038577694915686, "loss": 4.5447, "step": 62870 }, { "epoch": 0.12044557968745062, "grad_norm": 1.2416579723358154, "learning_rate": 0.00012040493171326593, "loss": 4.6141, "step": 62880 }, { "epoch": 0.12046473451882585, "grad_norm": 1.2901225090026855, "learning_rate": 0.00012042408647737502, "loss": 4.7226, "step": 62890 }, { "epoch": 0.12048388935020107, "grad_norm": 1.242424488067627, "learning_rate": 0.0001204432412414841, "loss": 4.5652, "step": 62900 }, { "epoch": 0.12050304418157631, "grad_norm": 1.2408242225646973, "learning_rate": 0.00012046239600559317, "loss": 4.5433, "step": 62910 }, { "epoch": 0.12052219901295154, "grad_norm": 1.2062257528305054, "learning_rate": 0.00012048155076970227, "loss": 4.6838, "step": 62920 }, { "epoch": 0.12054135384432677, "grad_norm": 1.2806200981140137, "learning_rate": 0.00012050070553381135, "loss": 4.6719, "step": 62930 }, { "epoch": 0.120560508675702, "grad_norm": 1.2491189241409302, "learning_rate": 0.00012051986029792042, "loss": 4.7167, "step": 62940 }, { "epoch": 0.12057966350707723, "grad_norm": 1.231842279434204, "learning_rate": 0.00012053901506202951, "loss": 4.65, "step": 62950 }, { "epoch": 0.12059881833845246, "grad_norm": 1.3412559032440186, "learning_rate": 0.00012055816982613858, "loss": 4.7254, "step": 62960 }, { "epoch": 0.1206179731698277, "grad_norm": 1.1947609186172485, "learning_rate": 0.00012057732459024766, "loss": 4.7538, "step": 62970 }, { "epoch": 0.12063712800120292, "grad_norm": 1.1763746738433838, "learning_rate": 0.00012059647935435674, "loss": 4.8202, "step": 62980 }, { "epoch": 0.12065628283257815, "grad_norm": 1.1845329999923706, "learning_rate": 0.00012061563411846581, "loss": 4.6066, "step": 62990 }, { "epoch": 0.12067543766395339, "grad_norm": 1.201544165611267, "learning_rate": 0.0001206347888825749, "loss": 4.6443, "step": 63000 }, { "epoch": 0.12069459249532861, "grad_norm": 1.2479567527770996, "learning_rate": 0.00012065394364668398, "loss": 4.7791, "step": 63010 }, { "epoch": 0.12071374732670384, "grad_norm": 1.2331832647323608, "learning_rate": 0.00012067309841079305, "loss": 4.6141, "step": 63020 }, { "epoch": 0.12073290215807908, "grad_norm": 1.322999119758606, "learning_rate": 0.00012069225317490215, "loss": 4.5958, "step": 63030 }, { "epoch": 0.1207520569894543, "grad_norm": 1.2286182641983032, "learning_rate": 0.00012071140793901123, "loss": 4.5364, "step": 63040 }, { "epoch": 0.12077121182082953, "grad_norm": 1.5263046026229858, "learning_rate": 0.0001207305627031203, "loss": 4.5313, "step": 63050 }, { "epoch": 0.12079036665220477, "grad_norm": 1.3285365104675293, "learning_rate": 0.00012074971746722938, "loss": 4.7013, "step": 63060 }, { "epoch": 0.12080952148358, "grad_norm": 1.3083869218826294, "learning_rate": 0.00012076887223133846, "loss": 4.6023, "step": 63070 }, { "epoch": 0.12082867631495522, "grad_norm": 1.2521165609359741, "learning_rate": 0.00012078802699544754, "loss": 4.6021, "step": 63080 }, { "epoch": 0.12084783114633046, "grad_norm": 1.1706904172897339, "learning_rate": 0.00012080718175955662, "loss": 4.5949, "step": 63090 }, { "epoch": 0.12086698597770569, "grad_norm": 1.261159896850586, "learning_rate": 0.00012082633652366569, "loss": 4.6167, "step": 63100 }, { "epoch": 0.12088614080908092, "grad_norm": 1.2475101947784424, "learning_rate": 0.00012084549128777478, "loss": 4.5867, "step": 63110 }, { "epoch": 0.12090529564045616, "grad_norm": 1.2547305822372437, "learning_rate": 0.00012086464605188387, "loss": 4.6062, "step": 63120 }, { "epoch": 0.12092445047183138, "grad_norm": 1.2633557319641113, "learning_rate": 0.00012088380081599293, "loss": 4.6563, "step": 63130 }, { "epoch": 0.12094360530320661, "grad_norm": 1.2069875001907349, "learning_rate": 0.00012090295558010203, "loss": 4.6448, "step": 63140 }, { "epoch": 0.12096276013458185, "grad_norm": 1.210600733757019, "learning_rate": 0.0001209221103442111, "loss": 4.6929, "step": 63150 }, { "epoch": 0.12098191496595707, "grad_norm": 1.2840725183486938, "learning_rate": 0.00012094126510832018, "loss": 4.5583, "step": 63160 }, { "epoch": 0.1210010697973323, "grad_norm": 1.2050409317016602, "learning_rate": 0.00012096041987242926, "loss": 4.6789, "step": 63170 }, { "epoch": 0.12102022462870754, "grad_norm": 1.2381385564804077, "learning_rate": 0.00012097957463653834, "loss": 4.5648, "step": 63180 }, { "epoch": 0.12103937946008277, "grad_norm": 1.2331064939498901, "learning_rate": 0.00012099872940064742, "loss": 4.6083, "step": 63190 }, { "epoch": 0.12105853429145799, "grad_norm": 1.2088799476623535, "learning_rate": 0.0001210178841647565, "loss": 4.5988, "step": 63200 }, { "epoch": 0.12107768912283323, "grad_norm": 1.2261449098587036, "learning_rate": 0.00012103703892886557, "loss": 4.6557, "step": 63210 }, { "epoch": 0.12109684395420846, "grad_norm": 1.1975979804992676, "learning_rate": 0.00012105619369297466, "loss": 4.5753, "step": 63220 }, { "epoch": 0.12111599878558368, "grad_norm": 1.2049280405044556, "learning_rate": 0.00012107534845708375, "loss": 4.5508, "step": 63230 }, { "epoch": 0.12113515361695892, "grad_norm": 1.237928032875061, "learning_rate": 0.00012109450322119281, "loss": 4.6737, "step": 63240 }, { "epoch": 0.12115430844833415, "grad_norm": 1.2233227491378784, "learning_rate": 0.00012111365798530191, "loss": 4.5199, "step": 63250 }, { "epoch": 0.12117346327970939, "grad_norm": 1.2602660655975342, "learning_rate": 0.00012113281274941098, "loss": 4.7064, "step": 63260 }, { "epoch": 0.12119261811108462, "grad_norm": 1.253617763519287, "learning_rate": 0.00012115196751352006, "loss": 4.4807, "step": 63270 }, { "epoch": 0.12121177294245984, "grad_norm": Infinity, "learning_rate": 0.00012117112227762914, "loss": 4.5084, "step": 63280 }, { "epoch": 0.12123092777383508, "grad_norm": 1.169669508934021, "learning_rate": 0.00012118836156532731, "loss": 4.6993, "step": 63290 }, { "epoch": 0.12125008260521031, "grad_norm": 1.2817671298980713, "learning_rate": 0.0001212075163294364, "loss": 4.6127, "step": 63300 }, { "epoch": 0.12126923743658553, "grad_norm": 1.2055737972259521, "learning_rate": 0.00012122667109354548, "loss": 4.5652, "step": 63310 }, { "epoch": 0.12128839226796077, "grad_norm": 1.234437108039856, "learning_rate": 0.00012124582585765456, "loss": 4.5921, "step": 63320 }, { "epoch": 0.121307547099336, "grad_norm": 1.277962565422058, "learning_rate": 0.00012126498062176362, "loss": 4.7081, "step": 63330 }, { "epoch": 0.12132670193071123, "grad_norm": 1.2598247528076172, "learning_rate": 0.0001212841353858727, "loss": 4.4731, "step": 63340 }, { "epoch": 0.12134585676208647, "grad_norm": 1.272920846939087, "learning_rate": 0.0001213032901499818, "loss": 4.6409, "step": 63350 }, { "epoch": 0.12136501159346169, "grad_norm": 1.2363929748535156, "learning_rate": 0.00012132244491409087, "loss": 4.4965, "step": 63360 }, { "epoch": 0.12138416642483692, "grad_norm": 1.2199145555496216, "learning_rate": 0.00012134159967819995, "loss": 4.5568, "step": 63370 }, { "epoch": 0.12140332125621216, "grad_norm": 1.2403450012207031, "learning_rate": 0.00012136075444230904, "loss": 4.7255, "step": 63380 }, { "epoch": 0.12142247608758738, "grad_norm": 1.2164660692214966, "learning_rate": 0.00012137990920641811, "loss": 4.7256, "step": 63390 }, { "epoch": 0.12144163091896261, "grad_norm": 1.216729998588562, "learning_rate": 0.00012139906397052719, "loss": 4.6148, "step": 63400 }, { "epoch": 0.12146078575033785, "grad_norm": 1.2998456954956055, "learning_rate": 0.00012141821873463627, "loss": 4.5149, "step": 63410 }, { "epoch": 0.12147994058171308, "grad_norm": 1.2430310249328613, "learning_rate": 0.00012143737349874536, "loss": 4.7227, "step": 63420 }, { "epoch": 0.1214990954130883, "grad_norm": 1.2424496412277222, "learning_rate": 0.00012145652826285444, "loss": 4.6718, "step": 63430 }, { "epoch": 0.12151825024446354, "grad_norm": 1.2086573839187622, "learning_rate": 0.0001214756830269635, "loss": 4.6537, "step": 63440 }, { "epoch": 0.12153740507583877, "grad_norm": 1.2356109619140625, "learning_rate": 0.00012149483779107259, "loss": 4.5918, "step": 63450 }, { "epoch": 0.12155655990721399, "grad_norm": 1.273051142692566, "learning_rate": 0.00012151399255518168, "loss": 4.59, "step": 63460 }, { "epoch": 0.12157571473858923, "grad_norm": 1.2040586471557617, "learning_rate": 0.00012153314731929075, "loss": 4.5666, "step": 63470 }, { "epoch": 0.12159486956996446, "grad_norm": 1.2276772260665894, "learning_rate": 0.00012155230208339983, "loss": 4.6894, "step": 63480 }, { "epoch": 0.12161402440133968, "grad_norm": 1.2186704874038696, "learning_rate": 0.00012157145684750892, "loss": 4.5628, "step": 63490 }, { "epoch": 0.12163317923271492, "grad_norm": 1.2090344429016113, "learning_rate": 0.00012159061161161799, "loss": 4.5598, "step": 63500 }, { "epoch": 0.12165233406409015, "grad_norm": 1.2386966943740845, "learning_rate": 0.00012160976637572707, "loss": 4.6256, "step": 63510 }, { "epoch": 0.12167148889546538, "grad_norm": 1.2595374584197998, "learning_rate": 0.00012162892113983614, "loss": 4.6594, "step": 63520 }, { "epoch": 0.12169064372684062, "grad_norm": 1.251529335975647, "learning_rate": 0.00012164807590394524, "loss": 4.6226, "step": 63530 }, { "epoch": 0.12170979855821584, "grad_norm": 1.1775376796722412, "learning_rate": 0.00012166723066805432, "loss": 4.7009, "step": 63540 }, { "epoch": 0.12172895338959107, "grad_norm": 1.230936884880066, "learning_rate": 0.00012168638543216339, "loss": 4.6916, "step": 63550 }, { "epoch": 0.12174810822096631, "grad_norm": 1.2037802934646606, "learning_rate": 0.00012170554019627247, "loss": 4.7068, "step": 63560 }, { "epoch": 0.12176726305234153, "grad_norm": 1.1882500648498535, "learning_rate": 0.00012172469496038156, "loss": 4.5404, "step": 63570 }, { "epoch": 0.12178641788371676, "grad_norm": 1.2175809144973755, "learning_rate": 0.00012174384972449063, "loss": 4.6528, "step": 63580 }, { "epoch": 0.121805572715092, "grad_norm": 1.2411651611328125, "learning_rate": 0.00012176300448859971, "loss": 4.541, "step": 63590 }, { "epoch": 0.12182472754646723, "grad_norm": 1.2143089771270752, "learning_rate": 0.0001217821592527088, "loss": 4.7019, "step": 63600 }, { "epoch": 0.12184388237784245, "grad_norm": 1.1965030431747437, "learning_rate": 0.00012180131401681787, "loss": 4.6906, "step": 63610 }, { "epoch": 0.12186303720921769, "grad_norm": 1.2046949863433838, "learning_rate": 0.00012182046878092695, "loss": 4.587, "step": 63620 }, { "epoch": 0.12188219204059292, "grad_norm": 1.5449564456939697, "learning_rate": 0.00012183962354503602, "loss": 4.6357, "step": 63630 }, { "epoch": 0.12190134687196814, "grad_norm": 1.225864052772522, "learning_rate": 0.00012185877830914512, "loss": 4.6152, "step": 63640 }, { "epoch": 0.12192050170334338, "grad_norm": 1.2904621362686157, "learning_rate": 0.0001218779330732542, "loss": 4.5614, "step": 63650 }, { "epoch": 0.12193965653471861, "grad_norm": 1.229353904724121, "learning_rate": 0.00012189708783736327, "loss": 4.53, "step": 63660 }, { "epoch": 0.12195881136609384, "grad_norm": 1.212836742401123, "learning_rate": 0.00012191624260147235, "loss": 4.5661, "step": 63670 }, { "epoch": 0.12197796619746908, "grad_norm": 1.2994362115859985, "learning_rate": 0.00012193539736558144, "loss": 4.6399, "step": 63680 }, { "epoch": 0.1219971210288443, "grad_norm": 1.2177139520645142, "learning_rate": 0.00012195455212969051, "loss": 4.6095, "step": 63690 }, { "epoch": 0.12201627586021953, "grad_norm": 1.190482497215271, "learning_rate": 0.00012197370689379959, "loss": 4.6435, "step": 63700 }, { "epoch": 0.12203543069159477, "grad_norm": 1.1994837522506714, "learning_rate": 0.00012199286165790869, "loss": 4.5952, "step": 63710 }, { "epoch": 0.12205458552297, "grad_norm": 1.3030768632888794, "learning_rate": 0.00012201201642201775, "loss": 4.6592, "step": 63720 }, { "epoch": 0.12207374035434522, "grad_norm": 1.2440738677978516, "learning_rate": 0.00012203117118612684, "loss": 4.6523, "step": 63730 }, { "epoch": 0.12209289518572046, "grad_norm": 1.2673269510269165, "learning_rate": 0.0001220503259502359, "loss": 4.624, "step": 63740 }, { "epoch": 0.12211205001709569, "grad_norm": 1.4203952550888062, "learning_rate": 0.000122069480714345, "loss": 4.7158, "step": 63750 }, { "epoch": 0.12213120484847091, "grad_norm": 1.2470828294754028, "learning_rate": 0.00012208863547845408, "loss": 4.5825, "step": 63760 }, { "epoch": 0.12215035967984615, "grad_norm": 1.2407742738723755, "learning_rate": 0.00012210779024256313, "loss": 4.5788, "step": 63770 }, { "epoch": 0.12216951451122138, "grad_norm": 1.2917523384094238, "learning_rate": 0.00012212694500667224, "loss": 4.5193, "step": 63780 }, { "epoch": 0.1221886693425966, "grad_norm": 1.2133257389068604, "learning_rate": 0.00012214609977078132, "loss": 4.6101, "step": 63790 }, { "epoch": 0.12220782417397184, "grad_norm": 1.2766417264938354, "learning_rate": 0.00012216525453489038, "loss": 4.6819, "step": 63800 }, { "epoch": 0.12222697900534707, "grad_norm": 1.1892046928405762, "learning_rate": 0.00012218440929899949, "loss": 4.6586, "step": 63810 }, { "epoch": 0.1222461338367223, "grad_norm": 1.2338132858276367, "learning_rate": 0.00012220356406310854, "loss": 4.5953, "step": 63820 }, { "epoch": 0.12226528866809754, "grad_norm": 1.2396595478057861, "learning_rate": 0.00012222271882721762, "loss": 4.6474, "step": 63830 }, { "epoch": 0.12228444349947276, "grad_norm": 1.27825927734375, "learning_rate": 0.00012224187359132673, "loss": 4.6001, "step": 63840 }, { "epoch": 0.12230359833084799, "grad_norm": 1.219579815864563, "learning_rate": 0.00012226102835543578, "loss": 4.6302, "step": 63850 }, { "epoch": 0.12232275316222323, "grad_norm": 1.2731751203536987, "learning_rate": 0.00012228018311954487, "loss": 4.6489, "step": 63860 }, { "epoch": 0.12234190799359845, "grad_norm": 1.2483875751495361, "learning_rate": 0.00012229933788365395, "loss": 4.6095, "step": 63870 }, { "epoch": 0.12236106282497368, "grad_norm": 1.2641406059265137, "learning_rate": 0.00012231849264776303, "loss": 4.5499, "step": 63880 }, { "epoch": 0.12238021765634892, "grad_norm": 1.2608354091644287, "learning_rate": 0.0001223376474118721, "loss": 4.5918, "step": 63890 }, { "epoch": 0.12239937248772415, "grad_norm": 1.2580031156539917, "learning_rate": 0.0001223568021759812, "loss": 4.5318, "step": 63900 }, { "epoch": 0.12241852731909939, "grad_norm": 1.2204270362854004, "learning_rate": 0.00012237595694009027, "loss": 4.5789, "step": 63910 }, { "epoch": 0.12243768215047461, "grad_norm": 1.2146573066711426, "learning_rate": 0.00012239511170419935, "loss": 4.5165, "step": 63920 }, { "epoch": 0.12245683698184984, "grad_norm": 1.3036463260650635, "learning_rate": 0.00012241426646830843, "loss": 4.5463, "step": 63930 }, { "epoch": 0.12247599181322508, "grad_norm": 1.1955457925796509, "learning_rate": 0.00012243342123241752, "loss": 4.7287, "step": 63940 }, { "epoch": 0.1224951466446003, "grad_norm": 1.240077257156372, "learning_rate": 0.0001224525759965266, "loss": 4.553, "step": 63950 }, { "epoch": 0.12251430147597553, "grad_norm": 1.2112252712249756, "learning_rate": 0.00012247173076063568, "loss": 4.5885, "step": 63960 }, { "epoch": 0.12253345630735077, "grad_norm": 1.2374054193496704, "learning_rate": 0.00012249088552474476, "loss": 4.5786, "step": 63970 }, { "epoch": 0.122552611138726, "grad_norm": 1.23727548122406, "learning_rate": 0.00012251004028885384, "loss": 4.6145, "step": 63980 }, { "epoch": 0.12257176597010122, "grad_norm": 1.181066870689392, "learning_rate": 0.0001225291950529629, "loss": 4.5428, "step": 63990 }, { "epoch": 0.12259092080147646, "grad_norm": 1.1988948583602905, "learning_rate": 0.000122548349817072, "loss": 4.8065, "step": 64000 }, { "epoch": 0.12261007563285169, "grad_norm": 1.2069059610366821, "learning_rate": 0.00012256750458118106, "loss": 4.5971, "step": 64010 }, { "epoch": 0.12262923046422691, "grad_norm": 1.2586110830307007, "learning_rate": 0.00012258665934529014, "loss": 4.5627, "step": 64020 }, { "epoch": 0.12264838529560215, "grad_norm": 1.2022451162338257, "learning_rate": 0.00012260581410939925, "loss": 4.7028, "step": 64030 }, { "epoch": 0.12266754012697738, "grad_norm": 1.2533376216888428, "learning_rate": 0.0001226249688735083, "loss": 4.3851, "step": 64040 }, { "epoch": 0.1226866949583526, "grad_norm": 1.1959607601165771, "learning_rate": 0.00012264412363761738, "loss": 4.717, "step": 64050 }, { "epoch": 0.12270584978972784, "grad_norm": 1.212480068206787, "learning_rate": 0.00012266327840172646, "loss": 4.6274, "step": 64060 }, { "epoch": 0.12272500462110307, "grad_norm": 1.311838984489441, "learning_rate": 0.00012268243316583555, "loss": 4.6531, "step": 64070 }, { "epoch": 0.1227441594524783, "grad_norm": 1.2653477191925049, "learning_rate": 0.00012270158792994463, "loss": 4.5788, "step": 64080 }, { "epoch": 0.12276331428385354, "grad_norm": 1.2734898328781128, "learning_rate": 0.0001227207426940537, "loss": 4.5827, "step": 64090 }, { "epoch": 0.12278246911522876, "grad_norm": 1.2124961614608765, "learning_rate": 0.0001227398974581628, "loss": 4.6833, "step": 64100 }, { "epoch": 0.12280162394660399, "grad_norm": 1.2200368642807007, "learning_rate": 0.00012275905222227187, "loss": 4.628, "step": 64110 }, { "epoch": 0.12282077877797923, "grad_norm": 1.2461833953857422, "learning_rate": 0.00012277820698638095, "loss": 4.5579, "step": 64120 }, { "epoch": 0.12283993360935445, "grad_norm": 1.197148084640503, "learning_rate": 0.00012279736175049003, "loss": 4.5271, "step": 64130 }, { "epoch": 0.12285908844072968, "grad_norm": 1.1889779567718506, "learning_rate": 0.00012281651651459911, "loss": 4.5588, "step": 64140 }, { "epoch": 0.12287824327210492, "grad_norm": 1.2276561260223389, "learning_rate": 0.0001228356712787082, "loss": 4.5184, "step": 64150 }, { "epoch": 0.12289739810348015, "grad_norm": 1.1934120655059814, "learning_rate": 0.00012285482604281728, "loss": 4.53, "step": 64160 }, { "epoch": 0.12291655293485537, "grad_norm": 1.221915602684021, "learning_rate": 0.00012287398080692636, "loss": 4.5988, "step": 64170 }, { "epoch": 0.12293570776623061, "grad_norm": 1.281754970550537, "learning_rate": 0.00012289313557103544, "loss": 4.5967, "step": 64180 }, { "epoch": 0.12295486259760584, "grad_norm": 1.2251898050308228, "learning_rate": 0.00012291229033514452, "loss": 4.752, "step": 64190 }, { "epoch": 0.12297401742898106, "grad_norm": 1.2112243175506592, "learning_rate": 0.00012293144509925358, "loss": 4.5562, "step": 64200 }, { "epoch": 0.1229931722603563, "grad_norm": 1.2330074310302734, "learning_rate": 0.00012295059986336266, "loss": 4.7059, "step": 64210 }, { "epoch": 0.12301232709173153, "grad_norm": 1.2153688669204712, "learning_rate": 0.00012296975462747177, "loss": 4.4643, "step": 64220 }, { "epoch": 0.12303148192310676, "grad_norm": 1.2530032396316528, "learning_rate": 0.00012298890939158082, "loss": 4.5323, "step": 64230 }, { "epoch": 0.123050636754482, "grad_norm": 1.2504632472991943, "learning_rate": 0.0001230080641556899, "loss": 4.5473, "step": 64240 }, { "epoch": 0.12306979158585722, "grad_norm": 1.2158198356628418, "learning_rate": 0.000123027218919799, "loss": 4.74, "step": 64250 }, { "epoch": 0.12308894641723245, "grad_norm": 1.2864477634429932, "learning_rate": 0.00012304637368390806, "loss": 4.5699, "step": 64260 }, { "epoch": 0.12310810124860769, "grad_norm": 1.2172188758850098, "learning_rate": 0.00012306552844801714, "loss": 4.5455, "step": 64270 }, { "epoch": 0.12312725607998291, "grad_norm": 1.2445470094680786, "learning_rate": 0.00012308468321212623, "loss": 4.6772, "step": 64280 }, { "epoch": 0.12314641091135814, "grad_norm": 1.2046329975128174, "learning_rate": 0.0001231038379762353, "loss": 4.5523, "step": 64290 }, { "epoch": 0.12316556574273338, "grad_norm": 1.214277744293213, "learning_rate": 0.0001231229927403444, "loss": 4.557, "step": 64300 }, { "epoch": 0.1231847205741086, "grad_norm": 1.2838279008865356, "learning_rate": 0.00012314214750445347, "loss": 4.7066, "step": 64310 }, { "epoch": 0.12320387540548383, "grad_norm": 1.2852121591567993, "learning_rate": 0.00012316130226856255, "loss": 4.5658, "step": 64320 }, { "epoch": 0.12322303023685907, "grad_norm": 1.2295197248458862, "learning_rate": 0.00012318045703267163, "loss": 4.7346, "step": 64330 }, { "epoch": 0.1232421850682343, "grad_norm": 1.2999030351638794, "learning_rate": 0.00012319961179678071, "loss": 4.5466, "step": 64340 }, { "epoch": 0.12326133989960952, "grad_norm": 1.261469841003418, "learning_rate": 0.0001232187665608898, "loss": 4.5547, "step": 64350 }, { "epoch": 0.12328049473098476, "grad_norm": 1.3170281648635864, "learning_rate": 0.00012323792132499888, "loss": 4.6053, "step": 64360 }, { "epoch": 0.12329964956235999, "grad_norm": 1.1730988025665283, "learning_rate": 0.00012325707608910796, "loss": 4.5828, "step": 64370 }, { "epoch": 0.12331880439373522, "grad_norm": 1.2097495794296265, "learning_rate": 0.00012327623085321704, "loss": 4.619, "step": 64380 }, { "epoch": 0.12333795922511046, "grad_norm": 1.1642135381698608, "learning_rate": 0.00012329538561732612, "loss": 4.6676, "step": 64390 }, { "epoch": 0.12335711405648568, "grad_norm": 1.3004367351531982, "learning_rate": 0.0001233145403814352, "loss": 4.7746, "step": 64400 }, { "epoch": 0.12337626888786091, "grad_norm": 1.220973253250122, "learning_rate": 0.00012333369514554428, "loss": 4.6332, "step": 64410 }, { "epoch": 0.12339542371923615, "grad_norm": 1.203568696975708, "learning_rate": 0.00012335284990965334, "loss": 4.6359, "step": 64420 }, { "epoch": 0.12341457855061137, "grad_norm": 1.2144240140914917, "learning_rate": 0.00012337200467376242, "loss": 4.6846, "step": 64430 }, { "epoch": 0.1234337333819866, "grad_norm": 1.3160068988800049, "learning_rate": 0.00012339115943787153, "loss": 4.544, "step": 64440 }, { "epoch": 0.12345288821336184, "grad_norm": 1.2269694805145264, "learning_rate": 0.00012341031420198058, "loss": 4.703, "step": 64450 }, { "epoch": 0.12347204304473706, "grad_norm": 1.204218864440918, "learning_rate": 0.00012342946896608966, "loss": 4.6581, "step": 64460 }, { "epoch": 0.12349119787611229, "grad_norm": 1.1645315885543823, "learning_rate": 0.00012344862373019877, "loss": 4.63, "step": 64470 }, { "epoch": 0.12351035270748753, "grad_norm": 1.2834546566009521, "learning_rate": 0.00012346777849430783, "loss": 4.569, "step": 64480 }, { "epoch": 0.12352950753886276, "grad_norm": 1.202275276184082, "learning_rate": 0.0001234869332584169, "loss": 4.6705, "step": 64490 }, { "epoch": 0.12354866237023798, "grad_norm": 1.1715459823608398, "learning_rate": 0.000123506088022526, "loss": 4.6406, "step": 64500 }, { "epoch": 0.12356781720161322, "grad_norm": 1.201870083808899, "learning_rate": 0.00012352524278663507, "loss": 4.6076, "step": 64510 }, { "epoch": 0.12358697203298845, "grad_norm": 1.2498111724853516, "learning_rate": 0.00012354439755074415, "loss": 4.6257, "step": 64520 }, { "epoch": 0.12360612686436367, "grad_norm": 1.2258338928222656, "learning_rate": 0.00012356355231485323, "loss": 4.7002, "step": 64530 }, { "epoch": 0.12362528169573891, "grad_norm": 1.2401180267333984, "learning_rate": 0.0001235827070789623, "loss": 4.6887, "step": 64540 }, { "epoch": 0.12364443652711414, "grad_norm": 1.2223026752471924, "learning_rate": 0.0001236018618430714, "loss": 4.6379, "step": 64550 }, { "epoch": 0.12366359135848937, "grad_norm": 1.2006206512451172, "learning_rate": 0.00012362101660718048, "loss": 4.5548, "step": 64560 }, { "epoch": 0.1236827461898646, "grad_norm": 1.18584406375885, "learning_rate": 0.00012364017137128956, "loss": 4.6877, "step": 64570 }, { "epoch": 0.12370190102123983, "grad_norm": 1.2203011512756348, "learning_rate": 0.00012365932613539864, "loss": 4.644, "step": 64580 }, { "epoch": 0.12372105585261507, "grad_norm": 1.2150366306304932, "learning_rate": 0.00012367848089950772, "loss": 4.6744, "step": 64590 }, { "epoch": 0.1237402106839903, "grad_norm": 1.208173155784607, "learning_rate": 0.0001236976356636168, "loss": 4.7679, "step": 64600 }, { "epoch": 0.12375936551536552, "grad_norm": 1.203813076019287, "learning_rate": 0.00012371679042772586, "loss": 4.5574, "step": 64610 }, { "epoch": 0.12377852034674076, "grad_norm": 1.2398535013198853, "learning_rate": 0.00012373594519183496, "loss": 4.8063, "step": 64620 }, { "epoch": 0.12379767517811599, "grad_norm": 1.2560220956802368, "learning_rate": 0.00012375509995594404, "loss": 4.5071, "step": 64630 }, { "epoch": 0.12381683000949122, "grad_norm": 1.196229100227356, "learning_rate": 0.0001237742547200531, "loss": 4.7555, "step": 64640 }, { "epoch": 0.12383598484086646, "grad_norm": 1.200850486755371, "learning_rate": 0.00012379340948416218, "loss": 4.639, "step": 64650 }, { "epoch": 0.12385513967224168, "grad_norm": 1.220449686050415, "learning_rate": 0.0001238125642482713, "loss": 4.6413, "step": 64660 }, { "epoch": 0.12387429450361691, "grad_norm": 1.225054383277893, "learning_rate": 0.00012383171901238034, "loss": 4.4211, "step": 64670 }, { "epoch": 0.12389344933499215, "grad_norm": 1.2525235414505005, "learning_rate": 0.00012385087377648942, "loss": 4.6558, "step": 64680 }, { "epoch": 0.12391260416636737, "grad_norm": 1.2476329803466797, "learning_rate": 0.0001238700285405985, "loss": 4.6775, "step": 64690 }, { "epoch": 0.1239317589977426, "grad_norm": 1.229752540588379, "learning_rate": 0.0001238891833047076, "loss": 4.6023, "step": 64700 }, { "epoch": 0.12395091382911784, "grad_norm": 1.1974427700042725, "learning_rate": 0.00012390833806881667, "loss": 4.5665, "step": 64710 }, { "epoch": 0.12397006866049307, "grad_norm": 1.2072792053222656, "learning_rate": 0.00012392749283292575, "loss": 4.4337, "step": 64720 }, { "epoch": 0.12398922349186829, "grad_norm": 1.1833016872406006, "learning_rate": 0.00012394664759703483, "loss": 4.6108, "step": 64730 }, { "epoch": 0.12400837832324353, "grad_norm": 1.3084325790405273, "learning_rate": 0.0001239658023611439, "loss": 4.6442, "step": 64740 }, { "epoch": 0.12402753315461876, "grad_norm": 1.212447166442871, "learning_rate": 0.000123984957125253, "loss": 4.5763, "step": 64750 }, { "epoch": 0.12404668798599398, "grad_norm": 1.2624881267547607, "learning_rate": 0.00012400219641295118, "loss": 4.7702, "step": 64760 }, { "epoch": 0.12406584281736922, "grad_norm": 1.1865376234054565, "learning_rate": 0.00012402135117706023, "loss": 4.5884, "step": 64770 }, { "epoch": 0.12408499764874445, "grad_norm": 1.2209258079528809, "learning_rate": 0.00012404050594116932, "loss": 4.5907, "step": 64780 }, { "epoch": 0.12410415248011968, "grad_norm": 1.2370367050170898, "learning_rate": 0.0001240596607052784, "loss": 4.5293, "step": 64790 }, { "epoch": 0.12412330731149492, "grad_norm": 1.244170069694519, "learning_rate": 0.00012407881546938748, "loss": 4.6205, "step": 64800 }, { "epoch": 0.12414246214287014, "grad_norm": 1.2125942707061768, "learning_rate": 0.00012409797023349656, "loss": 4.6288, "step": 64810 }, { "epoch": 0.12416161697424537, "grad_norm": 1.2253172397613525, "learning_rate": 0.00012411712499760564, "loss": 4.6912, "step": 64820 }, { "epoch": 0.12418077180562061, "grad_norm": 1.2629977464675903, "learning_rate": 0.00012413627976171472, "loss": 4.5479, "step": 64830 }, { "epoch": 0.12419992663699583, "grad_norm": 1.1993520259857178, "learning_rate": 0.0001241554345258238, "loss": 4.6705, "step": 64840 }, { "epoch": 0.12421908146837106, "grad_norm": 1.180861234664917, "learning_rate": 0.00012417458928993288, "loss": 4.7159, "step": 64850 }, { "epoch": 0.1242382362997463, "grad_norm": 1.181432843208313, "learning_rate": 0.00012419374405404197, "loss": 4.587, "step": 64860 }, { "epoch": 0.12425739113112153, "grad_norm": 1.1859116554260254, "learning_rate": 0.00012421289881815105, "loss": 4.5071, "step": 64870 }, { "epoch": 0.12427654596249675, "grad_norm": 1.2397046089172363, "learning_rate": 0.00012423205358226013, "loss": 4.5632, "step": 64880 }, { "epoch": 0.12429570079387199, "grad_norm": 1.1808886528015137, "learning_rate": 0.0001242512083463692, "loss": 4.4791, "step": 64890 }, { "epoch": 0.12431485562524722, "grad_norm": 1.2325938940048218, "learning_rate": 0.00012427036311047826, "loss": 4.5545, "step": 64900 }, { "epoch": 0.12433401045662244, "grad_norm": 1.2117241621017456, "learning_rate": 0.00012428951787458737, "loss": 4.3671, "step": 64910 }, { "epoch": 0.12435316528799768, "grad_norm": 1.199492335319519, "learning_rate": 0.00012430867263869645, "loss": 4.5746, "step": 64920 }, { "epoch": 0.12437232011937291, "grad_norm": 1.1751244068145752, "learning_rate": 0.0001243278274028055, "loss": 4.6105, "step": 64930 }, { "epoch": 0.12439147495074813, "grad_norm": 1.214087724685669, "learning_rate": 0.00012434698216691462, "loss": 4.6294, "step": 64940 }, { "epoch": 0.12441062978212337, "grad_norm": 1.3442696332931519, "learning_rate": 0.0001243661369310237, "loss": 4.726, "step": 64950 }, { "epoch": 0.1244297846134986, "grad_norm": 1.2360585927963257, "learning_rate": 0.00012438529169513275, "loss": 4.6068, "step": 64960 }, { "epoch": 0.12444893944487383, "grad_norm": 1.195174217224121, "learning_rate": 0.00012440444645924183, "loss": 4.6998, "step": 64970 }, { "epoch": 0.12446809427624907, "grad_norm": 1.2287025451660156, "learning_rate": 0.00012442360122335091, "loss": 4.6235, "step": 64980 }, { "epoch": 0.12448724910762429, "grad_norm": 1.2326853275299072, "learning_rate": 0.00012444275598746, "loss": 4.6573, "step": 64990 }, { "epoch": 0.12450640393899952, "grad_norm": 1.3021461963653564, "learning_rate": 0.00012446191075156908, "loss": 4.6747, "step": 65000 }, { "epoch": 0.12452555877037476, "grad_norm": 1.22629976272583, "learning_rate": 0.00012448106551567816, "loss": 4.7086, "step": 65010 }, { "epoch": 0.12454471360174998, "grad_norm": 1.166014313697815, "learning_rate": 0.00012450022027978724, "loss": 4.5443, "step": 65020 }, { "epoch": 0.12456386843312521, "grad_norm": 1.2114568948745728, "learning_rate": 0.00012451937504389632, "loss": 4.6113, "step": 65030 }, { "epoch": 0.12458302326450045, "grad_norm": 1.1709598302841187, "learning_rate": 0.0001245385298080054, "loss": 4.6175, "step": 65040 }, { "epoch": 0.12460217809587568, "grad_norm": 1.2135052680969238, "learning_rate": 0.00012455768457211448, "loss": 4.6703, "step": 65050 }, { "epoch": 0.1246213329272509, "grad_norm": 1.2109571695327759, "learning_rate": 0.00012457683933622357, "loss": 4.5381, "step": 65060 }, { "epoch": 0.12464048775862614, "grad_norm": 1.1808415651321411, "learning_rate": 0.00012459599410033265, "loss": 4.5866, "step": 65070 }, { "epoch": 0.12465964259000137, "grad_norm": 1.181168794631958, "learning_rate": 0.00012461514886444173, "loss": 4.6645, "step": 65080 }, { "epoch": 0.1246787974213766, "grad_norm": 1.212807059288025, "learning_rate": 0.0001246343036285508, "loss": 4.6313, "step": 65090 }, { "epoch": 0.12469795225275183, "grad_norm": 1.1760694980621338, "learning_rate": 0.0001246534583926599, "loss": 4.6831, "step": 65100 }, { "epoch": 0.12471710708412706, "grad_norm": 1.2551095485687256, "learning_rate": 0.00012467261315676897, "loss": 4.5484, "step": 65110 }, { "epoch": 0.12473626191550229, "grad_norm": 1.2291711568832397, "learning_rate": 0.00012469176792087803, "loss": 4.6581, "step": 65120 }, { "epoch": 0.12475541674687753, "grad_norm": 1.2108149528503418, "learning_rate": 0.00012471092268498713, "loss": 4.7187, "step": 65130 }, { "epoch": 0.12477457157825275, "grad_norm": 1.283535122871399, "learning_rate": 0.00012473007744909622, "loss": 4.5757, "step": 65140 }, { "epoch": 0.12479372640962798, "grad_norm": 1.2080074548721313, "learning_rate": 0.00012474923221320527, "loss": 4.6316, "step": 65150 }, { "epoch": 0.12481288124100322, "grad_norm": 1.1692993640899658, "learning_rate": 0.00012476838697731438, "loss": 4.5812, "step": 65160 }, { "epoch": 0.12483203607237844, "grad_norm": 1.2039135694503784, "learning_rate": 0.00012478754174142343, "loss": 4.5963, "step": 65170 }, { "epoch": 0.12485119090375367, "grad_norm": 1.225484013557434, "learning_rate": 0.00012480669650553251, "loss": 4.5637, "step": 65180 }, { "epoch": 0.12487034573512891, "grad_norm": 1.222091794013977, "learning_rate": 0.0001248258512696416, "loss": 4.337, "step": 65190 }, { "epoch": 0.12488950056650414, "grad_norm": 1.1706035137176514, "learning_rate": 0.00012484500603375068, "loss": 4.577, "step": 65200 }, { "epoch": 0.12490865539787936, "grad_norm": 1.277904748916626, "learning_rate": 0.00012486416079785976, "loss": 4.6246, "step": 65210 }, { "epoch": 0.1249278102292546, "grad_norm": 1.2399897575378418, "learning_rate": 0.00012488331556196884, "loss": 4.7434, "step": 65220 }, { "epoch": 0.12494696506062983, "grad_norm": 1.1919162273406982, "learning_rate": 0.00012490247032607792, "loss": 4.6638, "step": 65230 }, { "epoch": 0.12496611989200507, "grad_norm": 1.2471543550491333, "learning_rate": 0.000124921625090187, "loss": 4.7457, "step": 65240 }, { "epoch": 0.1249852747233803, "grad_norm": 1.2281938791275024, "learning_rate": 0.00012494077985429608, "loss": 4.5378, "step": 65250 }, { "epoch": 0.12500442955475552, "grad_norm": 1.2272440195083618, "learning_rate": 0.00012495993461840516, "loss": 4.6392, "step": 65260 }, { "epoch": 0.12502358438613076, "grad_norm": 1.237978219985962, "learning_rate": 0.00012497908938251425, "loss": 4.6332, "step": 65270 }, { "epoch": 0.12504273921750597, "grad_norm": 1.2138301134109497, "learning_rate": 0.00012499824414662333, "loss": 4.6102, "step": 65280 }, { "epoch": 0.1250618940488812, "grad_norm": 1.2271956205368042, "learning_rate": 0.0001250173989107324, "loss": 4.6248, "step": 65290 }, { "epoch": 0.12508104888025645, "grad_norm": 1.2356278896331787, "learning_rate": 0.0001250365536748415, "loss": 4.5136, "step": 65300 }, { "epoch": 0.12510020371163166, "grad_norm": 1.2529765367507935, "learning_rate": 0.00012505570843895057, "loss": 4.6283, "step": 65310 }, { "epoch": 0.1251193585430069, "grad_norm": 1.2181744575500488, "learning_rate": 0.00012507486320305965, "loss": 4.5967, "step": 65320 }, { "epoch": 0.12513851337438214, "grad_norm": 1.1935856342315674, "learning_rate": 0.00012509401796716873, "loss": 4.6957, "step": 65330 }, { "epoch": 0.12515766820575736, "grad_norm": 1.1813855171203613, "learning_rate": 0.0001251131727312778, "loss": 4.6251, "step": 65340 }, { "epoch": 0.1251768230371326, "grad_norm": 1.2286065816879272, "learning_rate": 0.0001251323274953869, "loss": 4.4952, "step": 65350 }, { "epoch": 0.12519597786850784, "grad_norm": 1.1733454465866089, "learning_rate": 0.00012515148225949595, "loss": 4.6562, "step": 65360 }, { "epoch": 0.12521513269988305, "grad_norm": 1.2567596435546875, "learning_rate": 0.00012517063702360503, "loss": 4.5073, "step": 65370 }, { "epoch": 0.1252342875312583, "grad_norm": 1.183743953704834, "learning_rate": 0.00012518979178771414, "loss": 4.611, "step": 65380 }, { "epoch": 0.12525344236263353, "grad_norm": 1.200069785118103, "learning_rate": 0.0001252089465518232, "loss": 4.5963, "step": 65390 }, { "epoch": 0.12527259719400874, "grad_norm": 1.2476354837417603, "learning_rate": 0.00012522810131593228, "loss": 4.7039, "step": 65400 }, { "epoch": 0.12529175202538398, "grad_norm": 1.184674859046936, "learning_rate": 0.00012524725608004136, "loss": 4.7243, "step": 65410 }, { "epoch": 0.12531090685675922, "grad_norm": 1.5703409910202026, "learning_rate": 0.00012526641084415044, "loss": 4.7113, "step": 65420 }, { "epoch": 0.12533006168813443, "grad_norm": 1.1803638935089111, "learning_rate": 0.00012528556560825952, "loss": 4.6203, "step": 65430 }, { "epoch": 0.12534921651950967, "grad_norm": 1.2371456623077393, "learning_rate": 0.0001253047203723686, "loss": 4.6238, "step": 65440 }, { "epoch": 0.1253683713508849, "grad_norm": 1.2345372438430786, "learning_rate": 0.00012532387513647768, "loss": 4.4304, "step": 65450 }, { "epoch": 0.12538752618226012, "grad_norm": 1.1918171644210815, "learning_rate": 0.00012534302990058676, "loss": 4.5878, "step": 65460 }, { "epoch": 0.12540668101363536, "grad_norm": 1.298037052154541, "learning_rate": 0.00012536218466469584, "loss": 4.6783, "step": 65470 }, { "epoch": 0.1254258358450106, "grad_norm": 1.2859392166137695, "learning_rate": 0.00012538133942880493, "loss": 4.6047, "step": 65480 }, { "epoch": 0.12544499067638581, "grad_norm": 1.192900538444519, "learning_rate": 0.000125400494192914, "loss": 4.6591, "step": 65490 }, { "epoch": 0.12546414550776105, "grad_norm": 1.3198860883712769, "learning_rate": 0.0001254196489570231, "loss": 4.5483, "step": 65500 }, { "epoch": 0.1254833003391363, "grad_norm": 1.30918288230896, "learning_rate": 0.00012543880372113217, "loss": 4.6338, "step": 65510 }, { "epoch": 0.1255024551705115, "grad_norm": 1.2057498693466187, "learning_rate": 0.00012545795848524125, "loss": 4.5248, "step": 65520 }, { "epoch": 0.12552161000188675, "grad_norm": 1.186885952949524, "learning_rate": 0.00012547711324935033, "loss": 4.5939, "step": 65530 }, { "epoch": 0.125540764833262, "grad_norm": 1.193633794784546, "learning_rate": 0.0001254962680134594, "loss": 4.6121, "step": 65540 }, { "epoch": 0.1255599196646372, "grad_norm": 1.1731261014938354, "learning_rate": 0.00012551542277756847, "loss": 4.5443, "step": 65550 }, { "epoch": 0.12557907449601244, "grad_norm": 1.3021539449691772, "learning_rate": 0.00012553457754167755, "loss": 4.489, "step": 65560 }, { "epoch": 0.12559822932738768, "grad_norm": 1.2124013900756836, "learning_rate": 0.00012555373230578666, "loss": 4.6428, "step": 65570 }, { "epoch": 0.12561738415876292, "grad_norm": 1.2020639181137085, "learning_rate": 0.0001255728870698957, "loss": 4.534, "step": 65580 }, { "epoch": 0.12563653899013813, "grad_norm": 1.1576389074325562, "learning_rate": 0.0001255920418340048, "loss": 4.6254, "step": 65590 }, { "epoch": 0.12565569382151337, "grad_norm": 1.252898097038269, "learning_rate": 0.0001256111965981139, "loss": 4.7001, "step": 65600 }, { "epoch": 0.1256748486528886, "grad_norm": 1.2081598043441772, "learning_rate": 0.00012563035136222296, "loss": 4.6049, "step": 65610 }, { "epoch": 0.12569400348426382, "grad_norm": 1.2204458713531494, "learning_rate": 0.00012564950612633204, "loss": 4.5637, "step": 65620 }, { "epoch": 0.12571315831563906, "grad_norm": 1.1783649921417236, "learning_rate": 0.00012566866089044112, "loss": 4.6009, "step": 65630 }, { "epoch": 0.1257323131470143, "grad_norm": 1.18450129032135, "learning_rate": 0.0001256878156545502, "loss": 4.6442, "step": 65640 }, { "epoch": 0.12575146797838951, "grad_norm": 1.1770873069763184, "learning_rate": 0.00012570697041865928, "loss": 4.5647, "step": 65650 }, { "epoch": 0.12577062280976475, "grad_norm": 1.43217134475708, "learning_rate": 0.00012572612518276836, "loss": 4.5268, "step": 65660 }, { "epoch": 0.12578977764114, "grad_norm": 1.2217360734939575, "learning_rate": 0.00012574527994687744, "loss": 4.5953, "step": 65670 }, { "epoch": 0.1258089324725152, "grad_norm": 1.224825143814087, "learning_rate": 0.00012576443471098652, "loss": 4.5875, "step": 65680 }, { "epoch": 0.12582808730389045, "grad_norm": 1.2565529346466064, "learning_rate": 0.0001257835894750956, "loss": 4.7219, "step": 65690 }, { "epoch": 0.12584724213526569, "grad_norm": 1.2556424140930176, "learning_rate": 0.0001258027442392047, "loss": 4.5034, "step": 65700 }, { "epoch": 0.1258663969666409, "grad_norm": 1.2389723062515259, "learning_rate": 0.00012582189900331377, "loss": 4.4936, "step": 65710 }, { "epoch": 0.12588555179801614, "grad_norm": 1.2208904027938843, "learning_rate": 0.00012584105376742285, "loss": 4.6435, "step": 65720 }, { "epoch": 0.12590470662939138, "grad_norm": 1.1638691425323486, "learning_rate": 0.00012586020853153193, "loss": 4.5393, "step": 65730 }, { "epoch": 0.1259238614607666, "grad_norm": 1.238620638847351, "learning_rate": 0.000125879363295641, "loss": 4.5796, "step": 65740 }, { "epoch": 0.12594301629214183, "grad_norm": 1.190982699394226, "learning_rate": 0.0001258985180597501, "loss": 4.4957, "step": 65750 }, { "epoch": 0.12596217112351707, "grad_norm": 1.2093969583511353, "learning_rate": 0.00012591767282385918, "loss": 4.6452, "step": 65760 }, { "epoch": 0.12598132595489228, "grad_norm": 1.2140072584152222, "learning_rate": 0.00012593682758796823, "loss": 4.5713, "step": 65770 }, { "epoch": 0.12600048078626752, "grad_norm": 1.1849106550216675, "learning_rate": 0.0001259559823520773, "loss": 4.4464, "step": 65780 }, { "epoch": 0.12601963561764276, "grad_norm": 1.1832661628723145, "learning_rate": 0.00012597513711618642, "loss": 4.7405, "step": 65790 }, { "epoch": 0.12603879044901797, "grad_norm": 1.227311611175537, "learning_rate": 0.00012599429188029547, "loss": 4.627, "step": 65800 }, { "epoch": 0.1260579452803932, "grad_norm": 1.2155112028121948, "learning_rate": 0.00012601344664440455, "loss": 4.5886, "step": 65810 }, { "epoch": 0.12607710011176845, "grad_norm": 1.2694627046585083, "learning_rate": 0.00012603260140851366, "loss": 4.6385, "step": 65820 }, { "epoch": 0.12609625494314367, "grad_norm": 1.2175220251083374, "learning_rate": 0.00012605175617262272, "loss": 4.4232, "step": 65830 }, { "epoch": 0.1261154097745189, "grad_norm": 1.1437783241271973, "learning_rate": 0.0001260709109367318, "loss": 4.5638, "step": 65840 }, { "epoch": 0.12613456460589414, "grad_norm": 1.277336597442627, "learning_rate": 0.00012609006570084088, "loss": 4.488, "step": 65850 }, { "epoch": 0.12615371943726936, "grad_norm": 1.223555564880371, "learning_rate": 0.00012610922046494996, "loss": 4.467, "step": 65860 }, { "epoch": 0.1261728742686446, "grad_norm": 1.1948084831237793, "learning_rate": 0.00012612837522905904, "loss": 4.532, "step": 65870 }, { "epoch": 0.12619202910001984, "grad_norm": 1.209394931793213, "learning_rate": 0.00012614752999316812, "loss": 4.6606, "step": 65880 }, { "epoch": 0.12621118393139505, "grad_norm": 1.1782398223876953, "learning_rate": 0.0001261666847572772, "loss": 4.5473, "step": 65890 }, { "epoch": 0.1262303387627703, "grad_norm": 1.2570117712020874, "learning_rate": 0.00012618583952138629, "loss": 4.5575, "step": 65900 }, { "epoch": 0.12624949359414553, "grad_norm": 1.220032811164856, "learning_rate": 0.00012620499428549537, "loss": 4.622, "step": 65910 }, { "epoch": 0.12626864842552074, "grad_norm": 1.2041038274765015, "learning_rate": 0.00012622414904960445, "loss": 4.6579, "step": 65920 }, { "epoch": 0.12628780325689598, "grad_norm": 1.1775847673416138, "learning_rate": 0.00012624330381371353, "loss": 4.7558, "step": 65930 }, { "epoch": 0.12630695808827122, "grad_norm": 1.1766687631607056, "learning_rate": 0.0001262624585778226, "loss": 4.6382, "step": 65940 }, { "epoch": 0.12632611291964643, "grad_norm": 1.2523232698440552, "learning_rate": 0.0001262816133419317, "loss": 4.7007, "step": 65950 }, { "epoch": 0.12634526775102167, "grad_norm": 1.2272604703903198, "learning_rate": 0.00012630076810604075, "loss": 4.4929, "step": 65960 }, { "epoch": 0.1263644225823969, "grad_norm": 1.2116329669952393, "learning_rate": 0.00012631992287014986, "loss": 4.6687, "step": 65970 }, { "epoch": 0.12638357741377212, "grad_norm": 1.239987850189209, "learning_rate": 0.00012633907763425894, "loss": 4.6698, "step": 65980 }, { "epoch": 0.12640273224514736, "grad_norm": 1.246090054512024, "learning_rate": 0.000126358232398368, "loss": 4.4604, "step": 65990 }, { "epoch": 0.1264218870765226, "grad_norm": 1.1764178276062012, "learning_rate": 0.00012637738716247707, "loss": 4.6352, "step": 66000 }, { "epoch": 0.12644104190789782, "grad_norm": 1.149375081062317, "learning_rate": 0.00012639654192658618, "loss": 4.6641, "step": 66010 }, { "epoch": 0.12646019673927306, "grad_norm": 1.195297360420227, "learning_rate": 0.00012641569669069523, "loss": 4.633, "step": 66020 }, { "epoch": 0.1264793515706483, "grad_norm": 1.3411186933517456, "learning_rate": 0.00012643485145480432, "loss": 4.5236, "step": 66030 }, { "epoch": 0.1264985064020235, "grad_norm": 1.1699888706207275, "learning_rate": 0.0001264540062189134, "loss": 4.5766, "step": 66040 }, { "epoch": 0.12651766123339875, "grad_norm": 1.212762475013733, "learning_rate": 0.00012647316098302248, "loss": 4.5787, "step": 66050 }, { "epoch": 0.126536816064774, "grad_norm": 1.2835733890533447, "learning_rate": 0.00012649231574713156, "loss": 4.6289, "step": 66060 }, { "epoch": 0.1265559708961492, "grad_norm": 1.1611272096633911, "learning_rate": 0.00012651147051124064, "loss": 4.4688, "step": 66070 }, { "epoch": 0.12657512572752444, "grad_norm": 1.251062035560608, "learning_rate": 0.00012653062527534972, "loss": 4.5196, "step": 66080 }, { "epoch": 0.12659428055889968, "grad_norm": 1.2330979108810425, "learning_rate": 0.0001265497800394588, "loss": 4.6872, "step": 66090 }, { "epoch": 0.1266134353902749, "grad_norm": 1.2116365432739258, "learning_rate": 0.00012656893480356789, "loss": 4.597, "step": 66100 }, { "epoch": 0.12663259022165013, "grad_norm": 1.1560583114624023, "learning_rate": 0.00012658808956767697, "loss": 4.647, "step": 66110 }, { "epoch": 0.12665174505302537, "grad_norm": 1.1884342432022095, "learning_rate": 0.00012660724433178605, "loss": 4.5529, "step": 66120 }, { "epoch": 0.12667089988440058, "grad_norm": 1.2186881303787231, "learning_rate": 0.00012662639909589513, "loss": 4.5576, "step": 66130 }, { "epoch": 0.12669005471577582, "grad_norm": 1.2798848152160645, "learning_rate": 0.0001266455538600042, "loss": 4.6207, "step": 66140 }, { "epoch": 0.12670920954715106, "grad_norm": 1.23418390750885, "learning_rate": 0.00012666470862411326, "loss": 4.6044, "step": 66150 }, { "epoch": 0.12672836437852628, "grad_norm": 1.200934648513794, "learning_rate": 0.00012668386338822237, "loss": 4.6709, "step": 66160 }, { "epoch": 0.12674751920990152, "grad_norm": 1.2005306482315063, "learning_rate": 0.00012670301815233145, "loss": 4.56, "step": 66170 }, { "epoch": 0.12676667404127676, "grad_norm": 1.1939119100570679, "learning_rate": 0.0001267221729164405, "loss": 4.5696, "step": 66180 }, { "epoch": 0.12678582887265197, "grad_norm": 1.197953224182129, "learning_rate": 0.00012674132768054962, "loss": 4.5356, "step": 66190 }, { "epoch": 0.1268049837040272, "grad_norm": 1.1944910287857056, "learning_rate": 0.0001267604824446587, "loss": 4.5849, "step": 66200 }, { "epoch": 0.12682413853540245, "grad_norm": 1.2068074941635132, "learning_rate": 0.00012677963720876775, "loss": 4.6367, "step": 66210 }, { "epoch": 0.12684329336677766, "grad_norm": 1.1974741220474243, "learning_rate": 0.00012679879197287683, "loss": 4.6166, "step": 66220 }, { "epoch": 0.1268624481981529, "grad_norm": 1.1630725860595703, "learning_rate": 0.00012681794673698592, "loss": 4.5505, "step": 66230 }, { "epoch": 0.12688160302952814, "grad_norm": 1.2328778505325317, "learning_rate": 0.000126837101501095, "loss": 4.5546, "step": 66240 }, { "epoch": 0.12690075786090335, "grad_norm": 1.244195818901062, "learning_rate": 0.00012685625626520408, "loss": 4.6657, "step": 66250 }, { "epoch": 0.1269199126922786, "grad_norm": 1.219658374786377, "learning_rate": 0.00012687541102931316, "loss": 4.6282, "step": 66260 }, { "epoch": 0.12693906752365383, "grad_norm": 1.2178645133972168, "learning_rate": 0.00012689456579342224, "loss": 4.7085, "step": 66270 }, { "epoch": 0.12695822235502904, "grad_norm": 1.2590460777282715, "learning_rate": 0.00012691372055753132, "loss": 4.6554, "step": 66280 }, { "epoch": 0.12697737718640428, "grad_norm": 1.219260334968567, "learning_rate": 0.0001269328753216404, "loss": 4.5883, "step": 66290 }, { "epoch": 0.12699653201777952, "grad_norm": 1.1784125566482544, "learning_rate": 0.00012695203008574948, "loss": 4.5715, "step": 66300 }, { "epoch": 0.12701568684915474, "grad_norm": 1.1975116729736328, "learning_rate": 0.00012697118484985857, "loss": 4.567, "step": 66310 }, { "epoch": 0.12703484168052998, "grad_norm": 1.1881741285324097, "learning_rate": 0.00012699033961396765, "loss": 4.4411, "step": 66320 }, { "epoch": 0.12705399651190522, "grad_norm": 1.2930234670639038, "learning_rate": 0.00012700949437807673, "loss": 4.5457, "step": 66330 }, { "epoch": 0.12707315134328043, "grad_norm": 1.2566481828689575, "learning_rate": 0.00012702864914218578, "loss": 4.6027, "step": 66340 }, { "epoch": 0.12709230617465567, "grad_norm": 1.2308318614959717, "learning_rate": 0.0001270478039062949, "loss": 4.5468, "step": 66350 }, { "epoch": 0.1271114610060309, "grad_norm": 1.3072699308395386, "learning_rate": 0.00012706695867040397, "loss": 4.6589, "step": 66360 }, { "epoch": 0.12713061583740612, "grad_norm": 1.2021076679229736, "learning_rate": 0.00012708611343451303, "loss": 4.6658, "step": 66370 }, { "epoch": 0.12714977066878136, "grad_norm": 1.167156457901001, "learning_rate": 0.00012710526819862213, "loss": 4.7683, "step": 66380 }, { "epoch": 0.1271689255001566, "grad_norm": 1.2380074262619019, "learning_rate": 0.00012712442296273122, "loss": 4.684, "step": 66390 }, { "epoch": 0.1271880803315318, "grad_norm": 1.2164630889892578, "learning_rate": 0.00012714357772684027, "loss": 4.555, "step": 66400 }, { "epoch": 0.12720723516290705, "grad_norm": 1.2204440832138062, "learning_rate": 0.00012716273249094935, "loss": 4.5736, "step": 66410 }, { "epoch": 0.1272263899942823, "grad_norm": 1.1296782493591309, "learning_rate": 0.00012718188725505846, "loss": 4.5854, "step": 66420 }, { "epoch": 0.1272455448256575, "grad_norm": 1.205390214920044, "learning_rate": 0.00012720104201916751, "loss": 4.5127, "step": 66430 }, { "epoch": 0.12726469965703274, "grad_norm": 1.2147493362426758, "learning_rate": 0.0001272201967832766, "loss": 4.5277, "step": 66440 }, { "epoch": 0.12728385448840798, "grad_norm": 1.1837685108184814, "learning_rate": 0.00012723935154738568, "loss": 4.5674, "step": 66450 }, { "epoch": 0.1273030093197832, "grad_norm": 1.2543452978134155, "learning_rate": 0.00012725850631149476, "loss": 4.4671, "step": 66460 }, { "epoch": 0.12732216415115843, "grad_norm": 1.204359769821167, "learning_rate": 0.00012727766107560384, "loss": 4.5771, "step": 66470 }, { "epoch": 0.12734131898253367, "grad_norm": 1.2015782594680786, "learning_rate": 0.00012729681583971292, "loss": 4.526, "step": 66480 }, { "epoch": 0.1273604738139089, "grad_norm": 1.1848200559616089, "learning_rate": 0.000127315970603822, "loss": 4.599, "step": 66490 }, { "epoch": 0.12737962864528413, "grad_norm": 1.1755242347717285, "learning_rate": 0.00012733512536793108, "loss": 4.7685, "step": 66500 }, { "epoch": 0.12739878347665937, "grad_norm": 1.2542128562927246, "learning_rate": 0.00012735428013204016, "loss": 4.5385, "step": 66510 }, { "epoch": 0.12741793830803458, "grad_norm": 1.1975935697555542, "learning_rate": 0.00012737343489614925, "loss": 4.4992, "step": 66520 }, { "epoch": 0.12743709313940982, "grad_norm": 1.2461481094360352, "learning_rate": 0.00012739258966025833, "loss": 4.4928, "step": 66530 }, { "epoch": 0.12745624797078506, "grad_norm": 1.2218667268753052, "learning_rate": 0.0001274117444243674, "loss": 4.5799, "step": 66540 }, { "epoch": 0.12747540280216027, "grad_norm": 1.1917668581008911, "learning_rate": 0.0001274308991884765, "loss": 4.6924, "step": 66550 }, { "epoch": 0.1274945576335355, "grad_norm": 1.228265643119812, "learning_rate": 0.00012745005395258554, "loss": 4.7277, "step": 66560 }, { "epoch": 0.12751371246491075, "grad_norm": 1.2336505651474, "learning_rate": 0.00012746920871669465, "loss": 4.669, "step": 66570 }, { "epoch": 0.12753286729628596, "grad_norm": 1.1591414213180542, "learning_rate": 0.00012748836348080373, "loss": 4.7224, "step": 66580 }, { "epoch": 0.1275520221276612, "grad_norm": 1.2212857007980347, "learning_rate": 0.0001275075182449128, "loss": 4.5141, "step": 66590 }, { "epoch": 0.12757117695903644, "grad_norm": 1.1820310354232788, "learning_rate": 0.0001275266730090219, "loss": 4.5565, "step": 66600 }, { "epoch": 0.12759033179041165, "grad_norm": 1.2074148654937744, "learning_rate": 0.00012754582777313098, "loss": 4.6744, "step": 66610 }, { "epoch": 0.1276094866217869, "grad_norm": 1.2048900127410889, "learning_rate": 0.00012756498253724003, "loss": 4.5778, "step": 66620 }, { "epoch": 0.12762864145316213, "grad_norm": 1.3639047145843506, "learning_rate": 0.0001275841373013491, "loss": 4.6253, "step": 66630 }, { "epoch": 0.12764779628453735, "grad_norm": 1.2152022123336792, "learning_rate": 0.0001276032920654582, "loss": 4.7478, "step": 66640 }, { "epoch": 0.12766695111591259, "grad_norm": 1.172256588935852, "learning_rate": 0.00012762244682956728, "loss": 4.5409, "step": 66650 }, { "epoch": 0.12768610594728783, "grad_norm": 1.213340401649475, "learning_rate": 0.00012764160159367636, "loss": 4.6272, "step": 66660 }, { "epoch": 0.12770526077866304, "grad_norm": 1.233719825744629, "learning_rate": 0.00012766075635778544, "loss": 4.592, "step": 66670 }, { "epoch": 0.12772441561003828, "grad_norm": 1.1408947706222534, "learning_rate": 0.00012767991112189452, "loss": 4.669, "step": 66680 }, { "epoch": 0.12774357044141352, "grad_norm": 1.15719735622406, "learning_rate": 0.0001276990658860036, "loss": 4.6071, "step": 66690 }, { "epoch": 0.12776272527278873, "grad_norm": 1.2500228881835938, "learning_rate": 0.00012771822065011268, "loss": 4.6643, "step": 66700 }, { "epoch": 0.12778188010416397, "grad_norm": 1.2665722370147705, "learning_rate": 0.00012773737541422176, "loss": 4.5707, "step": 66710 }, { "epoch": 0.1278010349355392, "grad_norm": 1.4041695594787598, "learning_rate": 0.00012775653017833084, "loss": 4.5333, "step": 66720 }, { "epoch": 0.12782018976691442, "grad_norm": 1.1876111030578613, "learning_rate": 0.00012777568494243993, "loss": 4.7331, "step": 66730 }, { "epoch": 0.12783934459828966, "grad_norm": 1.2333672046661377, "learning_rate": 0.000127794839706549, "loss": 4.5664, "step": 66740 }, { "epoch": 0.1278584994296649, "grad_norm": 1.2762268781661987, "learning_rate": 0.0001278139944706581, "loss": 4.5492, "step": 66750 }, { "epoch": 0.1278776542610401, "grad_norm": 1.174333095550537, "learning_rate": 0.00012783314923476717, "loss": 4.6577, "step": 66760 }, { "epoch": 0.12789680909241535, "grad_norm": 1.1808558702468872, "learning_rate": 0.00012785230399887625, "loss": 4.5978, "step": 66770 }, { "epoch": 0.1279159639237906, "grad_norm": 1.1554919481277466, "learning_rate": 0.0001278714587629853, "loss": 4.6481, "step": 66780 }, { "epoch": 0.1279351187551658, "grad_norm": 1.309539556503296, "learning_rate": 0.00012789061352709441, "loss": 4.6005, "step": 66790 }, { "epoch": 0.12795427358654105, "grad_norm": 1.1759521961212158, "learning_rate": 0.0001279097682912035, "loss": 4.5246, "step": 66800 }, { "epoch": 0.12797342841791629, "grad_norm": 1.2452839612960815, "learning_rate": 0.00012792892305531255, "loss": 4.4682, "step": 66810 }, { "epoch": 0.1279925832492915, "grad_norm": 1.173033356666565, "learning_rate": 0.00012794807781942166, "loss": 4.487, "step": 66820 }, { "epoch": 0.12801173808066674, "grad_norm": 1.1749060153961182, "learning_rate": 0.0001279672325835307, "loss": 4.7097, "step": 66830 }, { "epoch": 0.12803089291204198, "grad_norm": 1.157232642173767, "learning_rate": 0.0001279863873476398, "loss": 4.6905, "step": 66840 }, { "epoch": 0.1280500477434172, "grad_norm": 1.1480334997177124, "learning_rate": 0.00012800554211174887, "loss": 4.6442, "step": 66850 }, { "epoch": 0.12806920257479243, "grad_norm": 1.2003870010375977, "learning_rate": 0.00012802469687585796, "loss": 4.6654, "step": 66860 }, { "epoch": 0.12808835740616767, "grad_norm": 1.2189512252807617, "learning_rate": 0.00012804385163996704, "loss": 4.6861, "step": 66870 }, { "epoch": 0.12810751223754288, "grad_norm": 1.2300060987472534, "learning_rate": 0.00012806300640407612, "loss": 4.7724, "step": 66880 }, { "epoch": 0.12812666706891812, "grad_norm": 1.2362010478973389, "learning_rate": 0.0001280821611681852, "loss": 4.6819, "step": 66890 }, { "epoch": 0.12814582190029336, "grad_norm": 1.1956498622894287, "learning_rate": 0.00012810131593229428, "loss": 4.5833, "step": 66900 }, { "epoch": 0.1281649767316686, "grad_norm": 1.2163416147232056, "learning_rate": 0.00012812047069640336, "loss": 4.6709, "step": 66910 }, { "epoch": 0.1281841315630438, "grad_norm": 1.209626317024231, "learning_rate": 0.00012813962546051244, "loss": 4.7139, "step": 66920 }, { "epoch": 0.12820328639441905, "grad_norm": 1.2161469459533691, "learning_rate": 0.00012815878022462153, "loss": 4.6518, "step": 66930 }, { "epoch": 0.1282224412257943, "grad_norm": 1.3456764221191406, "learning_rate": 0.0001281779349887306, "loss": 4.4804, "step": 66940 }, { "epoch": 0.1282415960571695, "grad_norm": 1.2066172361373901, "learning_rate": 0.0001281970897528397, "loss": 4.627, "step": 66950 }, { "epoch": 0.12826075088854474, "grad_norm": 1.1826690435409546, "learning_rate": 0.00012821624451694877, "loss": 4.5784, "step": 66960 }, { "epoch": 0.12827990571991998, "grad_norm": 1.1816949844360352, "learning_rate": 0.00012823539928105785, "loss": 4.4597, "step": 66970 }, { "epoch": 0.1282990605512952, "grad_norm": 1.185621738433838, "learning_rate": 0.00012825455404516693, "loss": 4.6322, "step": 66980 }, { "epoch": 0.12831821538267044, "grad_norm": 1.2120132446289062, "learning_rate": 0.000128273708809276, "loss": 4.7093, "step": 66990 }, { "epoch": 0.12833737021404568, "grad_norm": 1.1506646871566772, "learning_rate": 0.00012829286357338507, "loss": 4.6954, "step": 67000 }, { "epoch": 0.1283565250454209, "grad_norm": 1.1712580919265747, "learning_rate": 0.00012831201833749418, "loss": 4.5196, "step": 67010 }, { "epoch": 0.12837567987679613, "grad_norm": 1.2153054475784302, "learning_rate": 0.00012833117310160323, "loss": 4.5286, "step": 67020 }, { "epoch": 0.12839483470817137, "grad_norm": 1.181812047958374, "learning_rate": 0.0001283503278657123, "loss": 4.5711, "step": 67030 }, { "epoch": 0.12841398953954658, "grad_norm": 1.2458739280700684, "learning_rate": 0.00012836948262982142, "loss": 4.5785, "step": 67040 }, { "epoch": 0.12843314437092182, "grad_norm": 1.2254515886306763, "learning_rate": 0.00012838863739393047, "loss": 4.5964, "step": 67050 }, { "epoch": 0.12845229920229706, "grad_norm": 1.1743640899658203, "learning_rate": 0.00012840779215803956, "loss": 4.4758, "step": 67060 }, { "epoch": 0.12847145403367227, "grad_norm": 1.193572759628296, "learning_rate": 0.00012842694692214864, "loss": 4.576, "step": 67070 }, { "epoch": 0.1284906088650475, "grad_norm": 1.1689715385437012, "learning_rate": 0.00012844610168625772, "loss": 4.4688, "step": 67080 }, { "epoch": 0.12850976369642275, "grad_norm": 1.185995101928711, "learning_rate": 0.0001284652564503668, "loss": 4.6629, "step": 67090 }, { "epoch": 0.12852891852779796, "grad_norm": 1.2230496406555176, "learning_rate": 0.00012848441121447588, "loss": 4.5886, "step": 67100 }, { "epoch": 0.1285480733591732, "grad_norm": 1.1722455024719238, "learning_rate": 0.00012850356597858496, "loss": 4.6136, "step": 67110 }, { "epoch": 0.12856722819054844, "grad_norm": 1.2383428812026978, "learning_rate": 0.00012852272074269404, "loss": 4.478, "step": 67120 }, { "epoch": 0.12858638302192366, "grad_norm": 1.170731544494629, "learning_rate": 0.00012854187550680312, "loss": 4.5622, "step": 67130 }, { "epoch": 0.1286055378532989, "grad_norm": 1.2152600288391113, "learning_rate": 0.0001285610302709122, "loss": 4.6707, "step": 67140 }, { "epoch": 0.12862469268467414, "grad_norm": 1.2153948545455933, "learning_rate": 0.0001285801850350213, "loss": 4.5942, "step": 67150 }, { "epoch": 0.12864384751604935, "grad_norm": 1.2089111804962158, "learning_rate": 0.00012859933979913037, "loss": 4.6011, "step": 67160 }, { "epoch": 0.1286630023474246, "grad_norm": 1.2947653532028198, "learning_rate": 0.00012861849456323945, "loss": 4.5956, "step": 67170 }, { "epoch": 0.12868215717879983, "grad_norm": 1.2617088556289673, "learning_rate": 0.00012863764932734853, "loss": 4.6337, "step": 67180 }, { "epoch": 0.12870131201017504, "grad_norm": 1.2584166526794434, "learning_rate": 0.0001286568040914576, "loss": 4.58, "step": 67190 }, { "epoch": 0.12872046684155028, "grad_norm": 1.2174450159072876, "learning_rate": 0.0001286759588555667, "loss": 4.5852, "step": 67200 }, { "epoch": 0.12873962167292552, "grad_norm": 1.2669199705123901, "learning_rate": 0.00012869511361967575, "loss": 4.6026, "step": 67210 }, { "epoch": 0.12875877650430073, "grad_norm": 1.192746639251709, "learning_rate": 0.00012871426838378483, "loss": 4.6642, "step": 67220 }, { "epoch": 0.12877793133567597, "grad_norm": 1.2308900356292725, "learning_rate": 0.00012873342314789394, "loss": 4.7645, "step": 67230 }, { "epoch": 0.1287970861670512, "grad_norm": 1.2135143280029297, "learning_rate": 0.000128752577912003, "loss": 4.7533, "step": 67240 }, { "epoch": 0.12881624099842642, "grad_norm": 1.246408462524414, "learning_rate": 0.00012877173267611207, "loss": 4.5794, "step": 67250 }, { "epoch": 0.12883539582980166, "grad_norm": 1.2357983589172363, "learning_rate": 0.00012879088744022118, "loss": 4.6281, "step": 67260 }, { "epoch": 0.1288545506611769, "grad_norm": 1.2081692218780518, "learning_rate": 0.00012881004220433024, "loss": 4.6101, "step": 67270 }, { "epoch": 0.12887370549255212, "grad_norm": 1.222581386566162, "learning_rate": 0.00012882919696843932, "loss": 4.5753, "step": 67280 }, { "epoch": 0.12889286032392736, "grad_norm": 1.4658461809158325, "learning_rate": 0.0001288483517325484, "loss": 4.4778, "step": 67290 }, { "epoch": 0.1289120151553026, "grad_norm": 1.1662122011184692, "learning_rate": 0.00012886750649665748, "loss": 4.465, "step": 67300 }, { "epoch": 0.1289311699866778, "grad_norm": 1.1949266195297241, "learning_rate": 0.00012888666126076656, "loss": 4.6583, "step": 67310 }, { "epoch": 0.12895032481805305, "grad_norm": 1.3070708513259888, "learning_rate": 0.00012890581602487564, "loss": 4.5399, "step": 67320 }, { "epoch": 0.1289694796494283, "grad_norm": 1.251909852027893, "learning_rate": 0.00012892497078898472, "loss": 4.6489, "step": 67330 }, { "epoch": 0.1289886344808035, "grad_norm": 1.215155005455017, "learning_rate": 0.0001289441255530938, "loss": 4.6059, "step": 67340 }, { "epoch": 0.12900778931217874, "grad_norm": 1.2429568767547607, "learning_rate": 0.00012896136484079196, "loss": 4.5425, "step": 67350 }, { "epoch": 0.12902694414355398, "grad_norm": 1.2456519603729248, "learning_rate": 0.00012898051960490107, "loss": 4.5676, "step": 67360 }, { "epoch": 0.1290460989749292, "grad_norm": 1.1485165357589722, "learning_rate": 0.00012899967436901013, "loss": 4.4615, "step": 67370 }, { "epoch": 0.12906525380630443, "grad_norm": 1.1820282936096191, "learning_rate": 0.0001290188291331192, "loss": 4.5503, "step": 67380 }, { "epoch": 0.12908440863767967, "grad_norm": 1.2524250745773315, "learning_rate": 0.0001290379838972283, "loss": 4.6953, "step": 67390 }, { "epoch": 0.12910356346905488, "grad_norm": 1.244580864906311, "learning_rate": 0.00012905713866133737, "loss": 4.6887, "step": 67400 }, { "epoch": 0.12912271830043012, "grad_norm": 1.1765220165252686, "learning_rate": 0.00012907629342544645, "loss": 4.4808, "step": 67410 }, { "epoch": 0.12914187313180536, "grad_norm": 1.1422648429870605, "learning_rate": 0.00012909544818955553, "loss": 4.5881, "step": 67420 }, { "epoch": 0.12916102796318057, "grad_norm": 1.2169451713562012, "learning_rate": 0.00012911460295366461, "loss": 4.6316, "step": 67430 }, { "epoch": 0.12918018279455581, "grad_norm": 1.2057220935821533, "learning_rate": 0.0001291337577177737, "loss": 4.4607, "step": 67440 }, { "epoch": 0.12919933762593105, "grad_norm": 1.184746265411377, "learning_rate": 0.00012915291248188278, "loss": 4.5576, "step": 67450 }, { "epoch": 0.12921849245730627, "grad_norm": 1.137588620185852, "learning_rate": 0.00012917206724599186, "loss": 4.7332, "step": 67460 }, { "epoch": 0.1292376472886815, "grad_norm": 1.1931090354919434, "learning_rate": 0.00012919122201010094, "loss": 4.5077, "step": 67470 }, { "epoch": 0.12925680212005675, "grad_norm": 1.1754889488220215, "learning_rate": 0.00012921037677421002, "loss": 4.6957, "step": 67480 }, { "epoch": 0.12927595695143196, "grad_norm": 1.279604196548462, "learning_rate": 0.0001292295315383191, "loss": 4.6425, "step": 67490 }, { "epoch": 0.1292951117828072, "grad_norm": 1.1868816614151, "learning_rate": 0.00012924868630242816, "loss": 4.6596, "step": 67500 }, { "epoch": 0.12931426661418244, "grad_norm": 1.173077940940857, "learning_rate": 0.00012926784106653727, "loss": 4.4407, "step": 67510 }, { "epoch": 0.12933342144555765, "grad_norm": 1.168714165687561, "learning_rate": 0.00012928699583064635, "loss": 4.6838, "step": 67520 }, { "epoch": 0.1293525762769329, "grad_norm": 1.202674150466919, "learning_rate": 0.0001293061505947554, "loss": 4.6639, "step": 67530 }, { "epoch": 0.12937173110830813, "grad_norm": 1.1839460134506226, "learning_rate": 0.00012932530535886448, "loss": 4.7032, "step": 67540 }, { "epoch": 0.12939088593968334, "grad_norm": 1.2367465496063232, "learning_rate": 0.0001293444601229736, "loss": 4.6478, "step": 67550 }, { "epoch": 0.12941004077105858, "grad_norm": 1.2303621768951416, "learning_rate": 0.00012936361488708264, "loss": 4.6882, "step": 67560 }, { "epoch": 0.12942919560243382, "grad_norm": 1.1588410139083862, "learning_rate": 0.00012938276965119173, "loss": 4.6612, "step": 67570 }, { "epoch": 0.12944835043380903, "grad_norm": 1.2334762811660767, "learning_rate": 0.0001294019244153008, "loss": 4.5999, "step": 67580 }, { "epoch": 0.12946750526518427, "grad_norm": 1.1801471710205078, "learning_rate": 0.0001294210791794099, "loss": 4.4838, "step": 67590 }, { "epoch": 0.1294866600965595, "grad_norm": 1.1629823446273804, "learning_rate": 0.00012944023394351897, "loss": 4.6569, "step": 67600 }, { "epoch": 0.12950581492793473, "grad_norm": 1.2050188779830933, "learning_rate": 0.00012945938870762805, "loss": 4.5775, "step": 67610 }, { "epoch": 0.12952496975930997, "grad_norm": 1.2040607929229736, "learning_rate": 0.00012947854347173713, "loss": 4.6605, "step": 67620 }, { "epoch": 0.1295441245906852, "grad_norm": 1.2193800210952759, "learning_rate": 0.00012949769823584621, "loss": 4.5634, "step": 67630 }, { "epoch": 0.12956327942206042, "grad_norm": 1.170169711112976, "learning_rate": 0.0001295168529999553, "loss": 4.5097, "step": 67640 }, { "epoch": 0.12958243425343566, "grad_norm": 1.1896412372589111, "learning_rate": 0.00012953600776406438, "loss": 4.5884, "step": 67650 }, { "epoch": 0.1296015890848109, "grad_norm": 1.1923072338104248, "learning_rate": 0.00012955516252817346, "loss": 4.4506, "step": 67660 }, { "epoch": 0.1296207439161861, "grad_norm": 1.2190150022506714, "learning_rate": 0.00012957431729228254, "loss": 4.6029, "step": 67670 }, { "epoch": 0.12963989874756135, "grad_norm": 1.2710505723953247, "learning_rate": 0.00012959347205639162, "loss": 4.7001, "step": 67680 }, { "epoch": 0.1296590535789366, "grad_norm": 1.1926862001419067, "learning_rate": 0.00012961262682050067, "loss": 4.6448, "step": 67690 }, { "epoch": 0.1296782084103118, "grad_norm": 1.2399742603302002, "learning_rate": 0.00012963178158460978, "loss": 4.6596, "step": 67700 }, { "epoch": 0.12969736324168704, "grad_norm": 1.156667709350586, "learning_rate": 0.00012965093634871886, "loss": 4.6716, "step": 67710 }, { "epoch": 0.12971651807306228, "grad_norm": 1.1891226768493652, "learning_rate": 0.00012967009111282792, "loss": 4.6628, "step": 67720 }, { "epoch": 0.1297356729044375, "grad_norm": 1.2035367488861084, "learning_rate": 0.00012968924587693703, "loss": 4.6927, "step": 67730 }, { "epoch": 0.12975482773581273, "grad_norm": 1.1938400268554688, "learning_rate": 0.0001297084006410461, "loss": 4.6042, "step": 67740 }, { "epoch": 0.12977398256718797, "grad_norm": 1.147423267364502, "learning_rate": 0.00012972755540515516, "loss": 4.7182, "step": 67750 }, { "epoch": 0.12979313739856319, "grad_norm": 1.1451070308685303, "learning_rate": 0.00012974671016926424, "loss": 4.6116, "step": 67760 }, { "epoch": 0.12981229222993843, "grad_norm": 1.1435468196868896, "learning_rate": 0.00012976586493337335, "loss": 4.5986, "step": 67770 }, { "epoch": 0.12983144706131367, "grad_norm": 1.2330044507980347, "learning_rate": 0.0001297850196974824, "loss": 4.5839, "step": 67780 }, { "epoch": 0.12985060189268888, "grad_norm": 1.14383065700531, "learning_rate": 0.0001298041744615915, "loss": 4.6361, "step": 67790 }, { "epoch": 0.12986975672406412, "grad_norm": 1.1615092754364014, "learning_rate": 0.00012982332922570057, "loss": 4.5506, "step": 67800 }, { "epoch": 0.12988891155543936, "grad_norm": 1.1789941787719727, "learning_rate": 0.00012984248398980965, "loss": 4.5836, "step": 67810 }, { "epoch": 0.12990806638681457, "grad_norm": 1.1854324340820312, "learning_rate": 0.00012986163875391873, "loss": 4.4354, "step": 67820 }, { "epoch": 0.1299272212181898, "grad_norm": 1.190794825553894, "learning_rate": 0.0001298807935180278, "loss": 4.5391, "step": 67830 }, { "epoch": 0.12994637604956505, "grad_norm": 1.1505872011184692, "learning_rate": 0.0001298999482821369, "loss": 4.7045, "step": 67840 }, { "epoch": 0.12996553088094026, "grad_norm": 1.1723458766937256, "learning_rate": 0.00012991910304624598, "loss": 4.6131, "step": 67850 }, { "epoch": 0.1299846857123155, "grad_norm": 1.236854076385498, "learning_rate": 0.00012993825781035506, "loss": 4.6251, "step": 67860 }, { "epoch": 0.13000384054369074, "grad_norm": 1.1768224239349365, "learning_rate": 0.00012995741257446414, "loss": 4.4468, "step": 67870 }, { "epoch": 0.13002299537506595, "grad_norm": 1.2230794429779053, "learning_rate": 0.00012997656733857322, "loss": 4.418, "step": 67880 }, { "epoch": 0.1300421502064412, "grad_norm": 1.1913807392120361, "learning_rate": 0.0001299957221026823, "loss": 4.5042, "step": 67890 }, { "epoch": 0.13006130503781643, "grad_norm": 1.191324234008789, "learning_rate": 0.00013001487686679138, "loss": 4.5662, "step": 67900 }, { "epoch": 0.13008045986919164, "grad_norm": 1.1341828107833862, "learning_rate": 0.00013003403163090044, "loss": 4.6548, "step": 67910 }, { "epoch": 0.13009961470056688, "grad_norm": 1.2421990633010864, "learning_rate": 0.00013005318639500954, "loss": 4.5347, "step": 67920 }, { "epoch": 0.13011876953194212, "grad_norm": 1.1738747358322144, "learning_rate": 0.00013007234115911863, "loss": 4.6169, "step": 67930 }, { "epoch": 0.13013792436331734, "grad_norm": 1.274902105331421, "learning_rate": 0.00013009149592322768, "loss": 4.5599, "step": 67940 }, { "epoch": 0.13015707919469258, "grad_norm": 1.2437254190444946, "learning_rate": 0.0001301106506873368, "loss": 4.6085, "step": 67950 }, { "epoch": 0.13017623402606782, "grad_norm": 1.1847693920135498, "learning_rate": 0.00013012980545144587, "loss": 4.6535, "step": 67960 }, { "epoch": 0.13019538885744303, "grad_norm": 1.1330915689468384, "learning_rate": 0.00013014896021555492, "loss": 4.5656, "step": 67970 }, { "epoch": 0.13021454368881827, "grad_norm": 1.175961971282959, "learning_rate": 0.000130168114979664, "loss": 4.555, "step": 67980 }, { "epoch": 0.1302336985201935, "grad_norm": 1.1770358085632324, "learning_rate": 0.0001301872697437731, "loss": 4.5105, "step": 67990 }, { "epoch": 0.13025285335156872, "grad_norm": 1.244873046875, "learning_rate": 0.00013020642450788217, "loss": 4.7255, "step": 68000 }, { "epoch": 0.13027200818294396, "grad_norm": 1.2823429107666016, "learning_rate": 0.00013022557927199125, "loss": 4.6057, "step": 68010 }, { "epoch": 0.1302911630143192, "grad_norm": 1.1409716606140137, "learning_rate": 0.00013024473403610033, "loss": 4.5204, "step": 68020 }, { "epoch": 0.1303103178456944, "grad_norm": 1.3461169004440308, "learning_rate": 0.0001302638888002094, "loss": 4.498, "step": 68030 }, { "epoch": 0.13032947267706965, "grad_norm": 1.3017200231552124, "learning_rate": 0.0001302830435643185, "loss": 4.3619, "step": 68040 }, { "epoch": 0.1303486275084449, "grad_norm": 1.1976350545883179, "learning_rate": 0.00013030219832842757, "loss": 4.6536, "step": 68050 }, { "epoch": 0.1303677823398201, "grad_norm": 1.2610102891921997, "learning_rate": 0.00013032135309253666, "loss": 4.5303, "step": 68060 }, { "epoch": 0.13038693717119534, "grad_norm": 1.1849443912506104, "learning_rate": 0.00013034050785664574, "loss": 4.5153, "step": 68070 }, { "epoch": 0.13040609200257058, "grad_norm": 1.2374069690704346, "learning_rate": 0.00013035966262075482, "loss": 4.5487, "step": 68080 }, { "epoch": 0.1304252468339458, "grad_norm": 1.1478012800216675, "learning_rate": 0.0001303788173848639, "loss": 4.5917, "step": 68090 }, { "epoch": 0.13044440166532104, "grad_norm": 1.2074896097183228, "learning_rate": 0.00013039797214897298, "loss": 4.6269, "step": 68100 }, { "epoch": 0.13046355649669628, "grad_norm": 1.2290797233581543, "learning_rate": 0.00013041712691308206, "loss": 4.7389, "step": 68110 }, { "epoch": 0.1304827113280715, "grad_norm": 1.2275933027267456, "learning_rate": 0.00013043628167719114, "loss": 4.6888, "step": 68120 }, { "epoch": 0.13050186615944673, "grad_norm": 1.2051218748092651, "learning_rate": 0.0001304554364413002, "loss": 4.8016, "step": 68130 }, { "epoch": 0.13052102099082197, "grad_norm": 1.1726980209350586, "learning_rate": 0.0001304745912054093, "loss": 4.7446, "step": 68140 }, { "epoch": 0.13054017582219718, "grad_norm": 1.1684685945510864, "learning_rate": 0.0001304937459695184, "loss": 4.5958, "step": 68150 }, { "epoch": 0.13055933065357242, "grad_norm": 1.2340292930603027, "learning_rate": 0.00013051290073362744, "loss": 4.5382, "step": 68160 }, { "epoch": 0.13057848548494766, "grad_norm": 1.1920384168624878, "learning_rate": 0.00013053205549773655, "loss": 4.64, "step": 68170 }, { "epoch": 0.13059764031632287, "grad_norm": 1.2288727760314941, "learning_rate": 0.0001305512102618456, "loss": 4.6091, "step": 68180 }, { "epoch": 0.1306167951476981, "grad_norm": 1.1485952138900757, "learning_rate": 0.00013057036502595469, "loss": 4.6247, "step": 68190 }, { "epoch": 0.13063594997907335, "grad_norm": 1.155657410621643, "learning_rate": 0.00013058951979006377, "loss": 4.54, "step": 68200 }, { "epoch": 0.1306551048104486, "grad_norm": 1.1862248182296753, "learning_rate": 0.00013060867455417285, "loss": 4.7045, "step": 68210 }, { "epoch": 0.1306742596418238, "grad_norm": 1.2144310474395752, "learning_rate": 0.00013062782931828193, "loss": 4.5641, "step": 68220 }, { "epoch": 0.13069341447319904, "grad_norm": 1.2288479804992676, "learning_rate": 0.000130646984082391, "loss": 4.6498, "step": 68230 }, { "epoch": 0.13071256930457428, "grad_norm": 1.1830265522003174, "learning_rate": 0.0001306661388465001, "loss": 4.5848, "step": 68240 }, { "epoch": 0.1307317241359495, "grad_norm": 1.2082405090332031, "learning_rate": 0.00013068529361060917, "loss": 4.5595, "step": 68250 }, { "epoch": 0.13075087896732474, "grad_norm": 1.2275482416152954, "learning_rate": 0.00013070444837471825, "loss": 4.5138, "step": 68260 }, { "epoch": 0.13077003379869998, "grad_norm": 1.1308621168136597, "learning_rate": 0.00013072360313882734, "loss": 4.5054, "step": 68270 }, { "epoch": 0.1307891886300752, "grad_norm": 1.1633169651031494, "learning_rate": 0.00013074275790293642, "loss": 4.6203, "step": 68280 }, { "epoch": 0.13080834346145043, "grad_norm": 1.2046544551849365, "learning_rate": 0.0001307619126670455, "loss": 4.4269, "step": 68290 }, { "epoch": 0.13082749829282567, "grad_norm": 1.133574366569519, "learning_rate": 0.00013078106743115458, "loss": 4.5895, "step": 68300 }, { "epoch": 0.13084665312420088, "grad_norm": 1.154985785484314, "learning_rate": 0.00013080022219526366, "loss": 4.5267, "step": 68310 }, { "epoch": 0.13086580795557612, "grad_norm": 1.1481074094772339, "learning_rate": 0.00013081937695937274, "loss": 4.6601, "step": 68320 }, { "epoch": 0.13088496278695136, "grad_norm": 1.221562147140503, "learning_rate": 0.00013083853172348182, "loss": 4.5593, "step": 68330 }, { "epoch": 0.13090411761832657, "grad_norm": 1.1955008506774902, "learning_rate": 0.0001308576864875909, "loss": 4.6413, "step": 68340 }, { "epoch": 0.1309232724497018, "grad_norm": 1.2254819869995117, "learning_rate": 0.00013087684125169996, "loss": 4.4229, "step": 68350 }, { "epoch": 0.13094242728107705, "grad_norm": 1.2786868810653687, "learning_rate": 0.00013089599601580907, "loss": 4.6188, "step": 68360 }, { "epoch": 0.13096158211245226, "grad_norm": 1.2019644975662231, "learning_rate": 0.00013091515077991812, "loss": 4.5364, "step": 68370 }, { "epoch": 0.1309807369438275, "grad_norm": 1.1789528131484985, "learning_rate": 0.0001309343055440272, "loss": 4.4797, "step": 68380 }, { "epoch": 0.13099989177520274, "grad_norm": 1.2045656442642212, "learning_rate": 0.0001309534603081363, "loss": 4.5908, "step": 68390 }, { "epoch": 0.13101904660657795, "grad_norm": 1.158500075340271, "learning_rate": 0.00013097261507224537, "loss": 4.6028, "step": 68400 }, { "epoch": 0.1310382014379532, "grad_norm": 1.210375189781189, "learning_rate": 0.00013099176983635445, "loss": 4.6963, "step": 68410 }, { "epoch": 0.13105735626932843, "grad_norm": 1.1640477180480957, "learning_rate": 0.00013101092460046353, "loss": 4.6221, "step": 68420 }, { "epoch": 0.13107651110070365, "grad_norm": 1.238543152809143, "learning_rate": 0.0001310300793645726, "loss": 4.6133, "step": 68430 }, { "epoch": 0.1310956659320789, "grad_norm": 1.153654932975769, "learning_rate": 0.0001310492341286817, "loss": 4.433, "step": 68440 }, { "epoch": 0.13111482076345413, "grad_norm": 1.160867691040039, "learning_rate": 0.00013106838889279077, "loss": 4.5692, "step": 68450 }, { "epoch": 0.13113397559482934, "grad_norm": 1.1292718648910522, "learning_rate": 0.00013108754365689985, "loss": 4.6601, "step": 68460 }, { "epoch": 0.13115313042620458, "grad_norm": 1.1825275421142578, "learning_rate": 0.00013110669842100893, "loss": 4.6826, "step": 68470 }, { "epoch": 0.13117228525757982, "grad_norm": 1.1601243019104004, "learning_rate": 0.00013112585318511802, "loss": 4.6847, "step": 68480 }, { "epoch": 0.13119144008895503, "grad_norm": 1.1537754535675049, "learning_rate": 0.0001311450079492271, "loss": 4.5344, "step": 68490 }, { "epoch": 0.13121059492033027, "grad_norm": 1.298282265663147, "learning_rate": 0.00013116416271333618, "loss": 4.5324, "step": 68500 }, { "epoch": 0.1312297497517055, "grad_norm": 1.3058733940124512, "learning_rate": 0.00013118331747744526, "loss": 4.5511, "step": 68510 }, { "epoch": 0.13124890458308072, "grad_norm": 1.1550723314285278, "learning_rate": 0.00013120247224155434, "loss": 4.5381, "step": 68520 }, { "epoch": 0.13126805941445596, "grad_norm": 1.2645750045776367, "learning_rate": 0.00013122162700566342, "loss": 4.6471, "step": 68530 }, { "epoch": 0.1312872142458312, "grad_norm": 1.1839463710784912, "learning_rate": 0.0001312407817697725, "loss": 4.5057, "step": 68540 }, { "epoch": 0.13130636907720641, "grad_norm": 1.250081181526184, "learning_rate": 0.00013125993653388159, "loss": 4.544, "step": 68550 }, { "epoch": 0.13132552390858165, "grad_norm": 1.2283755540847778, "learning_rate": 0.00013127909129799064, "loss": 4.5348, "step": 68560 }, { "epoch": 0.1313446787399569, "grad_norm": 1.1805269718170166, "learning_rate": 0.00013129824606209972, "loss": 4.5766, "step": 68570 }, { "epoch": 0.1313638335713321, "grad_norm": 1.1743383407592773, "learning_rate": 0.00013131740082620883, "loss": 4.6544, "step": 68580 }, { "epoch": 0.13138298840270735, "grad_norm": 1.157446026802063, "learning_rate": 0.00013133655559031788, "loss": 4.6253, "step": 68590 }, { "epoch": 0.13140214323408259, "grad_norm": 1.1855566501617432, "learning_rate": 0.00013135571035442696, "loss": 4.5045, "step": 68600 }, { "epoch": 0.1314212980654578, "grad_norm": 1.1945315599441528, "learning_rate": 0.00013137486511853607, "loss": 4.4382, "step": 68610 }, { "epoch": 0.13144045289683304, "grad_norm": 1.1429418325424194, "learning_rate": 0.00013139401988264513, "loss": 4.5507, "step": 68620 }, { "epoch": 0.13145960772820828, "grad_norm": 1.1863818168640137, "learning_rate": 0.0001314131746467542, "loss": 4.5867, "step": 68630 }, { "epoch": 0.1314787625595835, "grad_norm": 1.227600336074829, "learning_rate": 0.0001314323294108633, "loss": 4.3931, "step": 68640 }, { "epoch": 0.13149791739095873, "grad_norm": 1.2504924535751343, "learning_rate": 0.00013145148417497237, "loss": 4.5824, "step": 68650 }, { "epoch": 0.13151707222233397, "grad_norm": 1.1841522455215454, "learning_rate": 0.00013147063893908145, "loss": 4.5897, "step": 68660 }, { "epoch": 0.13153622705370918, "grad_norm": 1.251721739768982, "learning_rate": 0.00013148979370319053, "loss": 4.4862, "step": 68670 }, { "epoch": 0.13155538188508442, "grad_norm": 1.1831684112548828, "learning_rate": 0.00013150894846729962, "loss": 4.5975, "step": 68680 }, { "epoch": 0.13157453671645966, "grad_norm": 1.1773699522018433, "learning_rate": 0.0001315281032314087, "loss": 4.464, "step": 68690 }, { "epoch": 0.13159369154783487, "grad_norm": 1.18011474609375, "learning_rate": 0.00013154725799551778, "loss": 4.526, "step": 68700 }, { "epoch": 0.1316128463792101, "grad_norm": 1.226732850074768, "learning_rate": 0.00013156641275962686, "loss": 4.658, "step": 68710 }, { "epoch": 0.13163200121058535, "grad_norm": 1.1554269790649414, "learning_rate": 0.00013158556752373594, "loss": 4.6267, "step": 68720 }, { "epoch": 0.13165115604196057, "grad_norm": 1.1483553647994995, "learning_rate": 0.00013160472228784502, "loss": 4.4317, "step": 68730 }, { "epoch": 0.1316703108733358, "grad_norm": 1.236153244972229, "learning_rate": 0.0001316238770519541, "loss": 4.5453, "step": 68740 }, { "epoch": 0.13168946570471105, "grad_norm": 1.1767520904541016, "learning_rate": 0.00013164303181606316, "loss": 4.677, "step": 68750 }, { "epoch": 0.13170862053608626, "grad_norm": 1.179254174232483, "learning_rate": 0.00013166218658017224, "loss": 4.6577, "step": 68760 }, { "epoch": 0.1317277753674615, "grad_norm": 1.1606816053390503, "learning_rate": 0.00013168134134428135, "loss": 4.5799, "step": 68770 }, { "epoch": 0.13174693019883674, "grad_norm": 1.2419636249542236, "learning_rate": 0.0001317004961083904, "loss": 4.5239, "step": 68780 }, { "epoch": 0.13176608503021195, "grad_norm": 1.21503484249115, "learning_rate": 0.00013171965087249948, "loss": 4.6299, "step": 68790 }, { "epoch": 0.1317852398615872, "grad_norm": 1.1947001218795776, "learning_rate": 0.0001317388056366086, "loss": 4.7721, "step": 68800 }, { "epoch": 0.13180439469296243, "grad_norm": 1.2071679830551147, "learning_rate": 0.00013175796040071765, "loss": 4.4077, "step": 68810 }, { "epoch": 0.13182354952433764, "grad_norm": 1.1541818380355835, "learning_rate": 0.00013177711516482673, "loss": 4.55, "step": 68820 }, { "epoch": 0.13184270435571288, "grad_norm": 1.3358464241027832, "learning_rate": 0.00013179626992893583, "loss": 4.4487, "step": 68830 }, { "epoch": 0.13186185918708812, "grad_norm": 1.1902263164520264, "learning_rate": 0.0001318154246930449, "loss": 4.5976, "step": 68840 }, { "epoch": 0.13188101401846333, "grad_norm": 1.1845176219940186, "learning_rate": 0.00013183457945715397, "loss": 4.5446, "step": 68850 }, { "epoch": 0.13190016884983857, "grad_norm": 1.265305757522583, "learning_rate": 0.00013185373422126305, "loss": 4.6356, "step": 68860 }, { "epoch": 0.1319193236812138, "grad_norm": 1.196290373802185, "learning_rate": 0.00013187288898537213, "loss": 4.5392, "step": 68870 }, { "epoch": 0.13193847851258902, "grad_norm": 1.1304556131362915, "learning_rate": 0.00013189204374948121, "loss": 4.6341, "step": 68880 }, { "epoch": 0.13195763334396426, "grad_norm": 1.0971057415008545, "learning_rate": 0.0001319111985135903, "loss": 4.5077, "step": 68890 }, { "epoch": 0.1319767881753395, "grad_norm": 1.1788166761398315, "learning_rate": 0.00013193035327769938, "loss": 4.656, "step": 68900 }, { "epoch": 0.13199594300671472, "grad_norm": 1.3107136487960815, "learning_rate": 0.00013194950804180846, "loss": 4.7257, "step": 68910 }, { "epoch": 0.13201509783808996, "grad_norm": 1.2953046560287476, "learning_rate": 0.00013196866280591754, "loss": 4.4753, "step": 68920 }, { "epoch": 0.1320342526694652, "grad_norm": 1.19074547290802, "learning_rate": 0.00013198781757002662, "loss": 4.5746, "step": 68930 }, { "epoch": 0.1320534075008404, "grad_norm": 1.18085515499115, "learning_rate": 0.00013200697233413568, "loss": 4.5999, "step": 68940 }, { "epoch": 0.13207256233221565, "grad_norm": 1.1985186338424683, "learning_rate": 0.00013202612709824478, "loss": 4.5405, "step": 68950 }, { "epoch": 0.1320917171635909, "grad_norm": 1.1314170360565186, "learning_rate": 0.00013204528186235386, "loss": 4.6566, "step": 68960 }, { "epoch": 0.1321108719949661, "grad_norm": 1.1383388042449951, "learning_rate": 0.00013206443662646292, "loss": 4.5285, "step": 68970 }, { "epoch": 0.13213002682634134, "grad_norm": 1.1573469638824463, "learning_rate": 0.000132083591390572, "loss": 4.6103, "step": 68980 }, { "epoch": 0.13214918165771658, "grad_norm": 1.173883080482483, "learning_rate": 0.0001321027461546811, "loss": 4.6804, "step": 68990 }, { "epoch": 0.1321683364890918, "grad_norm": 1.2140614986419678, "learning_rate": 0.00013212190091879016, "loss": 4.5681, "step": 69000 }, { "epoch": 0.13218749132046703, "grad_norm": 1.216784119606018, "learning_rate": 0.00013214105568289924, "loss": 4.5649, "step": 69010 }, { "epoch": 0.13220664615184227, "grad_norm": 1.1745331287384033, "learning_rate": 0.00013216021044700835, "loss": 4.501, "step": 69020 }, { "epoch": 0.13222580098321748, "grad_norm": 1.1483967304229736, "learning_rate": 0.0001321793652111174, "loss": 4.5705, "step": 69030 }, { "epoch": 0.13224495581459272, "grad_norm": 1.1931240558624268, "learning_rate": 0.0001321985199752265, "loss": 4.5484, "step": 69040 }, { "epoch": 0.13226411064596796, "grad_norm": 1.208722710609436, "learning_rate": 0.00013221767473933557, "loss": 4.5459, "step": 69050 }, { "epoch": 0.13228326547734318, "grad_norm": 1.1706551313400269, "learning_rate": 0.00013223682950344465, "loss": 4.4933, "step": 69060 }, { "epoch": 0.13230242030871842, "grad_norm": 1.1891918182373047, "learning_rate": 0.00013225598426755373, "loss": 4.6566, "step": 69070 }, { "epoch": 0.13232157514009366, "grad_norm": 1.1934155225753784, "learning_rate": 0.0001322751390316628, "loss": 4.5463, "step": 69080 }, { "epoch": 0.13234072997146887, "grad_norm": 1.1760896444320679, "learning_rate": 0.0001322942937957719, "loss": 4.6846, "step": 69090 }, { "epoch": 0.1323598848028441, "grad_norm": 1.1138336658477783, "learning_rate": 0.00013231344855988098, "loss": 4.5083, "step": 69100 }, { "epoch": 0.13237903963421935, "grad_norm": 1.1708065271377563, "learning_rate": 0.00013233260332399006, "loss": 4.5652, "step": 69110 }, { "epoch": 0.13239819446559456, "grad_norm": 1.1928033828735352, "learning_rate": 0.00013235175808809914, "loss": 4.6423, "step": 69120 }, { "epoch": 0.1324173492969698, "grad_norm": 1.2115482091903687, "learning_rate": 0.0001323709128522082, "loss": 4.5987, "step": 69130 }, { "epoch": 0.13243650412834504, "grad_norm": 1.1575231552124023, "learning_rate": 0.0001323900676163173, "loss": 4.5876, "step": 69140 }, { "epoch": 0.13245565895972025, "grad_norm": 1.1942616701126099, "learning_rate": 0.00013240922238042638, "loss": 4.577, "step": 69150 }, { "epoch": 0.1324748137910955, "grad_norm": 1.2044830322265625, "learning_rate": 0.00013242837714453544, "loss": 4.6476, "step": 69160 }, { "epoch": 0.13249396862247073, "grad_norm": 1.1952835321426392, "learning_rate": 0.00013244753190864455, "loss": 4.4387, "step": 69170 }, { "epoch": 0.13251312345384594, "grad_norm": 1.171942114830017, "learning_rate": 0.00013246668667275363, "loss": 4.6395, "step": 69180 }, { "epoch": 0.13253227828522118, "grad_norm": 1.1985573768615723, "learning_rate": 0.00013248584143686268, "loss": 4.476, "step": 69190 }, { "epoch": 0.13255143311659642, "grad_norm": 1.1452511548995972, "learning_rate": 0.00013250499620097176, "loss": 4.5861, "step": 69200 }, { "epoch": 0.13257058794797164, "grad_norm": 1.1952003240585327, "learning_rate": 0.00013252415096508087, "loss": 4.7096, "step": 69210 }, { "epoch": 0.13258974277934688, "grad_norm": 1.2395063638687134, "learning_rate": 0.00013254330572918992, "loss": 4.5249, "step": 69220 }, { "epoch": 0.13260889761072212, "grad_norm": 1.2202714681625366, "learning_rate": 0.000132562460493299, "loss": 4.5847, "step": 69230 }, { "epoch": 0.13262805244209733, "grad_norm": 1.2553225755691528, "learning_rate": 0.0001325816152574081, "loss": 4.6759, "step": 69240 }, { "epoch": 0.13264720727347257, "grad_norm": 1.196250319480896, "learning_rate": 0.00013260077002151717, "loss": 4.3462, "step": 69250 }, { "epoch": 0.1326663621048478, "grad_norm": 1.1309882402420044, "learning_rate": 0.00013261992478562625, "loss": 4.5565, "step": 69260 }, { "epoch": 0.13268551693622302, "grad_norm": 1.1761003732681274, "learning_rate": 0.00013263907954973533, "loss": 4.5748, "step": 69270 }, { "epoch": 0.13270467176759826, "grad_norm": 1.1801989078521729, "learning_rate": 0.0001326582343138444, "loss": 4.6879, "step": 69280 }, { "epoch": 0.1327238265989735, "grad_norm": 1.1728519201278687, "learning_rate": 0.0001326773890779535, "loss": 4.6856, "step": 69290 }, { "epoch": 0.1327429814303487, "grad_norm": 1.286289930343628, "learning_rate": 0.00013269654384206257, "loss": 4.5928, "step": 69300 }, { "epoch": 0.13276213626172395, "grad_norm": 1.2063194513320923, "learning_rate": 0.00013271569860617166, "loss": 4.7066, "step": 69310 }, { "epoch": 0.1327812910930992, "grad_norm": 1.1479219198226929, "learning_rate": 0.00013273485337028074, "loss": 4.4914, "step": 69320 }, { "epoch": 0.1328004459244744, "grad_norm": 1.2839701175689697, "learning_rate": 0.00013275400813438982, "loss": 4.675, "step": 69330 }, { "epoch": 0.13281960075584964, "grad_norm": 1.1889569759368896, "learning_rate": 0.0001327731628984989, "loss": 4.698, "step": 69340 }, { "epoch": 0.13283875558722488, "grad_norm": 1.1371568441390991, "learning_rate": 0.00013279231766260795, "loss": 4.5097, "step": 69350 }, { "epoch": 0.1328579104186001, "grad_norm": 1.2153476476669312, "learning_rate": 0.00013281147242671706, "loss": 4.3695, "step": 69360 }, { "epoch": 0.13287706524997533, "grad_norm": 1.1346102952957153, "learning_rate": 0.00013283062719082614, "loss": 4.6224, "step": 69370 }, { "epoch": 0.13289622008135057, "grad_norm": 1.1444171667099, "learning_rate": 0.0001328497819549352, "loss": 4.6502, "step": 69380 }, { "epoch": 0.1329153749127258, "grad_norm": 1.169702172279358, "learning_rate": 0.0001328689367190443, "loss": 4.5589, "step": 69390 }, { "epoch": 0.13293452974410103, "grad_norm": 1.1481306552886963, "learning_rate": 0.0001328880914831534, "loss": 4.5151, "step": 69400 }, { "epoch": 0.13295368457547627, "grad_norm": 1.1869382858276367, "learning_rate": 0.00013290724624726244, "loss": 4.553, "step": 69410 }, { "epoch": 0.13297283940685148, "grad_norm": 1.1351162195205688, "learning_rate": 0.00013292640101137152, "loss": 4.5178, "step": 69420 }, { "epoch": 0.13299199423822672, "grad_norm": 1.1557873487472534, "learning_rate": 0.0001329455557754806, "loss": 4.5232, "step": 69430 }, { "epoch": 0.13301114906960196, "grad_norm": 1.1524567604064941, "learning_rate": 0.00013296471053958969, "loss": 4.6166, "step": 69440 }, { "epoch": 0.13303030390097717, "grad_norm": 1.207661747932434, "learning_rate": 0.00013298386530369877, "loss": 4.5773, "step": 69450 }, { "epoch": 0.1330494587323524, "grad_norm": 1.1991552114486694, "learning_rate": 0.00013300302006780785, "loss": 4.6048, "step": 69460 }, { "epoch": 0.13306861356372765, "grad_norm": 1.214590311050415, "learning_rate": 0.00013302217483191693, "loss": 4.6441, "step": 69470 }, { "epoch": 0.13308776839510286, "grad_norm": 1.1737288236618042, "learning_rate": 0.000133041329596026, "loss": 4.5035, "step": 69480 }, { "epoch": 0.1331069232264781, "grad_norm": 1.1820244789123535, "learning_rate": 0.0001330604843601351, "loss": 4.5134, "step": 69490 }, { "epoch": 0.13312607805785334, "grad_norm": 1.194040060043335, "learning_rate": 0.00013307963912424417, "loss": 4.664, "step": 69500 }, { "epoch": 0.13314523288922855, "grad_norm": 1.1464556455612183, "learning_rate": 0.00013309879388835326, "loss": 4.6614, "step": 69510 }, { "epoch": 0.1331643877206038, "grad_norm": 1.2116968631744385, "learning_rate": 0.00013311794865246234, "loss": 4.6781, "step": 69520 }, { "epoch": 0.13318354255197903, "grad_norm": 1.2100520133972168, "learning_rate": 0.00013313710341657142, "loss": 4.6058, "step": 69530 }, { "epoch": 0.13320269738335427, "grad_norm": 1.1939574480056763, "learning_rate": 0.0001331562581806805, "loss": 4.5813, "step": 69540 }, { "epoch": 0.13322185221472949, "grad_norm": 1.1421661376953125, "learning_rate": 0.00013317541294478958, "loss": 4.6834, "step": 69550 }, { "epoch": 0.13324100704610473, "grad_norm": 1.1635329723358154, "learning_rate": 0.00013319456770889866, "loss": 4.6027, "step": 69560 }, { "epoch": 0.13326016187747997, "grad_norm": 1.139905571937561, "learning_rate": 0.00013321372247300772, "loss": 4.5905, "step": 69570 }, { "epoch": 0.13327931670885518, "grad_norm": 1.2146934270858765, "learning_rate": 0.00013323287723711682, "loss": 4.5143, "step": 69580 }, { "epoch": 0.13329847154023042, "grad_norm": 1.17388916015625, "learning_rate": 0.0001332520320012259, "loss": 4.6648, "step": 69590 }, { "epoch": 0.13331762637160566, "grad_norm": 1.158410906791687, "learning_rate": 0.00013327118676533496, "loss": 4.6773, "step": 69600 }, { "epoch": 0.13333678120298087, "grad_norm": 1.2470577955245972, "learning_rate": 0.00013329034152944407, "loss": 4.628, "step": 69610 }, { "epoch": 0.1333559360343561, "grad_norm": 1.1557437181472778, "learning_rate": 0.00013330949629355312, "loss": 4.5155, "step": 69620 }, { "epoch": 0.13337509086573135, "grad_norm": 1.229286551475525, "learning_rate": 0.0001333286510576622, "loss": 4.6711, "step": 69630 }, { "epoch": 0.13339424569710656, "grad_norm": 1.1450104713439941, "learning_rate": 0.00013334780582177129, "loss": 4.7427, "step": 69640 }, { "epoch": 0.1334134005284818, "grad_norm": 1.2070385217666626, "learning_rate": 0.00013336696058588037, "loss": 4.5692, "step": 69650 }, { "epoch": 0.13343255535985704, "grad_norm": 1.1317418813705444, "learning_rate": 0.00013338611534998945, "loss": 4.6153, "step": 69660 }, { "epoch": 0.13345171019123225, "grad_norm": 1.1681663990020752, "learning_rate": 0.00013340527011409853, "loss": 4.5906, "step": 69670 }, { "epoch": 0.1334708650226075, "grad_norm": 1.1597435474395752, "learning_rate": 0.0001334244248782076, "loss": 4.6221, "step": 69680 }, { "epoch": 0.13349001985398273, "grad_norm": 1.152551293373108, "learning_rate": 0.0001334435796423167, "loss": 4.4511, "step": 69690 }, { "epoch": 0.13350917468535795, "grad_norm": 1.1384973526000977, "learning_rate": 0.00013346273440642577, "loss": 4.5473, "step": 69700 }, { "epoch": 0.13352832951673319, "grad_norm": 1.2607524394989014, "learning_rate": 0.00013348188917053485, "loss": 4.5849, "step": 69710 }, { "epoch": 0.13354748434810843, "grad_norm": 1.212689757347107, "learning_rate": 0.00013350104393464394, "loss": 4.5027, "step": 69720 }, { "epoch": 0.13356663917948364, "grad_norm": 1.209350824356079, "learning_rate": 0.00013352019869875302, "loss": 4.3867, "step": 69730 }, { "epoch": 0.13358579401085888, "grad_norm": 1.2987034320831299, "learning_rate": 0.0001335393534628621, "loss": 4.6132, "step": 69740 }, { "epoch": 0.13360494884223412, "grad_norm": 1.1676839590072632, "learning_rate": 0.00013355850822697118, "loss": 4.5317, "step": 69750 }, { "epoch": 0.13362410367360933, "grad_norm": 1.2286580801010132, "learning_rate": 0.00013357766299108026, "loss": 4.4001, "step": 69760 }, { "epoch": 0.13364325850498457, "grad_norm": 1.1348178386688232, "learning_rate": 0.00013359681775518934, "loss": 4.6898, "step": 69770 }, { "epoch": 0.1336624133363598, "grad_norm": 1.1658560037612915, "learning_rate": 0.00013361597251929842, "loss": 4.5736, "step": 69780 }, { "epoch": 0.13368156816773502, "grad_norm": 1.2974295616149902, "learning_rate": 0.00013363512728340748, "loss": 4.5817, "step": 69790 }, { "epoch": 0.13370072299911026, "grad_norm": 1.184421420097351, "learning_rate": 0.00013365428204751659, "loss": 4.6228, "step": 69800 }, { "epoch": 0.1337198778304855, "grad_norm": 1.195404291152954, "learning_rate": 0.00013367343681162564, "loss": 4.5859, "step": 69810 }, { "epoch": 0.1337390326618607, "grad_norm": 1.1982530355453491, "learning_rate": 0.00013369259157573472, "loss": 4.5572, "step": 69820 }, { "epoch": 0.13375818749323595, "grad_norm": 1.1019731760025024, "learning_rate": 0.00013371174633984383, "loss": 4.5959, "step": 69830 }, { "epoch": 0.1337773423246112, "grad_norm": 1.1866843700408936, "learning_rate": 0.00013373090110395288, "loss": 4.603, "step": 69840 }, { "epoch": 0.1337964971559864, "grad_norm": 1.1926259994506836, "learning_rate": 0.00013375005586806197, "loss": 4.5221, "step": 69850 }, { "epoch": 0.13381565198736164, "grad_norm": 1.1988939046859741, "learning_rate": 0.00013376921063217105, "loss": 4.645, "step": 69860 }, { "epoch": 0.13383480681873688, "grad_norm": 1.1686774492263794, "learning_rate": 0.00013378836539628013, "loss": 4.4192, "step": 69870 }, { "epoch": 0.1338539616501121, "grad_norm": 1.162235140800476, "learning_rate": 0.0001338075201603892, "loss": 4.592, "step": 69880 }, { "epoch": 0.13387311648148734, "grad_norm": 1.149224877357483, "learning_rate": 0.0001338266749244983, "loss": 4.498, "step": 69890 }, { "epoch": 0.13389227131286258, "grad_norm": 1.1791800260543823, "learning_rate": 0.00013384582968860737, "loss": 4.5827, "step": 69900 }, { "epoch": 0.1339114261442378, "grad_norm": 1.159754991531372, "learning_rate": 0.00013386498445271645, "loss": 4.5285, "step": 69910 }, { "epoch": 0.13393058097561303, "grad_norm": 1.213318109512329, "learning_rate": 0.00013388413921682553, "loss": 4.5054, "step": 69920 }, { "epoch": 0.13394973580698827, "grad_norm": 1.1879370212554932, "learning_rate": 0.00013390329398093462, "loss": 4.5891, "step": 69930 }, { "epoch": 0.13396889063836348, "grad_norm": 1.2176002264022827, "learning_rate": 0.00013392053326863278, "loss": 4.5701, "step": 69940 }, { "epoch": 0.13398804546973872, "grad_norm": 1.2350192070007324, "learning_rate": 0.00013393968803274186, "loss": 4.6288, "step": 69950 }, { "epoch": 0.13400720030111396, "grad_norm": 1.206812858581543, "learning_rate": 0.00013395884279685094, "loss": 4.4931, "step": 69960 }, { "epoch": 0.13402635513248917, "grad_norm": 1.1786081790924072, "learning_rate": 0.00013397799756096002, "loss": 4.5095, "step": 69970 }, { "epoch": 0.1340455099638644, "grad_norm": 1.180088758468628, "learning_rate": 0.0001339971523250691, "loss": 4.5695, "step": 69980 }, { "epoch": 0.13406466479523965, "grad_norm": 1.1954424381256104, "learning_rate": 0.00013401630708917818, "loss": 4.6194, "step": 69990 }, { "epoch": 0.13408381962661486, "grad_norm": 1.16026771068573, "learning_rate": 0.00013403546185328726, "loss": 4.6478, "step": 70000 }, { "epoch": 0.1341029744579901, "grad_norm": 1.185762882232666, "learning_rate": 0.00013405461661739634, "loss": 4.5774, "step": 70010 }, { "epoch": 0.13412212928936534, "grad_norm": 1.1931132078170776, "learning_rate": 0.00013407377138150543, "loss": 4.5046, "step": 70020 }, { "epoch": 0.13414128412074056, "grad_norm": 1.2170348167419434, "learning_rate": 0.0001340929261456145, "loss": 4.4855, "step": 70030 }, { "epoch": 0.1341604389521158, "grad_norm": 1.1224621534347534, "learning_rate": 0.0001341120809097236, "loss": 4.7407, "step": 70040 }, { "epoch": 0.13417959378349104, "grad_norm": 1.1534044742584229, "learning_rate": 0.00013413123567383267, "loss": 4.5223, "step": 70050 }, { "epoch": 0.13419874861486625, "grad_norm": 1.2033146619796753, "learning_rate": 0.00013415039043794175, "loss": 4.5925, "step": 70060 }, { "epoch": 0.1342179034462415, "grad_norm": 1.1780258417129517, "learning_rate": 0.00013416954520205083, "loss": 4.6011, "step": 70070 }, { "epoch": 0.13423705827761673, "grad_norm": 1.1467312574386597, "learning_rate": 0.00013418869996615991, "loss": 4.539, "step": 70080 }, { "epoch": 0.13425621310899194, "grad_norm": 1.1413311958312988, "learning_rate": 0.000134207854730269, "loss": 4.4458, "step": 70090 }, { "epoch": 0.13427536794036718, "grad_norm": 1.1560152769088745, "learning_rate": 0.00013422700949437805, "loss": 4.6395, "step": 70100 }, { "epoch": 0.13429452277174242, "grad_norm": 1.1614255905151367, "learning_rate": 0.00013424616425848713, "loss": 4.5428, "step": 70110 }, { "epoch": 0.13431367760311763, "grad_norm": 1.218492031097412, "learning_rate": 0.00013426531902259624, "loss": 4.5629, "step": 70120 }, { "epoch": 0.13433283243449287, "grad_norm": 1.1773186922073364, "learning_rate": 0.0001342844737867053, "loss": 4.4696, "step": 70130 }, { "epoch": 0.1343519872658681, "grad_norm": 1.2269644737243652, "learning_rate": 0.00013430362855081437, "loss": 4.5759, "step": 70140 }, { "epoch": 0.13437114209724332, "grad_norm": 1.1526401042938232, "learning_rate": 0.00013432278331492348, "loss": 4.495, "step": 70150 }, { "epoch": 0.13439029692861856, "grad_norm": 1.2006967067718506, "learning_rate": 0.00013434193807903254, "loss": 4.4377, "step": 70160 }, { "epoch": 0.1344094517599938, "grad_norm": 1.1800320148468018, "learning_rate": 0.00013436109284314162, "loss": 4.5568, "step": 70170 }, { "epoch": 0.13442860659136902, "grad_norm": 1.2061355113983154, "learning_rate": 0.0001343802476072507, "loss": 4.5786, "step": 70180 }, { "epoch": 0.13444776142274426, "grad_norm": 1.1823681592941284, "learning_rate": 0.00013439940237135978, "loss": 4.6007, "step": 70190 }, { "epoch": 0.1344669162541195, "grad_norm": 1.1903079748153687, "learning_rate": 0.00013441855713546886, "loss": 4.6934, "step": 70200 }, { "epoch": 0.1344860710854947, "grad_norm": 1.150672435760498, "learning_rate": 0.00013443771189957794, "loss": 4.4813, "step": 70210 }, { "epoch": 0.13450522591686995, "grad_norm": 1.2643526792526245, "learning_rate": 0.00013445686666368703, "loss": 4.583, "step": 70220 }, { "epoch": 0.1345243807482452, "grad_norm": 1.1679397821426392, "learning_rate": 0.0001344760214277961, "loss": 4.5247, "step": 70230 }, { "epoch": 0.1345435355796204, "grad_norm": 1.182676911354065, "learning_rate": 0.0001344951761919052, "loss": 4.6105, "step": 70240 }, { "epoch": 0.13456269041099564, "grad_norm": 1.152687668800354, "learning_rate": 0.00013451433095601427, "loss": 4.4627, "step": 70250 }, { "epoch": 0.13458184524237088, "grad_norm": 1.1184951066970825, "learning_rate": 0.00013453348572012335, "loss": 4.5586, "step": 70260 }, { "epoch": 0.1346010000737461, "grad_norm": 1.1873297691345215, "learning_rate": 0.00013455264048423243, "loss": 4.4433, "step": 70270 }, { "epoch": 0.13462015490512133, "grad_norm": 1.1839932203292847, "learning_rate": 0.0001345717952483415, "loss": 4.733, "step": 70280 }, { "epoch": 0.13463930973649657, "grad_norm": 1.1590577363967896, "learning_rate": 0.00013459095001245057, "loss": 4.5135, "step": 70290 }, { "epoch": 0.13465846456787178, "grad_norm": 1.1622980833053589, "learning_rate": 0.00013461010477655968, "loss": 4.3541, "step": 70300 }, { "epoch": 0.13467761939924702, "grad_norm": 1.1445600986480713, "learning_rate": 0.00013462925954066876, "loss": 4.5411, "step": 70310 }, { "epoch": 0.13469677423062226, "grad_norm": 1.1397640705108643, "learning_rate": 0.0001346484143047778, "loss": 4.5355, "step": 70320 }, { "epoch": 0.13471592906199747, "grad_norm": 1.1448346376419067, "learning_rate": 0.0001346675690688869, "loss": 4.565, "step": 70330 }, { "epoch": 0.13473508389337271, "grad_norm": 1.1959266662597656, "learning_rate": 0.000134686723832996, "loss": 4.4444, "step": 70340 }, { "epoch": 0.13475423872474795, "grad_norm": 1.2644606828689575, "learning_rate": 0.00013470587859710505, "loss": 4.7196, "step": 70350 }, { "epoch": 0.13477339355612317, "grad_norm": 1.2494138479232788, "learning_rate": 0.00013472503336121414, "loss": 4.4434, "step": 70360 }, { "epoch": 0.1347925483874984, "grad_norm": 1.2126049995422363, "learning_rate": 0.00013474418812532324, "loss": 4.5413, "step": 70370 }, { "epoch": 0.13481170321887365, "grad_norm": 1.1592909097671509, "learning_rate": 0.0001347633428894323, "loss": 4.6414, "step": 70380 }, { "epoch": 0.13483085805024886, "grad_norm": 1.1565496921539307, "learning_rate": 0.00013478249765354138, "loss": 4.5632, "step": 70390 }, { "epoch": 0.1348500128816241, "grad_norm": 1.1675209999084473, "learning_rate": 0.00013480165241765046, "loss": 4.5582, "step": 70400 }, { "epoch": 0.13486916771299934, "grad_norm": 1.2088431119918823, "learning_rate": 0.00013482080718175954, "loss": 4.4982, "step": 70410 }, { "epoch": 0.13488832254437455, "grad_norm": 1.19057297706604, "learning_rate": 0.00013483996194586862, "loss": 4.4987, "step": 70420 }, { "epoch": 0.1349074773757498, "grad_norm": 1.1889914274215698, "learning_rate": 0.0001348591167099777, "loss": 4.5431, "step": 70430 }, { "epoch": 0.13492663220712503, "grad_norm": 1.1744438409805298, "learning_rate": 0.0001348782714740868, "loss": 4.6549, "step": 70440 }, { "epoch": 0.13494578703850024, "grad_norm": 1.2367494106292725, "learning_rate": 0.00013489742623819587, "loss": 4.5381, "step": 70450 }, { "epoch": 0.13496494186987548, "grad_norm": 1.1226838827133179, "learning_rate": 0.00013491658100230495, "loss": 4.5963, "step": 70460 }, { "epoch": 0.13498409670125072, "grad_norm": 1.2392045259475708, "learning_rate": 0.00013493573576641403, "loss": 4.5463, "step": 70470 }, { "epoch": 0.13500325153262593, "grad_norm": 1.1852205991744995, "learning_rate": 0.00013495489053052308, "loss": 4.5411, "step": 70480 }, { "epoch": 0.13502240636400117, "grad_norm": 1.1654564142227173, "learning_rate": 0.0001349740452946322, "loss": 4.5799, "step": 70490 }, { "epoch": 0.13504156119537641, "grad_norm": 1.177889108657837, "learning_rate": 0.00013499320005874127, "loss": 4.5929, "step": 70500 }, { "epoch": 0.13506071602675163, "grad_norm": 1.1712610721588135, "learning_rate": 0.00013501235482285033, "loss": 4.6413, "step": 70510 }, { "epoch": 0.13507987085812687, "grad_norm": 1.1298844814300537, "learning_rate": 0.00013503150958695944, "loss": 4.5836, "step": 70520 }, { "epoch": 0.1350990256895021, "grad_norm": 1.1856645345687866, "learning_rate": 0.00013505066435106852, "loss": 4.4853, "step": 70530 }, { "epoch": 0.13511818052087732, "grad_norm": 1.1615790128707886, "learning_rate": 0.00013506981911517757, "loss": 4.8194, "step": 70540 }, { "epoch": 0.13513733535225256, "grad_norm": 1.13602876663208, "learning_rate": 0.00013508897387928665, "loss": 4.6157, "step": 70550 }, { "epoch": 0.1351564901836278, "grad_norm": 1.1719883680343628, "learning_rate": 0.00013510812864339576, "loss": 4.6558, "step": 70560 }, { "epoch": 0.135175645015003, "grad_norm": 1.1830170154571533, "learning_rate": 0.00013512728340750482, "loss": 4.5461, "step": 70570 }, { "epoch": 0.13519479984637825, "grad_norm": 1.1607098579406738, "learning_rate": 0.0001351464381716139, "loss": 4.6619, "step": 70580 }, { "epoch": 0.1352139546777535, "grad_norm": 1.162705898284912, "learning_rate": 0.00013516559293572298, "loss": 4.6064, "step": 70590 }, { "epoch": 0.1352331095091287, "grad_norm": 1.2241407632827759, "learning_rate": 0.00013518474769983206, "loss": 4.6193, "step": 70600 }, { "epoch": 0.13525226434050394, "grad_norm": 1.1859710216522217, "learning_rate": 0.00013520390246394114, "loss": 4.472, "step": 70610 }, { "epoch": 0.13527141917187918, "grad_norm": 1.1520044803619385, "learning_rate": 0.00013522305722805022, "loss": 4.5894, "step": 70620 }, { "epoch": 0.1352905740032544, "grad_norm": 1.1912901401519775, "learning_rate": 0.0001352422119921593, "loss": 4.4992, "step": 70630 }, { "epoch": 0.13530972883462963, "grad_norm": 1.1810288429260254, "learning_rate": 0.00013526136675626839, "loss": 4.718, "step": 70640 }, { "epoch": 0.13532888366600487, "grad_norm": 1.2900892496109009, "learning_rate": 0.00013528052152037747, "loss": 4.62, "step": 70650 }, { "epoch": 0.13534803849738009, "grad_norm": 1.1418390274047852, "learning_rate": 0.00013529967628448655, "loss": 4.5108, "step": 70660 }, { "epoch": 0.13536719332875533, "grad_norm": 1.2111748456954956, "learning_rate": 0.00013531883104859563, "loss": 4.6034, "step": 70670 }, { "epoch": 0.13538634816013057, "grad_norm": 1.1792585849761963, "learning_rate": 0.0001353379858127047, "loss": 4.5613, "step": 70680 }, { "epoch": 0.13540550299150578, "grad_norm": 1.104304313659668, "learning_rate": 0.0001353571405768138, "loss": 4.4762, "step": 70690 }, { "epoch": 0.13542465782288102, "grad_norm": 1.160605549812317, "learning_rate": 0.00013537629534092285, "loss": 4.6385, "step": 70700 }, { "epoch": 0.13544381265425626, "grad_norm": 1.163417935371399, "learning_rate": 0.00013539545010503195, "loss": 4.3959, "step": 70710 }, { "epoch": 0.13546296748563147, "grad_norm": 1.2300159931182861, "learning_rate": 0.00013541460486914104, "loss": 4.5355, "step": 70720 }, { "epoch": 0.1354821223170067, "grad_norm": 1.1441353559494019, "learning_rate": 0.0001354337596332501, "loss": 4.6058, "step": 70730 }, { "epoch": 0.13550127714838195, "grad_norm": 1.1939668655395508, "learning_rate": 0.0001354529143973592, "loss": 4.5239, "step": 70740 }, { "epoch": 0.13552043197975716, "grad_norm": 1.2296812534332275, "learning_rate": 0.00013547206916146828, "loss": 4.5616, "step": 70750 }, { "epoch": 0.1355395868111324, "grad_norm": 1.1993930339813232, "learning_rate": 0.00013549122392557733, "loss": 4.5291, "step": 70760 }, { "epoch": 0.13555874164250764, "grad_norm": 1.1601303815841675, "learning_rate": 0.00013551037868968642, "loss": 4.6055, "step": 70770 }, { "epoch": 0.13557789647388285, "grad_norm": 1.193635106086731, "learning_rate": 0.0001355295334537955, "loss": 4.5172, "step": 70780 }, { "epoch": 0.1355970513052581, "grad_norm": 1.105972409248352, "learning_rate": 0.00013554868821790458, "loss": 4.6769, "step": 70790 }, { "epoch": 0.13561620613663333, "grad_norm": 1.2341320514678955, "learning_rate": 0.00013556784298201366, "loss": 4.6387, "step": 70800 }, { "epoch": 0.13563536096800854, "grad_norm": 1.170447826385498, "learning_rate": 0.00013558699774612274, "loss": 4.5982, "step": 70810 }, { "epoch": 0.13565451579938378, "grad_norm": 1.1872668266296387, "learning_rate": 0.00013560615251023182, "loss": 4.5666, "step": 70820 }, { "epoch": 0.13567367063075902, "grad_norm": 1.1529451608657837, "learning_rate": 0.0001356253072743409, "loss": 4.5875, "step": 70830 }, { "epoch": 0.13569282546213424, "grad_norm": 1.1403776407241821, "learning_rate": 0.00013564446203844998, "loss": 4.6751, "step": 70840 }, { "epoch": 0.13571198029350948, "grad_norm": 1.2034761905670166, "learning_rate": 0.00013566361680255907, "loss": 4.5943, "step": 70850 }, { "epoch": 0.13573113512488472, "grad_norm": 1.2229288816452026, "learning_rate": 0.00013568277156666815, "loss": 4.606, "step": 70860 }, { "epoch": 0.13575028995625996, "grad_norm": 1.1519094705581665, "learning_rate": 0.00013570192633077723, "loss": 4.5567, "step": 70870 }, { "epoch": 0.13576944478763517, "grad_norm": 1.1575264930725098, "learning_rate": 0.0001357210810948863, "loss": 4.6697, "step": 70880 }, { "epoch": 0.1357885996190104, "grad_norm": 1.1467615365982056, "learning_rate": 0.00013574023585899536, "loss": 4.594, "step": 70890 }, { "epoch": 0.13580775445038565, "grad_norm": 1.2156963348388672, "learning_rate": 0.00013575939062310447, "loss": 4.4854, "step": 70900 }, { "epoch": 0.13582690928176086, "grad_norm": 1.2407587766647339, "learning_rate": 0.00013577854538721355, "loss": 4.5905, "step": 70910 }, { "epoch": 0.1358460641131361, "grad_norm": 1.259261965751648, "learning_rate": 0.0001357977001513226, "loss": 4.5775, "step": 70920 }, { "epoch": 0.13586521894451134, "grad_norm": 1.2242239713668823, "learning_rate": 0.00013581685491543172, "loss": 4.4665, "step": 70930 }, { "epoch": 0.13588437377588655, "grad_norm": 1.3810515403747559, "learning_rate": 0.0001358360096795408, "loss": 4.4613, "step": 70940 }, { "epoch": 0.1359035286072618, "grad_norm": 1.1099523305892944, "learning_rate": 0.00013585516444364985, "loss": 4.5423, "step": 70950 }, { "epoch": 0.13592268343863703, "grad_norm": 1.1175669431686401, "learning_rate": 0.00013587431920775896, "loss": 4.7073, "step": 70960 }, { "epoch": 0.13594183827001224, "grad_norm": 1.1311203241348267, "learning_rate": 0.00013589347397186801, "loss": 4.5096, "step": 70970 }, { "epoch": 0.13596099310138748, "grad_norm": 1.1561946868896484, "learning_rate": 0.0001359126287359771, "loss": 4.6857, "step": 70980 }, { "epoch": 0.13598014793276272, "grad_norm": 1.1518157720565796, "learning_rate": 0.00013593178350008618, "loss": 4.5536, "step": 70990 }, { "epoch": 0.13599930276413794, "grad_norm": 1.1456005573272705, "learning_rate": 0.00013595093826419526, "loss": 4.4111, "step": 71000 }, { "epoch": 0.13601845759551318, "grad_norm": 1.142863154411316, "learning_rate": 0.00013597009302830434, "loss": 4.5747, "step": 71010 }, { "epoch": 0.13603761242688842, "grad_norm": 1.15705406665802, "learning_rate": 0.00013598924779241342, "loss": 4.566, "step": 71020 }, { "epoch": 0.13605676725826363, "grad_norm": 1.2655930519104004, "learning_rate": 0.0001360084025565225, "loss": 4.42, "step": 71030 }, { "epoch": 0.13607592208963887, "grad_norm": 1.1180349588394165, "learning_rate": 0.00013602755732063158, "loss": 4.6361, "step": 71040 }, { "epoch": 0.1360950769210141, "grad_norm": 1.2195531129837036, "learning_rate": 0.00013604671208474066, "loss": 4.6023, "step": 71050 }, { "epoch": 0.13611423175238932, "grad_norm": 1.1515756845474243, "learning_rate": 0.00013606586684884975, "loss": 4.6105, "step": 71060 }, { "epoch": 0.13613338658376456, "grad_norm": 1.2211644649505615, "learning_rate": 0.00013608502161295883, "loss": 4.4711, "step": 71070 }, { "epoch": 0.1361525414151398, "grad_norm": 1.1028648614883423, "learning_rate": 0.0001361041763770679, "loss": 4.5194, "step": 71080 }, { "epoch": 0.136171696246515, "grad_norm": 1.202558159828186, "learning_rate": 0.000136123331141177, "loss": 4.6072, "step": 71090 }, { "epoch": 0.13619085107789025, "grad_norm": 1.1840615272521973, "learning_rate": 0.00013614248590528607, "loss": 4.5841, "step": 71100 }, { "epoch": 0.1362100059092655, "grad_norm": 1.1595536470413208, "learning_rate": 0.00013616164066939513, "loss": 4.4674, "step": 71110 }, { "epoch": 0.1362291607406407, "grad_norm": 1.0889410972595215, "learning_rate": 0.00013618079543350423, "loss": 4.4988, "step": 71120 }, { "epoch": 0.13624831557201594, "grad_norm": 1.1445469856262207, "learning_rate": 0.00013619995019761332, "loss": 4.5312, "step": 71130 }, { "epoch": 0.13626747040339118, "grad_norm": 1.1426922082901, "learning_rate": 0.00013621910496172237, "loss": 4.5866, "step": 71140 }, { "epoch": 0.1362866252347664, "grad_norm": 1.1541727781295776, "learning_rate": 0.00013623825972583148, "loss": 4.6307, "step": 71150 }, { "epoch": 0.13630578006614164, "grad_norm": 1.1418389081954956, "learning_rate": 0.00013625741448994053, "loss": 4.5172, "step": 71160 }, { "epoch": 0.13632493489751688, "grad_norm": 1.152335524559021, "learning_rate": 0.00013627656925404961, "loss": 4.5582, "step": 71170 }, { "epoch": 0.1363440897288921, "grad_norm": 1.1540051698684692, "learning_rate": 0.00013629572401815872, "loss": 4.4989, "step": 71180 }, { "epoch": 0.13636324456026733, "grad_norm": 1.2095024585723877, "learning_rate": 0.00013631487878226778, "loss": 4.5883, "step": 71190 }, { "epoch": 0.13638239939164257, "grad_norm": 1.2322512865066528, "learning_rate": 0.00013633403354637686, "loss": 4.6558, "step": 71200 }, { "epoch": 0.13640155422301778, "grad_norm": 1.3631786108016968, "learning_rate": 0.00013635318831048594, "loss": 4.4325, "step": 71210 }, { "epoch": 0.13642070905439302, "grad_norm": 1.1548125743865967, "learning_rate": 0.00013637234307459502, "loss": 4.5242, "step": 71220 }, { "epoch": 0.13643986388576826, "grad_norm": 1.1764321327209473, "learning_rate": 0.0001363914978387041, "loss": 4.6012, "step": 71230 }, { "epoch": 0.13645901871714347, "grad_norm": 1.23037588596344, "learning_rate": 0.00013641065260281318, "loss": 4.4464, "step": 71240 }, { "epoch": 0.1364781735485187, "grad_norm": 1.1786367893218994, "learning_rate": 0.00013642980736692226, "loss": 4.5572, "step": 71250 }, { "epoch": 0.13649732837989395, "grad_norm": 1.1423437595367432, "learning_rate": 0.00013644896213103135, "loss": 4.6195, "step": 71260 }, { "epoch": 0.13651648321126916, "grad_norm": 1.1360619068145752, "learning_rate": 0.00013646811689514043, "loss": 4.5742, "step": 71270 }, { "epoch": 0.1365356380426444, "grad_norm": 1.135740876197815, "learning_rate": 0.0001364872716592495, "loss": 4.5127, "step": 71280 }, { "epoch": 0.13655479287401964, "grad_norm": 1.1807212829589844, "learning_rate": 0.0001365064264233586, "loss": 4.5286, "step": 71290 }, { "epoch": 0.13657394770539485, "grad_norm": 1.1253432035446167, "learning_rate": 0.00013652558118746767, "loss": 4.6569, "step": 71300 }, { "epoch": 0.1365931025367701, "grad_norm": 1.1445955038070679, "learning_rate": 0.00013654473595157675, "loss": 4.6227, "step": 71310 }, { "epoch": 0.13661225736814533, "grad_norm": 1.1690328121185303, "learning_rate": 0.00013656389071568583, "loss": 4.5838, "step": 71320 }, { "epoch": 0.13663141219952055, "grad_norm": 1.2061071395874023, "learning_rate": 0.0001365830454797949, "loss": 4.6181, "step": 71330 }, { "epoch": 0.1366505670308958, "grad_norm": 1.135718584060669, "learning_rate": 0.000136602200243904, "loss": 4.6846, "step": 71340 }, { "epoch": 0.13666972186227103, "grad_norm": 1.1651102304458618, "learning_rate": 0.00013662135500801308, "loss": 4.4911, "step": 71350 }, { "epoch": 0.13668887669364624, "grad_norm": 1.223565936088562, "learning_rate": 0.00013664050977212213, "loss": 4.4527, "step": 71360 }, { "epoch": 0.13670803152502148, "grad_norm": 1.1889342069625854, "learning_rate": 0.00013665966453623124, "loss": 4.6534, "step": 71370 }, { "epoch": 0.13672718635639672, "grad_norm": 1.2481049299240112, "learning_rate": 0.0001366788193003403, "loss": 4.5518, "step": 71380 }, { "epoch": 0.13674634118777193, "grad_norm": 1.1838629245758057, "learning_rate": 0.00013669797406444938, "loss": 4.5158, "step": 71390 }, { "epoch": 0.13676549601914717, "grad_norm": 1.1604418754577637, "learning_rate": 0.00013671712882855846, "loss": 4.5298, "step": 71400 }, { "epoch": 0.1367846508505224, "grad_norm": 1.1836681365966797, "learning_rate": 0.00013673628359266754, "loss": 4.5205, "step": 71410 }, { "epoch": 0.13680380568189762, "grad_norm": 1.1340867280960083, "learning_rate": 0.00013675543835677662, "loss": 4.5682, "step": 71420 }, { "epoch": 0.13682296051327286, "grad_norm": 1.1958907842636108, "learning_rate": 0.0001367745931208857, "loss": 4.6391, "step": 71430 }, { "epoch": 0.1368421153446481, "grad_norm": 1.2096232175827026, "learning_rate": 0.00013679374788499478, "loss": 4.6024, "step": 71440 }, { "epoch": 0.13686127017602331, "grad_norm": 1.1733242273330688, "learning_rate": 0.00013681290264910386, "loss": 4.6793, "step": 71450 }, { "epoch": 0.13688042500739855, "grad_norm": 1.1714513301849365, "learning_rate": 0.00013683205741321294, "loss": 4.5536, "step": 71460 }, { "epoch": 0.1368995798387738, "grad_norm": 1.1409170627593994, "learning_rate": 0.00013685121217732203, "loss": 4.7506, "step": 71470 }, { "epoch": 0.136918734670149, "grad_norm": 1.1844472885131836, "learning_rate": 0.0001368703669414311, "loss": 4.5769, "step": 71480 }, { "epoch": 0.13693788950152425, "grad_norm": 1.169500708580017, "learning_rate": 0.0001368895217055402, "loss": 4.551, "step": 71490 }, { "epoch": 0.13695704433289949, "grad_norm": 1.1980255842208862, "learning_rate": 0.00013690867646964927, "loss": 4.4649, "step": 71500 }, { "epoch": 0.1369761991642747, "grad_norm": 1.172063946723938, "learning_rate": 0.00013692783123375835, "loss": 4.5022, "step": 71510 }, { "epoch": 0.13699535399564994, "grad_norm": 1.159940242767334, "learning_rate": 0.00013694698599786743, "loss": 4.6546, "step": 71520 }, { "epoch": 0.13701450882702518, "grad_norm": 1.1655503511428833, "learning_rate": 0.0001369661407619765, "loss": 4.4243, "step": 71530 }, { "epoch": 0.1370336636584004, "grad_norm": 1.1954214572906494, "learning_rate": 0.0001369852955260856, "loss": 4.7082, "step": 71540 }, { "epoch": 0.13705281848977563, "grad_norm": 1.192991852760315, "learning_rate": 0.00013700445029019465, "loss": 4.6072, "step": 71550 }, { "epoch": 0.13707197332115087, "grad_norm": 1.1418403387069702, "learning_rate": 0.00013702360505430376, "loss": 4.816, "step": 71560 }, { "epoch": 0.13709112815252608, "grad_norm": 1.1628170013427734, "learning_rate": 0.0001370427598184128, "loss": 4.5463, "step": 71570 }, { "epoch": 0.13711028298390132, "grad_norm": 1.1890547275543213, "learning_rate": 0.0001370619145825219, "loss": 4.4712, "step": 71580 }, { "epoch": 0.13712943781527656, "grad_norm": 1.1542634963989258, "learning_rate": 0.000137081069346631, "loss": 4.5535, "step": 71590 }, { "epoch": 0.13714859264665177, "grad_norm": 1.1574556827545166, "learning_rate": 0.00013710022411074006, "loss": 4.5654, "step": 71600 }, { "epoch": 0.137167747478027, "grad_norm": 1.1718690395355225, "learning_rate": 0.00013711937887484914, "loss": 4.5044, "step": 71610 }, { "epoch": 0.13718690230940225, "grad_norm": 1.2493033409118652, "learning_rate": 0.00013713853363895822, "loss": 4.5039, "step": 71620 }, { "epoch": 0.13720605714077747, "grad_norm": 1.2639856338500977, "learning_rate": 0.0001371576884030673, "loss": 4.6601, "step": 71630 }, { "epoch": 0.1372252119721527, "grad_norm": 1.164430856704712, "learning_rate": 0.00013717684316717638, "loss": 4.486, "step": 71640 }, { "epoch": 0.13724436680352795, "grad_norm": 1.2026301622390747, "learning_rate": 0.00013719599793128546, "loss": 4.648, "step": 71650 }, { "epoch": 0.13726352163490316, "grad_norm": 1.203107237815857, "learning_rate": 0.00013721515269539454, "loss": 4.5209, "step": 71660 }, { "epoch": 0.1372826764662784, "grad_norm": 1.150673270225525, "learning_rate": 0.00013723430745950362, "loss": 4.705, "step": 71670 }, { "epoch": 0.13730183129765364, "grad_norm": 1.1853630542755127, "learning_rate": 0.0001372534622236127, "loss": 4.5042, "step": 71680 }, { "epoch": 0.13732098612902885, "grad_norm": 1.133656620979309, "learning_rate": 0.0001372726169877218, "loss": 4.5884, "step": 71690 }, { "epoch": 0.1373401409604041, "grad_norm": 1.1559687852859497, "learning_rate": 0.00013729177175183087, "loss": 4.4952, "step": 71700 }, { "epoch": 0.13735929579177933, "grad_norm": 1.1563581228256226, "learning_rate": 0.00013731092651593995, "loss": 4.6395, "step": 71710 }, { "epoch": 0.13737845062315454, "grad_norm": 1.1492018699645996, "learning_rate": 0.00013733008128004903, "loss": 4.6737, "step": 71720 }, { "epoch": 0.13739760545452978, "grad_norm": 1.222019910812378, "learning_rate": 0.0001373492360441581, "loss": 4.5218, "step": 71730 }, { "epoch": 0.13741676028590502, "grad_norm": 1.1597883701324463, "learning_rate": 0.0001373683908082672, "loss": 4.4818, "step": 71740 }, { "epoch": 0.13743591511728023, "grad_norm": 1.1231240034103394, "learning_rate": 0.00013738754557237628, "loss": 4.4621, "step": 71750 }, { "epoch": 0.13745506994865547, "grad_norm": 1.158553957939148, "learning_rate": 0.00013740670033648533, "loss": 4.6895, "step": 71760 }, { "epoch": 0.1374742247800307, "grad_norm": 1.191282868385315, "learning_rate": 0.0001374258551005944, "loss": 4.6658, "step": 71770 }, { "epoch": 0.13749337961140592, "grad_norm": 1.119709849357605, "learning_rate": 0.00013744500986470352, "loss": 4.6202, "step": 71780 }, { "epoch": 0.13751253444278116, "grad_norm": 1.2932684421539307, "learning_rate": 0.00013746416462881257, "loss": 4.4859, "step": 71790 }, { "epoch": 0.1375316892741564, "grad_norm": 1.1242470741271973, "learning_rate": 0.00013748331939292165, "loss": 4.6452, "step": 71800 }, { "epoch": 0.13755084410553162, "grad_norm": 1.1507198810577393, "learning_rate": 0.00013750247415703076, "loss": 4.6257, "step": 71810 }, { "epoch": 0.13756999893690686, "grad_norm": 1.1672792434692383, "learning_rate": 0.00013752162892113982, "loss": 4.6238, "step": 71820 }, { "epoch": 0.1375891537682821, "grad_norm": 1.1537894010543823, "learning_rate": 0.0001375407836852489, "loss": 4.5512, "step": 71830 }, { "epoch": 0.1376083085996573, "grad_norm": 1.143906593322754, "learning_rate": 0.00013755993844935798, "loss": 4.5007, "step": 71840 }, { "epoch": 0.13762746343103255, "grad_norm": 1.1784439086914062, "learning_rate": 0.00013757909321346706, "loss": 4.5633, "step": 71850 }, { "epoch": 0.1376466182624078, "grad_norm": 1.2414393424987793, "learning_rate": 0.00013759824797757614, "loss": 4.5844, "step": 71860 }, { "epoch": 0.137665773093783, "grad_norm": 1.091570258140564, "learning_rate": 0.00013761740274168522, "loss": 4.7861, "step": 71870 }, { "epoch": 0.13768492792515824, "grad_norm": 1.1757395267486572, "learning_rate": 0.0001376365575057943, "loss": 4.6032, "step": 71880 }, { "epoch": 0.13770408275653348, "grad_norm": 1.1090774536132812, "learning_rate": 0.00013765571226990339, "loss": 4.4191, "step": 71890 }, { "epoch": 0.1377232375879087, "grad_norm": 1.1685823202133179, "learning_rate": 0.00013767486703401247, "loss": 4.5745, "step": 71900 }, { "epoch": 0.13774239241928393, "grad_norm": 1.1622238159179688, "learning_rate": 0.00013769402179812155, "loss": 4.5157, "step": 71910 }, { "epoch": 0.13776154725065917, "grad_norm": 1.1724786758422852, "learning_rate": 0.00013771317656223063, "loss": 4.4447, "step": 71920 }, { "epoch": 0.13778070208203438, "grad_norm": 1.1364535093307495, "learning_rate": 0.0001377323313263397, "loss": 4.5792, "step": 71930 }, { "epoch": 0.13779985691340962, "grad_norm": 1.1927425861358643, "learning_rate": 0.0001377514860904488, "loss": 4.6475, "step": 71940 }, { "epoch": 0.13781901174478486, "grad_norm": 1.1707017421722412, "learning_rate": 0.00013777064085455785, "loss": 4.6987, "step": 71950 }, { "epoch": 0.13783816657616008, "grad_norm": 1.1589890718460083, "learning_rate": 0.00013778979561866696, "loss": 4.3895, "step": 71960 }, { "epoch": 0.13785732140753532, "grad_norm": 1.173710823059082, "learning_rate": 0.00013780895038277604, "loss": 4.5801, "step": 71970 }, { "epoch": 0.13787647623891056, "grad_norm": 1.1547534465789795, "learning_rate": 0.0001378281051468851, "loss": 4.5676, "step": 71980 }, { "epoch": 0.13789563107028577, "grad_norm": 1.1165860891342163, "learning_rate": 0.00013784725991099417, "loss": 4.4944, "step": 71990 }, { "epoch": 0.137914785901661, "grad_norm": 1.1555218696594238, "learning_rate": 0.00013786641467510328, "loss": 4.4301, "step": 72000 }, { "epoch": 0.13793394073303625, "grad_norm": 1.2260491847991943, "learning_rate": 0.00013788556943921233, "loss": 4.5928, "step": 72010 }, { "epoch": 0.13795309556441146, "grad_norm": 1.210898518562317, "learning_rate": 0.00013790472420332142, "loss": 4.3462, "step": 72020 }, { "epoch": 0.1379722503957867, "grad_norm": 1.2183059453964233, "learning_rate": 0.00013792387896743052, "loss": 4.4809, "step": 72030 }, { "epoch": 0.13799140522716194, "grad_norm": 1.0889699459075928, "learning_rate": 0.00013794303373153958, "loss": 4.4048, "step": 72040 }, { "epoch": 0.13801056005853715, "grad_norm": 1.1754443645477295, "learning_rate": 0.00013796218849564866, "loss": 4.6032, "step": 72050 }, { "epoch": 0.1380297148899124, "grad_norm": 1.175059199333191, "learning_rate": 0.00013798134325975774, "loss": 4.5291, "step": 72060 }, { "epoch": 0.13804886972128763, "grad_norm": 1.15519380569458, "learning_rate": 0.00013800049802386682, "loss": 4.504, "step": 72070 }, { "epoch": 0.13806802455266284, "grad_norm": 1.1418650150299072, "learning_rate": 0.0001380196527879759, "loss": 4.4447, "step": 72080 }, { "epoch": 0.13808717938403808, "grad_norm": 1.1391613483428955, "learning_rate": 0.00013803880755208499, "loss": 4.5149, "step": 72090 }, { "epoch": 0.13810633421541332, "grad_norm": 1.1043158769607544, "learning_rate": 0.00013805796231619407, "loss": 4.6433, "step": 72100 }, { "epoch": 0.13812548904678854, "grad_norm": 1.1916682720184326, "learning_rate": 0.00013807711708030315, "loss": 4.5052, "step": 72110 }, { "epoch": 0.13814464387816378, "grad_norm": 1.2918548583984375, "learning_rate": 0.00013809627184441223, "loss": 4.5433, "step": 72120 }, { "epoch": 0.13816379870953902, "grad_norm": 1.1436160802841187, "learning_rate": 0.0001381154266085213, "loss": 4.7182, "step": 72130 }, { "epoch": 0.13818295354091423, "grad_norm": 1.1395277976989746, "learning_rate": 0.00013813458137263036, "loss": 4.5454, "step": 72140 }, { "epoch": 0.13820210837228947, "grad_norm": 1.1844909191131592, "learning_rate": 0.00013815373613673947, "loss": 4.6026, "step": 72150 }, { "epoch": 0.1382212632036647, "grad_norm": 1.1289187669754028, "learning_rate": 0.00013817289090084855, "loss": 4.6191, "step": 72160 }, { "epoch": 0.13824041803503992, "grad_norm": 1.14792799949646, "learning_rate": 0.0001381920456649576, "loss": 4.6403, "step": 72170 }, { "epoch": 0.13825957286641516, "grad_norm": 1.1054956912994385, "learning_rate": 0.00013821120042906672, "loss": 4.6204, "step": 72180 }, { "epoch": 0.1382787276977904, "grad_norm": 1.2325457334518433, "learning_rate": 0.0001382303551931758, "loss": 4.6028, "step": 72190 }, { "epoch": 0.13829788252916564, "grad_norm": 1.156916856765747, "learning_rate": 0.00013824950995728485, "loss": 4.6204, "step": 72200 }, { "epoch": 0.13831703736054085, "grad_norm": 1.1812947988510132, "learning_rate": 0.00013826866472139393, "loss": 4.4232, "step": 72210 }, { "epoch": 0.1383361921919161, "grad_norm": 1.1970316171646118, "learning_rate": 0.00013828781948550304, "loss": 4.5574, "step": 72220 }, { "epoch": 0.13835534702329133, "grad_norm": 1.173805594444275, "learning_rate": 0.0001383069742496121, "loss": 4.6039, "step": 72230 }, { "epoch": 0.13837450185466654, "grad_norm": 1.1859742403030396, "learning_rate": 0.00013832612901372118, "loss": 4.5372, "step": 72240 }, { "epoch": 0.13839365668604178, "grad_norm": 1.1009461879730225, "learning_rate": 0.00013834528377783026, "loss": 4.5282, "step": 72250 }, { "epoch": 0.13841281151741702, "grad_norm": 1.178666591644287, "learning_rate": 0.00013836443854193934, "loss": 4.5242, "step": 72260 }, { "epoch": 0.13843196634879223, "grad_norm": 1.186741590499878, "learning_rate": 0.00013838359330604842, "loss": 4.6155, "step": 72270 }, { "epoch": 0.13845112118016747, "grad_norm": 1.1427167654037476, "learning_rate": 0.0001384027480701575, "loss": 4.5751, "step": 72280 }, { "epoch": 0.13847027601154271, "grad_norm": 1.1299537420272827, "learning_rate": 0.00013842190283426658, "loss": 4.4509, "step": 72290 }, { "epoch": 0.13848943084291793, "grad_norm": 1.1989808082580566, "learning_rate": 0.00013844105759837567, "loss": 4.5762, "step": 72300 }, { "epoch": 0.13850858567429317, "grad_norm": 1.1276462078094482, "learning_rate": 0.00013846021236248475, "loss": 4.6433, "step": 72310 }, { "epoch": 0.1385277405056684, "grad_norm": 1.1500272750854492, "learning_rate": 0.00013847936712659383, "loss": 4.4387, "step": 72320 }, { "epoch": 0.13854689533704362, "grad_norm": 1.2082277536392212, "learning_rate": 0.00013849852189070288, "loss": 4.6191, "step": 72330 }, { "epoch": 0.13856605016841886, "grad_norm": 1.1791921854019165, "learning_rate": 0.000138517676654812, "loss": 4.5607, "step": 72340 }, { "epoch": 0.1385852049997941, "grad_norm": 1.1148329973220825, "learning_rate": 0.00013853683141892107, "loss": 4.7168, "step": 72350 }, { "epoch": 0.1386043598311693, "grad_norm": 1.2143454551696777, "learning_rate": 0.00013855598618303013, "loss": 4.5436, "step": 72360 }, { "epoch": 0.13862351466254455, "grad_norm": 1.104519248008728, "learning_rate": 0.00013857514094713923, "loss": 4.5014, "step": 72370 }, { "epoch": 0.1386426694939198, "grad_norm": 1.1480374336242676, "learning_rate": 0.00013859429571124832, "loss": 4.5289, "step": 72380 }, { "epoch": 0.138661824325295, "grad_norm": 1.1306146383285522, "learning_rate": 0.00013861345047535737, "loss": 4.6479, "step": 72390 }, { "epoch": 0.13868097915667024, "grad_norm": 1.304885983467102, "learning_rate": 0.00013863260523946648, "loss": 4.5663, "step": 72400 }, { "epoch": 0.13870013398804548, "grad_norm": 1.1797140836715698, "learning_rate": 0.00013865176000357556, "loss": 4.5285, "step": 72410 }, { "epoch": 0.1387192888194207, "grad_norm": 1.1314947605133057, "learning_rate": 0.00013867091476768461, "loss": 4.6051, "step": 72420 }, { "epoch": 0.13873844365079593, "grad_norm": 1.1791354417800903, "learning_rate": 0.0001386900695317937, "loss": 4.5433, "step": 72430 }, { "epoch": 0.13875759848217117, "grad_norm": 1.205728530883789, "learning_rate": 0.00013870922429590278, "loss": 4.5373, "step": 72440 }, { "epoch": 0.13877675331354639, "grad_norm": 1.1650969982147217, "learning_rate": 0.00013872837906001186, "loss": 4.6896, "step": 72450 }, { "epoch": 0.13879590814492163, "grad_norm": 1.1258100271224976, "learning_rate": 0.00013874753382412094, "loss": 4.5834, "step": 72460 }, { "epoch": 0.13881506297629687, "grad_norm": 1.2079386711120605, "learning_rate": 0.00013876668858823002, "loss": 4.7709, "step": 72470 }, { "epoch": 0.13883421780767208, "grad_norm": 1.1830600500106812, "learning_rate": 0.0001387858433523391, "loss": 4.6242, "step": 72480 }, { "epoch": 0.13885337263904732, "grad_norm": 1.166919469833374, "learning_rate": 0.00013880499811644818, "loss": 4.534, "step": 72490 }, { "epoch": 0.13887252747042256, "grad_norm": 1.1584384441375732, "learning_rate": 0.00013882415288055726, "loss": 4.6236, "step": 72500 }, { "epoch": 0.13889168230179777, "grad_norm": 1.1458512544631958, "learning_rate": 0.00013884330764466635, "loss": 4.4814, "step": 72510 }, { "epoch": 0.138910837133173, "grad_norm": 1.1558239459991455, "learning_rate": 0.00013886246240877543, "loss": 4.5654, "step": 72520 }, { "epoch": 0.13892999196454825, "grad_norm": 1.1659493446350098, "learning_rate": 0.0001388816171728845, "loss": 4.5366, "step": 72530 }, { "epoch": 0.13894914679592346, "grad_norm": 1.1147704124450684, "learning_rate": 0.0001389007719369936, "loss": 4.5652, "step": 72540 }, { "epoch": 0.1389683016272987, "grad_norm": 1.210001826286316, "learning_rate": 0.00013891992670110264, "loss": 4.6443, "step": 72550 }, { "epoch": 0.13898745645867394, "grad_norm": 1.1893442869186401, "learning_rate": 0.00013893908146521175, "loss": 4.3998, "step": 72560 }, { "epoch": 0.13900661129004915, "grad_norm": 1.1954632997512817, "learning_rate": 0.00013895823622932083, "loss": 4.5518, "step": 72570 }, { "epoch": 0.1390257661214244, "grad_norm": 1.1435940265655518, "learning_rate": 0.0001389773909934299, "loss": 4.3766, "step": 72580 }, { "epoch": 0.13904492095279963, "grad_norm": 1.1718838214874268, "learning_rate": 0.000138996545757539, "loss": 4.4806, "step": 72590 }, { "epoch": 0.13906407578417485, "grad_norm": 1.1539701223373413, "learning_rate": 0.00013901570052164808, "loss": 4.4822, "step": 72600 }, { "epoch": 0.13908323061555009, "grad_norm": 1.1012091636657715, "learning_rate": 0.00013903485528575713, "loss": 4.4463, "step": 72610 }, { "epoch": 0.13910238544692533, "grad_norm": 1.1562261581420898, "learning_rate": 0.00013905401004986624, "loss": 4.539, "step": 72620 }, { "epoch": 0.13912154027830054, "grad_norm": 1.1410105228424072, "learning_rate": 0.0001390731648139753, "loss": 4.5195, "step": 72630 }, { "epoch": 0.13914069510967578, "grad_norm": 1.1266151666641235, "learning_rate": 0.00013909231957808438, "loss": 4.7149, "step": 72640 }, { "epoch": 0.13915984994105102, "grad_norm": 1.1540861129760742, "learning_rate": 0.00013911147434219346, "loss": 4.5775, "step": 72650 }, { "epoch": 0.13917900477242623, "grad_norm": 1.1231805086135864, "learning_rate": 0.00013913062910630254, "loss": 4.449, "step": 72660 }, { "epoch": 0.13919815960380147, "grad_norm": 1.1280614137649536, "learning_rate": 0.00013914978387041162, "loss": 4.4757, "step": 72670 }, { "epoch": 0.1392173144351767, "grad_norm": 1.1420172452926636, "learning_rate": 0.0001391689386345207, "loss": 4.5787, "step": 72680 }, { "epoch": 0.13923646926655192, "grad_norm": 1.155827522277832, "learning_rate": 0.00013918809339862978, "loss": 4.5962, "step": 72690 }, { "epoch": 0.13925562409792716, "grad_norm": 1.141727089881897, "learning_rate": 0.00013920724816273886, "loss": 4.5556, "step": 72700 }, { "epoch": 0.1392747789293024, "grad_norm": 1.148952603340149, "learning_rate": 0.00013922640292684794, "loss": 4.5775, "step": 72710 }, { "epoch": 0.1392939337606776, "grad_norm": 1.689209222793579, "learning_rate": 0.00013924555769095703, "loss": 4.5657, "step": 72720 }, { "epoch": 0.13931308859205285, "grad_norm": 1.1610221862792969, "learning_rate": 0.0001392647124550661, "loss": 4.6975, "step": 72730 }, { "epoch": 0.1393322434234281, "grad_norm": 1.217124104499817, "learning_rate": 0.0001392838672191752, "loss": 4.5439, "step": 72740 }, { "epoch": 0.1393513982548033, "grad_norm": 1.185907244682312, "learning_rate": 0.00013930302198328427, "loss": 4.6488, "step": 72750 }, { "epoch": 0.13937055308617854, "grad_norm": 1.1561092138290405, "learning_rate": 0.00013932217674739335, "loss": 4.5556, "step": 72760 }, { "epoch": 0.13938970791755378, "grad_norm": 1.1688886880874634, "learning_rate": 0.0001393413315115024, "loss": 4.4621, "step": 72770 }, { "epoch": 0.139408862748929, "grad_norm": 1.1524280309677124, "learning_rate": 0.00013936048627561151, "loss": 4.4793, "step": 72780 }, { "epoch": 0.13942801758030424, "grad_norm": 1.2244446277618408, "learning_rate": 0.0001393796410397206, "loss": 4.6165, "step": 72790 }, { "epoch": 0.13944717241167948, "grad_norm": 1.1494081020355225, "learning_rate": 0.00013939688032741876, "loss": 4.4747, "step": 72800 }, { "epoch": 0.1394663272430547, "grad_norm": 1.3315562009811401, "learning_rate": 0.00013941603509152784, "loss": 4.6644, "step": 72810 }, { "epoch": 0.13948548207442993, "grad_norm": 1.122383713722229, "learning_rate": 0.00013943518985563692, "loss": 4.5567, "step": 72820 }, { "epoch": 0.13950463690580517, "grad_norm": 1.1914501190185547, "learning_rate": 0.000139454344619746, "loss": 4.6196, "step": 72830 }, { "epoch": 0.13952379173718038, "grad_norm": 1.1429893970489502, "learning_rate": 0.00013947349938385508, "loss": 4.5379, "step": 72840 }, { "epoch": 0.13954294656855562, "grad_norm": 1.1716737747192383, "learning_rate": 0.00013949265414796416, "loss": 4.7122, "step": 72850 }, { "epoch": 0.13956210139993086, "grad_norm": 1.1687848567962646, "learning_rate": 0.00013951180891207324, "loss": 4.6137, "step": 72860 }, { "epoch": 0.13958125623130607, "grad_norm": 1.1260097026824951, "learning_rate": 0.00013953096367618232, "loss": 4.5841, "step": 72870 }, { "epoch": 0.1396004110626813, "grad_norm": 1.1308550834655762, "learning_rate": 0.0001395501184402914, "loss": 4.7508, "step": 72880 }, { "epoch": 0.13961956589405655, "grad_norm": 1.1213253736495972, "learning_rate": 0.0001395692732044005, "loss": 4.4736, "step": 72890 }, { "epoch": 0.13963872072543176, "grad_norm": 1.1618835926055908, "learning_rate": 0.00013958842796850954, "loss": 4.6696, "step": 72900 }, { "epoch": 0.139657875556807, "grad_norm": 1.252907633781433, "learning_rate": 0.00013960758273261865, "loss": 4.6177, "step": 72910 }, { "epoch": 0.13967703038818224, "grad_norm": 1.1698366403579712, "learning_rate": 0.0001396267374967277, "loss": 4.4758, "step": 72920 }, { "epoch": 0.13969618521955746, "grad_norm": 1.2213021516799927, "learning_rate": 0.00013964589226083678, "loss": 4.4965, "step": 72930 }, { "epoch": 0.1397153400509327, "grad_norm": 1.1624414920806885, "learning_rate": 0.0001396650470249459, "loss": 4.5888, "step": 72940 }, { "epoch": 0.13973449488230794, "grad_norm": 1.1383849382400513, "learning_rate": 0.00013968420178905495, "loss": 4.5618, "step": 72950 }, { "epoch": 0.13975364971368315, "grad_norm": 1.1512528657913208, "learning_rate": 0.00013970335655316403, "loss": 4.6763, "step": 72960 }, { "epoch": 0.1397728045450584, "grad_norm": 1.1800392866134644, "learning_rate": 0.0001397225113172731, "loss": 4.4769, "step": 72970 }, { "epoch": 0.13979195937643363, "grad_norm": 1.1718670129776, "learning_rate": 0.0001397416660813822, "loss": 4.5938, "step": 72980 }, { "epoch": 0.13981111420780884, "grad_norm": 1.1964629888534546, "learning_rate": 0.00013976082084549127, "loss": 4.6084, "step": 72990 }, { "epoch": 0.13983026903918408, "grad_norm": 1.1443690061569214, "learning_rate": 0.00013977997560960035, "loss": 4.552, "step": 73000 }, { "epoch": 0.13984942387055932, "grad_norm": 1.1604853868484497, "learning_rate": 0.00013979913037370944, "loss": 4.6154, "step": 73010 }, { "epoch": 0.13986857870193453, "grad_norm": 1.21385657787323, "learning_rate": 0.00013981828513781852, "loss": 4.6176, "step": 73020 }, { "epoch": 0.13988773353330977, "grad_norm": 1.1828309297561646, "learning_rate": 0.0001398374399019276, "loss": 4.5514, "step": 73030 }, { "epoch": 0.139906888364685, "grad_norm": 1.1202095746994019, "learning_rate": 0.00013985659466603668, "loss": 4.6457, "step": 73040 }, { "epoch": 0.13992604319606022, "grad_norm": 1.1716121435165405, "learning_rate": 0.00013987574943014576, "loss": 4.4045, "step": 73050 }, { "epoch": 0.13994519802743546, "grad_norm": 1.108169674873352, "learning_rate": 0.00013989490419425484, "loss": 4.5483, "step": 73060 }, { "epoch": 0.1399643528588107, "grad_norm": 1.1253207921981812, "learning_rate": 0.00013991405895836392, "loss": 4.5025, "step": 73070 }, { "epoch": 0.13998350769018592, "grad_norm": 1.155251383781433, "learning_rate": 0.000139933213722473, "loss": 4.5515, "step": 73080 }, { "epoch": 0.14000266252156116, "grad_norm": 1.1553955078125, "learning_rate": 0.00013995236848658209, "loss": 4.5938, "step": 73090 }, { "epoch": 0.1400218173529364, "grad_norm": 1.0875447988510132, "learning_rate": 0.00013997152325069117, "loss": 4.5354, "step": 73100 }, { "epoch": 0.1400409721843116, "grad_norm": 1.1307867765426636, "learning_rate": 0.00013999067801480022, "loss": 4.5984, "step": 73110 }, { "epoch": 0.14006012701568685, "grad_norm": 1.1557544469833374, "learning_rate": 0.0001400098327789093, "loss": 4.4988, "step": 73120 }, { "epoch": 0.1400792818470621, "grad_norm": 1.1668206453323364, "learning_rate": 0.0001400289875430184, "loss": 4.52, "step": 73130 }, { "epoch": 0.1400984366784373, "grad_norm": 1.162218451499939, "learning_rate": 0.00014004814230712747, "loss": 4.6493, "step": 73140 }, { "epoch": 0.14011759150981254, "grad_norm": 1.0992823839187622, "learning_rate": 0.00014006729707123655, "loss": 4.6491, "step": 73150 }, { "epoch": 0.14013674634118778, "grad_norm": 1.1095489263534546, "learning_rate": 0.00014008645183534565, "loss": 4.571, "step": 73160 }, { "epoch": 0.140155901172563, "grad_norm": 1.119285225868225, "learning_rate": 0.0001401056065994547, "loss": 4.4892, "step": 73170 }, { "epoch": 0.14017505600393823, "grad_norm": 1.1960928440093994, "learning_rate": 0.0001401247613635638, "loss": 4.5147, "step": 73180 }, { "epoch": 0.14019421083531347, "grad_norm": 1.186842679977417, "learning_rate": 0.00014014391612767287, "loss": 4.5148, "step": 73190 }, { "epoch": 0.14021336566668868, "grad_norm": 1.1418368816375732, "learning_rate": 0.00014016307089178195, "loss": 4.5569, "step": 73200 }, { "epoch": 0.14023252049806392, "grad_norm": 1.0920405387878418, "learning_rate": 0.00014018222565589103, "loss": 4.6966, "step": 73210 }, { "epoch": 0.14025167532943916, "grad_norm": 1.1409037113189697, "learning_rate": 0.00014020138042000012, "loss": 4.678, "step": 73220 }, { "epoch": 0.14027083016081437, "grad_norm": 1.1718721389770508, "learning_rate": 0.0001402205351841092, "loss": 4.6332, "step": 73230 }, { "epoch": 0.14028998499218961, "grad_norm": 1.2005006074905396, "learning_rate": 0.00014023968994821828, "loss": 4.5475, "step": 73240 }, { "epoch": 0.14030913982356485, "grad_norm": 1.1430914402008057, "learning_rate": 0.00014025884471232736, "loss": 4.5971, "step": 73250 }, { "epoch": 0.14032829465494007, "grad_norm": 1.14369535446167, "learning_rate": 0.00014027799947643644, "loss": 4.6281, "step": 73260 }, { "epoch": 0.1403474494863153, "grad_norm": 1.239789605140686, "learning_rate": 0.00014029715424054552, "loss": 4.511, "step": 73270 }, { "epoch": 0.14036660431769055, "grad_norm": 1.1740840673446655, "learning_rate": 0.0001403163090046546, "loss": 4.4499, "step": 73280 }, { "epoch": 0.14038575914906576, "grad_norm": 1.142190933227539, "learning_rate": 0.00014033546376876368, "loss": 4.5979, "step": 73290 }, { "epoch": 0.140404913980441, "grad_norm": 1.1567561626434326, "learning_rate": 0.00014035461853287274, "loss": 4.5416, "step": 73300 }, { "epoch": 0.14042406881181624, "grad_norm": 1.1794410943984985, "learning_rate": 0.00014037377329698185, "loss": 4.6197, "step": 73310 }, { "epoch": 0.14044322364319145, "grad_norm": 1.1542795896530151, "learning_rate": 0.00014039292806109093, "loss": 4.5772, "step": 73320 }, { "epoch": 0.1404623784745667, "grad_norm": 1.1263097524642944, "learning_rate": 0.00014041208282519998, "loss": 4.6612, "step": 73330 }, { "epoch": 0.14048153330594193, "grad_norm": 1.134392261505127, "learning_rate": 0.00014043123758930906, "loss": 4.5007, "step": 73340 }, { "epoch": 0.14050068813731714, "grad_norm": 1.1690350770950317, "learning_rate": 0.00014045039235341817, "loss": 4.4344, "step": 73350 }, { "epoch": 0.14051984296869238, "grad_norm": 1.125826120376587, "learning_rate": 0.00014046954711752723, "loss": 4.4933, "step": 73360 }, { "epoch": 0.14053899780006762, "grad_norm": 1.18959641456604, "learning_rate": 0.0001404887018816363, "loss": 4.5632, "step": 73370 }, { "epoch": 0.14055815263144283, "grad_norm": 1.223206877708435, "learning_rate": 0.00014050785664574542, "loss": 4.4688, "step": 73380 }, { "epoch": 0.14057730746281807, "grad_norm": 1.1291112899780273, "learning_rate": 0.00014052701140985447, "loss": 4.588, "step": 73390 }, { "epoch": 0.14059646229419331, "grad_norm": 1.1497268676757812, "learning_rate": 0.00014054616617396355, "loss": 4.4888, "step": 73400 }, { "epoch": 0.14061561712556853, "grad_norm": 1.1471539735794067, "learning_rate": 0.00014056532093807263, "loss": 4.5097, "step": 73410 }, { "epoch": 0.14063477195694377, "grad_norm": 1.121126651763916, "learning_rate": 0.00014058447570218171, "loss": 4.5212, "step": 73420 }, { "epoch": 0.140653926788319, "grad_norm": 1.1966129541397095, "learning_rate": 0.0001406036304662908, "loss": 4.4905, "step": 73430 }, { "epoch": 0.14067308161969422, "grad_norm": 1.1957536935806274, "learning_rate": 0.00014062278523039988, "loss": 4.5578, "step": 73440 }, { "epoch": 0.14069223645106946, "grad_norm": 1.1057064533233643, "learning_rate": 0.00014064193999450896, "loss": 4.6524, "step": 73450 }, { "epoch": 0.1407113912824447, "grad_norm": 1.188794493675232, "learning_rate": 0.00014066109475861804, "loss": 4.5762, "step": 73460 }, { "epoch": 0.1407305461138199, "grad_norm": 1.1493027210235596, "learning_rate": 0.00014068024952272712, "loss": 4.6194, "step": 73470 }, { "epoch": 0.14074970094519515, "grad_norm": 1.1541731357574463, "learning_rate": 0.0001406994042868362, "loss": 4.5555, "step": 73480 }, { "epoch": 0.1407688557765704, "grad_norm": 1.1316004991531372, "learning_rate": 0.00014071855905094526, "loss": 4.675, "step": 73490 }, { "epoch": 0.1407880106079456, "grad_norm": 1.2408288717269897, "learning_rate": 0.00014073771381505437, "loss": 4.7139, "step": 73500 }, { "epoch": 0.14080716543932084, "grad_norm": 1.1350816488265991, "learning_rate": 0.00014075686857916345, "loss": 4.6511, "step": 73510 }, { "epoch": 0.14082632027069608, "grad_norm": 1.1465116739273071, "learning_rate": 0.0001407760233432725, "loss": 4.6474, "step": 73520 }, { "epoch": 0.14084547510207132, "grad_norm": 1.1053341627120972, "learning_rate": 0.00014079517810738158, "loss": 4.5918, "step": 73530 }, { "epoch": 0.14086462993344653, "grad_norm": 1.138480305671692, "learning_rate": 0.0001408143328714907, "loss": 4.5447, "step": 73540 }, { "epoch": 0.14088378476482177, "grad_norm": 1.1335254907608032, "learning_rate": 0.00014083348763559974, "loss": 4.7185, "step": 73550 }, { "epoch": 0.140902939596197, "grad_norm": 1.163961410522461, "learning_rate": 0.00014085264239970883, "loss": 4.6438, "step": 73560 }, { "epoch": 0.14092209442757223, "grad_norm": 1.10311758518219, "learning_rate": 0.00014087179716381793, "loss": 4.6351, "step": 73570 }, { "epoch": 0.14094124925894747, "grad_norm": 1.144998550415039, "learning_rate": 0.000140890951927927, "loss": 4.5791, "step": 73580 }, { "epoch": 0.1409604040903227, "grad_norm": 1.1322201490402222, "learning_rate": 0.00014091010669203607, "loss": 4.666, "step": 73590 }, { "epoch": 0.14097955892169792, "grad_norm": 1.3381630182266235, "learning_rate": 0.00014092926145614515, "loss": 4.5321, "step": 73600 }, { "epoch": 0.14099871375307316, "grad_norm": 1.2431210279464722, "learning_rate": 0.00014094841622025423, "loss": 4.5272, "step": 73610 }, { "epoch": 0.1410178685844484, "grad_norm": 1.092557430267334, "learning_rate": 0.00014096757098436331, "loss": 4.4718, "step": 73620 }, { "epoch": 0.1410370234158236, "grad_norm": 1.1430848836898804, "learning_rate": 0.0001409867257484724, "loss": 4.676, "step": 73630 }, { "epoch": 0.14105617824719885, "grad_norm": 1.1429301500320435, "learning_rate": 0.00014100588051258148, "loss": 4.5469, "step": 73640 }, { "epoch": 0.1410753330785741, "grad_norm": 1.1564275026321411, "learning_rate": 0.00014102503527669056, "loss": 4.6295, "step": 73650 }, { "epoch": 0.1410944879099493, "grad_norm": 1.0881052017211914, "learning_rate": 0.00014104419004079964, "loss": 4.616, "step": 73660 }, { "epoch": 0.14111364274132454, "grad_norm": 1.124636173248291, "learning_rate": 0.00014106334480490872, "loss": 4.6977, "step": 73670 }, { "epoch": 0.14113279757269978, "grad_norm": 1.142202615737915, "learning_rate": 0.00014108249956901777, "loss": 4.6455, "step": 73680 }, { "epoch": 0.141151952404075, "grad_norm": 1.1700478792190552, "learning_rate": 0.00014110165433312688, "loss": 4.5774, "step": 73690 }, { "epoch": 0.14117110723545023, "grad_norm": 1.1175976991653442, "learning_rate": 0.00014112080909723596, "loss": 4.7734, "step": 73700 }, { "epoch": 0.14119026206682547, "grad_norm": 1.1316083669662476, "learning_rate": 0.00014113996386134502, "loss": 4.418, "step": 73710 }, { "epoch": 0.14120941689820068, "grad_norm": 1.164120078086853, "learning_rate": 0.00014115911862545413, "loss": 4.5563, "step": 73720 }, { "epoch": 0.14122857172957592, "grad_norm": 1.1189138889312744, "learning_rate": 0.0001411782733895632, "loss": 4.6248, "step": 73730 }, { "epoch": 0.14124772656095116, "grad_norm": 1.1375828981399536, "learning_rate": 0.00014119742815367226, "loss": 4.6082, "step": 73740 }, { "epoch": 0.14126688139232638, "grad_norm": 1.1334543228149414, "learning_rate": 0.00014121658291778134, "loss": 4.4831, "step": 73750 }, { "epoch": 0.14128603622370162, "grad_norm": 1.1605685949325562, "learning_rate": 0.00014123573768189045, "loss": 4.567, "step": 73760 }, { "epoch": 0.14130519105507686, "grad_norm": 1.1995149850845337, "learning_rate": 0.0001412548924459995, "loss": 4.516, "step": 73770 }, { "epoch": 0.14132434588645207, "grad_norm": 1.1668403148651123, "learning_rate": 0.0001412740472101086, "loss": 4.5344, "step": 73780 }, { "epoch": 0.1413435007178273, "grad_norm": 1.129211664199829, "learning_rate": 0.00014129320197421767, "loss": 4.6393, "step": 73790 }, { "epoch": 0.14136265554920255, "grad_norm": 1.1421302556991577, "learning_rate": 0.00014131235673832675, "loss": 4.6067, "step": 73800 }, { "epoch": 0.14138181038057776, "grad_norm": 1.1168837547302246, "learning_rate": 0.00014133151150243583, "loss": 4.6448, "step": 73810 }, { "epoch": 0.141400965211953, "grad_norm": 1.1428567171096802, "learning_rate": 0.0001413506662665449, "loss": 4.7082, "step": 73820 }, { "epoch": 0.14142012004332824, "grad_norm": 1.151786208152771, "learning_rate": 0.000141369821030654, "loss": 4.588, "step": 73830 }, { "epoch": 0.14143927487470345, "grad_norm": 1.163863182067871, "learning_rate": 0.00014138897579476308, "loss": 4.5376, "step": 73840 }, { "epoch": 0.1414584297060787, "grad_norm": 1.1317088603973389, "learning_rate": 0.00014140813055887216, "loss": 4.6269, "step": 73850 }, { "epoch": 0.14147758453745393, "grad_norm": 1.1450555324554443, "learning_rate": 0.00014142728532298124, "loss": 4.591, "step": 73860 }, { "epoch": 0.14149673936882914, "grad_norm": 1.192700743675232, "learning_rate": 0.00014144644008709032, "loss": 4.59, "step": 73870 }, { "epoch": 0.14151589420020438, "grad_norm": 1.1821937561035156, "learning_rate": 0.0001414655948511994, "loss": 4.5547, "step": 73880 }, { "epoch": 0.14153504903157962, "grad_norm": 1.156268835067749, "learning_rate": 0.00014148474961530848, "loss": 4.525, "step": 73890 }, { "epoch": 0.14155420386295484, "grad_norm": 1.1338915824890137, "learning_rate": 0.00014150390437941754, "loss": 4.7193, "step": 73900 }, { "epoch": 0.14157335869433008, "grad_norm": 1.1201152801513672, "learning_rate": 0.00014152305914352664, "loss": 4.5243, "step": 73910 }, { "epoch": 0.14159251352570532, "grad_norm": 1.120285987854004, "learning_rate": 0.00014154221390763573, "loss": 4.5335, "step": 73920 }, { "epoch": 0.14161166835708053, "grad_norm": 1.1524746417999268, "learning_rate": 0.00014156136867174478, "loss": 4.5486, "step": 73930 }, { "epoch": 0.14163082318845577, "grad_norm": 1.1799002885818481, "learning_rate": 0.0001415805234358539, "loss": 4.4323, "step": 73940 }, { "epoch": 0.141649978019831, "grad_norm": 1.112869143486023, "learning_rate": 0.00014159967819996297, "loss": 4.4601, "step": 73950 }, { "epoch": 0.14166913285120622, "grad_norm": 1.117000699043274, "learning_rate": 0.00014161883296407202, "loss": 4.5656, "step": 73960 }, { "epoch": 0.14168828768258146, "grad_norm": 1.1149094104766846, "learning_rate": 0.0001416379877281811, "loss": 4.5296, "step": 73970 }, { "epoch": 0.1417074425139567, "grad_norm": 1.1303024291992188, "learning_rate": 0.0001416571424922902, "loss": 4.5191, "step": 73980 }, { "epoch": 0.1417265973453319, "grad_norm": 1.145167350769043, "learning_rate": 0.00014167629725639927, "loss": 4.6666, "step": 73990 }, { "epoch": 0.14174575217670715, "grad_norm": 1.1545703411102295, "learning_rate": 0.00014169545202050835, "loss": 4.5988, "step": 74000 }, { "epoch": 0.1417649070080824, "grad_norm": 1.1112890243530273, "learning_rate": 0.00014171460678461743, "loss": 4.5915, "step": 74010 }, { "epoch": 0.1417840618394576, "grad_norm": 1.233486533164978, "learning_rate": 0.0001417337615487265, "loss": 4.4653, "step": 74020 }, { "epoch": 0.14180321667083284, "grad_norm": 1.1334809064865112, "learning_rate": 0.0001417529163128356, "loss": 4.5232, "step": 74030 }, { "epoch": 0.14182237150220808, "grad_norm": 1.1285711526870728, "learning_rate": 0.00014177207107694467, "loss": 4.4931, "step": 74040 }, { "epoch": 0.1418415263335833, "grad_norm": 1.131384015083313, "learning_rate": 0.00014179122584105376, "loss": 4.3459, "step": 74050 }, { "epoch": 0.14186068116495854, "grad_norm": 1.0861378908157349, "learning_rate": 0.00014181038060516284, "loss": 4.6458, "step": 74060 }, { "epoch": 0.14187983599633378, "grad_norm": 1.1588817834854126, "learning_rate": 0.00014182953536927192, "loss": 4.5103, "step": 74070 }, { "epoch": 0.141898990827709, "grad_norm": 1.1617099046707153, "learning_rate": 0.000141848690133381, "loss": 4.5435, "step": 74080 }, { "epoch": 0.14191814565908423, "grad_norm": 1.0951350927352905, "learning_rate": 0.00014186784489749008, "loss": 4.5301, "step": 74090 }, { "epoch": 0.14193730049045947, "grad_norm": 1.176198124885559, "learning_rate": 0.00014188699966159916, "loss": 4.5332, "step": 74100 }, { "epoch": 0.14195645532183468, "grad_norm": 1.132425308227539, "learning_rate": 0.00014190615442570824, "loss": 4.4691, "step": 74110 }, { "epoch": 0.14197561015320992, "grad_norm": 1.1335258483886719, "learning_rate": 0.0001419253091898173, "loss": 4.6504, "step": 74120 }, { "epoch": 0.14199476498458516, "grad_norm": 1.1566901206970215, "learning_rate": 0.0001419444639539264, "loss": 4.6924, "step": 74130 }, { "epoch": 0.14201391981596037, "grad_norm": 1.125562310218811, "learning_rate": 0.0001419636187180355, "loss": 4.541, "step": 74140 }, { "epoch": 0.1420330746473356, "grad_norm": 1.1286481618881226, "learning_rate": 0.00014198277348214454, "loss": 4.5966, "step": 74150 }, { "epoch": 0.14205222947871085, "grad_norm": 1.1098082065582275, "learning_rate": 0.00014200192824625365, "loss": 4.512, "step": 74160 }, { "epoch": 0.14207138431008606, "grad_norm": 1.1159223318099976, "learning_rate": 0.0001420210830103627, "loss": 4.7234, "step": 74170 }, { "epoch": 0.1420905391414613, "grad_norm": 1.1970316171646118, "learning_rate": 0.00014204023777447179, "loss": 4.5339, "step": 74180 }, { "epoch": 0.14210969397283654, "grad_norm": 1.141018271446228, "learning_rate": 0.00014205939253858087, "loss": 4.5066, "step": 74190 }, { "epoch": 0.14212884880421175, "grad_norm": 1.15192711353302, "learning_rate": 0.00014207854730268995, "loss": 4.5627, "step": 74200 }, { "epoch": 0.142148003635587, "grad_norm": 1.119920015335083, "learning_rate": 0.00014209770206679903, "loss": 4.6515, "step": 74210 }, { "epoch": 0.14216715846696223, "grad_norm": 1.1034891605377197, "learning_rate": 0.0001421168568309081, "loss": 4.6455, "step": 74220 }, { "epoch": 0.14218631329833745, "grad_norm": 1.1329180002212524, "learning_rate": 0.0001421360115950172, "loss": 4.5127, "step": 74230 }, { "epoch": 0.1422054681297127, "grad_norm": 1.0763201713562012, "learning_rate": 0.00014215516635912627, "loss": 4.4964, "step": 74240 }, { "epoch": 0.14222462296108793, "grad_norm": 1.118540644645691, "learning_rate": 0.00014217432112323535, "loss": 4.5805, "step": 74250 }, { "epoch": 0.14224377779246314, "grad_norm": 1.132041573524475, "learning_rate": 0.00014219347588734444, "loss": 4.4949, "step": 74260 }, { "epoch": 0.14226293262383838, "grad_norm": 1.1156443357467651, "learning_rate": 0.00014221263065145352, "loss": 4.4381, "step": 74270 }, { "epoch": 0.14228208745521362, "grad_norm": 1.1064190864562988, "learning_rate": 0.0001422317854155626, "loss": 4.563, "step": 74280 }, { "epoch": 0.14230124228658883, "grad_norm": 1.1081209182739258, "learning_rate": 0.00014225094017967168, "loss": 4.6601, "step": 74290 }, { "epoch": 0.14232039711796407, "grad_norm": 1.1914398670196533, "learning_rate": 0.00014227009494378076, "loss": 4.5583, "step": 74300 }, { "epoch": 0.1423395519493393, "grad_norm": 1.180559754371643, "learning_rate": 0.00014228924970788984, "loss": 4.5201, "step": 74310 }, { "epoch": 0.14235870678071452, "grad_norm": 1.0928329229354858, "learning_rate": 0.00014230840447199892, "loss": 4.5529, "step": 74320 }, { "epoch": 0.14237786161208976, "grad_norm": 1.1561565399169922, "learning_rate": 0.000142327559236108, "loss": 4.5097, "step": 74330 }, { "epoch": 0.142397016443465, "grad_norm": 1.1167508363723755, "learning_rate": 0.00014234671400021706, "loss": 4.5752, "step": 74340 }, { "epoch": 0.14241617127484021, "grad_norm": 1.1205333471298218, "learning_rate": 0.00014236586876432617, "loss": 4.5963, "step": 74350 }, { "epoch": 0.14243532610621545, "grad_norm": 1.1398382186889648, "learning_rate": 0.00014238502352843522, "loss": 4.4984, "step": 74360 }, { "epoch": 0.1424544809375907, "grad_norm": 1.2011164426803589, "learning_rate": 0.0001424041782925443, "loss": 4.5894, "step": 74370 }, { "epoch": 0.1424736357689659, "grad_norm": 1.1024012565612793, "learning_rate": 0.0001424233330566534, "loss": 4.6353, "step": 74380 }, { "epoch": 0.14249279060034115, "grad_norm": 1.1215972900390625, "learning_rate": 0.00014244248782076247, "loss": 4.4833, "step": 74390 }, { "epoch": 0.14251194543171639, "grad_norm": 1.1781808137893677, "learning_rate": 0.00014246164258487155, "loss": 4.6168, "step": 74400 }, { "epoch": 0.1425311002630916, "grad_norm": 1.112420916557312, "learning_rate": 0.00014248079734898063, "loss": 4.655, "step": 74410 }, { "epoch": 0.14255025509446684, "grad_norm": 1.1475754976272583, "learning_rate": 0.0001424999521130897, "loss": 4.5034, "step": 74420 }, { "epoch": 0.14256940992584208, "grad_norm": 1.1211268901824951, "learning_rate": 0.0001425191068771988, "loss": 4.4977, "step": 74430 }, { "epoch": 0.1425885647572173, "grad_norm": 1.1215242147445679, "learning_rate": 0.00014253826164130787, "loss": 4.6432, "step": 74440 }, { "epoch": 0.14260771958859253, "grad_norm": 1.3034367561340332, "learning_rate": 0.00014255741640541695, "loss": 4.5798, "step": 74450 }, { "epoch": 0.14262687441996777, "grad_norm": 1.1541059017181396, "learning_rate": 0.00014257657116952603, "loss": 4.61, "step": 74460 }, { "epoch": 0.14264602925134298, "grad_norm": 1.1303821802139282, "learning_rate": 0.00014259572593363512, "loss": 4.5421, "step": 74470 }, { "epoch": 0.14266518408271822, "grad_norm": 1.1414512395858765, "learning_rate": 0.0001426148806977442, "loss": 4.5834, "step": 74480 }, { "epoch": 0.14268433891409346, "grad_norm": 1.1152993440628052, "learning_rate": 0.00014263403546185328, "loss": 4.5411, "step": 74490 }, { "epoch": 0.14270349374546867, "grad_norm": 1.1208211183547974, "learning_rate": 0.00014265319022596236, "loss": 4.6179, "step": 74500 }, { "epoch": 0.1427226485768439, "grad_norm": 1.1239855289459229, "learning_rate": 0.00014267234499007144, "loss": 4.5255, "step": 74510 }, { "epoch": 0.14274180340821915, "grad_norm": 1.113118052482605, "learning_rate": 0.00014269149975418052, "loss": 4.5499, "step": 74520 }, { "epoch": 0.14276095823959437, "grad_norm": 1.3143367767333984, "learning_rate": 0.0001427106545182896, "loss": 4.542, "step": 74530 }, { "epoch": 0.1427801130709696, "grad_norm": 1.1298062801361084, "learning_rate": 0.00014272980928239869, "loss": 4.4563, "step": 74540 }, { "epoch": 0.14279926790234485, "grad_norm": 1.1455163955688477, "learning_rate": 0.00014274896404650774, "loss": 4.6187, "step": 74550 }, { "epoch": 0.14281842273372006, "grad_norm": 1.1588795185089111, "learning_rate": 0.00014276811881061682, "loss": 4.5576, "step": 74560 }, { "epoch": 0.1428375775650953, "grad_norm": 1.15935218334198, "learning_rate": 0.00014278727357472593, "loss": 4.584, "step": 74570 }, { "epoch": 0.14285673239647054, "grad_norm": 1.1744749546051025, "learning_rate": 0.00014280642833883498, "loss": 4.5451, "step": 74580 }, { "epoch": 0.14287588722784575, "grad_norm": 1.1218904256820679, "learning_rate": 0.00014282558310294406, "loss": 4.6106, "step": 74590 }, { "epoch": 0.142895042059221, "grad_norm": 1.07564377784729, "learning_rate": 0.00014284473786705317, "loss": 4.7333, "step": 74600 }, { "epoch": 0.14291419689059623, "grad_norm": 1.1139496564865112, "learning_rate": 0.00014286389263116223, "loss": 4.6849, "step": 74610 }, { "epoch": 0.14293335172197144, "grad_norm": 1.1130974292755127, "learning_rate": 0.0001428830473952713, "loss": 4.5601, "step": 74620 }, { "epoch": 0.14295250655334668, "grad_norm": 1.1095644235610962, "learning_rate": 0.0001429022021593804, "loss": 4.4954, "step": 74630 }, { "epoch": 0.14297166138472192, "grad_norm": 1.1568044424057007, "learning_rate": 0.00014292135692348947, "loss": 4.3892, "step": 74640 }, { "epoch": 0.14299081621609713, "grad_norm": 1.1834664344787598, "learning_rate": 0.00014294051168759855, "loss": 4.513, "step": 74650 }, { "epoch": 0.14300997104747237, "grad_norm": 1.123232126235962, "learning_rate": 0.00014295966645170763, "loss": 4.5262, "step": 74660 }, { "epoch": 0.1430291258788476, "grad_norm": 1.1623163223266602, "learning_rate": 0.00014297882121581672, "loss": 4.4581, "step": 74670 }, { "epoch": 0.14304828071022282, "grad_norm": 1.132485270500183, "learning_rate": 0.0001429979759799258, "loss": 4.4534, "step": 74680 }, { "epoch": 0.14306743554159806, "grad_norm": 1.1169377565383911, "learning_rate": 0.00014301713074403488, "loss": 4.5523, "step": 74690 }, { "epoch": 0.1430865903729733, "grad_norm": 1.1210277080535889, "learning_rate": 0.00014303628550814396, "loss": 4.6865, "step": 74700 }, { "epoch": 0.14310574520434852, "grad_norm": 1.1008998155593872, "learning_rate": 0.00014305544027225304, "loss": 4.5033, "step": 74710 }, { "epoch": 0.14312490003572376, "grad_norm": 1.1247812509536743, "learning_rate": 0.00014307459503636212, "loss": 4.5321, "step": 74720 }, { "epoch": 0.143144054867099, "grad_norm": 1.125490427017212, "learning_rate": 0.0001430937498004712, "loss": 4.4775, "step": 74730 }, { "epoch": 0.1431632096984742, "grad_norm": 1.0978682041168213, "learning_rate": 0.00014311290456458028, "loss": 4.6353, "step": 74740 }, { "epoch": 0.14318236452984945, "grad_norm": 1.1272326707839966, "learning_rate": 0.00014313205932868937, "loss": 4.5302, "step": 74750 }, { "epoch": 0.1432015193612247, "grad_norm": 1.1275559663772583, "learning_rate": 0.00014315121409279845, "loss": 4.5629, "step": 74760 }, { "epoch": 0.1432206741925999, "grad_norm": 1.1410413980484009, "learning_rate": 0.0001431703688569075, "loss": 4.64, "step": 74770 }, { "epoch": 0.14323982902397514, "grad_norm": 1.1920382976531982, "learning_rate": 0.00014318952362101658, "loss": 4.5581, "step": 74780 }, { "epoch": 0.14325898385535038, "grad_norm": 1.1213600635528564, "learning_rate": 0.0001432086783851257, "loss": 4.5438, "step": 74790 }, { "epoch": 0.1432781386867256, "grad_norm": 1.130592703819275, "learning_rate": 0.00014322783314923475, "loss": 4.5719, "step": 74800 }, { "epoch": 0.14329729351810083, "grad_norm": 1.1264457702636719, "learning_rate": 0.00014324698791334383, "loss": 4.7357, "step": 74810 }, { "epoch": 0.14331644834947607, "grad_norm": 1.136461615562439, "learning_rate": 0.00014326614267745293, "loss": 4.6265, "step": 74820 }, { "epoch": 0.1433356031808513, "grad_norm": 1.1657602787017822, "learning_rate": 0.000143285297441562, "loss": 4.7124, "step": 74830 }, { "epoch": 0.14335475801222652, "grad_norm": 1.1165716648101807, "learning_rate": 0.00014330445220567107, "loss": 4.4749, "step": 74840 }, { "epoch": 0.14337391284360176, "grad_norm": 1.1323589086532593, "learning_rate": 0.00014332360696978015, "loss": 4.5749, "step": 74850 }, { "epoch": 0.143393067674977, "grad_norm": 1.089384913444519, "learning_rate": 0.00014334276173388923, "loss": 4.5632, "step": 74860 }, { "epoch": 0.14341222250635222, "grad_norm": 1.120459794998169, "learning_rate": 0.00014336191649799831, "loss": 4.4911, "step": 74870 }, { "epoch": 0.14343137733772746, "grad_norm": 1.1162670850753784, "learning_rate": 0.0001433810712621074, "loss": 4.3646, "step": 74880 }, { "epoch": 0.1434505321691027, "grad_norm": 1.1200299263000488, "learning_rate": 0.00014340022602621648, "loss": 4.6103, "step": 74890 }, { "epoch": 0.1434696870004779, "grad_norm": 1.1297311782836914, "learning_rate": 0.00014341938079032556, "loss": 4.7366, "step": 74900 }, { "epoch": 0.14348884183185315, "grad_norm": 1.1252816915512085, "learning_rate": 0.00014343853555443464, "loss": 4.393, "step": 74910 }, { "epoch": 0.1435079966632284, "grad_norm": 1.1344683170318604, "learning_rate": 0.00014345769031854372, "loss": 4.5474, "step": 74920 }, { "epoch": 0.1435271514946036, "grad_norm": 1.1219048500061035, "learning_rate": 0.0001434768450826528, "loss": 4.4581, "step": 74930 }, { "epoch": 0.14354630632597884, "grad_norm": 1.1276737451553345, "learning_rate": 0.00014349599984676188, "loss": 4.4168, "step": 74940 }, { "epoch": 0.14356546115735408, "grad_norm": 1.1482595205307007, "learning_rate": 0.00014351515461087096, "loss": 4.6618, "step": 74950 }, { "epoch": 0.1435846159887293, "grad_norm": 1.1422412395477295, "learning_rate": 0.00014353430937498002, "loss": 4.5491, "step": 74960 }, { "epoch": 0.14360377082010453, "grad_norm": 1.1566028594970703, "learning_rate": 0.00014355346413908913, "loss": 4.5173, "step": 74970 }, { "epoch": 0.14362292565147977, "grad_norm": 1.1797282695770264, "learning_rate": 0.0001435726189031982, "loss": 4.6301, "step": 74980 }, { "epoch": 0.14364208048285498, "grad_norm": 1.146773338317871, "learning_rate": 0.00014359177366730726, "loss": 4.6517, "step": 74990 }, { "epoch": 0.14366123531423022, "grad_norm": 1.1911544799804688, "learning_rate": 0.00014361092843141634, "loss": 4.6083, "step": 75000 }, { "epoch": 0.14368039014560546, "grad_norm": 1.124579906463623, "learning_rate": 0.00014363008319552545, "loss": 4.5682, "step": 75010 }, { "epoch": 0.14369954497698068, "grad_norm": 1.1686420440673828, "learning_rate": 0.0001436492379596345, "loss": 4.5454, "step": 75020 }, { "epoch": 0.14371869980835592, "grad_norm": 1.154617190361023, "learning_rate": 0.0001436683927237436, "loss": 4.5466, "step": 75030 }, { "epoch": 0.14373785463973116, "grad_norm": 1.200598955154419, "learning_rate": 0.00014368754748785267, "loss": 4.4716, "step": 75040 }, { "epoch": 0.14375700947110637, "grad_norm": 1.1120007038116455, "learning_rate": 0.00014370670225196175, "loss": 4.49, "step": 75050 }, { "epoch": 0.1437761643024816, "grad_norm": 1.2068138122558594, "learning_rate": 0.00014372585701607083, "loss": 4.578, "step": 75060 }, { "epoch": 0.14379531913385685, "grad_norm": 1.1376850605010986, "learning_rate": 0.0001437450117801799, "loss": 4.5794, "step": 75070 }, { "epoch": 0.14381447396523206, "grad_norm": 1.1117198467254639, "learning_rate": 0.000143764166544289, "loss": 4.4259, "step": 75080 }, { "epoch": 0.1438336287966073, "grad_norm": 1.1083576679229736, "learning_rate": 0.00014378332130839808, "loss": 4.5991, "step": 75090 }, { "epoch": 0.14385278362798254, "grad_norm": 1.160151481628418, "learning_rate": 0.00014380247607250716, "loss": 4.6182, "step": 75100 }, { "epoch": 0.14387193845935775, "grad_norm": 1.1315160989761353, "learning_rate": 0.00014382163083661624, "loss": 4.5648, "step": 75110 }, { "epoch": 0.143891093290733, "grad_norm": 1.1277718544006348, "learning_rate": 0.00014384078560072532, "loss": 4.4528, "step": 75120 }, { "epoch": 0.14391024812210823, "grad_norm": 1.1234158277511597, "learning_rate": 0.0001438599403648344, "loss": 4.5369, "step": 75130 }, { "epoch": 0.14392940295348344, "grad_norm": 1.0926313400268555, "learning_rate": 0.00014387909512894348, "loss": 4.5445, "step": 75140 }, { "epoch": 0.14394855778485868, "grad_norm": 1.134682059288025, "learning_rate": 0.00014389824989305254, "loss": 4.5988, "step": 75150 }, { "epoch": 0.14396771261623392, "grad_norm": 1.2629576921463013, "learning_rate": 0.00014391740465716165, "loss": 4.537, "step": 75160 }, { "epoch": 0.14398686744760913, "grad_norm": 1.0535508394241333, "learning_rate": 0.00014393655942127073, "loss": 4.6497, "step": 75170 }, { "epoch": 0.14400602227898437, "grad_norm": 1.118035078048706, "learning_rate": 0.00014395571418537978, "loss": 4.6468, "step": 75180 }, { "epoch": 0.14402517711035961, "grad_norm": 1.106245517730713, "learning_rate": 0.00014397486894948886, "loss": 4.5853, "step": 75190 }, { "epoch": 0.14404433194173483, "grad_norm": 1.075585126876831, "learning_rate": 0.00014399402371359797, "loss": 4.6856, "step": 75200 }, { "epoch": 0.14406348677311007, "grad_norm": 1.1095881462097168, "learning_rate": 0.00014401317847770702, "loss": 4.4871, "step": 75210 }, { "epoch": 0.1440826416044853, "grad_norm": 1.1267759799957275, "learning_rate": 0.0001440323332418161, "loss": 4.4907, "step": 75220 }, { "epoch": 0.14410179643586052, "grad_norm": 1.1225348711013794, "learning_rate": 0.0001440514880059252, "loss": 4.6217, "step": 75230 }, { "epoch": 0.14412095126723576, "grad_norm": 1.1027313470840454, "learning_rate": 0.00014407064277003427, "loss": 4.5097, "step": 75240 }, { "epoch": 0.144140106098611, "grad_norm": 1.0970584154129028, "learning_rate": 0.00014408979753414335, "loss": 4.4984, "step": 75250 }, { "epoch": 0.1441592609299862, "grad_norm": 1.1743957996368408, "learning_rate": 0.00014410895229825243, "loss": 4.4788, "step": 75260 }, { "epoch": 0.14417841576136145, "grad_norm": 1.123080849647522, "learning_rate": 0.0001441281070623615, "loss": 4.5894, "step": 75270 }, { "epoch": 0.1441975705927367, "grad_norm": 1.1077210903167725, "learning_rate": 0.0001441472618264706, "loss": 4.567, "step": 75280 }, { "epoch": 0.1442167254241119, "grad_norm": 1.1278239488601685, "learning_rate": 0.00014416641659057967, "loss": 4.5145, "step": 75290 }, { "epoch": 0.14423588025548714, "grad_norm": 1.1459579467773438, "learning_rate": 0.00014418557135468876, "loss": 4.3685, "step": 75300 }, { "epoch": 0.14425503508686238, "grad_norm": 1.1503143310546875, "learning_rate": 0.00014420472611879784, "loss": 4.5667, "step": 75310 }, { "epoch": 0.1442741899182376, "grad_norm": 1.1561098098754883, "learning_rate": 0.00014422388088290692, "loss": 4.6213, "step": 75320 }, { "epoch": 0.14429334474961283, "grad_norm": 1.0990471839904785, "learning_rate": 0.000144243035647016, "loss": 4.543, "step": 75330 }, { "epoch": 0.14431249958098807, "grad_norm": 1.2590819597244263, "learning_rate": 0.00014426219041112505, "loss": 4.6838, "step": 75340 }, { "epoch": 0.14433165441236329, "grad_norm": 1.1062142848968506, "learning_rate": 0.00014428134517523416, "loss": 4.5985, "step": 75350 }, { "epoch": 0.14435080924373853, "grad_norm": 1.0933895111083984, "learning_rate": 0.00014430049993934324, "loss": 4.5719, "step": 75360 }, { "epoch": 0.14436996407511377, "grad_norm": 1.1464656591415405, "learning_rate": 0.0001443196547034523, "loss": 4.4749, "step": 75370 }, { "epoch": 0.14438911890648898, "grad_norm": 1.0859709978103638, "learning_rate": 0.0001443388094675614, "loss": 4.6975, "step": 75380 }, { "epoch": 0.14440827373786422, "grad_norm": 1.1018531322479248, "learning_rate": 0.0001443579642316705, "loss": 4.5903, "step": 75390 }, { "epoch": 0.14442742856923946, "grad_norm": 1.1408913135528564, "learning_rate": 0.00014437711899577954, "loss": 4.5249, "step": 75400 }, { "epoch": 0.14444658340061467, "grad_norm": 1.1189219951629639, "learning_rate": 0.00014439627375988862, "loss": 4.7126, "step": 75410 }, { "epoch": 0.1444657382319899, "grad_norm": 1.1261645555496216, "learning_rate": 0.0001444154285239977, "loss": 4.5881, "step": 75420 }, { "epoch": 0.14448489306336515, "grad_norm": 1.114256501197815, "learning_rate": 0.00014443458328810679, "loss": 4.5452, "step": 75430 }, { "epoch": 0.14450404789474036, "grad_norm": 1.0887418985366821, "learning_rate": 0.00014445373805221587, "loss": 4.559, "step": 75440 }, { "epoch": 0.1445232027261156, "grad_norm": 1.1475635766983032, "learning_rate": 0.00014447289281632495, "loss": 4.3868, "step": 75450 }, { "epoch": 0.14454235755749084, "grad_norm": 1.1315981149673462, "learning_rate": 0.00014449204758043403, "loss": 4.6201, "step": 75460 }, { "epoch": 0.14456151238886605, "grad_norm": 1.131356954574585, "learning_rate": 0.0001445112023445431, "loss": 4.6066, "step": 75470 }, { "epoch": 0.1445806672202413, "grad_norm": 1.1556633710861206, "learning_rate": 0.0001445303571086522, "loss": 4.3952, "step": 75480 }, { "epoch": 0.14459982205161653, "grad_norm": 1.094438076019287, "learning_rate": 0.00014454951187276127, "loss": 4.5029, "step": 75490 }, { "epoch": 0.14461897688299175, "grad_norm": 1.158180832862854, "learning_rate": 0.00014456866663687036, "loss": 4.5741, "step": 75500 }, { "epoch": 0.14463813171436699, "grad_norm": 1.1552300453186035, "learning_rate": 0.00014458782140097944, "loss": 4.4866, "step": 75510 }, { "epoch": 0.14465728654574223, "grad_norm": 1.135354995727539, "learning_rate": 0.00014460697616508852, "loss": 4.4331, "step": 75520 }, { "epoch": 0.14467644137711744, "grad_norm": 1.0862748622894287, "learning_rate": 0.0001446261309291976, "loss": 4.5333, "step": 75530 }, { "epoch": 0.14469559620849268, "grad_norm": 1.1782475709915161, "learning_rate": 0.00014464528569330668, "loss": 4.6344, "step": 75540 }, { "epoch": 0.14471475103986792, "grad_norm": 1.1540879011154175, "learning_rate": 0.00014466444045741576, "loss": 4.4446, "step": 75550 }, { "epoch": 0.14473390587124313, "grad_norm": 1.1972711086273193, "learning_rate": 0.00014468359522152482, "loss": 4.4722, "step": 75560 }, { "epoch": 0.14475306070261837, "grad_norm": 1.1953761577606201, "learning_rate": 0.00014470274998563392, "loss": 4.5945, "step": 75570 }, { "epoch": 0.1447722155339936, "grad_norm": 1.1246142387390137, "learning_rate": 0.000144721904749743, "loss": 4.4713, "step": 75580 }, { "epoch": 0.14479137036536882, "grad_norm": 1.1473913192749023, "learning_rate": 0.00014474105951385206, "loss": 4.5241, "step": 75590 }, { "epoch": 0.14481052519674406, "grad_norm": 1.1149643659591675, "learning_rate": 0.00014476021427796117, "loss": 4.5551, "step": 75600 }, { "epoch": 0.1448296800281193, "grad_norm": 1.1526395082473755, "learning_rate": 0.00014477936904207025, "loss": 4.6298, "step": 75610 }, { "epoch": 0.1448488348594945, "grad_norm": 1.1477744579315186, "learning_rate": 0.0001447985238061793, "loss": 4.5764, "step": 75620 }, { "epoch": 0.14486798969086975, "grad_norm": 1.145354151725769, "learning_rate": 0.00014481767857028839, "loss": 4.6221, "step": 75630 }, { "epoch": 0.144887144522245, "grad_norm": 1.2179840803146362, "learning_rate": 0.00014483683333439747, "loss": 4.6299, "step": 75640 }, { "epoch": 0.1449062993536202, "grad_norm": 1.1627031564712524, "learning_rate": 0.00014485598809850655, "loss": 4.5892, "step": 75650 }, { "epoch": 0.14492545418499544, "grad_norm": 1.1006687879562378, "learning_rate": 0.00014487514286261563, "loss": 4.3855, "step": 75660 }, { "epoch": 0.14494460901637068, "grad_norm": 1.2875723838806152, "learning_rate": 0.0001448942976267247, "loss": 4.5894, "step": 75670 }, { "epoch": 0.1449637638477459, "grad_norm": 1.1278553009033203, "learning_rate": 0.0001449134523908338, "loss": 4.4556, "step": 75680 }, { "epoch": 0.14498291867912114, "grad_norm": 1.114174246788025, "learning_rate": 0.00014493260715494287, "loss": 4.6675, "step": 75690 }, { "epoch": 0.14500207351049638, "grad_norm": 1.0881743431091309, "learning_rate": 0.00014495176191905195, "loss": 4.6473, "step": 75700 }, { "epoch": 0.1450212283418716, "grad_norm": 1.1433669328689575, "learning_rate": 0.00014497091668316104, "loss": 4.587, "step": 75710 }, { "epoch": 0.14504038317324683, "grad_norm": 1.900992751121521, "learning_rate": 0.00014499007144727012, "loss": 4.569, "step": 75720 }, { "epoch": 0.14505953800462207, "grad_norm": 1.0861763954162598, "learning_rate": 0.0001450092262113792, "loss": 4.6696, "step": 75730 }, { "epoch": 0.14507869283599728, "grad_norm": 1.0954328775405884, "learning_rate": 0.00014502838097548828, "loss": 4.5817, "step": 75740 }, { "epoch": 0.14509784766737252, "grad_norm": 1.1285101175308228, "learning_rate": 0.00014504753573959736, "loss": 4.4883, "step": 75750 }, { "epoch": 0.14511700249874776, "grad_norm": 1.1388523578643799, "learning_rate": 0.00014506669050370644, "loss": 4.4672, "step": 75760 }, { "epoch": 0.14513615733012297, "grad_norm": 1.0743703842163086, "learning_rate": 0.00014508584526781552, "loss": 4.4035, "step": 75770 }, { "epoch": 0.1451553121614982, "grad_norm": 1.134974718093872, "learning_rate": 0.00014510500003192458, "loss": 4.5377, "step": 75780 }, { "epoch": 0.14517446699287345, "grad_norm": 1.1406008005142212, "learning_rate": 0.00014512415479603369, "loss": 4.4889, "step": 75790 }, { "epoch": 0.14519362182424866, "grad_norm": 1.1023800373077393, "learning_rate": 0.00014514330956014277, "loss": 4.6559, "step": 75800 }, { "epoch": 0.1452127766556239, "grad_norm": 1.1248536109924316, "learning_rate": 0.00014516246432425182, "loss": 4.6168, "step": 75810 }, { "epoch": 0.14523193148699914, "grad_norm": 1.1414417028427124, "learning_rate": 0.00014518161908836093, "loss": 4.5649, "step": 75820 }, { "epoch": 0.14525108631837436, "grad_norm": 1.1380329132080078, "learning_rate": 0.00014520077385246998, "loss": 4.5955, "step": 75830 }, { "epoch": 0.1452702411497496, "grad_norm": 1.1462420225143433, "learning_rate": 0.00014521992861657907, "loss": 4.5625, "step": 75840 }, { "epoch": 0.14528939598112484, "grad_norm": 1.1419881582260132, "learning_rate": 0.00014523908338068815, "loss": 4.603, "step": 75850 }, { "epoch": 0.14530855081250005, "grad_norm": 1.1398403644561768, "learning_rate": 0.00014525823814479723, "loss": 4.5525, "step": 75860 }, { "epoch": 0.1453277056438753, "grad_norm": 1.193971872329712, "learning_rate": 0.0001452773929089063, "loss": 4.6183, "step": 75870 }, { "epoch": 0.14534686047525053, "grad_norm": 1.1302566528320312, "learning_rate": 0.0001452965476730154, "loss": 4.5924, "step": 75880 }, { "epoch": 0.14536601530662574, "grad_norm": 1.1508678197860718, "learning_rate": 0.00014531570243712447, "loss": 4.5292, "step": 75890 }, { "epoch": 0.14538517013800098, "grad_norm": 1.1195775270462036, "learning_rate": 0.00014533485720123355, "loss": 4.5511, "step": 75900 }, { "epoch": 0.14540432496937622, "grad_norm": 1.1351398229599, "learning_rate": 0.00014535401196534263, "loss": 4.5962, "step": 75910 }, { "epoch": 0.14542347980075143, "grad_norm": 1.094386100769043, "learning_rate": 0.00014537316672945172, "loss": 4.4781, "step": 75920 }, { "epoch": 0.14544263463212667, "grad_norm": 1.1675021648406982, "learning_rate": 0.0001453923214935608, "loss": 4.6314, "step": 75930 }, { "epoch": 0.1454617894635019, "grad_norm": 1.116083025932312, "learning_rate": 0.00014541147625766988, "loss": 4.5106, "step": 75940 }, { "epoch": 0.14548094429487712, "grad_norm": 1.2249622344970703, "learning_rate": 0.00014543063102177896, "loss": 4.5312, "step": 75950 }, { "epoch": 0.14550009912625236, "grad_norm": 1.1258898973464966, "learning_rate": 0.00014544978578588804, "loss": 4.5138, "step": 75960 }, { "epoch": 0.1455192539576276, "grad_norm": 1.0891183614730835, "learning_rate": 0.00014546894054999712, "loss": 4.5482, "step": 75970 }, { "epoch": 0.14553840878900282, "grad_norm": 1.0998446941375732, "learning_rate": 0.0001454880953141062, "loss": 4.6304, "step": 75980 }, { "epoch": 0.14555756362037806, "grad_norm": 1.1396098136901855, "learning_rate": 0.00014550725007821529, "loss": 4.5157, "step": 75990 }, { "epoch": 0.1455767184517533, "grad_norm": 1.1335875988006592, "learning_rate": 0.00014552640484232434, "loss": 4.5648, "step": 76000 }, { "epoch": 0.1455958732831285, "grad_norm": 1.1661348342895508, "learning_rate": 0.00014554555960643345, "loss": 4.5976, "step": 76010 }, { "epoch": 0.14561502811450375, "grad_norm": 1.0504631996154785, "learning_rate": 0.0001455647143705425, "loss": 4.4968, "step": 76020 }, { "epoch": 0.145634182945879, "grad_norm": 1.1904863119125366, "learning_rate": 0.00014558386913465158, "loss": 4.5625, "step": 76030 }, { "epoch": 0.1456533377772542, "grad_norm": 1.1007243394851685, "learning_rate": 0.0001456030238987607, "loss": 4.533, "step": 76040 }, { "epoch": 0.14567249260862944, "grad_norm": 1.1626023054122925, "learning_rate": 0.00014562217866286975, "loss": 4.4654, "step": 76050 }, { "epoch": 0.14569164744000468, "grad_norm": 1.2299622297286987, "learning_rate": 0.00014564133342697883, "loss": 4.5282, "step": 76060 }, { "epoch": 0.1457108022713799, "grad_norm": 1.1161038875579834, "learning_rate": 0.0001456604881910879, "loss": 4.573, "step": 76070 }, { "epoch": 0.14572995710275513, "grad_norm": 1.0808902978897095, "learning_rate": 0.000145679642955197, "loss": 4.6123, "step": 76080 }, { "epoch": 0.14574911193413037, "grad_norm": 1.0886603593826294, "learning_rate": 0.00014569879771930607, "loss": 4.351, "step": 76090 }, { "epoch": 0.14576826676550558, "grad_norm": 1.1091309785842896, "learning_rate": 0.00014571795248341515, "loss": 4.4903, "step": 76100 }, { "epoch": 0.14578742159688082, "grad_norm": 1.1407593488693237, "learning_rate": 0.00014573710724752423, "loss": 4.5891, "step": 76110 }, { "epoch": 0.14580657642825606, "grad_norm": 1.130986213684082, "learning_rate": 0.00014575626201163331, "loss": 4.5675, "step": 76120 }, { "epoch": 0.14582573125963127, "grad_norm": 1.1180453300476074, "learning_rate": 0.0001457754167757424, "loss": 4.5207, "step": 76130 }, { "epoch": 0.14584488609100651, "grad_norm": 1.14909827709198, "learning_rate": 0.00014579457153985148, "loss": 4.5332, "step": 76140 }, { "epoch": 0.14586404092238175, "grad_norm": 1.1551610231399536, "learning_rate": 0.00014581372630396056, "loss": 4.3855, "step": 76150 }, { "epoch": 0.145883195753757, "grad_norm": 1.1091058254241943, "learning_rate": 0.00014583288106806964, "loss": 4.4607, "step": 76160 }, { "epoch": 0.1459023505851322, "grad_norm": 1.1459678411483765, "learning_rate": 0.00014585203583217872, "loss": 4.5787, "step": 76170 }, { "epoch": 0.14592150541650745, "grad_norm": 1.0890966653823853, "learning_rate": 0.0001458711905962878, "loss": 4.5893, "step": 76180 }, { "epoch": 0.1459406602478827, "grad_norm": 1.2091877460479736, "learning_rate": 0.00014589034536039688, "loss": 4.5404, "step": 76190 }, { "epoch": 0.1459598150792579, "grad_norm": 1.117963433265686, "learning_rate": 0.00014590950012450597, "loss": 4.5523, "step": 76200 }, { "epoch": 0.14597896991063314, "grad_norm": 1.1494710445404053, "learning_rate": 0.00014592865488861502, "loss": 4.5764, "step": 76210 }, { "epoch": 0.14599812474200838, "grad_norm": 1.1103475093841553, "learning_rate": 0.0001459478096527241, "loss": 4.4923, "step": 76220 }, { "epoch": 0.1460172795733836, "grad_norm": 1.097734808921814, "learning_rate": 0.0001459669644168332, "loss": 4.5683, "step": 76230 }, { "epoch": 0.14603643440475883, "grad_norm": 1.0883182287216187, "learning_rate": 0.00014598611918094226, "loss": 4.5725, "step": 76240 }, { "epoch": 0.14605558923613407, "grad_norm": 1.1291271448135376, "learning_rate": 0.00014600527394505134, "loss": 4.581, "step": 76250 }, { "epoch": 0.14607474406750928, "grad_norm": 1.0771125555038452, "learning_rate": 0.00014602442870916045, "loss": 4.5254, "step": 76260 }, { "epoch": 0.14609389889888452, "grad_norm": 1.099138855934143, "learning_rate": 0.0001460435834732695, "loss": 4.4989, "step": 76270 }, { "epoch": 0.14611305373025976, "grad_norm": 1.1078848838806152, "learning_rate": 0.0001460627382373786, "loss": 4.5963, "step": 76280 }, { "epoch": 0.14613220856163497, "grad_norm": 1.0878549814224243, "learning_rate": 0.00014608189300148767, "loss": 4.6716, "step": 76290 }, { "epoch": 0.14615136339301021, "grad_norm": 1.1005961894989014, "learning_rate": 0.00014610104776559675, "loss": 4.5791, "step": 76300 }, { "epoch": 0.14617051822438545, "grad_norm": 1.1194709539413452, "learning_rate": 0.00014612020252970583, "loss": 4.5874, "step": 76310 }, { "epoch": 0.14618967305576067, "grad_norm": 1.0850558280944824, "learning_rate": 0.00014613935729381491, "loss": 4.5091, "step": 76320 }, { "epoch": 0.1462088278871359, "grad_norm": 1.1429005861282349, "learning_rate": 0.000146158512057924, "loss": 4.5164, "step": 76330 }, { "epoch": 0.14622798271851115, "grad_norm": 1.1452062129974365, "learning_rate": 0.00014617766682203308, "loss": 4.6019, "step": 76340 }, { "epoch": 0.14624713754988636, "grad_norm": 1.1349011659622192, "learning_rate": 0.00014619682158614216, "loss": 4.4882, "step": 76350 }, { "epoch": 0.1462662923812616, "grad_norm": 1.152794599533081, "learning_rate": 0.00014621597635025124, "loss": 4.5397, "step": 76360 }, { "epoch": 0.14628544721263684, "grad_norm": 1.1180726289749146, "learning_rate": 0.00014623513111436032, "loss": 4.5934, "step": 76370 }, { "epoch": 0.14630460204401205, "grad_norm": 1.1032880544662476, "learning_rate": 0.0001462542858784694, "loss": 4.4234, "step": 76380 }, { "epoch": 0.1463237568753873, "grad_norm": 1.1383651494979858, "learning_rate": 0.00014627344064257848, "loss": 4.5092, "step": 76390 }, { "epoch": 0.14634291170676253, "grad_norm": 1.1143752336502075, "learning_rate": 0.00014629259540668754, "loss": 4.3829, "step": 76400 }, { "epoch": 0.14636206653813774, "grad_norm": 1.097894310951233, "learning_rate": 0.00014631175017079665, "loss": 4.5472, "step": 76410 }, { "epoch": 0.14638122136951298, "grad_norm": 1.1166142225265503, "learning_rate": 0.00014633090493490573, "loss": 4.3959, "step": 76420 }, { "epoch": 0.14640037620088822, "grad_norm": 1.125006914138794, "learning_rate": 0.00014635005969901478, "loss": 4.5895, "step": 76430 }, { "epoch": 0.14641953103226343, "grad_norm": 1.0593568086624146, "learning_rate": 0.00014636921446312386, "loss": 4.3845, "step": 76440 }, { "epoch": 0.14643868586363867, "grad_norm": 1.0770567655563354, "learning_rate": 0.00014638836922723297, "loss": 4.5529, "step": 76450 }, { "epoch": 0.1464578406950139, "grad_norm": 1.1075282096862793, "learning_rate": 0.00014640752399134203, "loss": 4.6739, "step": 76460 }, { "epoch": 0.14647699552638913, "grad_norm": 1.149686574935913, "learning_rate": 0.0001464266787554511, "loss": 4.5189, "step": 76470 }, { "epoch": 0.14649615035776437, "grad_norm": 1.1523152589797974, "learning_rate": 0.00014644583351956021, "loss": 4.5845, "step": 76480 }, { "epoch": 0.1465153051891396, "grad_norm": 1.154579758644104, "learning_rate": 0.00014646498828366927, "loss": 4.4324, "step": 76490 }, { "epoch": 0.14653446002051482, "grad_norm": 1.126912236213684, "learning_rate": 0.00014648414304777835, "loss": 4.5523, "step": 76500 }, { "epoch": 0.14655361485189006, "grad_norm": 1.116833209991455, "learning_rate": 0.00014650329781188743, "loss": 4.441, "step": 76510 }, { "epoch": 0.1465727696832653, "grad_norm": 1.1249351501464844, "learning_rate": 0.0001465224525759965, "loss": 4.4478, "step": 76520 }, { "epoch": 0.1465919245146405, "grad_norm": 1.1035882234573364, "learning_rate": 0.0001465416073401056, "loss": 4.5448, "step": 76530 }, { "epoch": 0.14661107934601575, "grad_norm": 1.1099112033843994, "learning_rate": 0.00014656076210421468, "loss": 4.4212, "step": 76540 }, { "epoch": 0.146630234177391, "grad_norm": 1.1050996780395508, "learning_rate": 0.00014657991686832376, "loss": 4.4481, "step": 76550 }, { "epoch": 0.1466493890087662, "grad_norm": 1.1362226009368896, "learning_rate": 0.00014659907163243284, "loss": 4.5092, "step": 76560 }, { "epoch": 0.14666854384014144, "grad_norm": 1.1604629755020142, "learning_rate": 0.00014661822639654192, "loss": 4.6095, "step": 76570 }, { "epoch": 0.14668769867151668, "grad_norm": 1.1277564764022827, "learning_rate": 0.000146637381160651, "loss": 4.7059, "step": 76580 }, { "epoch": 0.1467068535028919, "grad_norm": 1.147701621055603, "learning_rate": 0.00014665653592476005, "loss": 4.6228, "step": 76590 }, { "epoch": 0.14672600833426713, "grad_norm": 1.1424622535705566, "learning_rate": 0.00014667569068886916, "loss": 4.4397, "step": 76600 }, { "epoch": 0.14674516316564237, "grad_norm": 1.1128692626953125, "learning_rate": 0.00014669484545297824, "loss": 4.5505, "step": 76610 }, { "epoch": 0.14676431799701758, "grad_norm": 1.1102720499038696, "learning_rate": 0.0001467140002170873, "loss": 4.5318, "step": 76620 }, { "epoch": 0.14678347282839282, "grad_norm": 1.0734741687774658, "learning_rate": 0.00014673315498119638, "loss": 4.3881, "step": 76630 }, { "epoch": 0.14680262765976806, "grad_norm": 1.1152561902999878, "learning_rate": 0.0001467523097453055, "loss": 4.5193, "step": 76640 }, { "epoch": 0.14682178249114328, "grad_norm": 1.1500619649887085, "learning_rate": 0.00014677146450941454, "loss": 4.5726, "step": 76650 }, { "epoch": 0.14684093732251852, "grad_norm": 1.1159309148788452, "learning_rate": 0.00014679061927352362, "loss": 4.6444, "step": 76660 }, { "epoch": 0.14686009215389376, "grad_norm": 1.5237361192703247, "learning_rate": 0.00014680977403763273, "loss": 4.5166, "step": 76670 }, { "epoch": 0.14687924698526897, "grad_norm": 1.1206152439117432, "learning_rate": 0.0001468289288017418, "loss": 4.529, "step": 76680 }, { "epoch": 0.1468984018166442, "grad_norm": 1.1124149560928345, "learning_rate": 0.00014684808356585087, "loss": 4.4849, "step": 76690 }, { "epoch": 0.14691755664801945, "grad_norm": 1.1214762926101685, "learning_rate": 0.00014686723832995995, "loss": 4.585, "step": 76700 }, { "epoch": 0.14693671147939466, "grad_norm": 1.140306830406189, "learning_rate": 0.00014688639309406903, "loss": 4.5586, "step": 76710 }, { "epoch": 0.1469558663107699, "grad_norm": 1.1103202104568481, "learning_rate": 0.0001469055478581781, "loss": 4.5106, "step": 76720 }, { "epoch": 0.14697502114214514, "grad_norm": 1.0998671054840088, "learning_rate": 0.0001469247026222872, "loss": 4.4845, "step": 76730 }, { "epoch": 0.14699417597352035, "grad_norm": 1.074195384979248, "learning_rate": 0.00014694385738639627, "loss": 4.6752, "step": 76740 }, { "epoch": 0.1470133308048956, "grad_norm": 1.1332043409347534, "learning_rate": 0.00014696301215050536, "loss": 4.4052, "step": 76750 }, { "epoch": 0.14703248563627083, "grad_norm": 1.1504607200622559, "learning_rate": 0.00014698216691461444, "loss": 4.6341, "step": 76760 }, { "epoch": 0.14705164046764604, "grad_norm": 1.1321449279785156, "learning_rate": 0.00014700132167872352, "loss": 4.4944, "step": 76770 }, { "epoch": 0.14707079529902128, "grad_norm": 1.1040130853652954, "learning_rate": 0.00014702047644283257, "loss": 4.6233, "step": 76780 }, { "epoch": 0.14708995013039652, "grad_norm": 1.1088117361068726, "learning_rate": 0.00014703963120694168, "loss": 4.4976, "step": 76790 }, { "epoch": 0.14710910496177174, "grad_norm": 1.1373460292816162, "learning_rate": 0.00014705878597105076, "loss": 4.515, "step": 76800 }, { "epoch": 0.14712825979314698, "grad_norm": 1.103987455368042, "learning_rate": 0.00014707794073515982, "loss": 4.5019, "step": 76810 }, { "epoch": 0.14714741462452222, "grad_norm": 1.0961426496505737, "learning_rate": 0.00014709709549926893, "loss": 4.5014, "step": 76820 }, { "epoch": 0.14716656945589743, "grad_norm": 1.1165486574172974, "learning_rate": 0.000147116250263378, "loss": 4.5822, "step": 76830 }, { "epoch": 0.14718572428727267, "grad_norm": 1.0685285329818726, "learning_rate": 0.00014713540502748706, "loss": 4.5992, "step": 76840 }, { "epoch": 0.1472048791186479, "grad_norm": 1.1379308700561523, "learning_rate": 0.00014715455979159614, "loss": 4.6197, "step": 76850 }, { "epoch": 0.14722403395002312, "grad_norm": 1.1130534410476685, "learning_rate": 0.00014717371455570525, "loss": 4.5384, "step": 76860 }, { "epoch": 0.14724318878139836, "grad_norm": 1.1363967657089233, "learning_rate": 0.0001471928693198143, "loss": 4.5455, "step": 76870 }, { "epoch": 0.1472623436127736, "grad_norm": 1.1090623140335083, "learning_rate": 0.0001472101086075125, "loss": 4.505, "step": 76880 }, { "epoch": 0.1472814984441488, "grad_norm": 1.6199312210083008, "learning_rate": 0.00014722926337162157, "loss": 4.4584, "step": 76890 }, { "epoch": 0.14730065327552405, "grad_norm": 1.1315947771072388, "learning_rate": 0.00014724841813573065, "loss": 4.5181, "step": 76900 }, { "epoch": 0.1473198081068993, "grad_norm": 1.1340395212173462, "learning_rate": 0.0001472675728998397, "loss": 4.4882, "step": 76910 }, { "epoch": 0.1473389629382745, "grad_norm": 1.2742537260055542, "learning_rate": 0.00014728672766394882, "loss": 4.5595, "step": 76920 }, { "epoch": 0.14735811776964974, "grad_norm": 1.1470922231674194, "learning_rate": 0.0001473058824280579, "loss": 4.6556, "step": 76930 }, { "epoch": 0.14737727260102498, "grad_norm": 1.1123154163360596, "learning_rate": 0.00014732503719216695, "loss": 4.4877, "step": 76940 }, { "epoch": 0.1473964274324002, "grad_norm": 1.1478198766708374, "learning_rate": 0.00014734419195627606, "loss": 4.5315, "step": 76950 }, { "epoch": 0.14741558226377544, "grad_norm": 1.103294014930725, "learning_rate": 0.00014736334672038514, "loss": 4.5029, "step": 76960 }, { "epoch": 0.14743473709515068, "grad_norm": 1.1415822505950928, "learning_rate": 0.0001473825014844942, "loss": 4.4253, "step": 76970 }, { "epoch": 0.1474538919265259, "grad_norm": 1.0938044786453247, "learning_rate": 0.00014740165624860328, "loss": 4.573, "step": 76980 }, { "epoch": 0.14747304675790113, "grad_norm": 1.1210588216781616, "learning_rate": 0.00014742081101271236, "loss": 4.4871, "step": 76990 }, { "epoch": 0.14749220158927637, "grad_norm": 1.1096903085708618, "learning_rate": 0.00014743996577682144, "loss": 4.3676, "step": 77000 }, { "epoch": 0.14751135642065158, "grad_norm": 1.098474383354187, "learning_rate": 0.00014745912054093052, "loss": 4.4262, "step": 77010 }, { "epoch": 0.14753051125202682, "grad_norm": 1.0949101448059082, "learning_rate": 0.0001474782753050396, "loss": 4.5575, "step": 77020 }, { "epoch": 0.14754966608340206, "grad_norm": 1.082521677017212, "learning_rate": 0.00014749743006914868, "loss": 4.5187, "step": 77030 }, { "epoch": 0.14756882091477727, "grad_norm": 1.102753758430481, "learning_rate": 0.00014751658483325776, "loss": 4.5483, "step": 77040 }, { "epoch": 0.1475879757461525, "grad_norm": 1.0958722829818726, "learning_rate": 0.00014753573959736685, "loss": 4.5017, "step": 77050 }, { "epoch": 0.14760713057752775, "grad_norm": 1.0838996171951294, "learning_rate": 0.00014755489436147593, "loss": 4.5537, "step": 77060 }, { "epoch": 0.14762628540890296, "grad_norm": 1.0948573350906372, "learning_rate": 0.000147574049125585, "loss": 4.5661, "step": 77070 }, { "epoch": 0.1476454402402782, "grad_norm": 1.0988900661468506, "learning_rate": 0.0001475932038896941, "loss": 4.4517, "step": 77080 }, { "epoch": 0.14766459507165344, "grad_norm": 1.1385504007339478, "learning_rate": 0.00014761235865380317, "loss": 4.6904, "step": 77090 }, { "epoch": 0.14768374990302865, "grad_norm": 1.114197850227356, "learning_rate": 0.00014763151341791225, "loss": 4.5011, "step": 77100 }, { "epoch": 0.1477029047344039, "grad_norm": 1.1042969226837158, "learning_rate": 0.00014765066818202133, "loss": 4.6063, "step": 77110 }, { "epoch": 0.14772205956577913, "grad_norm": 1.1095486879348755, "learning_rate": 0.00014766982294613042, "loss": 4.6139, "step": 77120 }, { "epoch": 0.14774121439715435, "grad_norm": 1.122277855873108, "learning_rate": 0.00014768897771023947, "loss": 4.435, "step": 77130 }, { "epoch": 0.1477603692285296, "grad_norm": 1.1239473819732666, "learning_rate": 0.00014770813247434858, "loss": 4.5971, "step": 77140 }, { "epoch": 0.14777952405990483, "grad_norm": 1.1415514945983887, "learning_rate": 0.00014772728723845766, "loss": 4.5651, "step": 77150 }, { "epoch": 0.14779867889128004, "grad_norm": 1.1044710874557495, "learning_rate": 0.0001477464420025667, "loss": 4.6137, "step": 77160 }, { "epoch": 0.14781783372265528, "grad_norm": 1.1366288661956787, "learning_rate": 0.00014776559676667582, "loss": 4.4226, "step": 77170 }, { "epoch": 0.14783698855403052, "grad_norm": 1.1535263061523438, "learning_rate": 0.00014778475153078488, "loss": 4.4097, "step": 77180 }, { "epoch": 0.14785614338540573, "grad_norm": 1.1357824802398682, "learning_rate": 0.00014780390629489396, "loss": 4.5917, "step": 77190 }, { "epoch": 0.14787529821678097, "grad_norm": 1.1533209085464478, "learning_rate": 0.00014782306105900304, "loss": 4.6025, "step": 77200 }, { "epoch": 0.1478944530481562, "grad_norm": 1.160830020904541, "learning_rate": 0.00014784221582311212, "loss": 4.5575, "step": 77210 }, { "epoch": 0.14791360787953142, "grad_norm": 1.0985561609268188, "learning_rate": 0.0001478613705872212, "loss": 4.5145, "step": 77220 }, { "epoch": 0.14793276271090666, "grad_norm": 1.074154019355774, "learning_rate": 0.00014788052535133028, "loss": 4.4503, "step": 77230 }, { "epoch": 0.1479519175422819, "grad_norm": 1.1057097911834717, "learning_rate": 0.00014789968011543936, "loss": 4.487, "step": 77240 }, { "epoch": 0.14797107237365711, "grad_norm": 1.1095186471939087, "learning_rate": 0.00014791883487954845, "loss": 4.6736, "step": 77250 }, { "epoch": 0.14799022720503235, "grad_norm": 1.0879594087600708, "learning_rate": 0.00014793798964365753, "loss": 4.4666, "step": 77260 }, { "epoch": 0.1480093820364076, "grad_norm": 1.1138300895690918, "learning_rate": 0.0001479571444077666, "loss": 4.5104, "step": 77270 }, { "epoch": 0.1480285368677828, "grad_norm": 1.1241822242736816, "learning_rate": 0.0001479762991718757, "loss": 4.6726, "step": 77280 }, { "epoch": 0.14804769169915805, "grad_norm": 1.0904849767684937, "learning_rate": 0.00014799545393598477, "loss": 4.4499, "step": 77290 }, { "epoch": 0.14806684653053329, "grad_norm": 1.139957070350647, "learning_rate": 0.00014801460870009385, "loss": 4.5867, "step": 77300 }, { "epoch": 0.1480860013619085, "grad_norm": 1.0943650007247925, "learning_rate": 0.00014803376346420293, "loss": 4.5638, "step": 77310 }, { "epoch": 0.14810515619328374, "grad_norm": 1.133984088897705, "learning_rate": 0.000148052918228312, "loss": 4.4799, "step": 77320 }, { "epoch": 0.14812431102465898, "grad_norm": 1.187064528465271, "learning_rate": 0.0001480720729924211, "loss": 4.5531, "step": 77330 }, { "epoch": 0.1481434658560342, "grad_norm": 1.1596362590789795, "learning_rate": 0.00014809122775653018, "loss": 4.4104, "step": 77340 }, { "epoch": 0.14816262068740943, "grad_norm": 1.092321515083313, "learning_rate": 0.00014811038252063923, "loss": 4.6196, "step": 77350 }, { "epoch": 0.14818177551878467, "grad_norm": 1.1414456367492676, "learning_rate": 0.00014812953728474834, "loss": 4.4748, "step": 77360 }, { "epoch": 0.14820093035015988, "grad_norm": 1.1808414459228516, "learning_rate": 0.0001481486920488574, "loss": 4.4691, "step": 77370 }, { "epoch": 0.14822008518153512, "grad_norm": 1.1552114486694336, "learning_rate": 0.00014816784681296648, "loss": 4.4137, "step": 77380 }, { "epoch": 0.14823924001291036, "grad_norm": 1.120270848274231, "learning_rate": 0.00014818700157707558, "loss": 4.5235, "step": 77390 }, { "epoch": 0.14825839484428557, "grad_norm": 1.1527442932128906, "learning_rate": 0.00014820615634118464, "loss": 4.4307, "step": 77400 }, { "epoch": 0.1482775496756608, "grad_norm": 1.1654982566833496, "learning_rate": 0.00014822531110529372, "loss": 4.6166, "step": 77410 }, { "epoch": 0.14829670450703605, "grad_norm": 1.0851210355758667, "learning_rate": 0.0001482444658694028, "loss": 4.5676, "step": 77420 }, { "epoch": 0.14831585933841127, "grad_norm": 1.147161602973938, "learning_rate": 0.00014826362063351188, "loss": 4.5424, "step": 77430 }, { "epoch": 0.1483350141697865, "grad_norm": 1.0970485210418701, "learning_rate": 0.00014828277539762096, "loss": 4.4051, "step": 77440 }, { "epoch": 0.14835416900116175, "grad_norm": 1.1822787523269653, "learning_rate": 0.00014830193016173004, "loss": 4.4704, "step": 77450 }, { "epoch": 0.14837332383253696, "grad_norm": 1.0517897605895996, "learning_rate": 0.00014832108492583913, "loss": 4.5583, "step": 77460 }, { "epoch": 0.1483924786639122, "grad_norm": 1.106998324394226, "learning_rate": 0.0001483402396899482, "loss": 4.4916, "step": 77470 }, { "epoch": 0.14841163349528744, "grad_norm": 1.099437952041626, "learning_rate": 0.0001483593944540573, "loss": 4.5858, "step": 77480 }, { "epoch": 0.14843078832666268, "grad_norm": 1.1573864221572876, "learning_rate": 0.00014837854921816637, "loss": 4.5787, "step": 77490 }, { "epoch": 0.1484499431580379, "grad_norm": 1.1061246395111084, "learning_rate": 0.00014839770398227545, "loss": 4.5258, "step": 77500 }, { "epoch": 0.14846909798941313, "grad_norm": 1.098877191543579, "learning_rate": 0.00014841685874638453, "loss": 4.5605, "step": 77510 }, { "epoch": 0.14848825282078837, "grad_norm": 1.237070083618164, "learning_rate": 0.0001484360135104936, "loss": 4.4943, "step": 77520 }, { "epoch": 0.14850740765216358, "grad_norm": 1.1637115478515625, "learning_rate": 0.0001484551682746027, "loss": 4.53, "step": 77530 }, { "epoch": 0.14852656248353882, "grad_norm": 1.0792393684387207, "learning_rate": 0.00014847432303871175, "loss": 4.6604, "step": 77540 }, { "epoch": 0.14854571731491406, "grad_norm": 1.1280412673950195, "learning_rate": 0.00014849347780282086, "loss": 4.4739, "step": 77550 }, { "epoch": 0.14856487214628927, "grad_norm": 1.32707941532135, "learning_rate": 0.0001485126325669299, "loss": 4.7239, "step": 77560 }, { "epoch": 0.1485840269776645, "grad_norm": 1.119545578956604, "learning_rate": 0.000148531787331039, "loss": 4.454, "step": 77570 }, { "epoch": 0.14860318180903975, "grad_norm": 1.0922132730484009, "learning_rate": 0.0001485509420951481, "loss": 4.5891, "step": 77580 }, { "epoch": 0.14862233664041496, "grad_norm": 1.1140259504318237, "learning_rate": 0.00014857009685925716, "loss": 4.4322, "step": 77590 }, { "epoch": 0.1486414914717902, "grad_norm": 1.086346983909607, "learning_rate": 0.00014858925162336624, "loss": 4.539, "step": 77600 }, { "epoch": 0.14866064630316544, "grad_norm": 1.135643482208252, "learning_rate": 0.00014860840638747535, "loss": 4.3978, "step": 77610 }, { "epoch": 0.14867980113454066, "grad_norm": 1.13152015209198, "learning_rate": 0.0001486275611515844, "loss": 4.4673, "step": 77620 }, { "epoch": 0.1486989559659159, "grad_norm": 1.085366129875183, "learning_rate": 0.00014864671591569348, "loss": 4.5688, "step": 77630 }, { "epoch": 0.14871811079729114, "grad_norm": 1.1245241165161133, "learning_rate": 0.00014866587067980256, "loss": 4.563, "step": 77640 }, { "epoch": 0.14873726562866635, "grad_norm": 1.073910117149353, "learning_rate": 0.00014868502544391164, "loss": 4.6639, "step": 77650 }, { "epoch": 0.1487564204600416, "grad_norm": 1.1336201429367065, "learning_rate": 0.00014870418020802072, "loss": 4.5035, "step": 77660 }, { "epoch": 0.14877557529141683, "grad_norm": 1.0894006490707397, "learning_rate": 0.0001487233349721298, "loss": 4.546, "step": 77670 }, { "epoch": 0.14879473012279204, "grad_norm": 1.1217122077941895, "learning_rate": 0.0001487424897362389, "loss": 4.5154, "step": 77680 }, { "epoch": 0.14881388495416728, "grad_norm": 1.068375587463379, "learning_rate": 0.00014876164450034797, "loss": 4.5815, "step": 77690 }, { "epoch": 0.14883303978554252, "grad_norm": 1.101157784461975, "learning_rate": 0.00014878079926445705, "loss": 4.6529, "step": 77700 }, { "epoch": 0.14885219461691773, "grad_norm": 1.07345712184906, "learning_rate": 0.00014879995402856613, "loss": 4.5307, "step": 77710 }, { "epoch": 0.14887134944829297, "grad_norm": 1.0702389478683472, "learning_rate": 0.0001488191087926752, "loss": 4.5815, "step": 77720 }, { "epoch": 0.1488905042796682, "grad_norm": 1.0903549194335938, "learning_rate": 0.0001488382635567843, "loss": 4.4608, "step": 77730 }, { "epoch": 0.14890965911104342, "grad_norm": 1.094347357749939, "learning_rate": 0.00014885741832089338, "loss": 4.3612, "step": 77740 }, { "epoch": 0.14892881394241866, "grad_norm": 1.1210483312606812, "learning_rate": 0.00014887657308500243, "loss": 4.5741, "step": 77750 }, { "epoch": 0.1489479687737939, "grad_norm": 1.0872739553451538, "learning_rate": 0.0001488957278491115, "loss": 4.5941, "step": 77760 }, { "epoch": 0.14896712360516912, "grad_norm": 1.0985007286071777, "learning_rate": 0.00014891488261322062, "loss": 4.4546, "step": 77770 }, { "epoch": 0.14898627843654436, "grad_norm": 1.1052625179290771, "learning_rate": 0.00014893403737732967, "loss": 4.4321, "step": 77780 }, { "epoch": 0.1490054332679196, "grad_norm": 1.091003179550171, "learning_rate": 0.00014895319214143875, "loss": 4.3877, "step": 77790 }, { "epoch": 0.1490245880992948, "grad_norm": 1.109730839729309, "learning_rate": 0.00014897234690554786, "loss": 4.5637, "step": 77800 }, { "epoch": 0.14904374293067005, "grad_norm": 1.1064316034317017, "learning_rate": 0.00014899150166965692, "loss": 4.6013, "step": 77810 }, { "epoch": 0.1490628977620453, "grad_norm": 1.1343519687652588, "learning_rate": 0.000149010656433766, "loss": 4.7027, "step": 77820 }, { "epoch": 0.1490820525934205, "grad_norm": 1.1171973943710327, "learning_rate": 0.00014902981119787508, "loss": 4.5718, "step": 77830 }, { "epoch": 0.14910120742479574, "grad_norm": 1.1550543308258057, "learning_rate": 0.00014904896596198416, "loss": 4.6386, "step": 77840 }, { "epoch": 0.14912036225617098, "grad_norm": 1.1215134859085083, "learning_rate": 0.00014906812072609324, "loss": 4.587, "step": 77850 }, { "epoch": 0.1491395170875462, "grad_norm": 1.0680416822433472, "learning_rate": 0.00014908727549020232, "loss": 4.6421, "step": 77860 }, { "epoch": 0.14915867191892143, "grad_norm": 1.0896137952804565, "learning_rate": 0.0001491064302543114, "loss": 4.4157, "step": 77870 }, { "epoch": 0.14917782675029667, "grad_norm": 1.1148576736450195, "learning_rate": 0.00014912558501842049, "loss": 4.5144, "step": 77880 }, { "epoch": 0.14919698158167188, "grad_norm": 1.0853708982467651, "learning_rate": 0.00014914473978252957, "loss": 4.4493, "step": 77890 }, { "epoch": 0.14921613641304712, "grad_norm": 1.1462652683258057, "learning_rate": 0.00014916389454663865, "loss": 4.4748, "step": 77900 }, { "epoch": 0.14923529124442236, "grad_norm": 1.1211349964141846, "learning_rate": 0.00014918304931074773, "loss": 4.5128, "step": 77910 }, { "epoch": 0.14925444607579758, "grad_norm": 1.1056839227676392, "learning_rate": 0.0001492022040748568, "loss": 4.5315, "step": 77920 }, { "epoch": 0.14927360090717282, "grad_norm": 1.1031078100204468, "learning_rate": 0.0001492213588389659, "loss": 4.5417, "step": 77930 }, { "epoch": 0.14929275573854806, "grad_norm": 1.0942450761795044, "learning_rate": 0.00014924051360307495, "loss": 4.5994, "step": 77940 }, { "epoch": 0.14931191056992327, "grad_norm": 1.1045805215835571, "learning_rate": 0.00014925966836718406, "loss": 4.4247, "step": 77950 }, { "epoch": 0.1493310654012985, "grad_norm": 1.0714306831359863, "learning_rate": 0.00014927882313129314, "loss": 4.5427, "step": 77960 }, { "epoch": 0.14935022023267375, "grad_norm": 1.0823146104812622, "learning_rate": 0.0001492979778954022, "loss": 4.5708, "step": 77970 }, { "epoch": 0.14936937506404896, "grad_norm": 1.1111453771591187, "learning_rate": 0.00014931713265951127, "loss": 4.4503, "step": 77980 }, { "epoch": 0.1493885298954242, "grad_norm": 1.1412339210510254, "learning_rate": 0.00014933628742362038, "loss": 4.5428, "step": 77990 }, { "epoch": 0.14940768472679944, "grad_norm": 1.1140497922897339, "learning_rate": 0.00014935544218772943, "loss": 4.5857, "step": 78000 }, { "epoch": 0.14942683955817465, "grad_norm": 1.0901769399642944, "learning_rate": 0.00014937459695183852, "loss": 4.7831, "step": 78010 }, { "epoch": 0.1494459943895499, "grad_norm": 1.0711579322814941, "learning_rate": 0.00014939375171594762, "loss": 4.5442, "step": 78020 }, { "epoch": 0.14946514922092513, "grad_norm": 1.1375120878219604, "learning_rate": 0.00014941290648005668, "loss": 4.4045, "step": 78030 }, { "epoch": 0.14948430405230034, "grad_norm": 1.0560498237609863, "learning_rate": 0.00014943206124416576, "loss": 4.5501, "step": 78040 }, { "epoch": 0.14950345888367558, "grad_norm": 1.0887378454208374, "learning_rate": 0.00014945121600827484, "loss": 4.5593, "step": 78050 }, { "epoch": 0.14952261371505082, "grad_norm": 1.0808556079864502, "learning_rate": 0.00014947037077238392, "loss": 4.5844, "step": 78060 }, { "epoch": 0.14954176854642603, "grad_norm": 1.1036005020141602, "learning_rate": 0.000149489525536493, "loss": 4.576, "step": 78070 }, { "epoch": 0.14956092337780127, "grad_norm": 1.1104114055633545, "learning_rate": 0.00014950868030060209, "loss": 4.4788, "step": 78080 }, { "epoch": 0.14958007820917651, "grad_norm": 1.1221669912338257, "learning_rate": 0.00014952783506471117, "loss": 4.4748, "step": 78090 }, { "epoch": 0.14959923304055173, "grad_norm": 1.0655686855316162, "learning_rate": 0.00014954698982882025, "loss": 4.5242, "step": 78100 }, { "epoch": 0.14961838787192697, "grad_norm": 1.136258840560913, "learning_rate": 0.00014956614459292933, "loss": 4.5039, "step": 78110 }, { "epoch": 0.1496375427033022, "grad_norm": 1.1428344249725342, "learning_rate": 0.0001495852993570384, "loss": 4.4954, "step": 78120 }, { "epoch": 0.14965669753467742, "grad_norm": 1.0979770421981812, "learning_rate": 0.00014960445412114746, "loss": 4.4914, "step": 78130 }, { "epoch": 0.14967585236605266, "grad_norm": 1.142795443534851, "learning_rate": 0.00014962360888525657, "loss": 4.5214, "step": 78140 }, { "epoch": 0.1496950071974279, "grad_norm": 1.0729866027832031, "learning_rate": 0.00014964276364936565, "loss": 4.431, "step": 78150 }, { "epoch": 0.1497141620288031, "grad_norm": 1.148792028427124, "learning_rate": 0.0001496619184134747, "loss": 4.5336, "step": 78160 }, { "epoch": 0.14973331686017835, "grad_norm": 1.1596542596817017, "learning_rate": 0.00014968107317758382, "loss": 4.4379, "step": 78170 }, { "epoch": 0.1497524716915536, "grad_norm": 1.1063499450683594, "learning_rate": 0.0001497002279416929, "loss": 4.4848, "step": 78180 }, { "epoch": 0.1497716265229288, "grad_norm": 1.1554596424102783, "learning_rate": 0.00014971938270580195, "loss": 4.4679, "step": 78190 }, { "epoch": 0.14979078135430404, "grad_norm": 1.0967514514923096, "learning_rate": 0.00014973853746991103, "loss": 4.5105, "step": 78200 }, { "epoch": 0.14980993618567928, "grad_norm": 1.1093119382858276, "learning_rate": 0.00014975769223402014, "loss": 4.5489, "step": 78210 }, { "epoch": 0.1498290910170545, "grad_norm": 1.1002286672592163, "learning_rate": 0.0001497768469981292, "loss": 4.4311, "step": 78220 }, { "epoch": 0.14984824584842973, "grad_norm": 1.0831090211868286, "learning_rate": 0.00014979600176223828, "loss": 4.579, "step": 78230 }, { "epoch": 0.14986740067980497, "grad_norm": 1.0808019638061523, "learning_rate": 0.00014981515652634736, "loss": 4.4951, "step": 78240 }, { "epoch": 0.1498865555111802, "grad_norm": 1.105753779411316, "learning_rate": 0.00014983431129045644, "loss": 4.5245, "step": 78250 }, { "epoch": 0.14990571034255543, "grad_norm": 1.0714919567108154, "learning_rate": 0.00014985346605456552, "loss": 4.5897, "step": 78260 }, { "epoch": 0.14992486517393067, "grad_norm": 1.0810463428497314, "learning_rate": 0.0001498726208186746, "loss": 4.5525, "step": 78270 }, { "epoch": 0.14994402000530588, "grad_norm": 1.1224193572998047, "learning_rate": 0.00014989177558278368, "loss": 4.6302, "step": 78280 }, { "epoch": 0.14996317483668112, "grad_norm": 1.16624116897583, "learning_rate": 0.00014991093034689277, "loss": 4.495, "step": 78290 }, { "epoch": 0.14998232966805636, "grad_norm": 1.0475364923477173, "learning_rate": 0.00014993008511100185, "loss": 4.5338, "step": 78300 }, { "epoch": 0.15000148449943157, "grad_norm": 1.1296789646148682, "learning_rate": 0.00014994923987511093, "loss": 4.5815, "step": 78310 }, { "epoch": 0.1500206393308068, "grad_norm": 1.1073490381240845, "learning_rate": 0.00014996839463922, "loss": 4.474, "step": 78320 }, { "epoch": 0.15003979416218205, "grad_norm": 1.070219874382019, "learning_rate": 0.0001499875494033291, "loss": 4.4833, "step": 78330 }, { "epoch": 0.15005894899355726, "grad_norm": 1.1603655815124512, "learning_rate": 0.00015000670416743814, "loss": 4.4828, "step": 78340 }, { "epoch": 0.1500781038249325, "grad_norm": 1.0949398279190063, "learning_rate": 0.00015002585893154723, "loss": 4.4905, "step": 78350 }, { "epoch": 0.15009725865630774, "grad_norm": 1.1219544410705566, "learning_rate": 0.00015004501369565633, "loss": 4.4859, "step": 78360 }, { "epoch": 0.15011641348768295, "grad_norm": 1.1130670309066772, "learning_rate": 0.00015006416845976542, "loss": 4.5045, "step": 78370 }, { "epoch": 0.1501355683190582, "grad_norm": 1.108711838722229, "learning_rate": 0.0001500833232238745, "loss": 4.5936, "step": 78380 }, { "epoch": 0.15015472315043343, "grad_norm": 1.1054555177688599, "learning_rate": 0.00015010247798798355, "loss": 4.5822, "step": 78390 }, { "epoch": 0.15017387798180865, "grad_norm": 1.0992461442947388, "learning_rate": 0.00015012163275209263, "loss": 4.5179, "step": 78400 }, { "epoch": 0.15019303281318389, "grad_norm": 1.0928071737289429, "learning_rate": 0.00015014078751620171, "loss": 4.4617, "step": 78410 }, { "epoch": 0.15021218764455913, "grad_norm": 1.1101239919662476, "learning_rate": 0.0001501599422803108, "loss": 4.4765, "step": 78420 }, { "epoch": 0.15023134247593434, "grad_norm": 1.073195457458496, "learning_rate": 0.0001501790970444199, "loss": 4.4606, "step": 78430 }, { "epoch": 0.15025049730730958, "grad_norm": 1.0920002460479736, "learning_rate": 0.00015019825180852899, "loss": 4.6086, "step": 78440 }, { "epoch": 0.15026965213868482, "grad_norm": 1.0944843292236328, "learning_rate": 0.00015021740657263804, "loss": 4.5716, "step": 78450 }, { "epoch": 0.15028880697006003, "grad_norm": 1.1047616004943848, "learning_rate": 0.00015023656133674712, "loss": 4.5092, "step": 78460 }, { "epoch": 0.15030796180143527, "grad_norm": 1.0716928243637085, "learning_rate": 0.0001502557161008562, "loss": 4.5252, "step": 78470 }, { "epoch": 0.1503271166328105, "grad_norm": 1.1099581718444824, "learning_rate": 0.00015027487086496528, "loss": 4.4266, "step": 78480 }, { "epoch": 0.15034627146418572, "grad_norm": 1.1779369115829468, "learning_rate": 0.00015029402562907436, "loss": 4.3306, "step": 78490 }, { "epoch": 0.15036542629556096, "grad_norm": 1.103400468826294, "learning_rate": 0.00015031318039318342, "loss": 4.5862, "step": 78500 }, { "epoch": 0.1503845811269362, "grad_norm": 1.180682897567749, "learning_rate": 0.00015033233515729253, "loss": 4.6309, "step": 78510 }, { "epoch": 0.1504037359583114, "grad_norm": 1.099174976348877, "learning_rate": 0.0001503514899214016, "loss": 4.5256, "step": 78520 }, { "epoch": 0.15042289078968665, "grad_norm": 1.0778149366378784, "learning_rate": 0.0001503706446855107, "loss": 4.5551, "step": 78530 }, { "epoch": 0.1504420456210619, "grad_norm": 1.1040318012237549, "learning_rate": 0.00015038979944961977, "loss": 4.4373, "step": 78540 }, { "epoch": 0.1504612004524371, "grad_norm": 1.0748834609985352, "learning_rate": 0.00015040895421372885, "loss": 4.6604, "step": 78550 }, { "epoch": 0.15048035528381234, "grad_norm": 1.0711485147476196, "learning_rate": 0.0001504281089778379, "loss": 4.4787, "step": 78560 }, { "epoch": 0.15049951011518758, "grad_norm": 1.1014292240142822, "learning_rate": 0.000150447263741947, "loss": 4.5825, "step": 78570 }, { "epoch": 0.1505186649465628, "grad_norm": 1.1270990371704102, "learning_rate": 0.0001504664185060561, "loss": 4.4824, "step": 78580 }, { "epoch": 0.15053781977793804, "grad_norm": 1.1139270067214966, "learning_rate": 0.00015048557327016518, "loss": 4.5186, "step": 78590 }, { "epoch": 0.15055697460931328, "grad_norm": 1.0905845165252686, "learning_rate": 0.00015050472803427426, "loss": 4.6645, "step": 78600 }, { "epoch": 0.1505761294406885, "grad_norm": 1.084673523902893, "learning_rate": 0.0001505238827983833, "loss": 4.5349, "step": 78610 }, { "epoch": 0.15059528427206373, "grad_norm": 1.0821737051010132, "learning_rate": 0.0001505430375624924, "loss": 4.5198, "step": 78620 }, { "epoch": 0.15061443910343897, "grad_norm": 1.0961759090423584, "learning_rate": 0.00015056219232660148, "loss": 4.6463, "step": 78630 }, { "epoch": 0.15063359393481418, "grad_norm": 1.0765080451965332, "learning_rate": 0.00015058134709071056, "loss": 4.467, "step": 78640 }, { "epoch": 0.15065274876618942, "grad_norm": 1.065605640411377, "learning_rate": 0.00015060050185481967, "loss": 4.5431, "step": 78650 }, { "epoch": 0.15067190359756466, "grad_norm": 1.079506516456604, "learning_rate": 0.00015061965661892875, "loss": 4.513, "step": 78660 }, { "epoch": 0.15069105842893987, "grad_norm": 1.0624580383300781, "learning_rate": 0.0001506388113830378, "loss": 4.5034, "step": 78670 }, { "epoch": 0.1507102132603151, "grad_norm": 1.122762680053711, "learning_rate": 0.00015065796614714688, "loss": 4.4393, "step": 78680 }, { "epoch": 0.15072936809169035, "grad_norm": 1.0840407609939575, "learning_rate": 0.00015067712091125596, "loss": 4.5209, "step": 78690 }, { "epoch": 0.15074852292306556, "grad_norm": 1.073352336883545, "learning_rate": 0.00015069627567536504, "loss": 4.6055, "step": 78700 }, { "epoch": 0.1507676777544408, "grad_norm": 1.10361909866333, "learning_rate": 0.00015071543043947413, "loss": 4.4948, "step": 78710 }, { "epoch": 0.15078683258581604, "grad_norm": 1.1216846704483032, "learning_rate": 0.00015073458520358318, "loss": 4.5034, "step": 78720 }, { "epoch": 0.15080598741719126, "grad_norm": 1.0517021417617798, "learning_rate": 0.0001507537399676923, "loss": 4.508, "step": 78730 }, { "epoch": 0.1508251422485665, "grad_norm": 1.095808744430542, "learning_rate": 0.00015077289473180137, "loss": 4.5247, "step": 78740 }, { "epoch": 0.15084429707994174, "grad_norm": 1.1073616743087769, "learning_rate": 0.00015079204949591045, "loss": 4.5859, "step": 78750 }, { "epoch": 0.15086345191131695, "grad_norm": 1.0978063344955444, "learning_rate": 0.00015081120426001953, "loss": 4.5446, "step": 78760 }, { "epoch": 0.1508826067426922, "grad_norm": 1.058241605758667, "learning_rate": 0.0001508303590241286, "loss": 4.6046, "step": 78770 }, { "epoch": 0.15090176157406743, "grad_norm": 1.0537681579589844, "learning_rate": 0.00015084951378823767, "loss": 4.4969, "step": 78780 }, { "epoch": 0.15092091640544264, "grad_norm": 1.1759012937545776, "learning_rate": 0.00015086866855234675, "loss": 4.6076, "step": 78790 }, { "epoch": 0.15094007123681788, "grad_norm": 1.1565755605697632, "learning_rate": 0.00015088782331645586, "loss": 4.5205, "step": 78800 }, { "epoch": 0.15095922606819312, "grad_norm": 1.0849971771240234, "learning_rate": 0.00015090697808056494, "loss": 4.4793, "step": 78810 }, { "epoch": 0.15097838089956836, "grad_norm": 1.097162127494812, "learning_rate": 0.00015092613284467402, "loss": 4.441, "step": 78820 }, { "epoch": 0.15099753573094357, "grad_norm": 1.0961275100708008, "learning_rate": 0.00015094528760878307, "loss": 4.6487, "step": 78830 }, { "epoch": 0.1510166905623188, "grad_norm": 1.1064817905426025, "learning_rate": 0.00015096444237289216, "loss": 4.4294, "step": 78840 }, { "epoch": 0.15103584539369405, "grad_norm": 1.1458921432495117, "learning_rate": 0.00015098359713700124, "loss": 4.5749, "step": 78850 }, { "epoch": 0.15105500022506926, "grad_norm": 1.1032798290252686, "learning_rate": 0.00015100275190111032, "loss": 4.3578, "step": 78860 }, { "epoch": 0.1510741550564445, "grad_norm": 1.108984112739563, "learning_rate": 0.00015102190666521943, "loss": 4.5541, "step": 78870 }, { "epoch": 0.15109330988781974, "grad_norm": 1.156877040863037, "learning_rate": 0.00015104106142932848, "loss": 4.4787, "step": 78880 }, { "epoch": 0.15111246471919496, "grad_norm": 1.112462043762207, "learning_rate": 0.00015106021619343756, "loss": 4.5209, "step": 78890 }, { "epoch": 0.1511316195505702, "grad_norm": 1.113706350326538, "learning_rate": 0.00015107937095754664, "loss": 4.4586, "step": 78900 }, { "epoch": 0.15115077438194544, "grad_norm": 1.12366783618927, "learning_rate": 0.00015109852572165573, "loss": 4.5684, "step": 78910 }, { "epoch": 0.15116992921332065, "grad_norm": 1.0678907632827759, "learning_rate": 0.0001511176804857648, "loss": 4.5727, "step": 78920 }, { "epoch": 0.1511890840446959, "grad_norm": 1.1099224090576172, "learning_rate": 0.0001511368352498739, "loss": 4.5097, "step": 78930 }, { "epoch": 0.15120823887607113, "grad_norm": 1.100738286972046, "learning_rate": 0.00015115599001398294, "loss": 4.5691, "step": 78940 }, { "epoch": 0.15122739370744634, "grad_norm": 1.116135597229004, "learning_rate": 0.00015117514477809205, "loss": 4.5976, "step": 78950 }, { "epoch": 0.15124654853882158, "grad_norm": 1.1119741201400757, "learning_rate": 0.00015119429954220113, "loss": 4.6832, "step": 78960 }, { "epoch": 0.15126570337019682, "grad_norm": 1.152361273765564, "learning_rate": 0.0001512134543063102, "loss": 4.4212, "step": 78970 }, { "epoch": 0.15128485820157203, "grad_norm": 1.0920965671539307, "learning_rate": 0.0001512326090704193, "loss": 4.5331, "step": 78980 }, { "epoch": 0.15130401303294727, "grad_norm": 1.0570013523101807, "learning_rate": 0.00015125176383452835, "loss": 4.5211, "step": 78990 }, { "epoch": 0.1513231678643225, "grad_norm": 1.0668684244155884, "learning_rate": 0.00015127091859863743, "loss": 4.6691, "step": 79000 }, { "epoch": 0.15134232269569772, "grad_norm": 1.1742082834243774, "learning_rate": 0.0001512900733627465, "loss": 4.3492, "step": 79010 }, { "epoch": 0.15136147752707296, "grad_norm": 1.2015101909637451, "learning_rate": 0.00015130922812685562, "loss": 4.4822, "step": 79020 }, { "epoch": 0.1513806323584482, "grad_norm": 1.08887779712677, "learning_rate": 0.0001513283828909647, "loss": 4.4535, "step": 79030 }, { "epoch": 0.15139978718982341, "grad_norm": 1.1397521495819092, "learning_rate": 0.00015134753765507378, "loss": 4.5308, "step": 79040 }, { "epoch": 0.15141894202119865, "grad_norm": 1.1097781658172607, "learning_rate": 0.00015136669241918284, "loss": 4.52, "step": 79050 }, { "epoch": 0.1514380968525739, "grad_norm": 1.0868737697601318, "learning_rate": 0.00015138584718329192, "loss": 4.4587, "step": 79060 }, { "epoch": 0.1514572516839491, "grad_norm": 1.071823000907898, "learning_rate": 0.000151405001947401, "loss": 4.4092, "step": 79070 }, { "epoch": 0.15147640651532435, "grad_norm": 1.0626975297927856, "learning_rate": 0.00015142415671151008, "loss": 4.5243, "step": 79080 }, { "epoch": 0.1514955613466996, "grad_norm": 1.0660536289215088, "learning_rate": 0.0001514433114756192, "loss": 4.4723, "step": 79090 }, { "epoch": 0.1515147161780748, "grad_norm": 1.121912956237793, "learning_rate": 0.00015146246623972824, "loss": 4.5454, "step": 79100 }, { "epoch": 0.15153387100945004, "grad_norm": 1.123205304145813, "learning_rate": 0.00015148162100383732, "loss": 4.4265, "step": 79110 }, { "epoch": 0.15155302584082528, "grad_norm": 1.1405270099639893, "learning_rate": 0.0001515007757679464, "loss": 4.4984, "step": 79120 }, { "epoch": 0.1515721806722005, "grad_norm": 1.1029629707336426, "learning_rate": 0.0001515199305320555, "loss": 4.507, "step": 79130 }, { "epoch": 0.15159133550357573, "grad_norm": 1.1448032855987549, "learning_rate": 0.00015153716981975365, "loss": 4.6898, "step": 79140 }, { "epoch": 0.15161049033495097, "grad_norm": 1.0930851697921753, "learning_rate": 0.00015155632458386275, "loss": 4.5182, "step": 79150 }, { "epoch": 0.15162964516632618, "grad_norm": 1.107916235923767, "learning_rate": 0.00015157547934797184, "loss": 4.3897, "step": 79160 }, { "epoch": 0.15164879999770142, "grad_norm": 1.1032578945159912, "learning_rate": 0.0001515946341120809, "loss": 4.5576, "step": 79170 }, { "epoch": 0.15166795482907666, "grad_norm": 1.089524507522583, "learning_rate": 0.00015161378887618997, "loss": 4.5461, "step": 79180 }, { "epoch": 0.15168710966045187, "grad_norm": 1.0818110704421997, "learning_rate": 0.00015163294364029905, "loss": 4.4594, "step": 79190 }, { "epoch": 0.15170626449182711, "grad_norm": 1.0917549133300781, "learning_rate": 0.00015165209840440813, "loss": 4.592, "step": 79200 }, { "epoch": 0.15172541932320235, "grad_norm": 1.1155351400375366, "learning_rate": 0.00015167125316851722, "loss": 4.4682, "step": 79210 }, { "epoch": 0.15174457415457757, "grad_norm": 1.1114988327026367, "learning_rate": 0.00015169040793262632, "loss": 4.5884, "step": 79220 }, { "epoch": 0.1517637289859528, "grad_norm": 1.1042978763580322, "learning_rate": 0.00015170956269673538, "loss": 4.6025, "step": 79230 }, { "epoch": 0.15178288381732805, "grad_norm": 1.0457614660263062, "learning_rate": 0.00015172871746084446, "loss": 4.5547, "step": 79240 }, { "epoch": 0.15180203864870326, "grad_norm": 1.0643147230148315, "learning_rate": 0.00015174787222495354, "loss": 4.5265, "step": 79250 }, { "epoch": 0.1518211934800785, "grad_norm": 1.10662043094635, "learning_rate": 0.00015176702698906262, "loss": 4.5605, "step": 79260 }, { "epoch": 0.15184034831145374, "grad_norm": 1.0871505737304688, "learning_rate": 0.0001517861817531717, "loss": 4.5045, "step": 79270 }, { "epoch": 0.15185950314282895, "grad_norm": 1.122096300125122, "learning_rate": 0.00015180533651728076, "loss": 4.6116, "step": 79280 }, { "epoch": 0.1518786579742042, "grad_norm": 1.0757595300674438, "learning_rate": 0.00015182449128138984, "loss": 4.379, "step": 79290 }, { "epoch": 0.15189781280557943, "grad_norm": 1.1042075157165527, "learning_rate": 0.00015184364604549895, "loss": 4.4936, "step": 79300 }, { "epoch": 0.15191696763695464, "grad_norm": 1.091296672821045, "learning_rate": 0.00015186280080960803, "loss": 4.6192, "step": 79310 }, { "epoch": 0.15193612246832988, "grad_norm": 1.0723620653152466, "learning_rate": 0.0001518819555737171, "loss": 4.597, "step": 79320 }, { "epoch": 0.15195527729970512, "grad_norm": 1.0595967769622803, "learning_rate": 0.0001519011103378262, "loss": 4.5359, "step": 79330 }, { "epoch": 0.15197443213108033, "grad_norm": 1.1390295028686523, "learning_rate": 0.00015192026510193525, "loss": 4.5367, "step": 79340 }, { "epoch": 0.15199358696245557, "grad_norm": 1.0975412130355835, "learning_rate": 0.00015193941986604433, "loss": 4.6787, "step": 79350 }, { "epoch": 0.1520127417938308, "grad_norm": 1.0678907632827759, "learning_rate": 0.0001519585746301534, "loss": 4.3108, "step": 79360 }, { "epoch": 0.15203189662520603, "grad_norm": 1.0721309185028076, "learning_rate": 0.00015197772939426252, "loss": 4.5436, "step": 79370 }, { "epoch": 0.15205105145658127, "grad_norm": 1.0869406461715698, "learning_rate": 0.0001519968841583716, "loss": 4.5449, "step": 79380 }, { "epoch": 0.1520702062879565, "grad_norm": 1.092373251914978, "learning_rate": 0.00015201603892248065, "loss": 4.5837, "step": 79390 }, { "epoch": 0.15208936111933172, "grad_norm": 1.090726375579834, "learning_rate": 0.00015203519368658973, "loss": 4.4566, "step": 79400 }, { "epoch": 0.15210851595070696, "grad_norm": 1.0460268259048462, "learning_rate": 0.00015205434845069881, "loss": 4.5972, "step": 79410 }, { "epoch": 0.1521276707820822, "grad_norm": 1.0875400304794312, "learning_rate": 0.0001520735032148079, "loss": 4.6802, "step": 79420 }, { "epoch": 0.1521468256134574, "grad_norm": 1.091448426246643, "learning_rate": 0.00015209265797891698, "loss": 4.6036, "step": 79430 }, { "epoch": 0.15216598044483265, "grad_norm": 1.1031650304794312, "learning_rate": 0.00015211181274302603, "loss": 4.5409, "step": 79440 }, { "epoch": 0.1521851352762079, "grad_norm": 1.1062335968017578, "learning_rate": 0.00015213096750713514, "loss": 4.4617, "step": 79450 }, { "epoch": 0.1522042901075831, "grad_norm": 1.070254921913147, "learning_rate": 0.00015215012227124422, "loss": 4.5404, "step": 79460 }, { "epoch": 0.15222344493895834, "grad_norm": 1.097822904586792, "learning_rate": 0.0001521692770353533, "loss": 4.5749, "step": 79470 }, { "epoch": 0.15224259977033358, "grad_norm": 1.1039037704467773, "learning_rate": 0.00015218843179946238, "loss": 4.4917, "step": 79480 }, { "epoch": 0.1522617546017088, "grad_norm": 1.0992484092712402, "learning_rate": 0.00015220758656357147, "loss": 4.4278, "step": 79490 }, { "epoch": 0.15228090943308403, "grad_norm": 1.0350456237792969, "learning_rate": 0.00015222674132768052, "loss": 4.5939, "step": 79500 }, { "epoch": 0.15230006426445927, "grad_norm": 1.1165268421173096, "learning_rate": 0.0001522458960917896, "loss": 4.5314, "step": 79510 }, { "epoch": 0.15231921909583448, "grad_norm": 1.0596269369125366, "learning_rate": 0.0001522650508558987, "loss": 4.5289, "step": 79520 }, { "epoch": 0.15233837392720972, "grad_norm": 1.2057162523269653, "learning_rate": 0.0001522842056200078, "loss": 4.6474, "step": 79530 }, { "epoch": 0.15235752875858496, "grad_norm": 1.0740461349487305, "learning_rate": 0.00015230336038411687, "loss": 4.4983, "step": 79540 }, { "epoch": 0.15237668358996018, "grad_norm": 1.05857515335083, "learning_rate": 0.00015232251514822593, "loss": 4.6365, "step": 79550 }, { "epoch": 0.15239583842133542, "grad_norm": 1.0823718309402466, "learning_rate": 0.000152341669912335, "loss": 4.5037, "step": 79560 }, { "epoch": 0.15241499325271066, "grad_norm": 1.1055713891983032, "learning_rate": 0.0001523608246764441, "loss": 4.5843, "step": 79570 }, { "epoch": 0.15243414808408587, "grad_norm": 1.1182150840759277, "learning_rate": 0.00015237997944055317, "loss": 4.5948, "step": 79580 }, { "epoch": 0.1524533029154611, "grad_norm": 1.1093778610229492, "learning_rate": 0.00015239913420466228, "loss": 4.6385, "step": 79590 }, { "epoch": 0.15247245774683635, "grad_norm": 1.1138803958892822, "learning_rate": 0.00015241828896877136, "loss": 4.5969, "step": 79600 }, { "epoch": 0.15249161257821156, "grad_norm": 1.0896269083023071, "learning_rate": 0.00015243744373288041, "loss": 4.3753, "step": 79610 }, { "epoch": 0.1525107674095868, "grad_norm": 1.0451502799987793, "learning_rate": 0.0001524565984969895, "loss": 4.5907, "step": 79620 }, { "epoch": 0.15252992224096204, "grad_norm": 1.1195346117019653, "learning_rate": 0.00015247575326109858, "loss": 4.4539, "step": 79630 }, { "epoch": 0.15254907707233725, "grad_norm": 1.0739716291427612, "learning_rate": 0.00015249490802520766, "loss": 4.63, "step": 79640 }, { "epoch": 0.1525682319037125, "grad_norm": 1.0760940313339233, "learning_rate": 0.00015251406278931674, "loss": 4.3997, "step": 79650 }, { "epoch": 0.15258738673508773, "grad_norm": 1.1161667108535767, "learning_rate": 0.0001525332175534258, "loss": 4.6008, "step": 79660 }, { "epoch": 0.15260654156646294, "grad_norm": 1.0830975770950317, "learning_rate": 0.00015255237231753487, "loss": 4.5631, "step": 79670 }, { "epoch": 0.15262569639783818, "grad_norm": 1.1331911087036133, "learning_rate": 0.00015257152708164398, "loss": 4.4573, "step": 79680 }, { "epoch": 0.15264485122921342, "grad_norm": 1.0793460607528687, "learning_rate": 0.00015259068184575306, "loss": 4.4401, "step": 79690 }, { "epoch": 0.15266400606058864, "grad_norm": 1.1247732639312744, "learning_rate": 0.00015260983660986215, "loss": 4.4142, "step": 79700 }, { "epoch": 0.15268316089196388, "grad_norm": 1.0889748334884644, "learning_rate": 0.00015262899137397123, "loss": 4.5536, "step": 79710 }, { "epoch": 0.15270231572333912, "grad_norm": 2.6279406547546387, "learning_rate": 0.00015264814613808028, "loss": 4.5955, "step": 79720 }, { "epoch": 0.15272147055471433, "grad_norm": 1.0863913297653198, "learning_rate": 0.00015266730090218936, "loss": 4.5693, "step": 79730 }, { "epoch": 0.15274062538608957, "grad_norm": 1.0790420770645142, "learning_rate": 0.00015268645566629847, "loss": 4.5016, "step": 79740 }, { "epoch": 0.1527597802174648, "grad_norm": 1.0550556182861328, "learning_rate": 0.00015270561043040755, "loss": 4.4584, "step": 79750 }, { "epoch": 0.15277893504884002, "grad_norm": 1.1330831050872803, "learning_rate": 0.00015272476519451663, "loss": 4.3976, "step": 79760 }, { "epoch": 0.15279808988021526, "grad_norm": 1.0707871913909912, "learning_rate": 0.0001527439199586257, "loss": 4.5241, "step": 79770 }, { "epoch": 0.1528172447115905, "grad_norm": 1.195998191833496, "learning_rate": 0.00015276307472273477, "loss": 4.4908, "step": 79780 }, { "epoch": 0.1528363995429657, "grad_norm": 1.0740629434585571, "learning_rate": 0.00015278222948684385, "loss": 4.5626, "step": 79790 }, { "epoch": 0.15285555437434095, "grad_norm": 1.093548059463501, "learning_rate": 0.00015280138425095293, "loss": 4.3457, "step": 79800 }, { "epoch": 0.1528747092057162, "grad_norm": 1.4712032079696655, "learning_rate": 0.00015282053901506204, "loss": 4.5186, "step": 79810 }, { "epoch": 0.1528938640370914, "grad_norm": 1.0792897939682007, "learning_rate": 0.00015283969377917107, "loss": 4.4862, "step": 79820 }, { "epoch": 0.15291301886846664, "grad_norm": 1.1113790273666382, "learning_rate": 0.00015285884854328018, "loss": 4.6187, "step": 79830 }, { "epoch": 0.15293217369984188, "grad_norm": 1.0433858633041382, "learning_rate": 0.00015287800330738926, "loss": 4.4678, "step": 79840 }, { "epoch": 0.1529513285312171, "grad_norm": 1.129220724105835, "learning_rate": 0.00015289715807149834, "loss": 4.4697, "step": 79850 }, { "epoch": 0.15297048336259234, "grad_norm": 1.0788991451263428, "learning_rate": 0.00015291631283560742, "loss": 4.4774, "step": 79860 }, { "epoch": 0.15298963819396758, "grad_norm": 1.0763849020004272, "learning_rate": 0.0001529354675997165, "loss": 4.5513, "step": 79870 }, { "epoch": 0.1530087930253428, "grad_norm": 1.063939094543457, "learning_rate": 0.00015295462236382555, "loss": 4.5452, "step": 79880 }, { "epoch": 0.15302794785671803, "grad_norm": 1.1192866563796997, "learning_rate": 0.00015297377712793464, "loss": 4.403, "step": 79890 }, { "epoch": 0.15304710268809327, "grad_norm": 1.108920931816101, "learning_rate": 0.00015299293189204374, "loss": 4.3997, "step": 79900 }, { "epoch": 0.15306625751946848, "grad_norm": 1.1375558376312256, "learning_rate": 0.0001530101711797419, "loss": 4.4342, "step": 79910 }, { "epoch": 0.15308541235084372, "grad_norm": 1.1691763401031494, "learning_rate": 0.00015302932594385099, "loss": 4.4342, "step": 79920 }, { "epoch": 0.15310456718221896, "grad_norm": 1.0908195972442627, "learning_rate": 0.00015304848070796007, "loss": 4.5638, "step": 79930 }, { "epoch": 0.15312372201359417, "grad_norm": 1.1724604368209839, "learning_rate": 0.00015306763547206915, "loss": 4.6915, "step": 79940 }, { "epoch": 0.1531428768449694, "grad_norm": 1.0775229930877686, "learning_rate": 0.0001530867902361782, "loss": 4.4333, "step": 79950 }, { "epoch": 0.15316203167634465, "grad_norm": 1.073761224746704, "learning_rate": 0.0001531059450002873, "loss": 4.6205, "step": 79960 }, { "epoch": 0.15318118650771986, "grad_norm": 1.0820541381835938, "learning_rate": 0.0001531250997643964, "loss": 4.5677, "step": 79970 }, { "epoch": 0.1532003413390951, "grad_norm": 1.0609198808670044, "learning_rate": 0.00015314425452850547, "loss": 4.5173, "step": 79980 }, { "epoch": 0.15321949617047034, "grad_norm": 1.1003646850585938, "learning_rate": 0.00015316340929261455, "loss": 4.6025, "step": 79990 }, { "epoch": 0.15323865100184555, "grad_norm": 1.102329969406128, "learning_rate": 0.00015318256405672364, "loss": 4.5585, "step": 80000 }, { "epoch": 0.1532578058332208, "grad_norm": 1.076065182685852, "learning_rate": 0.0001532017188208327, "loss": 4.6244, "step": 80010 }, { "epoch": 0.15327696066459603, "grad_norm": 1.0447765588760376, "learning_rate": 0.00015322087358494177, "loss": 4.4665, "step": 80020 }, { "epoch": 0.15329611549597125, "grad_norm": 1.0397297143936157, "learning_rate": 0.00015324002834905088, "loss": 4.5195, "step": 80030 }, { "epoch": 0.1533152703273465, "grad_norm": 1.0975793600082397, "learning_rate": 0.00015325918311315996, "loss": 4.4734, "step": 80040 }, { "epoch": 0.15333442515872173, "grad_norm": 1.069075345993042, "learning_rate": 0.00015327833787726904, "loss": 4.5545, "step": 80050 }, { "epoch": 0.15335357999009694, "grad_norm": 1.102468729019165, "learning_rate": 0.0001532974926413781, "loss": 4.5051, "step": 80060 }, { "epoch": 0.15337273482147218, "grad_norm": 1.1066480875015259, "learning_rate": 0.00015331664740548718, "loss": 4.6607, "step": 80070 }, { "epoch": 0.15339188965284742, "grad_norm": 1.0947296619415283, "learning_rate": 0.00015333580216959626, "loss": 4.4805, "step": 80080 }, { "epoch": 0.15341104448422263, "grad_norm": 1.1942800283432007, "learning_rate": 0.00015335495693370534, "loss": 4.3933, "step": 80090 }, { "epoch": 0.15343019931559787, "grad_norm": 1.1423187255859375, "learning_rate": 0.00015337411169781445, "loss": 4.5994, "step": 80100 }, { "epoch": 0.1534493541469731, "grad_norm": 1.0851682424545288, "learning_rate": 0.0001533932664619235, "loss": 4.4131, "step": 80110 }, { "epoch": 0.15346850897834832, "grad_norm": 1.1156392097473145, "learning_rate": 0.00015341242122603258, "loss": 4.5282, "step": 80120 }, { "epoch": 0.15348766380972356, "grad_norm": 1.0965944528579712, "learning_rate": 0.00015343157599014167, "loss": 4.5105, "step": 80130 }, { "epoch": 0.1535068186410988, "grad_norm": 1.091364860534668, "learning_rate": 0.00015345073075425075, "loss": 4.5158, "step": 80140 }, { "epoch": 0.15352597347247404, "grad_norm": 1.0665220022201538, "learning_rate": 0.00015346988551835983, "loss": 4.5838, "step": 80150 }, { "epoch": 0.15354512830384925, "grad_norm": 1.0528572797775269, "learning_rate": 0.0001534890402824689, "loss": 4.6691, "step": 80160 }, { "epoch": 0.1535642831352245, "grad_norm": 1.134663462638855, "learning_rate": 0.00015350819504657796, "loss": 4.4954, "step": 80170 }, { "epoch": 0.15358343796659973, "grad_norm": 1.0644447803497314, "learning_rate": 0.00015352734981068707, "loss": 4.5963, "step": 80180 }, { "epoch": 0.15360259279797495, "grad_norm": 1.1058775186538696, "learning_rate": 0.00015354650457479615, "loss": 4.5762, "step": 80190 }, { "epoch": 0.15362174762935019, "grad_norm": 1.01175057888031, "learning_rate": 0.00015356565933890523, "loss": 4.4321, "step": 80200 }, { "epoch": 0.15364090246072543, "grad_norm": 1.139565348625183, "learning_rate": 0.00015358481410301432, "loss": 4.6947, "step": 80210 }, { "epoch": 0.15366005729210064, "grad_norm": 1.0913678407669067, "learning_rate": 0.00015360396886712337, "loss": 4.5651, "step": 80220 }, { "epoch": 0.15367921212347588, "grad_norm": 1.081654667854309, "learning_rate": 0.00015362312363123245, "loss": 4.4145, "step": 80230 }, { "epoch": 0.15369836695485112, "grad_norm": 1.1244395971298218, "learning_rate": 0.00015364227839534153, "loss": 4.4694, "step": 80240 }, { "epoch": 0.15371752178622633, "grad_norm": 1.0912203788757324, "learning_rate": 0.00015366143315945064, "loss": 4.5392, "step": 80250 }, { "epoch": 0.15373667661760157, "grad_norm": 1.047484278678894, "learning_rate": 0.00015368058792355972, "loss": 4.5561, "step": 80260 }, { "epoch": 0.1537558314489768, "grad_norm": 1.0905355215072632, "learning_rate": 0.0001536997426876688, "loss": 4.3613, "step": 80270 }, { "epoch": 0.15377498628035202, "grad_norm": 1.111810326576233, "learning_rate": 0.00015371889745177786, "loss": 4.603, "step": 80280 }, { "epoch": 0.15379414111172726, "grad_norm": 1.0552018880844116, "learning_rate": 0.00015373805221588694, "loss": 4.6434, "step": 80290 }, { "epoch": 0.1538132959431025, "grad_norm": 1.0511705875396729, "learning_rate": 0.00015375720697999602, "loss": 4.5447, "step": 80300 }, { "epoch": 0.1538324507744777, "grad_norm": 1.0944864749908447, "learning_rate": 0.0001537763617441051, "loss": 4.5021, "step": 80310 }, { "epoch": 0.15385160560585295, "grad_norm": 1.0573389530181885, "learning_rate": 0.0001537955165082142, "loss": 4.6158, "step": 80320 }, { "epoch": 0.1538707604372282, "grad_norm": 1.0423434972763062, "learning_rate": 0.00015381467127232326, "loss": 4.5084, "step": 80330 }, { "epoch": 0.1538899152686034, "grad_norm": 1.09873366355896, "learning_rate": 0.00015383382603643235, "loss": 4.4636, "step": 80340 }, { "epoch": 0.15390907009997865, "grad_norm": 1.0511786937713623, "learning_rate": 0.00015385298080054143, "loss": 4.5198, "step": 80350 }, { "epoch": 0.15392822493135389, "grad_norm": 1.1167625188827515, "learning_rate": 0.0001538721355646505, "loss": 4.5009, "step": 80360 }, { "epoch": 0.1539473797627291, "grad_norm": 1.0731120109558105, "learning_rate": 0.0001538912903287596, "loss": 4.4082, "step": 80370 }, { "epoch": 0.15396653459410434, "grad_norm": 1.1000821590423584, "learning_rate": 0.00015391044509286867, "loss": 4.4736, "step": 80380 }, { "epoch": 0.15398568942547958, "grad_norm": 1.0798404216766357, "learning_rate": 0.00015392959985697773, "loss": 4.587, "step": 80390 }, { "epoch": 0.1540048442568548, "grad_norm": 1.085182547569275, "learning_rate": 0.00015394875462108683, "loss": 4.4521, "step": 80400 }, { "epoch": 0.15402399908823003, "grad_norm": 1.0907124280929565, "learning_rate": 0.00015396790938519592, "loss": 4.4, "step": 80410 }, { "epoch": 0.15404315391960527, "grad_norm": 1.1976584196090698, "learning_rate": 0.000153987064149305, "loss": 4.5641, "step": 80420 }, { "epoch": 0.15406230875098048, "grad_norm": 1.0699903964996338, "learning_rate": 0.00015400621891341408, "loss": 4.6686, "step": 80430 }, { "epoch": 0.15408146358235572, "grad_norm": 1.094206690788269, "learning_rate": 0.00015402537367752313, "loss": 4.5428, "step": 80440 }, { "epoch": 0.15410061841373096, "grad_norm": 1.0462948083877563, "learning_rate": 0.0001540445284416322, "loss": 4.4734, "step": 80450 }, { "epoch": 0.15411977324510617, "grad_norm": 1.0612354278564453, "learning_rate": 0.0001540636832057413, "loss": 4.5135, "step": 80460 }, { "epoch": 0.1541389280764814, "grad_norm": 1.101129412651062, "learning_rate": 0.0001540828379698504, "loss": 4.5089, "step": 80470 }, { "epoch": 0.15415808290785665, "grad_norm": 1.0806244611740112, "learning_rate": 0.00015410199273395948, "loss": 4.5579, "step": 80480 }, { "epoch": 0.15417723773923186, "grad_norm": 1.1071261167526245, "learning_rate": 0.00015412114749806854, "loss": 4.4835, "step": 80490 }, { "epoch": 0.1541963925706071, "grad_norm": 1.0444631576538086, "learning_rate": 0.00015414030226217762, "loss": 4.6496, "step": 80500 }, { "epoch": 0.15421554740198234, "grad_norm": 1.0848875045776367, "learning_rate": 0.0001541594570262867, "loss": 4.4447, "step": 80510 }, { "epoch": 0.15423470223335756, "grad_norm": 1.0751527547836304, "learning_rate": 0.00015417861179039578, "loss": 4.5786, "step": 80520 }, { "epoch": 0.1542538570647328, "grad_norm": 1.3485996723175049, "learning_rate": 0.00015419776655450486, "loss": 4.6366, "step": 80530 }, { "epoch": 0.15427301189610804, "grad_norm": 1.0781123638153076, "learning_rate": 0.00015421692131861397, "loss": 4.5108, "step": 80540 }, { "epoch": 0.15429216672748325, "grad_norm": 1.0622988939285278, "learning_rate": 0.00015423607608272303, "loss": 4.5034, "step": 80550 }, { "epoch": 0.1543113215588585, "grad_norm": 1.1184245347976685, "learning_rate": 0.0001542552308468321, "loss": 4.6306, "step": 80560 }, { "epoch": 0.15433047639023373, "grad_norm": 1.1264570951461792, "learning_rate": 0.0001542743856109412, "loss": 4.4952, "step": 80570 }, { "epoch": 0.15434963122160894, "grad_norm": 1.084669589996338, "learning_rate": 0.00015429354037505027, "loss": 4.633, "step": 80580 }, { "epoch": 0.15436878605298418, "grad_norm": 1.0873581171035767, "learning_rate": 0.00015431269513915935, "loss": 4.7079, "step": 80590 }, { "epoch": 0.15438794088435942, "grad_norm": 1.08564293384552, "learning_rate": 0.0001543318499032684, "loss": 4.5682, "step": 80600 }, { "epoch": 0.15440709571573463, "grad_norm": 1.0932697057724, "learning_rate": 0.0001543510046673775, "loss": 4.561, "step": 80610 }, { "epoch": 0.15442625054710987, "grad_norm": 1.0988050699234009, "learning_rate": 0.0001543701594314866, "loss": 4.5836, "step": 80620 }, { "epoch": 0.1544454053784851, "grad_norm": 1.086812973022461, "learning_rate": 0.00015438931419559568, "loss": 4.5783, "step": 80630 }, { "epoch": 0.15446456020986032, "grad_norm": 1.0598233938217163, "learning_rate": 0.00015440846895970476, "loss": 4.5338, "step": 80640 }, { "epoch": 0.15448371504123556, "grad_norm": 1.1373769044876099, "learning_rate": 0.00015442762372381384, "loss": 4.3991, "step": 80650 }, { "epoch": 0.1545028698726108, "grad_norm": 1.0799810886383057, "learning_rate": 0.0001544467784879229, "loss": 4.5941, "step": 80660 }, { "epoch": 0.15452202470398602, "grad_norm": 1.1019177436828613, "learning_rate": 0.00015446593325203197, "loss": 4.5978, "step": 80670 }, { "epoch": 0.15454117953536126, "grad_norm": 1.0886955261230469, "learning_rate": 0.00015448508801614106, "loss": 4.5162, "step": 80680 }, { "epoch": 0.1545603343667365, "grad_norm": 1.0742034912109375, "learning_rate": 0.00015450424278025016, "loss": 4.5836, "step": 80690 }, { "epoch": 0.1545794891981117, "grad_norm": 1.077518343925476, "learning_rate": 0.00015452339754435925, "loss": 4.5376, "step": 80700 }, { "epoch": 0.15459864402948695, "grad_norm": 1.080964207649231, "learning_rate": 0.0001545425523084683, "loss": 4.5973, "step": 80710 }, { "epoch": 0.1546177988608622, "grad_norm": 1.0633994340896606, "learning_rate": 0.00015456170707257738, "loss": 4.6073, "step": 80720 }, { "epoch": 0.1546369536922374, "grad_norm": 1.0994175672531128, "learning_rate": 0.00015458086183668646, "loss": 4.5283, "step": 80730 }, { "epoch": 0.15465610852361264, "grad_norm": 1.0888367891311646, "learning_rate": 0.00015460001660079554, "loss": 4.5812, "step": 80740 }, { "epoch": 0.15467526335498788, "grad_norm": 1.1092529296875, "learning_rate": 0.00015461917136490463, "loss": 4.5054, "step": 80750 }, { "epoch": 0.1546944181863631, "grad_norm": 1.073338270187378, "learning_rate": 0.00015463832612901373, "loss": 4.5334, "step": 80760 }, { "epoch": 0.15471357301773833, "grad_norm": 1.0554156303405762, "learning_rate": 0.0001546574808931228, "loss": 4.6111, "step": 80770 }, { "epoch": 0.15473272784911357, "grad_norm": 1.0731474161148071, "learning_rate": 0.00015467663565723187, "loss": 4.4646, "step": 80780 }, { "epoch": 0.15475188268048878, "grad_norm": 1.0853737592697144, "learning_rate": 0.00015469579042134095, "loss": 4.4689, "step": 80790 }, { "epoch": 0.15477103751186402, "grad_norm": 1.098435878753662, "learning_rate": 0.00015471494518545003, "loss": 4.54, "step": 80800 }, { "epoch": 0.15479019234323926, "grad_norm": 1.077480673789978, "learning_rate": 0.0001547340999495591, "loss": 4.6745, "step": 80810 }, { "epoch": 0.15480934717461448, "grad_norm": 1.0694397687911987, "learning_rate": 0.00015475325471366817, "loss": 4.573, "step": 80820 }, { "epoch": 0.15482850200598972, "grad_norm": 1.0764060020446777, "learning_rate": 0.00015477240947777725, "loss": 4.4721, "step": 80830 }, { "epoch": 0.15484765683736496, "grad_norm": 1.0878390073776245, "learning_rate": 0.00015479156424188636, "loss": 4.4915, "step": 80840 }, { "epoch": 0.15486681166874017, "grad_norm": 1.0414856672286987, "learning_rate": 0.00015481071900599544, "loss": 4.637, "step": 80850 }, { "epoch": 0.1548859665001154, "grad_norm": 1.0956932306289673, "learning_rate": 0.00015482987377010452, "loss": 4.5429, "step": 80860 }, { "epoch": 0.15490512133149065, "grad_norm": 1.08456289768219, "learning_rate": 0.0001548490285342136, "loss": 4.5979, "step": 80870 }, { "epoch": 0.15492427616286586, "grad_norm": 1.0756102800369263, "learning_rate": 0.00015486818329832266, "loss": 4.5127, "step": 80880 }, { "epoch": 0.1549434309942411, "grad_norm": 1.0682015419006348, "learning_rate": 0.00015488733806243174, "loss": 4.5392, "step": 80890 }, { "epoch": 0.15496258582561634, "grad_norm": 1.0839149951934814, "learning_rate": 0.00015490649282654082, "loss": 4.6321, "step": 80900 }, { "epoch": 0.15498174065699155, "grad_norm": 1.0628198385238647, "learning_rate": 0.00015492564759064993, "loss": 4.4544, "step": 80910 }, { "epoch": 0.1550008954883668, "grad_norm": 1.015285849571228, "learning_rate": 0.000154944802354759, "loss": 4.4774, "step": 80920 }, { "epoch": 0.15502005031974203, "grad_norm": 1.0751724243164062, "learning_rate": 0.00015496395711886806, "loss": 4.5359, "step": 80930 }, { "epoch": 0.15503920515111724, "grad_norm": 1.1014238595962524, "learning_rate": 0.00015498311188297714, "loss": 4.4713, "step": 80940 }, { "epoch": 0.15505835998249248, "grad_norm": 1.071463942527771, "learning_rate": 0.00015500226664708622, "loss": 4.6451, "step": 80950 }, { "epoch": 0.15507751481386772, "grad_norm": 1.095589518547058, "learning_rate": 0.0001550214214111953, "loss": 4.2858, "step": 80960 }, { "epoch": 0.15509666964524293, "grad_norm": 1.0946037769317627, "learning_rate": 0.0001550405761753044, "loss": 4.4967, "step": 80970 }, { "epoch": 0.15511582447661817, "grad_norm": 1.072573184967041, "learning_rate": 0.00015505973093941344, "loss": 4.4137, "step": 80980 }, { "epoch": 0.15513497930799341, "grad_norm": 1.079248070716858, "learning_rate": 0.00015507888570352255, "loss": 4.5998, "step": 80990 }, { "epoch": 0.15515413413936863, "grad_norm": 1.0505656003952026, "learning_rate": 0.00015509804046763163, "loss": 4.5522, "step": 81000 }, { "epoch": 0.15517328897074387, "grad_norm": 1.176308274269104, "learning_rate": 0.0001551171952317407, "loss": 4.5029, "step": 81010 }, { "epoch": 0.1551924438021191, "grad_norm": 1.1364927291870117, "learning_rate": 0.0001551363499958498, "loss": 4.4318, "step": 81020 }, { "epoch": 0.15521159863349432, "grad_norm": 1.16996431350708, "learning_rate": 0.00015515550475995887, "loss": 4.4873, "step": 81030 }, { "epoch": 0.15523075346486956, "grad_norm": 1.0720937252044678, "learning_rate": 0.00015517465952406793, "loss": 4.5587, "step": 81040 }, { "epoch": 0.1552499082962448, "grad_norm": 1.0926265716552734, "learning_rate": 0.000155193814288177, "loss": 4.5526, "step": 81050 }, { "epoch": 0.15526906312762, "grad_norm": 1.132064938545227, "learning_rate": 0.00015521296905228612, "loss": 4.5071, "step": 81060 }, { "epoch": 0.15528821795899525, "grad_norm": 1.0800082683563232, "learning_rate": 0.0001552321238163952, "loss": 4.5183, "step": 81070 }, { "epoch": 0.1553073727903705, "grad_norm": 1.0620301961898804, "learning_rate": 0.00015525127858050428, "loss": 4.4328, "step": 81080 }, { "epoch": 0.1553265276217457, "grad_norm": 1.0832879543304443, "learning_rate": 0.00015527043334461334, "loss": 4.4575, "step": 81090 }, { "epoch": 0.15534568245312094, "grad_norm": 1.0533721446990967, "learning_rate": 0.00015528958810872242, "loss": 4.5321, "step": 81100 }, { "epoch": 0.15536483728449618, "grad_norm": 1.056815505027771, "learning_rate": 0.0001553087428728315, "loss": 4.438, "step": 81110 }, { "epoch": 0.1553839921158714, "grad_norm": 1.0687150955200195, "learning_rate": 0.00015532789763694058, "loss": 4.7629, "step": 81120 }, { "epoch": 0.15540314694724663, "grad_norm": 1.1793328523635864, "learning_rate": 0.0001553470524010497, "loss": 4.4989, "step": 81130 }, { "epoch": 0.15542230177862187, "grad_norm": 1.0712629556655884, "learning_rate": 0.00015536620716515877, "loss": 4.3791, "step": 81140 }, { "epoch": 0.1554414566099971, "grad_norm": 1.0902345180511475, "learning_rate": 0.00015538536192926782, "loss": 4.506, "step": 81150 }, { "epoch": 0.15546061144137233, "grad_norm": 1.0395478010177612, "learning_rate": 0.0001554045166933769, "loss": 4.4495, "step": 81160 }, { "epoch": 0.15547976627274757, "grad_norm": 1.0809030532836914, "learning_rate": 0.00015542367145748599, "loss": 4.5452, "step": 81170 }, { "epoch": 0.15549892110412278, "grad_norm": 1.1942105293273926, "learning_rate": 0.00015544282622159507, "loss": 4.5299, "step": 81180 }, { "epoch": 0.15551807593549802, "grad_norm": 1.0943679809570312, "learning_rate": 0.00015546198098570415, "loss": 4.642, "step": 81190 }, { "epoch": 0.15553723076687326, "grad_norm": 1.0779110193252563, "learning_rate": 0.0001554811357498132, "loss": 4.6192, "step": 81200 }, { "epoch": 0.15555638559824847, "grad_norm": 1.0971722602844238, "learning_rate": 0.0001555002905139223, "loss": 4.5444, "step": 81210 }, { "epoch": 0.1555755404296237, "grad_norm": 1.043191909790039, "learning_rate": 0.0001555194452780314, "loss": 4.6559, "step": 81220 }, { "epoch": 0.15559469526099895, "grad_norm": 1.0779353380203247, "learning_rate": 0.00015553860004214047, "loss": 4.6891, "step": 81230 }, { "epoch": 0.15561385009237416, "grad_norm": 1.0841492414474487, "learning_rate": 0.00015555775480624956, "loss": 4.6761, "step": 81240 }, { "epoch": 0.1556330049237494, "grad_norm": 1.031819462776184, "learning_rate": 0.00015557690957035864, "loss": 4.4472, "step": 81250 }, { "epoch": 0.15565215975512464, "grad_norm": 1.0528818368911743, "learning_rate": 0.0001555960643344677, "loss": 4.5067, "step": 81260 }, { "epoch": 0.15567131458649985, "grad_norm": 1.124150037765503, "learning_rate": 0.00015561521909857677, "loss": 4.6497, "step": 81270 }, { "epoch": 0.1556904694178751, "grad_norm": 1.0558708906173706, "learning_rate": 0.00015563437386268588, "loss": 4.4799, "step": 81280 }, { "epoch": 0.15570962424925033, "grad_norm": 1.0726896524429321, "learning_rate": 0.00015565352862679496, "loss": 4.6293, "step": 81290 }, { "epoch": 0.15572877908062555, "grad_norm": 1.0657448768615723, "learning_rate": 0.00015567268339090404, "loss": 4.5522, "step": 81300 }, { "epoch": 0.15574793391200079, "grad_norm": 1.0535873174667358, "learning_rate": 0.0001556918381550131, "loss": 4.5955, "step": 81310 }, { "epoch": 0.15576708874337603, "grad_norm": 1.1845076084136963, "learning_rate": 0.00015571099291912218, "loss": 4.544, "step": 81320 }, { "epoch": 0.15578624357475124, "grad_norm": 1.050013542175293, "learning_rate": 0.00015573014768323126, "loss": 4.5353, "step": 81330 }, { "epoch": 0.15580539840612648, "grad_norm": 1.0787323713302612, "learning_rate": 0.00015574930244734034, "loss": 4.6431, "step": 81340 }, { "epoch": 0.15582455323750172, "grad_norm": 1.073774814605713, "learning_rate": 0.00015576845721144945, "loss": 4.4887, "step": 81350 }, { "epoch": 0.15584370806887693, "grad_norm": 1.013668417930603, "learning_rate": 0.00015578761197555853, "loss": 4.3849, "step": 81360 }, { "epoch": 0.15586286290025217, "grad_norm": 1.0670312643051147, "learning_rate": 0.00015580676673966758, "loss": 4.6418, "step": 81370 }, { "epoch": 0.1558820177316274, "grad_norm": 1.107409119606018, "learning_rate": 0.00015582592150377667, "loss": 4.5124, "step": 81380 }, { "epoch": 0.15590117256300262, "grad_norm": 1.0376057624816895, "learning_rate": 0.00015584507626788575, "loss": 4.4343, "step": 81390 }, { "epoch": 0.15592032739437786, "grad_norm": 1.071905493736267, "learning_rate": 0.00015586423103199483, "loss": 4.6506, "step": 81400 }, { "epoch": 0.1559394822257531, "grad_norm": 1.2549012899398804, "learning_rate": 0.0001558833857961039, "loss": 4.5475, "step": 81410 }, { "epoch": 0.1559586370571283, "grad_norm": 1.0984736680984497, "learning_rate": 0.00015590254056021296, "loss": 4.4949, "step": 81420 }, { "epoch": 0.15597779188850355, "grad_norm": 1.0530555248260498, "learning_rate": 0.00015592169532432207, "loss": 4.5237, "step": 81430 }, { "epoch": 0.1559969467198788, "grad_norm": 1.137625813484192, "learning_rate": 0.00015594085008843115, "loss": 4.5142, "step": 81440 }, { "epoch": 0.15601610155125403, "grad_norm": 1.0515594482421875, "learning_rate": 0.00015596000485254024, "loss": 4.5233, "step": 81450 }, { "epoch": 0.15603525638262924, "grad_norm": 1.0657761096954346, "learning_rate": 0.00015597915961664932, "loss": 4.5006, "step": 81460 }, { "epoch": 0.15605441121400448, "grad_norm": 1.0665103197097778, "learning_rate": 0.00015599831438075837, "loss": 4.7361, "step": 81470 }, { "epoch": 0.15607356604537972, "grad_norm": 1.1041829586029053, "learning_rate": 0.00015601746914486745, "loss": 4.4798, "step": 81480 }, { "epoch": 0.15609272087675494, "grad_norm": 1.1140620708465576, "learning_rate": 0.00015603662390897653, "loss": 4.6393, "step": 81490 }, { "epoch": 0.15611187570813018, "grad_norm": 1.1070533990859985, "learning_rate": 0.00015605577867308564, "loss": 4.5432, "step": 81500 }, { "epoch": 0.15613103053950542, "grad_norm": 1.1510896682739258, "learning_rate": 0.00015607493343719472, "loss": 4.5014, "step": 81510 }, { "epoch": 0.15615018537088063, "grad_norm": 1.0695644617080688, "learning_rate": 0.0001560940882013038, "loss": 4.601, "step": 81520 }, { "epoch": 0.15616934020225587, "grad_norm": 1.0943361520767212, "learning_rate": 0.00015611324296541286, "loss": 4.6245, "step": 81530 }, { "epoch": 0.1561884950336311, "grad_norm": 1.069622278213501, "learning_rate": 0.00015613239772952194, "loss": 4.5985, "step": 81540 }, { "epoch": 0.15620764986500632, "grad_norm": 1.1108096837997437, "learning_rate": 0.00015615155249363102, "loss": 4.7952, "step": 81550 }, { "epoch": 0.15622680469638156, "grad_norm": 1.046888828277588, "learning_rate": 0.0001561707072577401, "loss": 4.5223, "step": 81560 }, { "epoch": 0.1562459595277568, "grad_norm": 1.0446040630340576, "learning_rate": 0.0001561898620218492, "loss": 4.4136, "step": 81570 }, { "epoch": 0.156265114359132, "grad_norm": 1.0424890518188477, "learning_rate": 0.00015620901678595827, "loss": 4.4701, "step": 81580 }, { "epoch": 0.15628426919050725, "grad_norm": 1.0502989292144775, "learning_rate": 0.00015622817155006735, "loss": 4.3957, "step": 81590 }, { "epoch": 0.1563034240218825, "grad_norm": 1.0961973667144775, "learning_rate": 0.00015624732631417643, "loss": 4.5755, "step": 81600 }, { "epoch": 0.1563225788532577, "grad_norm": 1.1141200065612793, "learning_rate": 0.0001562664810782855, "loss": 4.5499, "step": 81610 }, { "epoch": 0.15634173368463294, "grad_norm": 1.0280671119689941, "learning_rate": 0.0001562856358423946, "loss": 4.4761, "step": 81620 }, { "epoch": 0.15636088851600818, "grad_norm": 1.1063921451568604, "learning_rate": 0.00015630479060650367, "loss": 4.5486, "step": 81630 }, { "epoch": 0.1563800433473834, "grad_norm": 1.103340983390808, "learning_rate": 0.00015632394537061273, "loss": 4.4981, "step": 81640 }, { "epoch": 0.15639919817875864, "grad_norm": 1.0605448484420776, "learning_rate": 0.00015634310013472183, "loss": 4.6497, "step": 81650 }, { "epoch": 0.15641835301013388, "grad_norm": 1.0942336320877075, "learning_rate": 0.00015636225489883092, "loss": 4.6564, "step": 81660 }, { "epoch": 0.1564375078415091, "grad_norm": 1.1445074081420898, "learning_rate": 0.00015638140966294, "loss": 4.5602, "step": 81670 }, { "epoch": 0.15645666267288433, "grad_norm": 1.0467591285705566, "learning_rate": 0.00015640056442704908, "loss": 4.5858, "step": 81680 }, { "epoch": 0.15647581750425957, "grad_norm": 1.1087530851364136, "learning_rate": 0.00015641971919115813, "loss": 4.5495, "step": 81690 }, { "epoch": 0.15649497233563478, "grad_norm": 1.0793098211288452, "learning_rate": 0.00015643887395526721, "loss": 4.4958, "step": 81700 }, { "epoch": 0.15651412716701002, "grad_norm": 1.1301028728485107, "learning_rate": 0.0001564580287193763, "loss": 4.472, "step": 81710 }, { "epoch": 0.15653328199838526, "grad_norm": 1.085623860359192, "learning_rate": 0.0001564771834834854, "loss": 4.5743, "step": 81720 }, { "epoch": 0.15655243682976047, "grad_norm": 1.0725997686386108, "learning_rate": 0.00015649633824759448, "loss": 4.4467, "step": 81730 }, { "epoch": 0.1565715916611357, "grad_norm": 1.069661021232605, "learning_rate": 0.00015651549301170357, "loss": 4.4755, "step": 81740 }, { "epoch": 0.15659074649251095, "grad_norm": 1.044553518295288, "learning_rate": 0.00015653464777581262, "loss": 4.5185, "step": 81750 }, { "epoch": 0.15660990132388616, "grad_norm": 1.0863168239593506, "learning_rate": 0.0001565538025399217, "loss": 4.5675, "step": 81760 }, { "epoch": 0.1566290561552614, "grad_norm": 1.0646584033966064, "learning_rate": 0.00015657295730403078, "loss": 4.637, "step": 81770 }, { "epoch": 0.15664821098663664, "grad_norm": 1.0822356939315796, "learning_rate": 0.00015659211206813986, "loss": 4.6336, "step": 81780 }, { "epoch": 0.15666736581801186, "grad_norm": 1.0704078674316406, "learning_rate": 0.00015661126683224897, "loss": 4.5345, "step": 81790 }, { "epoch": 0.1566865206493871, "grad_norm": 1.106968641281128, "learning_rate": 0.00015663042159635803, "loss": 4.5752, "step": 81800 }, { "epoch": 0.15670567548076234, "grad_norm": 1.2736554145812988, "learning_rate": 0.0001566495763604671, "loss": 4.4653, "step": 81810 }, { "epoch": 0.15672483031213755, "grad_norm": 1.0939157009124756, "learning_rate": 0.0001566687311245762, "loss": 4.4578, "step": 81820 }, { "epoch": 0.1567439851435128, "grad_norm": 1.1087621450424194, "learning_rate": 0.00015668788588868527, "loss": 4.5186, "step": 81830 }, { "epoch": 0.15676313997488803, "grad_norm": 1.1001240015029907, "learning_rate": 0.00015670704065279435, "loss": 4.536, "step": 81840 }, { "epoch": 0.15678229480626324, "grad_norm": 1.0822641849517822, "learning_rate": 0.0001567261954169034, "loss": 4.5442, "step": 81850 }, { "epoch": 0.15680144963763848, "grad_norm": 1.0909587144851685, "learning_rate": 0.0001567453501810125, "loss": 4.4471, "step": 81860 }, { "epoch": 0.15682060446901372, "grad_norm": 1.0549265146255493, "learning_rate": 0.0001567645049451216, "loss": 4.4856, "step": 81870 }, { "epoch": 0.15683975930038893, "grad_norm": 1.079992651939392, "learning_rate": 0.00015678365970923068, "loss": 4.6083, "step": 81880 }, { "epoch": 0.15685891413176417, "grad_norm": 1.1386483907699585, "learning_rate": 0.00015680281447333976, "loss": 4.595, "step": 81890 }, { "epoch": 0.1568780689631394, "grad_norm": 1.1004142761230469, "learning_rate": 0.00015682196923744884, "loss": 4.4954, "step": 81900 }, { "epoch": 0.15689722379451462, "grad_norm": 1.1398186683654785, "learning_rate": 0.0001568411240015579, "loss": 4.479, "step": 81910 }, { "epoch": 0.15691637862588986, "grad_norm": 1.0925841331481934, "learning_rate": 0.00015686027876566698, "loss": 4.5339, "step": 81920 }, { "epoch": 0.1569355334572651, "grad_norm": 1.0813740491867065, "learning_rate": 0.00015687943352977606, "loss": 4.5948, "step": 81930 }, { "epoch": 0.15695468828864031, "grad_norm": 1.0895342826843262, "learning_rate": 0.00015689858829388517, "loss": 4.709, "step": 81940 }, { "epoch": 0.15697384312001555, "grad_norm": 1.0795798301696777, "learning_rate": 0.00015691774305799425, "loss": 4.489, "step": 81950 }, { "epoch": 0.1569929979513908, "grad_norm": 1.0557607412338257, "learning_rate": 0.0001569368978221033, "loss": 4.5997, "step": 81960 }, { "epoch": 0.157012152782766, "grad_norm": 1.0718806982040405, "learning_rate": 0.00015695605258621238, "loss": 4.6092, "step": 81970 }, { "epoch": 0.15703130761414125, "grad_norm": 1.0700914859771729, "learning_rate": 0.00015697520735032146, "loss": 4.4564, "step": 81980 }, { "epoch": 0.1570504624455165, "grad_norm": 1.0950952768325806, "learning_rate": 0.00015699436211443054, "loss": 4.5826, "step": 81990 }, { "epoch": 0.1570696172768917, "grad_norm": 1.0885987281799316, "learning_rate": 0.00015701351687853963, "loss": 4.6642, "step": 82000 }, { "epoch": 0.15708877210826694, "grad_norm": 1.0739104747772217, "learning_rate": 0.00015703267164264873, "loss": 4.585, "step": 82010 }, { "epoch": 0.15710792693964218, "grad_norm": 1.0442593097686768, "learning_rate": 0.00015705182640675776, "loss": 4.5512, "step": 82020 }, { "epoch": 0.1571270817710174, "grad_norm": 1.1040072441101074, "learning_rate": 0.00015707098117086687, "loss": 4.4736, "step": 82030 }, { "epoch": 0.15714623660239263, "grad_norm": 1.1047462224960327, "learning_rate": 0.00015709013593497595, "loss": 4.5104, "step": 82040 }, { "epoch": 0.15716539143376787, "grad_norm": 1.0171122550964355, "learning_rate": 0.00015710929069908503, "loss": 4.5648, "step": 82050 }, { "epoch": 0.15718454626514308, "grad_norm": 1.0423792600631714, "learning_rate": 0.00015712844546319411, "loss": 4.7013, "step": 82060 }, { "epoch": 0.15720370109651832, "grad_norm": 1.076218605041504, "learning_rate": 0.00015714760022730317, "loss": 4.5587, "step": 82070 }, { "epoch": 0.15722285592789356, "grad_norm": 1.064265251159668, "learning_rate": 0.00015716675499141225, "loss": 4.541, "step": 82080 }, { "epoch": 0.15724201075926877, "grad_norm": 1.0543761253356934, "learning_rate": 0.00015718590975552136, "loss": 4.5724, "step": 82090 }, { "epoch": 0.15726116559064401, "grad_norm": 1.0757737159729004, "learning_rate": 0.00015720506451963044, "loss": 4.4408, "step": 82100 }, { "epoch": 0.15728032042201925, "grad_norm": 1.1220331192016602, "learning_rate": 0.00015722421928373952, "loss": 4.5476, "step": 82110 }, { "epoch": 0.15729947525339447, "grad_norm": 1.0722966194152832, "learning_rate": 0.0001572433740478486, "loss": 4.5431, "step": 82120 }, { "epoch": 0.1573186300847697, "grad_norm": 1.0917619466781616, "learning_rate": 0.00015726252881195766, "loss": 4.4954, "step": 82130 }, { "epoch": 0.15733778491614495, "grad_norm": 1.0736217498779297, "learning_rate": 0.00015728168357606674, "loss": 4.5213, "step": 82140 }, { "epoch": 0.15735693974752016, "grad_norm": 1.086923360824585, "learning_rate": 0.00015730083834017582, "loss": 4.397, "step": 82150 }, { "epoch": 0.1573760945788954, "grad_norm": 1.0978788137435913, "learning_rate": 0.00015731999310428493, "loss": 4.5588, "step": 82160 }, { "epoch": 0.15739524941027064, "grad_norm": 1.0760247707366943, "learning_rate": 0.000157339147868394, "loss": 4.4978, "step": 82170 }, { "epoch": 0.15741440424164585, "grad_norm": 1.092084527015686, "learning_rate": 0.00015735830263250306, "loss": 4.5204, "step": 82180 }, { "epoch": 0.1574335590730211, "grad_norm": 1.0645610094070435, "learning_rate": 0.00015737745739661214, "loss": 4.5889, "step": 82190 }, { "epoch": 0.15745271390439633, "grad_norm": 1.059127926826477, "learning_rate": 0.00015739661216072122, "loss": 4.7193, "step": 82200 }, { "epoch": 0.15747186873577154, "grad_norm": 1.0528626441955566, "learning_rate": 0.0001574157669248303, "loss": 4.515, "step": 82210 }, { "epoch": 0.15749102356714678, "grad_norm": 1.0618081092834473, "learning_rate": 0.0001574349216889394, "loss": 4.5224, "step": 82220 }, { "epoch": 0.15751017839852202, "grad_norm": 1.064578652381897, "learning_rate": 0.0001574540764530485, "loss": 4.5674, "step": 82230 }, { "epoch": 0.15752933322989723, "grad_norm": 1.0958547592163086, "learning_rate": 0.00015747323121715752, "loss": 4.4512, "step": 82240 }, { "epoch": 0.15754848806127247, "grad_norm": 1.0578397512435913, "learning_rate": 0.00015749238598126663, "loss": 4.5787, "step": 82250 }, { "epoch": 0.1575676428926477, "grad_norm": 1.0328099727630615, "learning_rate": 0.0001575115407453757, "loss": 4.4929, "step": 82260 }, { "epoch": 0.15758679772402293, "grad_norm": 1.0402251482009888, "learning_rate": 0.0001575306955094848, "loss": 4.4368, "step": 82270 }, { "epoch": 0.15760595255539817, "grad_norm": 1.0460034608840942, "learning_rate": 0.00015754985027359388, "loss": 4.452, "step": 82280 }, { "epoch": 0.1576251073867734, "grad_norm": 1.0918569564819336, "learning_rate": 0.00015756900503770293, "loss": 4.6735, "step": 82290 }, { "epoch": 0.15764426221814862, "grad_norm": 1.087265133857727, "learning_rate": 0.000157588159801812, "loss": 4.4907, "step": 82300 }, { "epoch": 0.15766341704952386, "grad_norm": 1.1032793521881104, "learning_rate": 0.0001576073145659211, "loss": 4.4908, "step": 82310 }, { "epoch": 0.1576825718808991, "grad_norm": 1.0775035619735718, "learning_rate": 0.0001576264693300302, "loss": 4.5945, "step": 82320 }, { "epoch": 0.1577017267122743, "grad_norm": 1.0757606029510498, "learning_rate": 0.00015764562409413928, "loss": 4.5968, "step": 82330 }, { "epoch": 0.15772088154364955, "grad_norm": 1.102752447128296, "learning_rate": 0.00015766477885824834, "loss": 4.4571, "step": 82340 }, { "epoch": 0.1577400363750248, "grad_norm": 1.137650728225708, "learning_rate": 0.00015768393362235742, "loss": 4.5206, "step": 82350 }, { "epoch": 0.1577591912064, "grad_norm": 1.0404863357543945, "learning_rate": 0.0001577030883864665, "loss": 4.4799, "step": 82360 }, { "epoch": 0.15777834603777524, "grad_norm": 1.0941427946090698, "learning_rate": 0.00015772224315057558, "loss": 4.6059, "step": 82370 }, { "epoch": 0.15779750086915048, "grad_norm": 1.0523767471313477, "learning_rate": 0.0001577413979146847, "loss": 4.6501, "step": 82380 }, { "epoch": 0.1578166557005257, "grad_norm": 1.0209896564483643, "learning_rate": 0.00015776055267879377, "loss": 4.3924, "step": 82390 }, { "epoch": 0.15783581053190093, "grad_norm": 1.095768690109253, "learning_rate": 0.00015777970744290282, "loss": 4.5963, "step": 82400 }, { "epoch": 0.15785496536327617, "grad_norm": 1.0768976211547852, "learning_rate": 0.0001577988622070119, "loss": 4.4657, "step": 82410 }, { "epoch": 0.15787412019465139, "grad_norm": 1.0721728801727295, "learning_rate": 0.000157818016971121, "loss": 4.4439, "step": 82420 }, { "epoch": 0.15789327502602662, "grad_norm": 1.0791699886322021, "learning_rate": 0.00015783717173523007, "loss": 4.4835, "step": 82430 }, { "epoch": 0.15791242985740186, "grad_norm": 1.112326741218567, "learning_rate": 0.00015785632649933915, "loss": 4.5558, "step": 82440 }, { "epoch": 0.15793158468877708, "grad_norm": 1.0759419202804565, "learning_rate": 0.0001578754812634482, "loss": 4.5146, "step": 82450 }, { "epoch": 0.15795073952015232, "grad_norm": 1.224359393119812, "learning_rate": 0.00015789463602755728, "loss": 4.39, "step": 82460 }, { "epoch": 0.15796989435152756, "grad_norm": 1.0498769283294678, "learning_rate": 0.0001579137907916664, "loss": 4.5611, "step": 82470 }, { "epoch": 0.15798904918290277, "grad_norm": 1.040482997894287, "learning_rate": 0.00015793294555577547, "loss": 4.4448, "step": 82480 }, { "epoch": 0.158008204014278, "grad_norm": 1.1573890447616577, "learning_rate": 0.00015795210031988456, "loss": 4.5505, "step": 82490 }, { "epoch": 0.15802735884565325, "grad_norm": 1.0773043632507324, "learning_rate": 0.00015797125508399364, "loss": 4.6547, "step": 82500 }, { "epoch": 0.15804651367702846, "grad_norm": 1.0501412153244019, "learning_rate": 0.0001579904098481027, "loss": 4.5112, "step": 82510 }, { "epoch": 0.1580656685084037, "grad_norm": 1.0595335960388184, "learning_rate": 0.00015800956461221177, "loss": 4.3949, "step": 82520 }, { "epoch": 0.15808482333977894, "grad_norm": 1.0409603118896484, "learning_rate": 0.00015802871937632085, "loss": 4.5438, "step": 82530 }, { "epoch": 0.15810397817115415, "grad_norm": 1.05857253074646, "learning_rate": 0.00015804787414042996, "loss": 4.3472, "step": 82540 }, { "epoch": 0.1581231330025294, "grad_norm": 1.044842004776001, "learning_rate": 0.00015806702890453904, "loss": 4.6372, "step": 82550 }, { "epoch": 0.15814228783390463, "grad_norm": 1.488146424293518, "learning_rate": 0.0001580861836686481, "loss": 4.5345, "step": 82560 }, { "epoch": 0.15816144266527984, "grad_norm": 1.071763038635254, "learning_rate": 0.00015810533843275718, "loss": 4.498, "step": 82570 }, { "epoch": 0.15818059749665508, "grad_norm": 1.0613138675689697, "learning_rate": 0.00015812449319686626, "loss": 4.515, "step": 82580 }, { "epoch": 0.15819975232803032, "grad_norm": 1.0908273458480835, "learning_rate": 0.00015814364796097534, "loss": 4.5873, "step": 82590 }, { "epoch": 0.15821890715940554, "grad_norm": 1.0654112100601196, "learning_rate": 0.00015816280272508445, "loss": 4.5935, "step": 82600 }, { "epoch": 0.15823806199078078, "grad_norm": 1.0490233898162842, "learning_rate": 0.00015818195748919353, "loss": 4.5253, "step": 82610 }, { "epoch": 0.15825721682215602, "grad_norm": 1.061755657196045, "learning_rate": 0.00015820111225330259, "loss": 4.5978, "step": 82620 }, { "epoch": 0.15827637165353123, "grad_norm": 1.0693899393081665, "learning_rate": 0.00015822026701741167, "loss": 4.681, "step": 82630 }, { "epoch": 0.15829552648490647, "grad_norm": 1.0709244012832642, "learning_rate": 0.00015823942178152075, "loss": 4.5962, "step": 82640 }, { "epoch": 0.1583146813162817, "grad_norm": 1.0700541734695435, "learning_rate": 0.00015825857654562983, "loss": 4.5698, "step": 82650 }, { "epoch": 0.15833383614765692, "grad_norm": 1.0541247129440308, "learning_rate": 0.0001582777313097389, "loss": 4.4559, "step": 82660 }, { "epoch": 0.15835299097903216, "grad_norm": 1.0912320613861084, "learning_rate": 0.00015829688607384797, "loss": 4.4366, "step": 82670 }, { "epoch": 0.1583721458104074, "grad_norm": 1.0367720127105713, "learning_rate": 0.00015831604083795705, "loss": 4.59, "step": 82680 }, { "epoch": 0.1583913006417826, "grad_norm": 1.0697927474975586, "learning_rate": 0.00015833519560206615, "loss": 4.5437, "step": 82690 }, { "epoch": 0.15841045547315785, "grad_norm": 1.0767741203308105, "learning_rate": 0.00015835435036617524, "loss": 4.6219, "step": 82700 }, { "epoch": 0.1584296103045331, "grad_norm": 1.026092290878296, "learning_rate": 0.00015837350513028432, "loss": 4.494, "step": 82710 }, { "epoch": 0.1584487651359083, "grad_norm": 1.0436447858810425, "learning_rate": 0.0001583926598943934, "loss": 4.4538, "step": 82720 }, { "epoch": 0.15846791996728354, "grad_norm": 1.029249906539917, "learning_rate": 0.00015841181465850245, "loss": 4.5653, "step": 82730 }, { "epoch": 0.15848707479865878, "grad_norm": 1.064592957496643, "learning_rate": 0.00015843096942261153, "loss": 4.4821, "step": 82740 }, { "epoch": 0.158506229630034, "grad_norm": 1.0219790935516357, "learning_rate": 0.00015845012418672062, "loss": 4.5871, "step": 82750 }, { "epoch": 0.15852538446140924, "grad_norm": 1.0802959203720093, "learning_rate": 0.00015846927895082972, "loss": 4.4402, "step": 82760 }, { "epoch": 0.15854453929278448, "grad_norm": 1.0897395610809326, "learning_rate": 0.0001584884337149388, "loss": 4.5205, "step": 82770 }, { "epoch": 0.15856369412415972, "grad_norm": 1.1056803464889526, "learning_rate": 0.00015850758847904786, "loss": 4.5155, "step": 82780 }, { "epoch": 0.15858284895553493, "grad_norm": 1.017547845840454, "learning_rate": 0.00015852674324315694, "loss": 4.6757, "step": 82790 }, { "epoch": 0.15860200378691017, "grad_norm": Infinity, "learning_rate": 0.00015854589800726602, "loss": 4.5074, "step": 82800 }, { "epoch": 0.1586211586182854, "grad_norm": 1.0344862937927246, "learning_rate": 0.00015856313729496418, "loss": 4.4763, "step": 82810 }, { "epoch": 0.15864031344966062, "grad_norm": 1.1247683763504028, "learning_rate": 0.0001585822920590733, "loss": 4.6653, "step": 82820 }, { "epoch": 0.15865946828103586, "grad_norm": 1.0630440711975098, "learning_rate": 0.00015860144682318237, "loss": 4.4501, "step": 82830 }, { "epoch": 0.1586786231124111, "grad_norm": 1.0546040534973145, "learning_rate": 0.00015862060158729145, "loss": 4.3915, "step": 82840 }, { "epoch": 0.1586977779437863, "grad_norm": 1.052538514137268, "learning_rate": 0.0001586397563514005, "loss": 4.4078, "step": 82850 }, { "epoch": 0.15871693277516155, "grad_norm": 1.0526546239852905, "learning_rate": 0.0001586589111155096, "loss": 4.4295, "step": 82860 }, { "epoch": 0.1587360876065368, "grad_norm": 1.0748597383499146, "learning_rate": 0.00015867806587961867, "loss": 4.4325, "step": 82870 }, { "epoch": 0.158755242437912, "grad_norm": 1.0747774839401245, "learning_rate": 0.00015869722064372775, "loss": 4.514, "step": 82880 }, { "epoch": 0.15877439726928724, "grad_norm": 1.1571093797683716, "learning_rate": 0.00015871637540783686, "loss": 4.5255, "step": 82890 }, { "epoch": 0.15879355210066248, "grad_norm": 1.026840329170227, "learning_rate": 0.00015873553017194594, "loss": 4.4994, "step": 82900 }, { "epoch": 0.1588127069320377, "grad_norm": 1.0495599508285522, "learning_rate": 0.000158754684936055, "loss": 4.5046, "step": 82910 }, { "epoch": 0.15883186176341293, "grad_norm": 1.0488454103469849, "learning_rate": 0.00015877383970016408, "loss": 4.5386, "step": 82920 }, { "epoch": 0.15885101659478817, "grad_norm": 1.0440860986709595, "learning_rate": 0.00015879299446427316, "loss": 4.4275, "step": 82930 }, { "epoch": 0.1588701714261634, "grad_norm": 1.0993164777755737, "learning_rate": 0.00015881214922838224, "loss": 4.5438, "step": 82940 }, { "epoch": 0.15888932625753863, "grad_norm": 1.0815585851669312, "learning_rate": 0.00015883130399249132, "loss": 4.7177, "step": 82950 }, { "epoch": 0.15890848108891387, "grad_norm": 1.0806634426116943, "learning_rate": 0.00015885045875660037, "loss": 4.5213, "step": 82960 }, { "epoch": 0.15892763592028908, "grad_norm": 1.0565229654312134, "learning_rate": 0.00015886961352070948, "loss": 4.5504, "step": 82970 }, { "epoch": 0.15894679075166432, "grad_norm": 1.08681321144104, "learning_rate": 0.00015888876828481856, "loss": 4.5485, "step": 82980 }, { "epoch": 0.15896594558303956, "grad_norm": 1.082107663154602, "learning_rate": 0.00015890792304892765, "loss": 4.4722, "step": 82990 }, { "epoch": 0.15898510041441477, "grad_norm": 1.1322572231292725, "learning_rate": 0.00015892707781303673, "loss": 4.4093, "step": 83000 }, { "epoch": 0.15900425524579, "grad_norm": 1.0889050960540771, "learning_rate": 0.00015894623257714578, "loss": 4.5353, "step": 83010 }, { "epoch": 0.15902341007716525, "grad_norm": 1.0615242719650269, "learning_rate": 0.00015896538734125486, "loss": 4.4699, "step": 83020 }, { "epoch": 0.15904256490854046, "grad_norm": 1.0205928087234497, "learning_rate": 0.00015898454210536394, "loss": 4.4064, "step": 83030 }, { "epoch": 0.1590617197399157, "grad_norm": 1.0369962453842163, "learning_rate": 0.00015900369686947305, "loss": 4.5278, "step": 83040 }, { "epoch": 0.15908087457129094, "grad_norm": 1.0616352558135986, "learning_rate": 0.00015902285163358213, "loss": 4.4669, "step": 83050 }, { "epoch": 0.15910002940266615, "grad_norm": 1.3664193153381348, "learning_rate": 0.00015904200639769121, "loss": 4.571, "step": 83060 }, { "epoch": 0.1591191842340414, "grad_norm": 1.0572340488433838, "learning_rate": 0.00015906116116180027, "loss": 4.4617, "step": 83070 }, { "epoch": 0.15913833906541663, "grad_norm": 1.0488545894622803, "learning_rate": 0.00015908031592590935, "loss": 4.4988, "step": 83080 }, { "epoch": 0.15915749389679185, "grad_norm": 1.1240770816802979, "learning_rate": 0.00015909947069001843, "loss": 4.5333, "step": 83090 }, { "epoch": 0.15917664872816709, "grad_norm": 1.1133769750595093, "learning_rate": 0.0001591186254541275, "loss": 4.5173, "step": 83100 }, { "epoch": 0.15919580355954233, "grad_norm": 1.1194369792938232, "learning_rate": 0.00015913778021823662, "loss": 4.4768, "step": 83110 }, { "epoch": 0.15921495839091754, "grad_norm": 1.0426353216171265, "learning_rate": 0.00015915693498234567, "loss": 4.5791, "step": 83120 }, { "epoch": 0.15923411322229278, "grad_norm": 1.0530861616134644, "learning_rate": 0.00015917608974645476, "loss": 4.5774, "step": 83130 }, { "epoch": 0.15925326805366802, "grad_norm": 1.1125766038894653, "learning_rate": 0.00015919524451056384, "loss": 4.5312, "step": 83140 }, { "epoch": 0.15927242288504323, "grad_norm": 1.0927886962890625, "learning_rate": 0.00015921439927467292, "loss": 4.5161, "step": 83150 }, { "epoch": 0.15929157771641847, "grad_norm": 1.1054354906082153, "learning_rate": 0.000159233554038782, "loss": 4.5355, "step": 83160 }, { "epoch": 0.1593107325477937, "grad_norm": 1.0774377584457397, "learning_rate": 0.00015925270880289108, "loss": 4.335, "step": 83170 }, { "epoch": 0.15932988737916892, "grad_norm": 1.047946572303772, "learning_rate": 0.00015927186356700014, "loss": 4.4295, "step": 83180 }, { "epoch": 0.15934904221054416, "grad_norm": 1.0581796169281006, "learning_rate": 0.00015929101833110924, "loss": 4.5099, "step": 83190 }, { "epoch": 0.1593681970419194, "grad_norm": 1.081023097038269, "learning_rate": 0.00015931017309521833, "loss": 4.6764, "step": 83200 }, { "epoch": 0.1593873518732946, "grad_norm": 1.0562227964401245, "learning_rate": 0.0001593293278593274, "loss": 4.5752, "step": 83210 }, { "epoch": 0.15940650670466985, "grad_norm": 1.0962384939193726, "learning_rate": 0.0001593484826234365, "loss": 4.5314, "step": 83220 }, { "epoch": 0.1594256615360451, "grad_norm": 1.1081762313842773, "learning_rate": 0.00015936763738754554, "loss": 4.597, "step": 83230 }, { "epoch": 0.1594448163674203, "grad_norm": 1.0886591672897339, "learning_rate": 0.00015938679215165462, "loss": 4.6275, "step": 83240 }, { "epoch": 0.15946397119879555, "grad_norm": 1.0512391328811646, "learning_rate": 0.0001594059469157637, "loss": 4.5037, "step": 83250 }, { "epoch": 0.15948312603017079, "grad_norm": 1.0551872253417969, "learning_rate": 0.0001594251016798728, "loss": 4.6111, "step": 83260 }, { "epoch": 0.159502280861546, "grad_norm": 1.0748522281646729, "learning_rate": 0.0001594442564439819, "loss": 4.4739, "step": 83270 }, { "epoch": 0.15952143569292124, "grad_norm": 1.0196588039398193, "learning_rate": 0.00015946341120809098, "loss": 4.4453, "step": 83280 }, { "epoch": 0.15954059052429648, "grad_norm": 1.059836506843567, "learning_rate": 0.00015948256597220003, "loss": 4.427, "step": 83290 }, { "epoch": 0.1595597453556717, "grad_norm": 1.064349889755249, "learning_rate": 0.0001595017207363091, "loss": 4.4633, "step": 83300 }, { "epoch": 0.15957890018704693, "grad_norm": 1.0628594160079956, "learning_rate": 0.0001595208755004182, "loss": 4.5423, "step": 83310 }, { "epoch": 0.15959805501842217, "grad_norm": 1.1181870698928833, "learning_rate": 0.00015954003026452727, "loss": 4.4785, "step": 83320 }, { "epoch": 0.15961720984979738, "grad_norm": 1.0838923454284668, "learning_rate": 0.00015955918502863638, "loss": 4.5647, "step": 83330 }, { "epoch": 0.15963636468117262, "grad_norm": 1.1251581907272339, "learning_rate": 0.00015957833979274544, "loss": 4.4764, "step": 83340 }, { "epoch": 0.15965551951254786, "grad_norm": 1.0943269729614258, "learning_rate": 0.00015959749455685452, "loss": 4.5309, "step": 83350 }, { "epoch": 0.15967467434392307, "grad_norm": 1.0870319604873657, "learning_rate": 0.0001596166493209636, "loss": 4.6202, "step": 83360 }, { "epoch": 0.1596938291752983, "grad_norm": 1.0368582010269165, "learning_rate": 0.00015963580408507268, "loss": 4.695, "step": 83370 }, { "epoch": 0.15971298400667355, "grad_norm": 1.05988609790802, "learning_rate": 0.00015965495884918176, "loss": 4.4445, "step": 83380 }, { "epoch": 0.15973213883804877, "grad_norm": 1.0245304107666016, "learning_rate": 0.00015967411361329084, "loss": 4.5602, "step": 83390 }, { "epoch": 0.159751293669424, "grad_norm": 1.0256164073944092, "learning_rate": 0.0001596932683773999, "loss": 4.5522, "step": 83400 }, { "epoch": 0.15977044850079924, "grad_norm": 1.1466929912567139, "learning_rate": 0.000159712423141509, "loss": 4.5005, "step": 83410 }, { "epoch": 0.15978960333217446, "grad_norm": 1.104240894317627, "learning_rate": 0.0001597315779056181, "loss": 4.6262, "step": 83420 }, { "epoch": 0.1598087581635497, "grad_norm": 1.071961760520935, "learning_rate": 0.00015975073266972717, "loss": 4.6062, "step": 83430 }, { "epoch": 0.15982791299492494, "grad_norm": 1.1888824701309204, "learning_rate": 0.00015976988743383625, "loss": 4.4997, "step": 83440 }, { "epoch": 0.15984706782630015, "grad_norm": 1.0651661157608032, "learning_rate": 0.0001597890421979453, "loss": 4.6314, "step": 83450 }, { "epoch": 0.1598662226576754, "grad_norm": 1.046913743019104, "learning_rate": 0.00015980819696205439, "loss": 4.6178, "step": 83460 }, { "epoch": 0.15988537748905063, "grad_norm": 1.089306354522705, "learning_rate": 0.00015982735172616347, "loss": 4.4608, "step": 83470 }, { "epoch": 0.15990453232042584, "grad_norm": 1.0560200214385986, "learning_rate": 0.00015984650649027257, "loss": 4.6182, "step": 83480 }, { "epoch": 0.15992368715180108, "grad_norm": 1.1007734537124634, "learning_rate": 0.00015986566125438166, "loss": 4.4716, "step": 83490 }, { "epoch": 0.15994284198317632, "grad_norm": 1.068240761756897, "learning_rate": 0.0001598848160184907, "loss": 4.5245, "step": 83500 }, { "epoch": 0.15996199681455153, "grad_norm": 1.0176788568496704, "learning_rate": 0.0001599039707825998, "loss": 4.5223, "step": 83510 }, { "epoch": 0.15998115164592677, "grad_norm": 1.0903223752975464, "learning_rate": 0.00015992312554670887, "loss": 4.4852, "step": 83520 }, { "epoch": 0.160000306477302, "grad_norm": 1.0244290828704834, "learning_rate": 0.00015994228031081795, "loss": 4.6129, "step": 83530 }, { "epoch": 0.16001946130867722, "grad_norm": 1.1067003011703491, "learning_rate": 0.00015996143507492704, "loss": 4.5519, "step": 83540 }, { "epoch": 0.16003861614005246, "grad_norm": 1.1005891561508179, "learning_rate": 0.00015998058983903614, "loss": 4.544, "step": 83550 }, { "epoch": 0.1600577709714277, "grad_norm": 1.0143263339996338, "learning_rate": 0.0001599997446031452, "loss": 4.4947, "step": 83560 }, { "epoch": 0.16007692580280292, "grad_norm": 1.0419188737869263, "learning_rate": 0.00016001889936725428, "loss": 4.5577, "step": 83570 }, { "epoch": 0.16009608063417816, "grad_norm": 1.084842562675476, "learning_rate": 0.00016003805413136336, "loss": 4.3649, "step": 83580 }, { "epoch": 0.1601152354655534, "grad_norm": 1.0915488004684448, "learning_rate": 0.00016005720889547244, "loss": 4.3993, "step": 83590 }, { "epoch": 0.1601343902969286, "grad_norm": 1.056720495223999, "learning_rate": 0.00016007636365958152, "loss": 4.5094, "step": 83600 }, { "epoch": 0.16015354512830385, "grad_norm": 1.0281696319580078, "learning_rate": 0.00016009551842369058, "loss": 4.4216, "step": 83610 }, { "epoch": 0.1601726999596791, "grad_norm": 1.059289574623108, "learning_rate": 0.00016011467318779966, "loss": 4.5681, "step": 83620 }, { "epoch": 0.1601918547910543, "grad_norm": 1.0736980438232422, "learning_rate": 0.00016013382795190877, "loss": 4.4526, "step": 83630 }, { "epoch": 0.16021100962242954, "grad_norm": 1.0564168691635132, "learning_rate": 0.00016015298271601785, "loss": 4.5894, "step": 83640 }, { "epoch": 0.16023016445380478, "grad_norm": 1.0775609016418457, "learning_rate": 0.00016017213748012693, "loss": 4.5532, "step": 83650 }, { "epoch": 0.16024931928518, "grad_norm": 1.045743465423584, "learning_rate": 0.000160191292244236, "loss": 4.5019, "step": 83660 }, { "epoch": 0.16026847411655523, "grad_norm": 1.0843515396118164, "learning_rate": 0.00016021044700834507, "loss": 4.514, "step": 83670 }, { "epoch": 0.16028762894793047, "grad_norm": 1.0493510961532593, "learning_rate": 0.00016022960177245415, "loss": 4.4752, "step": 83680 }, { "epoch": 0.16030678377930568, "grad_norm": 1.0466781854629517, "learning_rate": 0.00016024875653656323, "loss": 4.4852, "step": 83690 }, { "epoch": 0.16032593861068092, "grad_norm": 1.018572211265564, "learning_rate": 0.00016026791130067234, "loss": 4.4571, "step": 83700 }, { "epoch": 0.16034509344205616, "grad_norm": 1.0519911050796509, "learning_rate": 0.00016028706606478142, "loss": 4.5331, "step": 83710 }, { "epoch": 0.16036424827343138, "grad_norm": 1.0742534399032593, "learning_rate": 0.00016030622082889047, "loss": 4.5366, "step": 83720 }, { "epoch": 0.16038340310480662, "grad_norm": 1.0793415307998657, "learning_rate": 0.00016032537559299955, "loss": 4.5354, "step": 83730 }, { "epoch": 0.16040255793618186, "grad_norm": 1.036773920059204, "learning_rate": 0.00016034453035710863, "loss": 4.5655, "step": 83740 }, { "epoch": 0.16042171276755707, "grad_norm": 1.0315884351730347, "learning_rate": 0.00016036368512121772, "loss": 4.5092, "step": 83750 }, { "epoch": 0.1604408675989323, "grad_norm": 1.044027328491211, "learning_rate": 0.0001603828398853268, "loss": 4.6228, "step": 83760 }, { "epoch": 0.16046002243030755, "grad_norm": 1.0639342069625854, "learning_rate": 0.0001604019946494359, "loss": 4.4479, "step": 83770 }, { "epoch": 0.16047917726168276, "grad_norm": 1.034560203552246, "learning_rate": 0.00016042114941354496, "loss": 4.4842, "step": 83780 }, { "epoch": 0.160498332093058, "grad_norm": 1.0053168535232544, "learning_rate": 0.00016044030417765404, "loss": 4.4175, "step": 83790 }, { "epoch": 0.16051748692443324, "grad_norm": 1.1082189083099365, "learning_rate": 0.00016045945894176312, "loss": 4.5077, "step": 83800 }, { "epoch": 0.16053664175580845, "grad_norm": 1.0610723495483398, "learning_rate": 0.0001604786137058722, "loss": 4.5551, "step": 83810 }, { "epoch": 0.1605557965871837, "grad_norm": 1.0378614664077759, "learning_rate": 0.00016049776846998129, "loss": 4.6076, "step": 83820 }, { "epoch": 0.16057495141855893, "grad_norm": 1.0502897500991821, "learning_rate": 0.00016051692323409034, "loss": 4.5332, "step": 83830 }, { "epoch": 0.16059410624993414, "grad_norm": 1.0961638689041138, "learning_rate": 0.00016053607799819942, "loss": 4.4331, "step": 83840 }, { "epoch": 0.16061326108130938, "grad_norm": 1.1134364604949951, "learning_rate": 0.00016055523276230853, "loss": 4.4484, "step": 83850 }, { "epoch": 0.16063241591268462, "grad_norm": 1.0406384468078613, "learning_rate": 0.0001605743875264176, "loss": 4.4613, "step": 83860 }, { "epoch": 0.16065157074405984, "grad_norm": 1.04743492603302, "learning_rate": 0.0001605935422905267, "loss": 4.566, "step": 83870 }, { "epoch": 0.16067072557543507, "grad_norm": 1.0682997703552246, "learning_rate": 0.00016061269705463575, "loss": 4.549, "step": 83880 }, { "epoch": 0.16068988040681031, "grad_norm": 1.103238582611084, "learning_rate": 0.00016063185181874483, "loss": 4.5184, "step": 83890 }, { "epoch": 0.16070903523818553, "grad_norm": 1.08319890499115, "learning_rate": 0.0001606510065828539, "loss": 4.4932, "step": 83900 }, { "epoch": 0.16072819006956077, "grad_norm": 1.0636708736419678, "learning_rate": 0.000160670161346963, "loss": 4.4311, "step": 83910 }, { "epoch": 0.160747344900936, "grad_norm": 1.102260947227478, "learning_rate": 0.0001606893161110721, "loss": 4.5208, "step": 83920 }, { "epoch": 0.16076649973231122, "grad_norm": 1.0762786865234375, "learning_rate": 0.00016070847087518118, "loss": 4.4474, "step": 83930 }, { "epoch": 0.16078565456368646, "grad_norm": 1.0428991317749023, "learning_rate": 0.00016072762563929023, "loss": 4.4137, "step": 83940 }, { "epoch": 0.1608048093950617, "grad_norm": 1.0846284627914429, "learning_rate": 0.00016074678040339931, "loss": 4.4759, "step": 83950 }, { "epoch": 0.1608239642264369, "grad_norm": 1.1400247812271118, "learning_rate": 0.0001607659351675084, "loss": 4.6045, "step": 83960 }, { "epoch": 0.16084311905781215, "grad_norm": 1.058487057685852, "learning_rate": 0.00016078508993161748, "loss": 4.6027, "step": 83970 }, { "epoch": 0.1608622738891874, "grad_norm": 1.0445384979248047, "learning_rate": 0.00016080424469572656, "loss": 4.4962, "step": 83980 }, { "epoch": 0.1608814287205626, "grad_norm": 1.083311676979065, "learning_rate": 0.0001608233994598356, "loss": 4.5406, "step": 83990 }, { "epoch": 0.16090058355193784, "grad_norm": 1.0417921543121338, "learning_rate": 0.00016084255422394472, "loss": 4.4414, "step": 84000 } ], "logging_steps": 10, "max_steps": 1566186, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.49114353057792e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }