| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.1201792286765881, | |
| "eval_steps": 7000, | |
| "global_step": 14000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0, | |
| "eval_loss": 9.306169509887695, | |
| "eval_runtime": 10.9126, | |
| "eval_samples_per_second": 3.665, | |
| "eval_steps_per_second": 0.458, | |
| "step": 0 | |
| }, | |
| { | |
| "epoch": 0.0008001280204832773, | |
| "grad_norm": 8.51533031463623, | |
| "learning_rate": 3.5000000000000004e-06, | |
| "loss": 8.786, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0016002560409665546, | |
| "grad_norm": 10.90935230255127, | |
| "learning_rate": 8.500000000000002e-06, | |
| "loss": 8.3433, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.002400384061449832, | |
| "grad_norm": 7.269016265869141, | |
| "learning_rate": 1.3500000000000001e-05, | |
| "loss": 7.549, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.003200512081933109, | |
| "grad_norm": 8.790578842163086, | |
| "learning_rate": 1.85e-05, | |
| "loss": 7.2574, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.004000640102416387, | |
| "grad_norm": 6.52068567276001, | |
| "learning_rate": 2.35e-05, | |
| "loss": 7.0024, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.004800768122899664, | |
| "grad_norm": 6.902959823608398, | |
| "learning_rate": 2.8499999999999998e-05, | |
| "loss": 6.9074, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.005600896143382941, | |
| "grad_norm": 5.350945949554443, | |
| "learning_rate": 3.35e-05, | |
| "loss": 6.8765, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.006401024163866218, | |
| "grad_norm": 5.928489685058594, | |
| "learning_rate": 3.85e-05, | |
| "loss": 6.5663, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.007201152184349496, | |
| "grad_norm": 9.222543716430664, | |
| "learning_rate": 4.35e-05, | |
| "loss": 6.6131, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.008001280204832774, | |
| "grad_norm": 6.57027006149292, | |
| "learning_rate": 4.85e-05, | |
| "loss": 6.5829, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.00880140822531605, | |
| "grad_norm": 5.280848503112793, | |
| "learning_rate": 4.999064020965931e-05, | |
| "loss": 6.5996, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.009601536245799328, | |
| "grad_norm": 5.950971603393555, | |
| "learning_rate": 4.997726908060117e-05, | |
| "loss": 6.6075, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.010401664266282605, | |
| "grad_norm": 4.300549507141113, | |
| "learning_rate": 4.996389795154303e-05, | |
| "loss": 6.5074, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.011201792286765882, | |
| "grad_norm": 4.824333190917969, | |
| "learning_rate": 4.9950526822484896e-05, | |
| "loss": 6.6072, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.01200192030724916, | |
| "grad_norm": 5.4324116706848145, | |
| "learning_rate": 4.993715569342676e-05, | |
| "loss": 6.6183, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.012802048327732437, | |
| "grad_norm": 4.087579250335693, | |
| "learning_rate": 4.992378456436862e-05, | |
| "loss": 6.4806, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.013602176348215714, | |
| "grad_norm": 7.260207653045654, | |
| "learning_rate": 4.9910413435310484e-05, | |
| "loss": 6.3709, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.014402304368698993, | |
| "grad_norm": 4.145061016082764, | |
| "learning_rate": 4.9897042306252346e-05, | |
| "loss": 6.2951, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.01520243238918227, | |
| "grad_norm": 3.2026450634002686, | |
| "learning_rate": 4.98836711771942e-05, | |
| "loss": 6.3255, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.016002560409665547, | |
| "grad_norm": 3.443145751953125, | |
| "learning_rate": 4.9870300048136065e-05, | |
| "loss": 6.4894, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.016802688430148822, | |
| "grad_norm": 5.324231147766113, | |
| "learning_rate": 4.985692891907793e-05, | |
| "loss": 6.4312, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.0176028164506321, | |
| "grad_norm": 3.2833452224731445, | |
| "learning_rate": 4.984355779001979e-05, | |
| "loss": 6.513, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.018402944471115377, | |
| "grad_norm": 3.8984358310699463, | |
| "learning_rate": 4.983018666096165e-05, | |
| "loss": 6.1683, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.019203072491598656, | |
| "grad_norm": 4.183676719665527, | |
| "learning_rate": 4.9816815531903516e-05, | |
| "loss": 6.329, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.020003200512081935, | |
| "grad_norm": 3.136693239212036, | |
| "learning_rate": 4.980344440284538e-05, | |
| "loss": 6.466, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.02080332853256521, | |
| "grad_norm": 4.185967445373535, | |
| "learning_rate": 4.979007327378724e-05, | |
| "loss": 6.4613, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.02160345655304849, | |
| "grad_norm": 3.105653762817383, | |
| "learning_rate": 4.9776702144729104e-05, | |
| "loss": 6.3596, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.022403584573531764, | |
| "grad_norm": 3.927561044692993, | |
| "learning_rate": 4.9763331015670967e-05, | |
| "loss": 6.2604, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.023203712594015043, | |
| "grad_norm": 3.513439178466797, | |
| "learning_rate": 4.974995988661283e-05, | |
| "loss": 6.2747, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.02400384061449832, | |
| "grad_norm": 3.07377290725708, | |
| "learning_rate": 4.973658875755469e-05, | |
| "loss": 6.202, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.024803968634981598, | |
| "grad_norm": 3.045619249343872, | |
| "learning_rate": 4.9723217628496555e-05, | |
| "loss": 6.1022, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.025604096655464873, | |
| "grad_norm": 3.330648183822632, | |
| "learning_rate": 4.970984649943842e-05, | |
| "loss": 6.1544, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.026404224675948152, | |
| "grad_norm": 3.0299668312072754, | |
| "learning_rate": 4.969647537038028e-05, | |
| "loss": 6.3119, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.027204352696431428, | |
| "grad_norm": 3.687938928604126, | |
| "learning_rate": 4.9683104241322136e-05, | |
| "loss": 6.333, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.028004480716914706, | |
| "grad_norm": 4.0919413566589355, | |
| "learning_rate": 4.9669733112264e-05, | |
| "loss": 6.1711, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.028804608737397985, | |
| "grad_norm": 3.1327242851257324, | |
| "learning_rate": 4.965636198320586e-05, | |
| "loss": 6.3365, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.02960473675788126, | |
| "grad_norm": 4.531859874725342, | |
| "learning_rate": 4.9642990854147724e-05, | |
| "loss": 6.2121, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.03040486477836454, | |
| "grad_norm": 2.522672414779663, | |
| "learning_rate": 4.962961972508959e-05, | |
| "loss": 6.2388, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.031204992798847815, | |
| "grad_norm": 5.62153959274292, | |
| "learning_rate": 4.961624859603145e-05, | |
| "loss": 6.168, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.032005120819331094, | |
| "grad_norm": 3.522804021835327, | |
| "learning_rate": 4.960287746697331e-05, | |
| "loss": 6.1207, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.03280524883981437, | |
| "grad_norm": 7.260324478149414, | |
| "learning_rate": 4.9589506337915175e-05, | |
| "loss": 6.31, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.033605376860297645, | |
| "grad_norm": 4.309441566467285, | |
| "learning_rate": 4.957613520885704e-05, | |
| "loss": 6.1107, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.034405504880780924, | |
| "grad_norm": 3.2409913539886475, | |
| "learning_rate": 4.95627640797989e-05, | |
| "loss": 6.2082, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.0352056329012642, | |
| "grad_norm": 3.9414610862731934, | |
| "learning_rate": 4.954939295074076e-05, | |
| "loss": 6.2102, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.03600576092174748, | |
| "grad_norm": 2.441235303878784, | |
| "learning_rate": 4.9536021821682626e-05, | |
| "loss": 6.1023, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.036805888942230754, | |
| "grad_norm": 2.997591972351074, | |
| "learning_rate": 4.952265069262449e-05, | |
| "loss": 6.1147, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.03760601696271403, | |
| "grad_norm": 3.950436592102051, | |
| "learning_rate": 4.950927956356635e-05, | |
| "loss": 6.0725, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.03840614498319731, | |
| "grad_norm": 3.4340896606445312, | |
| "learning_rate": 4.9495908434508214e-05, | |
| "loss": 6.1336, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.03920627300368059, | |
| "grad_norm": 3.28839373588562, | |
| "learning_rate": 4.948253730545007e-05, | |
| "loss": 6.1709, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.04000640102416387, | |
| "grad_norm": 2.976365566253662, | |
| "learning_rate": 4.946916617639193e-05, | |
| "loss": 6.2074, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.04080652904464714, | |
| "grad_norm": 4.156027793884277, | |
| "learning_rate": 4.9455795047333795e-05, | |
| "loss": 6.1694, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.04160665706513042, | |
| "grad_norm": 3.4855797290802, | |
| "learning_rate": 4.944242391827566e-05, | |
| "loss": 6.1218, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.0424067850856137, | |
| "grad_norm": 4.489185333251953, | |
| "learning_rate": 4.942905278921752e-05, | |
| "loss": 6.1507, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.04320691310609698, | |
| "grad_norm": 3.2751166820526123, | |
| "learning_rate": 4.941568166015938e-05, | |
| "loss": 6.1055, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.04400704112658025, | |
| "grad_norm": 2.4234585762023926, | |
| "learning_rate": 4.9402310531101246e-05, | |
| "loss": 6.1755, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.04480716914706353, | |
| "grad_norm": 3.4436991214752197, | |
| "learning_rate": 4.938893940204311e-05, | |
| "loss": 6.1882, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.04560729716754681, | |
| "grad_norm": 3.3731908798217773, | |
| "learning_rate": 4.937556827298497e-05, | |
| "loss": 6.0648, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.04640742518803009, | |
| "grad_norm": 3.8733670711517334, | |
| "learning_rate": 4.9362197143926834e-05, | |
| "loss": 6.0621, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.04720755320851336, | |
| "grad_norm": 4.126636505126953, | |
| "learning_rate": 4.9348826014868696e-05, | |
| "loss": 6.122, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.04800768122899664, | |
| "grad_norm": 3.8605775833129883, | |
| "learning_rate": 4.933545488581056e-05, | |
| "loss": 5.9788, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.048807809249479917, | |
| "grad_norm": 2.9509966373443604, | |
| "learning_rate": 4.932208375675242e-05, | |
| "loss": 6.2045, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.049607937269963195, | |
| "grad_norm": 4.4266510009765625, | |
| "learning_rate": 4.9308712627694285e-05, | |
| "loss": 5.9981, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.050408065290446474, | |
| "grad_norm": 2.79042649269104, | |
| "learning_rate": 4.929534149863615e-05, | |
| "loss": 6.1882, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.051208193310929746, | |
| "grad_norm": 2.8986568450927734, | |
| "learning_rate": 4.928197036957801e-05, | |
| "loss": 6.1739, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.052008321331413025, | |
| "grad_norm": 4.294217586517334, | |
| "learning_rate": 4.926859924051987e-05, | |
| "loss": 6.0566, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.052808449351896304, | |
| "grad_norm": 8.848836898803711, | |
| "learning_rate": 4.9255228111461735e-05, | |
| "loss": 6.2994, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.05360857737237958, | |
| "grad_norm": 3.2204337120056152, | |
| "learning_rate": 4.92418569824036e-05, | |
| "loss": 6.0573, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.054408705392862855, | |
| "grad_norm": 4.775251865386963, | |
| "learning_rate": 4.922848585334546e-05, | |
| "loss": 5.9764, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.055208833413346134, | |
| "grad_norm": 3.5426905155181885, | |
| "learning_rate": 4.921511472428732e-05, | |
| "loss": 6.0402, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.05600896143382941, | |
| "grad_norm": 10.72481632232666, | |
| "learning_rate": 4.9201743595229186e-05, | |
| "loss": 6.0024, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.05680908945431269, | |
| "grad_norm": 2.441681385040283, | |
| "learning_rate": 4.918837246617105e-05, | |
| "loss": 6.1122, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.05760921747479597, | |
| "grad_norm": 3.375319480895996, | |
| "learning_rate": 4.917500133711291e-05, | |
| "loss": 6.058, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.05840934549527924, | |
| "grad_norm": 2.821507453918457, | |
| "learning_rate": 4.9161630208054774e-05, | |
| "loss": 6.0586, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.05920947351576252, | |
| "grad_norm": 2.8658957481384277, | |
| "learning_rate": 4.914825907899664e-05, | |
| "loss": 6.0115, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.0600096015362458, | |
| "grad_norm": 2.239774227142334, | |
| "learning_rate": 4.91348879499385e-05, | |
| "loss": 6.0669, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.06080972955672908, | |
| "grad_norm": 3.5249900817871094, | |
| "learning_rate": 4.912151682088036e-05, | |
| "loss": 6.1013, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.06160985757721235, | |
| "grad_norm": 2.790356159210205, | |
| "learning_rate": 4.9108145691822225e-05, | |
| "loss": 6.0099, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.06240998559769563, | |
| "grad_norm": 3.0729963779449463, | |
| "learning_rate": 4.909477456276409e-05, | |
| "loss": 6.1376, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.06321011361817891, | |
| "grad_norm": 2.9490275382995605, | |
| "learning_rate": 4.908140343370595e-05, | |
| "loss": 6.1457, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.06401024163866219, | |
| "grad_norm": 2.7475438117980957, | |
| "learning_rate": 4.9068032304647806e-05, | |
| "loss": 6.0041, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.06481036965914547, | |
| "grad_norm": 2.755703926086426, | |
| "learning_rate": 4.905466117558967e-05, | |
| "loss": 6.0242, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.06561049767962875, | |
| "grad_norm": 2.724515676498413, | |
| "learning_rate": 4.904129004653153e-05, | |
| "loss": 6.1827, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.06641062570011202, | |
| "grad_norm": 4.498260974884033, | |
| "learning_rate": 4.9027918917473394e-05, | |
| "loss": 6.0892, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.06721075372059529, | |
| "grad_norm": 2.4399070739746094, | |
| "learning_rate": 4.901454778841526e-05, | |
| "loss": 6.0197, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.06801088174107857, | |
| "grad_norm": 2.7584304809570312, | |
| "learning_rate": 4.900117665935712e-05, | |
| "loss": 5.9056, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.06881100976156185, | |
| "grad_norm": 2.8177144527435303, | |
| "learning_rate": 4.898780553029898e-05, | |
| "loss": 6.1484, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.06961113778204513, | |
| "grad_norm": 4.181133270263672, | |
| "learning_rate": 4.8974434401240845e-05, | |
| "loss": 5.9376, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.0704112658025284, | |
| "grad_norm": 3.677849769592285, | |
| "learning_rate": 4.896106327218271e-05, | |
| "loss": 6.0403, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.07121139382301168, | |
| "grad_norm": 3.1553192138671875, | |
| "learning_rate": 4.894769214312457e-05, | |
| "loss": 6.0488, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.07201152184349496, | |
| "grad_norm": 3.2580947875976562, | |
| "learning_rate": 4.893432101406643e-05, | |
| "loss": 6.1002, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.07281164986397824, | |
| "grad_norm": 6.328150749206543, | |
| "learning_rate": 4.8920949885008296e-05, | |
| "loss": 6.0225, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.07361177788446151, | |
| "grad_norm": 2.7467615604400635, | |
| "learning_rate": 4.890757875595016e-05, | |
| "loss": 5.9622, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.07441190590494479, | |
| "grad_norm": 2.86570405960083, | |
| "learning_rate": 4.889420762689202e-05, | |
| "loss": 5.9718, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.07521203392542807, | |
| "grad_norm": 2.544917106628418, | |
| "learning_rate": 4.8880836497833884e-05, | |
| "loss": 5.8697, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.07601216194591134, | |
| "grad_norm": 2.5245840549468994, | |
| "learning_rate": 4.8867465368775746e-05, | |
| "loss": 5.9973, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.07681228996639462, | |
| "grad_norm": 3.6830902099609375, | |
| "learning_rate": 4.88540942397176e-05, | |
| "loss": 5.943, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.0776124179868779, | |
| "grad_norm": 2.6643354892730713, | |
| "learning_rate": 4.8840723110659465e-05, | |
| "loss": 5.8958, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.07841254600736118, | |
| "grad_norm": 6.4623565673828125, | |
| "learning_rate": 4.882735198160133e-05, | |
| "loss": 6.0236, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.07921267402784446, | |
| "grad_norm": 2.186974048614502, | |
| "learning_rate": 4.881398085254319e-05, | |
| "loss": 6.0481, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.08001280204832774, | |
| "grad_norm": 2.4983859062194824, | |
| "learning_rate": 4.880060972348505e-05, | |
| "loss": 6.075, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.080812930068811, | |
| "grad_norm": 2.778280258178711, | |
| "learning_rate": 4.8787238594426916e-05, | |
| "loss": 6.0757, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.08161305808929428, | |
| "grad_norm": 2.706965923309326, | |
| "learning_rate": 4.877386746536878e-05, | |
| "loss": 6.1504, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.08241318610977756, | |
| "grad_norm": 3.4069600105285645, | |
| "learning_rate": 4.876049633631064e-05, | |
| "loss": 6.0889, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.08321331413026084, | |
| "grad_norm": 3.179551124572754, | |
| "learning_rate": 4.8747125207252504e-05, | |
| "loss": 6.0057, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.08401344215074412, | |
| "grad_norm": 2.924018383026123, | |
| "learning_rate": 4.873375407819437e-05, | |
| "loss": 5.8406, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.0848135701712274, | |
| "grad_norm": 3.103912115097046, | |
| "learning_rate": 4.872038294913623e-05, | |
| "loss": 6.0351, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.08561369819171068, | |
| "grad_norm": 2.8037219047546387, | |
| "learning_rate": 4.870701182007809e-05, | |
| "loss": 6.0272, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.08641382621219396, | |
| "grad_norm": 2.477062940597534, | |
| "learning_rate": 4.8693640691019955e-05, | |
| "loss": 5.9269, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.08721395423267723, | |
| "grad_norm": 2.748488187789917, | |
| "learning_rate": 4.868026956196182e-05, | |
| "loss": 5.943, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.0880140822531605, | |
| "grad_norm": 3.3991920948028564, | |
| "learning_rate": 4.866689843290368e-05, | |
| "loss": 6.1455, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.08881421027364378, | |
| "grad_norm": 3.208509683609009, | |
| "learning_rate": 4.8653527303845536e-05, | |
| "loss": 5.9746, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.08961433829412706, | |
| "grad_norm": 3.3378469944000244, | |
| "learning_rate": 4.86401561747874e-05, | |
| "loss": 5.9185, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.09041446631461034, | |
| "grad_norm": 2.269606113433838, | |
| "learning_rate": 4.862678504572926e-05, | |
| "loss": 5.9369, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.09121459433509362, | |
| "grad_norm": 2.749335765838623, | |
| "learning_rate": 4.8613413916671124e-05, | |
| "loss": 6.0648, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.0920147223555769, | |
| "grad_norm": 2.821913480758667, | |
| "learning_rate": 4.860004278761299e-05, | |
| "loss": 5.952, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.09281485037606017, | |
| "grad_norm": 2.640990734100342, | |
| "learning_rate": 4.858667165855485e-05, | |
| "loss": 6.0537, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.09361497839654345, | |
| "grad_norm": 3.570896625518799, | |
| "learning_rate": 4.857330052949671e-05, | |
| "loss": 5.7721, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.09441510641702672, | |
| "grad_norm": 3.245318651199341, | |
| "learning_rate": 4.8559929400438575e-05, | |
| "loss": 5.7305, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.09521523443751, | |
| "grad_norm": 4.075076580047607, | |
| "learning_rate": 4.854655827138044e-05, | |
| "loss": 5.974, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.09601536245799328, | |
| "grad_norm": 2.429893732070923, | |
| "learning_rate": 4.85331871423223e-05, | |
| "loss": 5.7828, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.09681549047847655, | |
| "grad_norm": 2.7077040672302246, | |
| "learning_rate": 4.851981601326416e-05, | |
| "loss": 5.9143, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.09761561849895983, | |
| "grad_norm": 2.767918586730957, | |
| "learning_rate": 4.8506444884206026e-05, | |
| "loss": 5.9449, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.09841574651944311, | |
| "grad_norm": 2.4544034004211426, | |
| "learning_rate": 4.849307375514789e-05, | |
| "loss": 6.0034, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.09921587453992639, | |
| "grad_norm": 5.215607643127441, | |
| "learning_rate": 4.847970262608975e-05, | |
| "loss": 5.867, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.10001600256040967, | |
| "grad_norm": 2.7856080532073975, | |
| "learning_rate": 4.8466331497031614e-05, | |
| "loss": 6.0213, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.10081613058089295, | |
| "grad_norm": 2.5528719425201416, | |
| "learning_rate": 4.8452960367973476e-05, | |
| "loss": 5.9634, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.10161625860137621, | |
| "grad_norm": 2.4917409420013428, | |
| "learning_rate": 4.843958923891533e-05, | |
| "loss": 5.887, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.10241638662185949, | |
| "grad_norm": 6.125699520111084, | |
| "learning_rate": 4.8426218109857195e-05, | |
| "loss": 6.1189, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.10321651464234277, | |
| "grad_norm": 2.783156156539917, | |
| "learning_rate": 4.841284698079906e-05, | |
| "loss": 5.9064, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.10401664266282605, | |
| "grad_norm": 3.611070156097412, | |
| "learning_rate": 4.839947585174092e-05, | |
| "loss": 5.9405, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.10481677068330933, | |
| "grad_norm": 4.296909809112549, | |
| "learning_rate": 4.838610472268278e-05, | |
| "loss": 5.9067, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.10561689870379261, | |
| "grad_norm": 2.4273040294647217, | |
| "learning_rate": 4.8372733593624646e-05, | |
| "loss": 5.888, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.10641702672427589, | |
| "grad_norm": 2.6499924659729004, | |
| "learning_rate": 4.835936246456651e-05, | |
| "loss": 5.9683, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.10721715474475917, | |
| "grad_norm": 3.1474297046661377, | |
| "learning_rate": 4.834599133550837e-05, | |
| "loss": 5.8946, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.10801728276524244, | |
| "grad_norm": 3.5050199031829834, | |
| "learning_rate": 4.8332620206450234e-05, | |
| "loss": 5.9179, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.10881741078572571, | |
| "grad_norm": 2.693700075149536, | |
| "learning_rate": 4.8319249077392096e-05, | |
| "loss": 5.7965, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.10961753880620899, | |
| "grad_norm": 2.8202953338623047, | |
| "learning_rate": 4.830587794833396e-05, | |
| "loss": 5.9526, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.11041766682669227, | |
| "grad_norm": 2.514862060546875, | |
| "learning_rate": 4.829250681927582e-05, | |
| "loss": 5.936, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.11121779484717555, | |
| "grad_norm": 3.18804931640625, | |
| "learning_rate": 4.8279135690217685e-05, | |
| "loss": 5.9246, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.11201792286765883, | |
| "grad_norm": 2.77697491645813, | |
| "learning_rate": 4.826576456115955e-05, | |
| "loss": 5.9576, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.1128180508881421, | |
| "grad_norm": 2.762524127960205, | |
| "learning_rate": 4.825239343210141e-05, | |
| "loss": 5.9085, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.11361817890862538, | |
| "grad_norm": 2.4407670497894287, | |
| "learning_rate": 4.8239022303043266e-05, | |
| "loss": 5.9518, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.11441830692910866, | |
| "grad_norm": 3.1036713123321533, | |
| "learning_rate": 4.822565117398513e-05, | |
| "loss": 5.8412, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.11521843494959194, | |
| "grad_norm": 3.319058418273926, | |
| "learning_rate": 4.821228004492699e-05, | |
| "loss": 5.9733, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.1160185629700752, | |
| "grad_norm": 2.13468599319458, | |
| "learning_rate": 4.8198908915868854e-05, | |
| "loss": 5.9193, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.11681869099055849, | |
| "grad_norm": 2.6057028770446777, | |
| "learning_rate": 4.8185537786810717e-05, | |
| "loss": 5.9807, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.11761881901104176, | |
| "grad_norm": 2.7509753704071045, | |
| "learning_rate": 4.817216665775258e-05, | |
| "loss": 5.9534, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.11841894703152504, | |
| "grad_norm": 2.111055850982666, | |
| "learning_rate": 4.815879552869444e-05, | |
| "loss": 5.9207, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.11921907505200832, | |
| "grad_norm": 2.5271990299224854, | |
| "learning_rate": 4.8145424399636305e-05, | |
| "loss": 5.7148, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.1200192030724916, | |
| "grad_norm": 2.814138174057007, | |
| "learning_rate": 4.813205327057817e-05, | |
| "loss": 5.9498, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.12081933109297488, | |
| "grad_norm": 3.449355363845825, | |
| "learning_rate": 4.811868214152003e-05, | |
| "loss": 5.7814, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.12161945911345816, | |
| "grad_norm": 2.813746213912964, | |
| "learning_rate": 4.810531101246189e-05, | |
| "loss": 5.9517, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.12241958713394142, | |
| "grad_norm": 2.529242753982544, | |
| "learning_rate": 4.8091939883403755e-05, | |
| "loss": 5.8227, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.1232197151544247, | |
| "grad_norm": 2.2425034046173096, | |
| "learning_rate": 4.807856875434562e-05, | |
| "loss": 6.1064, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.12401984317490798, | |
| "grad_norm": 2.7732784748077393, | |
| "learning_rate": 4.806519762528748e-05, | |
| "loss": 5.8888, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.12481997119539126, | |
| "grad_norm": 2.5558009147644043, | |
| "learning_rate": 4.8051826496229343e-05, | |
| "loss": 5.8185, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.12562009921587455, | |
| "grad_norm": 2.884411096572876, | |
| "learning_rate": 4.8038455367171206e-05, | |
| "loss": 6.0534, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.12642022723635782, | |
| "grad_norm": 2.5747668743133545, | |
| "learning_rate": 4.802508423811307e-05, | |
| "loss": 5.8186, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.12722035525684108, | |
| "grad_norm": 2.324767827987671, | |
| "learning_rate": 4.801171310905493e-05, | |
| "loss": 5.8642, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.12802048327732438, | |
| "grad_norm": 2.2255160808563232, | |
| "learning_rate": 4.7998341979996794e-05, | |
| "loss": 5.8559, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.12882061129780764, | |
| "grad_norm": 2.97525954246521, | |
| "learning_rate": 4.798497085093866e-05, | |
| "loss": 5.8744, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.12962073931829093, | |
| "grad_norm": 2.23962664604187, | |
| "learning_rate": 4.797159972188052e-05, | |
| "loss": 5.7545, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.1304208673387742, | |
| "grad_norm": 3.6182124614715576, | |
| "learning_rate": 4.795822859282238e-05, | |
| "loss": 5.8872, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.1312209953592575, | |
| "grad_norm": 4.068545341491699, | |
| "learning_rate": 4.7944857463764245e-05, | |
| "loss": 5.9008, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.13202112337974076, | |
| "grad_norm": 3.627082109451294, | |
| "learning_rate": 4.793148633470611e-05, | |
| "loss": 5.8215, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.13282125140022405, | |
| "grad_norm": 3.0080721378326416, | |
| "learning_rate": 4.791811520564797e-05, | |
| "loss": 5.9086, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.13362137942070731, | |
| "grad_norm": 2.5463860034942627, | |
| "learning_rate": 4.790474407658983e-05, | |
| "loss": 5.776, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.13442150744119058, | |
| "grad_norm": 2.212488889694214, | |
| "learning_rate": 4.7891372947531696e-05, | |
| "loss": 6.006, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.13522163546167387, | |
| "grad_norm": 4.147563934326172, | |
| "learning_rate": 4.787800181847356e-05, | |
| "loss": 5.886, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.13602176348215714, | |
| "grad_norm": 2.6021018028259277, | |
| "learning_rate": 4.786463068941542e-05, | |
| "loss": 5.9182, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.13682189150264043, | |
| "grad_norm": 2.3109893798828125, | |
| "learning_rate": 4.7851259560357284e-05, | |
| "loss": 5.8084, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.1376220195231237, | |
| "grad_norm": 2.8678529262542725, | |
| "learning_rate": 4.7837888431299147e-05, | |
| "loss": 6.0363, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.138422147543607, | |
| "grad_norm": 2.1921958923339844, | |
| "learning_rate": 4.7824517302241e-05, | |
| "loss": 5.7667, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.13922227556409025, | |
| "grad_norm": 2.6883316040039062, | |
| "learning_rate": 4.7811146173182865e-05, | |
| "loss": 5.7906, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.14002240358457352, | |
| "grad_norm": 2.4079957008361816, | |
| "learning_rate": 4.779777504412473e-05, | |
| "loss": 5.7698, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.1408225316050568, | |
| "grad_norm": 4.29390287399292, | |
| "learning_rate": 4.778440391506659e-05, | |
| "loss": 5.9639, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.14162265962554008, | |
| "grad_norm": 4.133132457733154, | |
| "learning_rate": 4.777103278600845e-05, | |
| "loss": 6.0901, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.14242278764602337, | |
| "grad_norm": 3.871561288833618, | |
| "learning_rate": 4.7757661656950316e-05, | |
| "loss": 5.7455, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.14322291566650663, | |
| "grad_norm": 4.266111850738525, | |
| "learning_rate": 4.774429052789218e-05, | |
| "loss": 5.9971, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.14402304368698993, | |
| "grad_norm": 2.9000513553619385, | |
| "learning_rate": 4.773091939883404e-05, | |
| "loss": 5.9025, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.1448231717074732, | |
| "grad_norm": 2.549964189529419, | |
| "learning_rate": 4.7717548269775904e-05, | |
| "loss": 5.768, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.14562329972795648, | |
| "grad_norm": 2.2882704734802246, | |
| "learning_rate": 4.770417714071777e-05, | |
| "loss": 6.022, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.14642342774843975, | |
| "grad_norm": 2.6501784324645996, | |
| "learning_rate": 4.769080601165963e-05, | |
| "loss": 5.8539, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.14722355576892301, | |
| "grad_norm": 2.3417108058929443, | |
| "learning_rate": 4.767743488260149e-05, | |
| "loss": 5.7734, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.1480236837894063, | |
| "grad_norm": 2.2151668071746826, | |
| "learning_rate": 4.7664063753543355e-05, | |
| "loss": 5.84, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.14882381180988957, | |
| "grad_norm": 3.114260196685791, | |
| "learning_rate": 4.765069262448522e-05, | |
| "loss": 5.9409, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.14962393983037287, | |
| "grad_norm": 2.4931910037994385, | |
| "learning_rate": 4.763732149542708e-05, | |
| "loss": 5.9396, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.15042406785085613, | |
| "grad_norm": 3.736487865447998, | |
| "learning_rate": 4.7623950366368936e-05, | |
| "loss": 5.7427, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.15122419587133942, | |
| "grad_norm": 4.730785846710205, | |
| "learning_rate": 4.76105792373108e-05, | |
| "loss": 5.9181, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.1520243238918227, | |
| "grad_norm": 2.9264132976531982, | |
| "learning_rate": 4.759720810825266e-05, | |
| "loss": 5.8967, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.15282445191230598, | |
| "grad_norm": 3.2538132667541504, | |
| "learning_rate": 4.7583836979194524e-05, | |
| "loss": 5.8459, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.15362457993278925, | |
| "grad_norm": 2.7208549976348877, | |
| "learning_rate": 4.757046585013639e-05, | |
| "loss": 5.7038, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.1544247079532725, | |
| "grad_norm": 2.7510788440704346, | |
| "learning_rate": 4.755709472107825e-05, | |
| "loss": 5.8524, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.1552248359737558, | |
| "grad_norm": 2.6565892696380615, | |
| "learning_rate": 4.754372359202011e-05, | |
| "loss": 5.6324, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.15602496399423907, | |
| "grad_norm": 2.954798936843872, | |
| "learning_rate": 4.7530352462961975e-05, | |
| "loss": 5.8388, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.15682509201472236, | |
| "grad_norm": 2.291714668273926, | |
| "learning_rate": 4.751698133390384e-05, | |
| "loss": 5.7504, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.15762522003520563, | |
| "grad_norm": 2.1387598514556885, | |
| "learning_rate": 4.75036102048457e-05, | |
| "loss": 5.7556, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.15842534805568892, | |
| "grad_norm": 2.290407180786133, | |
| "learning_rate": 4.749023907578756e-05, | |
| "loss": 5.7089, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.15922547607617218, | |
| "grad_norm": 2.852696657180786, | |
| "learning_rate": 4.7476867946729426e-05, | |
| "loss": 5.8656, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.16002560409665548, | |
| "grad_norm": 2.8190526962280273, | |
| "learning_rate": 4.746349681767129e-05, | |
| "loss": 6.0134, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.16082573211713874, | |
| "grad_norm": 2.705008029937744, | |
| "learning_rate": 4.745012568861315e-05, | |
| "loss": 5.8713, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.161625860137622, | |
| "grad_norm": 3.571394205093384, | |
| "learning_rate": 4.7436754559555014e-05, | |
| "loss": 5.8329, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.1624259881581053, | |
| "grad_norm": 2.687455177307129, | |
| "learning_rate": 4.7423383430496876e-05, | |
| "loss": 5.8355, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.16322611617858857, | |
| "grad_norm": 2.6158690452575684, | |
| "learning_rate": 4.741001230143873e-05, | |
| "loss": 5.6938, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.16402624419907186, | |
| "grad_norm": 2.9657154083251953, | |
| "learning_rate": 4.7396641172380595e-05, | |
| "loss": 5.7514, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.16482637221955512, | |
| "grad_norm": 2.310607433319092, | |
| "learning_rate": 4.738327004332246e-05, | |
| "loss": 5.7397, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.16562650024003842, | |
| "grad_norm": 2.855271339416504, | |
| "learning_rate": 4.736989891426432e-05, | |
| "loss": 5.7645, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.16642662826052168, | |
| "grad_norm": 2.778768301010132, | |
| "learning_rate": 4.735652778520618e-05, | |
| "loss": 5.9582, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.16722675628100497, | |
| "grad_norm": 3.069973945617676, | |
| "learning_rate": 4.7343156656148046e-05, | |
| "loss": 5.8205, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.16802688430148824, | |
| "grad_norm": 3.5799551010131836, | |
| "learning_rate": 4.732978552708991e-05, | |
| "loss": 5.9001, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.1688270123219715, | |
| "grad_norm": 2.556668758392334, | |
| "learning_rate": 4.731641439803177e-05, | |
| "loss": 5.7258, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.1696271403424548, | |
| "grad_norm": 2.7847707271575928, | |
| "learning_rate": 4.7303043268973634e-05, | |
| "loss": 5.9007, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.17042726836293806, | |
| "grad_norm": 4.071508407592773, | |
| "learning_rate": 4.7289672139915496e-05, | |
| "loss": 5.7035, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.17122739638342135, | |
| "grad_norm": 2.6188418865203857, | |
| "learning_rate": 4.727630101085736e-05, | |
| "loss": 5.651, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.17202752440390462, | |
| "grad_norm": 1.952249526977539, | |
| "learning_rate": 4.726292988179922e-05, | |
| "loss": 6.1107, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.1728276524243879, | |
| "grad_norm": 2.299018144607544, | |
| "learning_rate": 4.7249558752741085e-05, | |
| "loss": 5.7609, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.17362778044487118, | |
| "grad_norm": 2.5578439235687256, | |
| "learning_rate": 4.723618762368295e-05, | |
| "loss": 5.792, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.17442790846535447, | |
| "grad_norm": 3.9921529293060303, | |
| "learning_rate": 4.722281649462481e-05, | |
| "loss": 5.7233, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.17522803648583773, | |
| "grad_norm": 2.5521302223205566, | |
| "learning_rate": 4.7209445365566666e-05, | |
| "loss": 5.807, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.176028164506321, | |
| "grad_norm": 2.71401047706604, | |
| "learning_rate": 4.719607423650853e-05, | |
| "loss": 5.6689, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.1768282925268043, | |
| "grad_norm": 3.782607316970825, | |
| "learning_rate": 4.718270310745039e-05, | |
| "loss": 5.734, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.17762842054728756, | |
| "grad_norm": 2.57356333732605, | |
| "learning_rate": 4.7169331978392254e-05, | |
| "loss": 5.8101, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.17842854856777085, | |
| "grad_norm": 2.7005815505981445, | |
| "learning_rate": 4.715596084933412e-05, | |
| "loss": 6.0603, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.17922867658825412, | |
| "grad_norm": 2.081550359725952, | |
| "learning_rate": 4.714258972027598e-05, | |
| "loss": 5.7677, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.1800288046087374, | |
| "grad_norm": 3.6565728187561035, | |
| "learning_rate": 4.712921859121784e-05, | |
| "loss": 5.9672, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.18082893262922067, | |
| "grad_norm": 2.4702320098876953, | |
| "learning_rate": 4.7115847462159705e-05, | |
| "loss": 5.8397, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.18162906064970397, | |
| "grad_norm": 3.335736036300659, | |
| "learning_rate": 4.710247633310157e-05, | |
| "loss": 5.7021, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.18242918867018723, | |
| "grad_norm": 3.3939075469970703, | |
| "learning_rate": 4.708910520404343e-05, | |
| "loss": 5.8464, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.1832293166906705, | |
| "grad_norm": 2.4869279861450195, | |
| "learning_rate": 4.707573407498529e-05, | |
| "loss": 5.6904, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.1840294447111538, | |
| "grad_norm": 2.4240360260009766, | |
| "learning_rate": 4.7062362945927155e-05, | |
| "loss": 5.7227, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.18482957273163705, | |
| "grad_norm": 2.428786039352417, | |
| "learning_rate": 4.704899181686902e-05, | |
| "loss": 5.8295, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.18562970075212035, | |
| "grad_norm": 3.3214187622070312, | |
| "learning_rate": 4.703562068781088e-05, | |
| "loss": 5.8341, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.1864298287726036, | |
| "grad_norm": 3.2146456241607666, | |
| "learning_rate": 4.7022249558752744e-05, | |
| "loss": 5.7217, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.1872299567930869, | |
| "grad_norm": 4.442914009094238, | |
| "learning_rate": 4.7008878429694606e-05, | |
| "loss": 5.9003, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.18803008481357017, | |
| "grad_norm": 1.9268267154693604, | |
| "learning_rate": 4.699550730063646e-05, | |
| "loss": 5.8292, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.18883021283405343, | |
| "grad_norm": 3.130021095275879, | |
| "learning_rate": 4.6982136171578325e-05, | |
| "loss": 5.6864, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.18963034085453673, | |
| "grad_norm": 2.8835690021514893, | |
| "learning_rate": 4.696876504252019e-05, | |
| "loss": 5.829, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.19043046887502, | |
| "grad_norm": 2.4171135425567627, | |
| "learning_rate": 4.695539391346205e-05, | |
| "loss": 5.7972, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.19123059689550329, | |
| "grad_norm": 3.782817840576172, | |
| "learning_rate": 4.694202278440391e-05, | |
| "loss": 5.8497, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.19203072491598655, | |
| "grad_norm": 2.475249767303467, | |
| "learning_rate": 4.6928651655345776e-05, | |
| "loss": 5.9237, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.19283085293646984, | |
| "grad_norm": 2.5809242725372314, | |
| "learning_rate": 4.691528052628764e-05, | |
| "loss": 5.7756, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.1936309809569531, | |
| "grad_norm": 2.6922059059143066, | |
| "learning_rate": 4.69019093972295e-05, | |
| "loss": 5.9326, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.1944311089774364, | |
| "grad_norm": 2.7542431354522705, | |
| "learning_rate": 4.6888538268171364e-05, | |
| "loss": 5.6279, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.19523123699791967, | |
| "grad_norm": 2.4063303470611572, | |
| "learning_rate": 4.6875167139113226e-05, | |
| "loss": 5.91, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.19603136501840293, | |
| "grad_norm": 4.855547904968262, | |
| "learning_rate": 4.686179601005509e-05, | |
| "loss": 5.7286, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.19683149303888622, | |
| "grad_norm": 2.9875595569610596, | |
| "learning_rate": 4.684842488099695e-05, | |
| "loss": 5.8299, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.1976316210593695, | |
| "grad_norm": 4.467639923095703, | |
| "learning_rate": 4.6835053751938814e-05, | |
| "loss": 5.8469, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.19843174907985278, | |
| "grad_norm": 2.2144124507904053, | |
| "learning_rate": 4.682168262288068e-05, | |
| "loss": 5.7871, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.19923187710033605, | |
| "grad_norm": 2.4507012367248535, | |
| "learning_rate": 4.680831149382254e-05, | |
| "loss": 5.7529, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.20003200512081934, | |
| "grad_norm": 2.208648681640625, | |
| "learning_rate": 4.67949403647644e-05, | |
| "loss": 5.7265, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.2008321331413026, | |
| "grad_norm": 2.560302257537842, | |
| "learning_rate": 4.6781569235706265e-05, | |
| "loss": 5.7842, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.2016322611617859, | |
| "grad_norm": 2.354292154312134, | |
| "learning_rate": 4.676819810664813e-05, | |
| "loss": 5.8468, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.20243238918226916, | |
| "grad_norm": 2.9559860229492188, | |
| "learning_rate": 4.675482697758999e-05, | |
| "loss": 5.7003, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.20323251720275243, | |
| "grad_norm": 3.251077651977539, | |
| "learning_rate": 4.674145584853185e-05, | |
| "loss": 5.8129, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.20403264522323572, | |
| "grad_norm": 2.7863471508026123, | |
| "learning_rate": 4.6728084719473716e-05, | |
| "loss": 5.6814, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.20483277324371899, | |
| "grad_norm": 2.9006989002227783, | |
| "learning_rate": 4.671471359041558e-05, | |
| "loss": 5.8292, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.20563290126420228, | |
| "grad_norm": 2.930689573287964, | |
| "learning_rate": 4.670134246135744e-05, | |
| "loss": 5.8825, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.20643302928468554, | |
| "grad_norm": 2.3105032444000244, | |
| "learning_rate": 4.6687971332299304e-05, | |
| "loss": 5.7039, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.20723315730516884, | |
| "grad_norm": 3.1141879558563232, | |
| "learning_rate": 4.667460020324117e-05, | |
| "loss": 5.8692, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.2080332853256521, | |
| "grad_norm": 3.5017199516296387, | |
| "learning_rate": 4.666122907418303e-05, | |
| "loss": 5.7922, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.2088334133461354, | |
| "grad_norm": 2.657975912094116, | |
| "learning_rate": 4.664785794512489e-05, | |
| "loss": 5.7736, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.20963354136661866, | |
| "grad_norm": 3.246952772140503, | |
| "learning_rate": 4.6634486816066755e-05, | |
| "loss": 5.768, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.21043366938710192, | |
| "grad_norm": 6.832335948944092, | |
| "learning_rate": 4.662111568700862e-05, | |
| "loss": 5.6752, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.21123379740758522, | |
| "grad_norm": 3.2479753494262695, | |
| "learning_rate": 4.660774455795048e-05, | |
| "loss": 5.8015, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.21203392542806848, | |
| "grad_norm": 2.809082508087158, | |
| "learning_rate": 4.659437342889234e-05, | |
| "loss": 5.8663, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.21283405344855177, | |
| "grad_norm": 3.7948036193847656, | |
| "learning_rate": 4.65810022998342e-05, | |
| "loss": 5.889, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.21363418146903504, | |
| "grad_norm": 2.836090564727783, | |
| "learning_rate": 4.656763117077606e-05, | |
| "loss": 5.7516, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.21443430948951833, | |
| "grad_norm": 3.0940232276916504, | |
| "learning_rate": 4.6554260041717924e-05, | |
| "loss": 5.7033, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.2152344375100016, | |
| "grad_norm": 2.436757802963257, | |
| "learning_rate": 4.654088891265979e-05, | |
| "loss": 5.746, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.2160345655304849, | |
| "grad_norm": 2.4339609146118164, | |
| "learning_rate": 4.652751778360165e-05, | |
| "loss": 5.828, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.21683469355096816, | |
| "grad_norm": 2.379366874694824, | |
| "learning_rate": 4.651414665454351e-05, | |
| "loss": 5.719, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.21763482157145142, | |
| "grad_norm": 2.1722371578216553, | |
| "learning_rate": 4.6500775525485375e-05, | |
| "loss": 5.7875, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.2184349495919347, | |
| "grad_norm": 3.633279800415039, | |
| "learning_rate": 4.648740439642724e-05, | |
| "loss": 5.802, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.21923507761241798, | |
| "grad_norm": 2.4091219902038574, | |
| "learning_rate": 4.64740332673691e-05, | |
| "loss": 5.8197, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.22003520563290127, | |
| "grad_norm": 2.7289021015167236, | |
| "learning_rate": 4.646066213831096e-05, | |
| "loss": 5.9445, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.22083533365338454, | |
| "grad_norm": 2.376481294631958, | |
| "learning_rate": 4.6447291009252826e-05, | |
| "loss": 5.9943, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.22163546167386783, | |
| "grad_norm": 2.6542563438415527, | |
| "learning_rate": 4.643391988019469e-05, | |
| "loss": 5.6049, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.2224355896943511, | |
| "grad_norm": 2.320472240447998, | |
| "learning_rate": 4.642054875113655e-05, | |
| "loss": 5.7637, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.2232357177148344, | |
| "grad_norm": 2.8923239707946777, | |
| "learning_rate": 4.6407177622078414e-05, | |
| "loss": 5.9666, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.22403584573531765, | |
| "grad_norm": 4.277271270751953, | |
| "learning_rate": 4.6393806493020276e-05, | |
| "loss": 5.8393, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.22483597375580092, | |
| "grad_norm": 2.797428607940674, | |
| "learning_rate": 4.638043536396213e-05, | |
| "loss": 5.759, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.2256361017762842, | |
| "grad_norm": 2.1849517822265625, | |
| "learning_rate": 4.6367064234903995e-05, | |
| "loss": 5.7514, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.22643622979676747, | |
| "grad_norm": 2.8607492446899414, | |
| "learning_rate": 4.635369310584586e-05, | |
| "loss": 5.7545, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.22723635781725077, | |
| "grad_norm": 3.722041130065918, | |
| "learning_rate": 4.634032197678772e-05, | |
| "loss": 5.8011, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.22803648583773403, | |
| "grad_norm": 2.8563833236694336, | |
| "learning_rate": 4.632695084772958e-05, | |
| "loss": 5.8569, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.22883661385821732, | |
| "grad_norm": 3.5724806785583496, | |
| "learning_rate": 4.6313579718671446e-05, | |
| "loss": 5.9649, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.2296367418787006, | |
| "grad_norm": 2.380469560623169, | |
| "learning_rate": 4.630020858961331e-05, | |
| "loss": 5.7467, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.23043686989918388, | |
| "grad_norm": 3.1629838943481445, | |
| "learning_rate": 4.628683746055517e-05, | |
| "loss": 5.642, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.23123699791966715, | |
| "grad_norm": 2.1239373683929443, | |
| "learning_rate": 4.6273466331497034e-05, | |
| "loss": 5.6483, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.2320371259401504, | |
| "grad_norm": 3.049079418182373, | |
| "learning_rate": 4.6260095202438897e-05, | |
| "loss": 5.9736, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.2328372539606337, | |
| "grad_norm": 2.556830406188965, | |
| "learning_rate": 4.624672407338076e-05, | |
| "loss": 5.6037, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.23363738198111697, | |
| "grad_norm": 2.8762035369873047, | |
| "learning_rate": 4.623335294432262e-05, | |
| "loss": 5.6345, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.23443751000160026, | |
| "grad_norm": 2.11167573928833, | |
| "learning_rate": 4.6219981815264485e-05, | |
| "loss": 5.7822, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.23523763802208353, | |
| "grad_norm": 4.623869895935059, | |
| "learning_rate": 4.620661068620635e-05, | |
| "loss": 5.7063, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.23603776604256682, | |
| "grad_norm": 2.4420578479766846, | |
| "learning_rate": 4.619323955714821e-05, | |
| "loss": 5.686, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.2368378940630501, | |
| "grad_norm": 2.6543869972229004, | |
| "learning_rate": 4.617986842809007e-05, | |
| "loss": 5.7802, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.23763802208353338, | |
| "grad_norm": 2.6264312267303467, | |
| "learning_rate": 4.616649729903193e-05, | |
| "loss": 5.6667, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.23843815010401664, | |
| "grad_norm": 2.4579195976257324, | |
| "learning_rate": 4.615312616997379e-05, | |
| "loss": 5.6738, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.2392382781244999, | |
| "grad_norm": 2.299448251724243, | |
| "learning_rate": 4.6139755040915654e-05, | |
| "loss": 5.8622, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.2400384061449832, | |
| "grad_norm": 3.6527328491210938, | |
| "learning_rate": 4.612638391185752e-05, | |
| "loss": 5.6346, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.24083853416546647, | |
| "grad_norm": 2.217876434326172, | |
| "learning_rate": 4.611301278279938e-05, | |
| "loss": 5.7892, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.24163866218594976, | |
| "grad_norm": 3.500544309616089, | |
| "learning_rate": 4.609964165374124e-05, | |
| "loss": 5.8026, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.24243879020643302, | |
| "grad_norm": 3.1694483757019043, | |
| "learning_rate": 4.6086270524683105e-05, | |
| "loss": 5.827, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.24323891822691632, | |
| "grad_norm": 2.899625778198242, | |
| "learning_rate": 4.607289939562497e-05, | |
| "loss": 5.7384, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.24403904624739958, | |
| "grad_norm": 2.8286776542663574, | |
| "learning_rate": 4.605952826656683e-05, | |
| "loss": 5.7629, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.24483917426788285, | |
| "grad_norm": 2.7585489749908447, | |
| "learning_rate": 4.604615713750869e-05, | |
| "loss": 5.7462, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.24563930228836614, | |
| "grad_norm": 2.2017667293548584, | |
| "learning_rate": 4.6032786008450555e-05, | |
| "loss": 5.844, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.2464394303088494, | |
| "grad_norm": 4.679725170135498, | |
| "learning_rate": 4.601941487939242e-05, | |
| "loss": 5.7254, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.2472395583293327, | |
| "grad_norm": 2.923884868621826, | |
| "learning_rate": 4.600604375033428e-05, | |
| "loss": 5.703, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.24803968634981596, | |
| "grad_norm": 2.2205090522766113, | |
| "learning_rate": 4.5992672621276144e-05, | |
| "loss": 5.7185, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.24883981437029926, | |
| "grad_norm": 2.852313280105591, | |
| "learning_rate": 4.5979301492218006e-05, | |
| "loss": 5.5653, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.24963994239078252, | |
| "grad_norm": 2.7683911323547363, | |
| "learning_rate": 4.596593036315986e-05, | |
| "loss": 5.7262, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.2504400704112658, | |
| "grad_norm": 3.1315665245056152, | |
| "learning_rate": 4.5952559234101725e-05, | |
| "loss": 5.7524, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.2512401984317491, | |
| "grad_norm": 2.5233592987060547, | |
| "learning_rate": 4.593918810504359e-05, | |
| "loss": 5.7443, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.25204032645223234, | |
| "grad_norm": 2.3802831172943115, | |
| "learning_rate": 4.592581697598545e-05, | |
| "loss": 5.8091, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.25284045447271564, | |
| "grad_norm": 2.378218412399292, | |
| "learning_rate": 4.591244584692731e-05, | |
| "loss": 5.7741, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.25364058249319893, | |
| "grad_norm": 4.712483882904053, | |
| "learning_rate": 4.5899074717869176e-05, | |
| "loss": 5.8643, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.25444071051368217, | |
| "grad_norm": 2.798752784729004, | |
| "learning_rate": 4.588570358881104e-05, | |
| "loss": 5.7984, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.25524083853416546, | |
| "grad_norm": 2.302037477493286, | |
| "learning_rate": 4.58723324597529e-05, | |
| "loss": 5.6548, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.25604096655464875, | |
| "grad_norm": 2.8621273040771484, | |
| "learning_rate": 4.5858961330694764e-05, | |
| "loss": 5.6875, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.25684109457513205, | |
| "grad_norm": 2.9079480171203613, | |
| "learning_rate": 4.5845590201636626e-05, | |
| "loss": 5.8801, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.2576412225956153, | |
| "grad_norm": 2.9576847553253174, | |
| "learning_rate": 4.583221907257849e-05, | |
| "loss": 5.6646, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.2584413506160986, | |
| "grad_norm": 4.085951805114746, | |
| "learning_rate": 4.581884794352035e-05, | |
| "loss": 5.9078, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.25924147863658187, | |
| "grad_norm": 2.622903347015381, | |
| "learning_rate": 4.5805476814462214e-05, | |
| "loss": 5.6821, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.2600416066570651, | |
| "grad_norm": 1.794255256652832, | |
| "learning_rate": 4.579210568540408e-05, | |
| "loss": 5.751, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.2608417346775484, | |
| "grad_norm": 3.074042558670044, | |
| "learning_rate": 4.577873455634594e-05, | |
| "loss": 5.7864, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.2616418626980317, | |
| "grad_norm": 2.3138844966888428, | |
| "learning_rate": 4.57653634272878e-05, | |
| "loss": 5.693, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.262441990718515, | |
| "grad_norm": 3.8877549171447754, | |
| "learning_rate": 4.5751992298229665e-05, | |
| "loss": 5.7154, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.2632421187389982, | |
| "grad_norm": 2.9623680114746094, | |
| "learning_rate": 4.573862116917153e-05, | |
| "loss": 5.7514, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.2640422467594815, | |
| "grad_norm": 2.840122938156128, | |
| "learning_rate": 4.572525004011339e-05, | |
| "loss": 5.7397, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.2648423747799648, | |
| "grad_norm": 2.9699277877807617, | |
| "learning_rate": 4.571187891105525e-05, | |
| "loss": 5.7626, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.2656425028004481, | |
| "grad_norm": 2.6493773460388184, | |
| "learning_rate": 4.5698507781997116e-05, | |
| "loss": 5.7619, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.26644263082093134, | |
| "grad_norm": 2.283259868621826, | |
| "learning_rate": 4.568513665293898e-05, | |
| "loss": 5.8409, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.26724275884141463, | |
| "grad_norm": 1.9254164695739746, | |
| "learning_rate": 4.567176552388084e-05, | |
| "loss": 5.8218, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.2680428868618979, | |
| "grad_norm": 2.382345676422119, | |
| "learning_rate": 4.5658394394822704e-05, | |
| "loss": 5.6865, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.26884301488238116, | |
| "grad_norm": 2.6039271354675293, | |
| "learning_rate": 4.564502326576457e-05, | |
| "loss": 5.7254, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.26964314290286445, | |
| "grad_norm": 2.0948996543884277, | |
| "learning_rate": 4.563165213670643e-05, | |
| "loss": 5.7589, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.27044327092334774, | |
| "grad_norm": 2.939955711364746, | |
| "learning_rate": 4.561828100764829e-05, | |
| "loss": 5.8298, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.27124339894383104, | |
| "grad_norm": 2.748307466506958, | |
| "learning_rate": 4.5604909878590155e-05, | |
| "loss": 5.8505, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.2720435269643143, | |
| "grad_norm": 2.7122459411621094, | |
| "learning_rate": 4.559153874953202e-05, | |
| "loss": 5.9027, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.27284365498479757, | |
| "grad_norm": 3.6053593158721924, | |
| "learning_rate": 4.557816762047388e-05, | |
| "loss": 5.6746, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.27364378300528086, | |
| "grad_norm": 4.433299541473389, | |
| "learning_rate": 4.556479649141574e-05, | |
| "loss": 5.7713, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.2744439110257641, | |
| "grad_norm": 2.5253539085388184, | |
| "learning_rate": 4.55514253623576e-05, | |
| "loss": 5.8219, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.2752440390462474, | |
| "grad_norm": 4.9358062744140625, | |
| "learning_rate": 4.553805423329946e-05, | |
| "loss": 5.7971, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.2760441670667307, | |
| "grad_norm": 2.6247594356536865, | |
| "learning_rate": 4.5524683104241324e-05, | |
| "loss": 5.1528, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.276844295087214, | |
| "grad_norm": 2.8152048587799072, | |
| "learning_rate": 4.551131197518319e-05, | |
| "loss": 5.7955, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.2776444231076972, | |
| "grad_norm": 2.143275499343872, | |
| "learning_rate": 4.549794084612505e-05, | |
| "loss": 5.6875, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.2784445511281805, | |
| "grad_norm": 2.9896023273468018, | |
| "learning_rate": 4.548456971706691e-05, | |
| "loss": 5.7981, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.2792446791486638, | |
| "grad_norm": 3.5231759548187256, | |
| "learning_rate": 4.5471198588008775e-05, | |
| "loss": 5.7343, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.28004480716914704, | |
| "grad_norm": 2.391721487045288, | |
| "learning_rate": 4.545782745895064e-05, | |
| "loss": 5.6821, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.28084493518963033, | |
| "grad_norm": 2.414992332458496, | |
| "learning_rate": 4.54444563298925e-05, | |
| "loss": 5.7357, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.2816450632101136, | |
| "grad_norm": 2.7502214908599854, | |
| "learning_rate": 4.543108520083436e-05, | |
| "loss": 5.6511, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.2824451912305969, | |
| "grad_norm": 2.1601436138153076, | |
| "learning_rate": 4.5417714071776226e-05, | |
| "loss": 5.6249, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.28324531925108015, | |
| "grad_norm": 2.89013671875, | |
| "learning_rate": 4.540434294271809e-05, | |
| "loss": 5.7583, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.28404544727156344, | |
| "grad_norm": 2.4915778636932373, | |
| "learning_rate": 4.539097181365995e-05, | |
| "loss": 5.6957, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.28484557529204674, | |
| "grad_norm": 5.053386688232422, | |
| "learning_rate": 4.5377600684601814e-05, | |
| "loss": 5.632, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.28564570331253003, | |
| "grad_norm": 2.6207687854766846, | |
| "learning_rate": 4.5364229555543676e-05, | |
| "loss": 5.8514, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.28644583133301327, | |
| "grad_norm": 4.157670497894287, | |
| "learning_rate": 4.535085842648553e-05, | |
| "loss": 5.7608, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.28724595935349656, | |
| "grad_norm": 3.4464797973632812, | |
| "learning_rate": 4.5337487297427395e-05, | |
| "loss": 5.6737, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.28804608737397985, | |
| "grad_norm": 4.255002498626709, | |
| "learning_rate": 4.532411616836926e-05, | |
| "loss": 5.7977, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.2888462153944631, | |
| "grad_norm": 2.7926547527313232, | |
| "learning_rate": 4.531074503931112e-05, | |
| "loss": 5.6891, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.2896463434149464, | |
| "grad_norm": 3.150400400161743, | |
| "learning_rate": 4.529737391025298e-05, | |
| "loss": 5.7931, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.2904464714354297, | |
| "grad_norm": 2.1223199367523193, | |
| "learning_rate": 4.5284002781194846e-05, | |
| "loss": 5.8646, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.29124659945591297, | |
| "grad_norm": 3.950665235519409, | |
| "learning_rate": 4.527063165213671e-05, | |
| "loss": 5.7008, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.2920467274763962, | |
| "grad_norm": 2.995692729949951, | |
| "learning_rate": 4.525726052307857e-05, | |
| "loss": 5.688, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.2928468554968795, | |
| "grad_norm": 2.041736125946045, | |
| "learning_rate": 4.5243889394020434e-05, | |
| "loss": 5.7301, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.2936469835173628, | |
| "grad_norm": 2.541757106781006, | |
| "learning_rate": 4.5230518264962297e-05, | |
| "loss": 5.5606, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.29444711153784603, | |
| "grad_norm": 2.140761613845825, | |
| "learning_rate": 4.521714713590416e-05, | |
| "loss": 5.7671, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.2952472395583293, | |
| "grad_norm": 2.6869146823883057, | |
| "learning_rate": 4.520377600684602e-05, | |
| "loss": 5.6452, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.2960473675788126, | |
| "grad_norm": 3.072376012802124, | |
| "learning_rate": 4.5190404877787885e-05, | |
| "loss": 5.6956, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.2968474955992959, | |
| "grad_norm": 2.5933837890625, | |
| "learning_rate": 4.517703374872975e-05, | |
| "loss": 5.6212, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.29764762361977914, | |
| "grad_norm": 3.0443103313446045, | |
| "learning_rate": 4.516366261967161e-05, | |
| "loss": 5.7849, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.29844775164026244, | |
| "grad_norm": 2.673583745956421, | |
| "learning_rate": 4.515029149061347e-05, | |
| "loss": 5.6186, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.29924787966074573, | |
| "grad_norm": 2.3276283740997314, | |
| "learning_rate": 4.513692036155533e-05, | |
| "loss": 5.9188, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.300048007681229, | |
| "grad_norm": 5.504491329193115, | |
| "learning_rate": 4.512354923249719e-05, | |
| "loss": 5.5676, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.30084813570171226, | |
| "grad_norm": 2.4181482791900635, | |
| "learning_rate": 4.5110178103439054e-05, | |
| "loss": 5.6852, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.30164826372219555, | |
| "grad_norm": 2.2489006519317627, | |
| "learning_rate": 4.509680697438092e-05, | |
| "loss": 5.7003, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.30244839174267885, | |
| "grad_norm": 2.6925253868103027, | |
| "learning_rate": 4.508343584532278e-05, | |
| "loss": 5.8176, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.3032485197631621, | |
| "grad_norm": 2.904318332672119, | |
| "learning_rate": 4.507006471626464e-05, | |
| "loss": 5.6912, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.3040486477836454, | |
| "grad_norm": 3.3189070224761963, | |
| "learning_rate": 4.5056693587206505e-05, | |
| "loss": 5.8706, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.30484877580412867, | |
| "grad_norm": 2.8324170112609863, | |
| "learning_rate": 4.504332245814837e-05, | |
| "loss": 5.8795, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.30564890382461196, | |
| "grad_norm": 3.113417148590088, | |
| "learning_rate": 4.502995132909023e-05, | |
| "loss": 5.8689, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.3064490318450952, | |
| "grad_norm": 2.469269275665283, | |
| "learning_rate": 4.501658020003209e-05, | |
| "loss": 5.7934, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.3072491598655785, | |
| "grad_norm": 2.778571128845215, | |
| "learning_rate": 4.5003209070973956e-05, | |
| "loss": 5.8577, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.3080492878860618, | |
| "grad_norm": 3.4269161224365234, | |
| "learning_rate": 4.498983794191582e-05, | |
| "loss": 5.8378, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.308849415906545, | |
| "grad_norm": 3.417850971221924, | |
| "learning_rate": 4.497646681285768e-05, | |
| "loss": 5.6532, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.3096495439270283, | |
| "grad_norm": 2.389784097671509, | |
| "learning_rate": 4.4963095683799544e-05, | |
| "loss": 5.5454, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.3104496719475116, | |
| "grad_norm": 2.384453296661377, | |
| "learning_rate": 4.4949724554741406e-05, | |
| "loss": 5.8014, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.3112497999679949, | |
| "grad_norm": 1.913668155670166, | |
| "learning_rate": 4.493635342568326e-05, | |
| "loss": 5.6033, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.31204992798847814, | |
| "grad_norm": 3.4930074214935303, | |
| "learning_rate": 4.4922982296625125e-05, | |
| "loss": 5.7649, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.31285005600896143, | |
| "grad_norm": 3.517458200454712, | |
| "learning_rate": 4.490961116756699e-05, | |
| "loss": 5.5635, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.3136501840294447, | |
| "grad_norm": 2.611274480819702, | |
| "learning_rate": 4.489624003850885e-05, | |
| "loss": 5.8121, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.314450312049928, | |
| "grad_norm": 2.373997926712036, | |
| "learning_rate": 4.488286890945071e-05, | |
| "loss": 5.6002, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.31525044007041125, | |
| "grad_norm": 2.554847002029419, | |
| "learning_rate": 4.4869497780392576e-05, | |
| "loss": 5.6432, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.31605056809089455, | |
| "grad_norm": 3.3720595836639404, | |
| "learning_rate": 4.485612665133444e-05, | |
| "loss": 5.5794, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.31685069611137784, | |
| "grad_norm": 2.2308788299560547, | |
| "learning_rate": 4.48427555222763e-05, | |
| "loss": 5.794, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.3176508241318611, | |
| "grad_norm": 2.0659661293029785, | |
| "learning_rate": 4.4829384393218164e-05, | |
| "loss": 5.5383, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.31845095215234437, | |
| "grad_norm": 3.2644894123077393, | |
| "learning_rate": 4.4816013264160026e-05, | |
| "loss": 5.6979, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.31925108017282766, | |
| "grad_norm": 2.3485729694366455, | |
| "learning_rate": 4.480264213510189e-05, | |
| "loss": 5.7214, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.32005120819331095, | |
| "grad_norm": 2.7470600605010986, | |
| "learning_rate": 4.478927100604375e-05, | |
| "loss": 5.6032, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.3208513362137942, | |
| "grad_norm": 2.1622989177703857, | |
| "learning_rate": 4.4775899876985614e-05, | |
| "loss": 5.7976, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.3216514642342775, | |
| "grad_norm": 2.7463905811309814, | |
| "learning_rate": 4.476252874792748e-05, | |
| "loss": 5.7181, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.3224515922547608, | |
| "grad_norm": 3.503662109375, | |
| "learning_rate": 4.474915761886934e-05, | |
| "loss": 5.8092, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.323251720275244, | |
| "grad_norm": 2.6073853969573975, | |
| "learning_rate": 4.47357864898112e-05, | |
| "loss": 5.7876, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.3240518482957273, | |
| "grad_norm": 3.354768991470337, | |
| "learning_rate": 4.472241536075306e-05, | |
| "loss": 5.7741, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.3248519763162106, | |
| "grad_norm": 2.648145914077759, | |
| "learning_rate": 4.470904423169492e-05, | |
| "loss": 5.7522, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.3256521043366939, | |
| "grad_norm": 3.086655378341675, | |
| "learning_rate": 4.4695673102636784e-05, | |
| "loss": 5.81, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.32645223235717713, | |
| "grad_norm": 2.230905771255493, | |
| "learning_rate": 4.4682301973578647e-05, | |
| "loss": 5.8839, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.3272523603776604, | |
| "grad_norm": 2.5391674041748047, | |
| "learning_rate": 4.466893084452051e-05, | |
| "loss": 5.5535, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.3280524883981437, | |
| "grad_norm": 2.7574117183685303, | |
| "learning_rate": 4.465555971546237e-05, | |
| "loss": 5.8275, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.32885261641862695, | |
| "grad_norm": 3.1114678382873535, | |
| "learning_rate": 4.4642188586404235e-05, | |
| "loss": 5.6876, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.32965274443911025, | |
| "grad_norm": 2.404892683029175, | |
| "learning_rate": 4.46288174573461e-05, | |
| "loss": 5.6876, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.33045287245959354, | |
| "grad_norm": 2.590759754180908, | |
| "learning_rate": 4.461544632828796e-05, | |
| "loss": 5.802, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.33125300048007683, | |
| "grad_norm": 2.4358649253845215, | |
| "learning_rate": 4.460207519922982e-05, | |
| "loss": 5.632, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.33205312850056007, | |
| "grad_norm": 3.9567458629608154, | |
| "learning_rate": 4.4588704070171685e-05, | |
| "loss": 5.8761, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.33285325652104336, | |
| "grad_norm": 2.3808743953704834, | |
| "learning_rate": 4.457533294111355e-05, | |
| "loss": 5.6815, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.33365338454152665, | |
| "grad_norm": 2.6527156829833984, | |
| "learning_rate": 4.456196181205541e-05, | |
| "loss": 5.805, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.33445351256200995, | |
| "grad_norm": 2.351062536239624, | |
| "learning_rate": 4.4548590682997273e-05, | |
| "loss": 5.6681, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.3352536405824932, | |
| "grad_norm": 2.3213460445404053, | |
| "learning_rate": 4.4535219553939136e-05, | |
| "loss": 5.6363, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.3360537686029765, | |
| "grad_norm": 1.9470767974853516, | |
| "learning_rate": 4.4521848424881e-05, | |
| "loss": 5.8772, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.33685389662345977, | |
| "grad_norm": 4.303500652313232, | |
| "learning_rate": 4.450847729582286e-05, | |
| "loss": 5.6185, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.337654024643943, | |
| "grad_norm": 2.713275909423828, | |
| "learning_rate": 4.4495106166764724e-05, | |
| "loss": 5.6754, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.3384541526644263, | |
| "grad_norm": 2.34993314743042, | |
| "learning_rate": 4.448173503770659e-05, | |
| "loss": 5.7003, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.3392542806849096, | |
| "grad_norm": 2.276228666305542, | |
| "learning_rate": 4.446836390864845e-05, | |
| "loss": 5.6, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.3400544087053929, | |
| "grad_norm": 2.3635685443878174, | |
| "learning_rate": 4.445499277959031e-05, | |
| "loss": 5.7373, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.3408545367258761, | |
| "grad_norm": 3.100604772567749, | |
| "learning_rate": 4.4441621650532175e-05, | |
| "loss": 5.7354, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.3416546647463594, | |
| "grad_norm": 2.6743876934051514, | |
| "learning_rate": 4.442825052147404e-05, | |
| "loss": 5.7544, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.3424547927668427, | |
| "grad_norm": 2.5783612728118896, | |
| "learning_rate": 4.44148793924159e-05, | |
| "loss": 5.8826, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.34325492078732595, | |
| "grad_norm": 2.8976659774780273, | |
| "learning_rate": 4.440150826335776e-05, | |
| "loss": 5.5418, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.34405504880780924, | |
| "grad_norm": 2.1061089038848877, | |
| "learning_rate": 4.4388137134299626e-05, | |
| "loss": 5.6406, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.34485517682829253, | |
| "grad_norm": 2.1303789615631104, | |
| "learning_rate": 4.437476600524149e-05, | |
| "loss": 5.6491, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.3456553048487758, | |
| "grad_norm": 2.6240499019622803, | |
| "learning_rate": 4.436139487618335e-05, | |
| "loss": 5.7161, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.34645543286925906, | |
| "grad_norm": 2.325155019760132, | |
| "learning_rate": 4.4348023747125214e-05, | |
| "loss": 5.6172, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.34725556088974235, | |
| "grad_norm": 2.8844404220581055, | |
| "learning_rate": 4.4334652618067076e-05, | |
| "loss": 5.7438, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.34805568891022565, | |
| "grad_norm": 2.375324249267578, | |
| "learning_rate": 4.432128148900894e-05, | |
| "loss": 5.8335, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.34885581693070894, | |
| "grad_norm": 2.1572377681732178, | |
| "learning_rate": 4.4307910359950795e-05, | |
| "loss": 5.706, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.3496559449511922, | |
| "grad_norm": 2.5218889713287354, | |
| "learning_rate": 4.429453923089266e-05, | |
| "loss": 5.7487, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.35045607297167547, | |
| "grad_norm": 2.636223554611206, | |
| "learning_rate": 4.428116810183452e-05, | |
| "loss": 5.8327, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.35125620099215876, | |
| "grad_norm": 2.436155080795288, | |
| "learning_rate": 4.426779697277638e-05, | |
| "loss": 5.6895, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.352056329012642, | |
| "grad_norm": 3.4435484409332275, | |
| "learning_rate": 4.4254425843718246e-05, | |
| "loss": 5.6171, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.3528564570331253, | |
| "grad_norm": 2.3990628719329834, | |
| "learning_rate": 4.424105471466011e-05, | |
| "loss": 5.7574, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.3536565850536086, | |
| "grad_norm": 2.544774293899536, | |
| "learning_rate": 4.422768358560197e-05, | |
| "loss": 5.558, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.3544567130740919, | |
| "grad_norm": 2.389491081237793, | |
| "learning_rate": 4.4214312456543834e-05, | |
| "loss": 5.6628, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.3552568410945751, | |
| "grad_norm": 5.203212261199951, | |
| "learning_rate": 4.4200941327485697e-05, | |
| "loss": 5.5403, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.3560569691150584, | |
| "grad_norm": 2.0861873626708984, | |
| "learning_rate": 4.418757019842756e-05, | |
| "loss": 5.625, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.3568570971355417, | |
| "grad_norm": 2.2355470657348633, | |
| "learning_rate": 4.417419906936942e-05, | |
| "loss": 5.614, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.35765722515602494, | |
| "grad_norm": 2.2239274978637695, | |
| "learning_rate": 4.4160827940311285e-05, | |
| "loss": 5.6885, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.35845735317650823, | |
| "grad_norm": 4.571592807769775, | |
| "learning_rate": 4.414745681125315e-05, | |
| "loss": 5.8495, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.3592574811969915, | |
| "grad_norm": 2.6501150131225586, | |
| "learning_rate": 4.413408568219501e-05, | |
| "loss": 5.6158, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.3600576092174748, | |
| "grad_norm": 2.8568902015686035, | |
| "learning_rate": 4.412071455313687e-05, | |
| "loss": 5.6403, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.36085773723795805, | |
| "grad_norm": 2.4179179668426514, | |
| "learning_rate": 4.410734342407873e-05, | |
| "loss": 5.749, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.36165786525844135, | |
| "grad_norm": 2.950491189956665, | |
| "learning_rate": 4.409397229502059e-05, | |
| "loss": 5.7128, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.36245799327892464, | |
| "grad_norm": 3.731049060821533, | |
| "learning_rate": 4.4080601165962454e-05, | |
| "loss": 5.6397, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.36325812129940793, | |
| "grad_norm": 2.255730390548706, | |
| "learning_rate": 4.406723003690432e-05, | |
| "loss": 5.626, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.36405824931989117, | |
| "grad_norm": 2.623455047607422, | |
| "learning_rate": 4.405385890784618e-05, | |
| "loss": 5.6792, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.36485837734037446, | |
| "grad_norm": 2.366481065750122, | |
| "learning_rate": 4.404048777878804e-05, | |
| "loss": 5.5455, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.36565850536085776, | |
| "grad_norm": 2.56351375579834, | |
| "learning_rate": 4.4027116649729905e-05, | |
| "loss": 5.7982, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.366458633381341, | |
| "grad_norm": 2.3203811645507812, | |
| "learning_rate": 4.401374552067177e-05, | |
| "loss": 5.7969, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.3672587614018243, | |
| "grad_norm": 2.3838179111480713, | |
| "learning_rate": 4.400037439161363e-05, | |
| "loss": 5.7484, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.3680588894223076, | |
| "grad_norm": 2.0725440979003906, | |
| "learning_rate": 4.398700326255549e-05, | |
| "loss": 5.8405, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.36885901744279087, | |
| "grad_norm": 3.49495005607605, | |
| "learning_rate": 4.3973632133497356e-05, | |
| "loss": 5.7151, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.3696591454632741, | |
| "grad_norm": 2.643007755279541, | |
| "learning_rate": 4.396026100443922e-05, | |
| "loss": 5.6374, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.3704592734837574, | |
| "grad_norm": 2.282304286956787, | |
| "learning_rate": 4.394688987538108e-05, | |
| "loss": 5.589, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.3712594015042407, | |
| "grad_norm": 2.244058609008789, | |
| "learning_rate": 4.3933518746322944e-05, | |
| "loss": 5.7516, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.37205952952472393, | |
| "grad_norm": 2.44496488571167, | |
| "learning_rate": 4.3920147617264806e-05, | |
| "loss": 5.8393, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.3728596575452072, | |
| "grad_norm": 2.6613078117370605, | |
| "learning_rate": 4.390677648820667e-05, | |
| "loss": 5.6764, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.3736597855656905, | |
| "grad_norm": 3.99092173576355, | |
| "learning_rate": 4.3893405359148525e-05, | |
| "loss": 5.8658, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.3744599135861738, | |
| "grad_norm": 1.6338485479354858, | |
| "learning_rate": 4.388003423009039e-05, | |
| "loss": 5.7527, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.37526004160665705, | |
| "grad_norm": 2.3723371028900146, | |
| "learning_rate": 4.386666310103225e-05, | |
| "loss": 5.7482, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.37606016962714034, | |
| "grad_norm": 2.630424976348877, | |
| "learning_rate": 4.385329197197411e-05, | |
| "loss": 5.7539, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.37686029764762363, | |
| "grad_norm": 2.3873038291931152, | |
| "learning_rate": 4.3839920842915976e-05, | |
| "loss": 5.6729, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.37766042566810687, | |
| "grad_norm": 1.9391748905181885, | |
| "learning_rate": 4.382654971385784e-05, | |
| "loss": 5.6794, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.37846055368859016, | |
| "grad_norm": 2.103975296020508, | |
| "learning_rate": 4.38131785847997e-05, | |
| "loss": 5.5104, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.37926068170907346, | |
| "grad_norm": 3.731184959411621, | |
| "learning_rate": 4.3799807455741564e-05, | |
| "loss": 5.6699, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.38006080972955675, | |
| "grad_norm": 2.881068468093872, | |
| "learning_rate": 4.3786436326683426e-05, | |
| "loss": 5.6394, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.38086093775004, | |
| "grad_norm": 2.5963799953460693, | |
| "learning_rate": 4.377306519762529e-05, | |
| "loss": 5.784, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.3816610657705233, | |
| "grad_norm": 1.9520230293273926, | |
| "learning_rate": 4.375969406856715e-05, | |
| "loss": 5.7608, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.38246119379100657, | |
| "grad_norm": 2.386702537536621, | |
| "learning_rate": 4.374766005241483e-05, | |
| "loss": 5.5725, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.38326132181148986, | |
| "grad_norm": 2.3830511569976807, | |
| "learning_rate": 4.3734288923356694e-05, | |
| "loss": 5.5584, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.3840614498319731, | |
| "grad_norm": 2.1514739990234375, | |
| "learning_rate": 4.3720917794298556e-05, | |
| "loss": 5.6621, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.3848615778524564, | |
| "grad_norm": 2.5376317501068115, | |
| "learning_rate": 4.370754666524042e-05, | |
| "loss": 5.4138, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.3856617058729397, | |
| "grad_norm": 3.425899028778076, | |
| "learning_rate": 4.3694175536182275e-05, | |
| "loss": 5.6478, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.3864618338934229, | |
| "grad_norm": 2.7518632411956787, | |
| "learning_rate": 4.368080440712414e-05, | |
| "loss": 5.6556, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.3872619619139062, | |
| "grad_norm": 3.119227647781372, | |
| "learning_rate": 4.3667433278066e-05, | |
| "loss": 5.7925, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.3880620899343895, | |
| "grad_norm": 3.2664616107940674, | |
| "learning_rate": 4.365406214900786e-05, | |
| "loss": 5.7176, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.3888622179548728, | |
| "grad_norm": 2.5125045776367188, | |
| "learning_rate": 4.3640691019949726e-05, | |
| "loss": 5.6511, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.38966234597535604, | |
| "grad_norm": 2.992112874984741, | |
| "learning_rate": 4.362731989089159e-05, | |
| "loss": 5.6426, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 0.39046247399583933, | |
| "grad_norm": 4.46783971786499, | |
| "learning_rate": 4.361394876183345e-05, | |
| "loss": 5.736, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.3912626020163226, | |
| "grad_norm": 1.8372838497161865, | |
| "learning_rate": 4.3600577632775314e-05, | |
| "loss": 5.7603, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.39206273003680586, | |
| "grad_norm": 2.1635375022888184, | |
| "learning_rate": 4.3587206503717176e-05, | |
| "loss": 5.6019, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.39286285805728915, | |
| "grad_norm": 2.2425310611724854, | |
| "learning_rate": 4.357383537465904e-05, | |
| "loss": 5.6829, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 0.39366298607777245, | |
| "grad_norm": 2.408907413482666, | |
| "learning_rate": 4.35604642456009e-05, | |
| "loss": 5.6821, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.39446311409825574, | |
| "grad_norm": 3.012258291244507, | |
| "learning_rate": 4.3547093116542765e-05, | |
| "loss": 5.7503, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.395263242118739, | |
| "grad_norm": 3.187053680419922, | |
| "learning_rate": 4.353372198748463e-05, | |
| "loss": 5.6459, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.39606337013922227, | |
| "grad_norm": 2.7528955936431885, | |
| "learning_rate": 4.352035085842649e-05, | |
| "loss": 5.6386, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.39686349815970556, | |
| "grad_norm": 2.9744699001312256, | |
| "learning_rate": 4.350697972936835e-05, | |
| "loss": 5.5938, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.39766362618018886, | |
| "grad_norm": 2.779604196548462, | |
| "learning_rate": 4.3493608600310215e-05, | |
| "loss": 5.5459, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.3984637542006721, | |
| "grad_norm": 2.9092133045196533, | |
| "learning_rate": 4.348023747125207e-05, | |
| "loss": 5.7695, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.3992638822211554, | |
| "grad_norm": 2.800872802734375, | |
| "learning_rate": 4.3466866342193934e-05, | |
| "loss": 5.6943, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 0.4000640102416387, | |
| "grad_norm": 3.299595832824707, | |
| "learning_rate": 4.3453495213135797e-05, | |
| "loss": 5.4432, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.4008641382621219, | |
| "grad_norm": 2.2425456047058105, | |
| "learning_rate": 4.344012408407766e-05, | |
| "loss": 5.6688, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 0.4016642662826052, | |
| "grad_norm": 2.269378423690796, | |
| "learning_rate": 4.342675295501952e-05, | |
| "loss": 5.7713, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 0.4024643943030885, | |
| "grad_norm": 2.3903868198394775, | |
| "learning_rate": 4.3413381825961385e-05, | |
| "loss": 5.5926, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 0.4032645223235718, | |
| "grad_norm": 3.267918109893799, | |
| "learning_rate": 4.340001069690325e-05, | |
| "loss": 5.6806, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.40406465034405503, | |
| "grad_norm": 3.2075066566467285, | |
| "learning_rate": 4.338663956784511e-05, | |
| "loss": 5.6582, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.4048647783645383, | |
| "grad_norm": 2.5458226203918457, | |
| "learning_rate": 4.337326843878697e-05, | |
| "loss": 5.6576, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 0.4056649063850216, | |
| "grad_norm": 2.0331077575683594, | |
| "learning_rate": 4.3359897309728835e-05, | |
| "loss": 5.6725, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 0.40646503440550485, | |
| "grad_norm": 2.406907796859741, | |
| "learning_rate": 4.33465261806707e-05, | |
| "loss": 5.5168, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 0.40726516242598815, | |
| "grad_norm": 2.661137580871582, | |
| "learning_rate": 4.333315505161256e-05, | |
| "loss": 5.5953, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 0.40806529044647144, | |
| "grad_norm": 2.857725143432617, | |
| "learning_rate": 4.3319783922554423e-05, | |
| "loss": 5.6702, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.40886541846695473, | |
| "grad_norm": 2.7894747257232666, | |
| "learning_rate": 4.3306412793496286e-05, | |
| "loss": 5.6228, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 0.40966554648743797, | |
| "grad_norm": 2.8865861892700195, | |
| "learning_rate": 4.329304166443815e-05, | |
| "loss": 5.6859, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.41046567450792126, | |
| "grad_norm": 2.1493608951568604, | |
| "learning_rate": 4.3279670535380005e-05, | |
| "loss": 5.5516, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 0.41126580252840456, | |
| "grad_norm": 3.112820863723755, | |
| "learning_rate": 4.326629940632187e-05, | |
| "loss": 5.6409, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 0.41206593054888785, | |
| "grad_norm": 2.778876543045044, | |
| "learning_rate": 4.325292827726373e-05, | |
| "loss": 5.6948, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.4128660585693711, | |
| "grad_norm": 2.0409047603607178, | |
| "learning_rate": 4.323955714820559e-05, | |
| "loss": 5.5458, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.4136661865898544, | |
| "grad_norm": 3.1058828830718994, | |
| "learning_rate": 4.3226186019147456e-05, | |
| "loss": 5.8437, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 0.41446631461033767, | |
| "grad_norm": 3.306704044342041, | |
| "learning_rate": 4.321281489008932e-05, | |
| "loss": 5.691, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 0.4152664426308209, | |
| "grad_norm": 2.9495625495910645, | |
| "learning_rate": 4.319944376103118e-05, | |
| "loss": 5.6364, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 0.4160665706513042, | |
| "grad_norm": 2.1773974895477295, | |
| "learning_rate": 4.3186072631973044e-05, | |
| "loss": 5.6713, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.4168666986717875, | |
| "grad_norm": 2.0897533893585205, | |
| "learning_rate": 4.3172701502914906e-05, | |
| "loss": 5.6022, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 0.4176668266922708, | |
| "grad_norm": 2.2131927013397217, | |
| "learning_rate": 4.315933037385677e-05, | |
| "loss": 5.5728, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.418466954712754, | |
| "grad_norm": 2.225728750228882, | |
| "learning_rate": 4.314595924479863e-05, | |
| "loss": 5.5374, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 0.4192670827332373, | |
| "grad_norm": 2.219791889190674, | |
| "learning_rate": 4.3132588115740494e-05, | |
| "loss": 5.6986, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 0.4200672107537206, | |
| "grad_norm": 2.720323085784912, | |
| "learning_rate": 4.311921698668236e-05, | |
| "loss": 5.6046, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.42086733877420385, | |
| "grad_norm": 2.4254257678985596, | |
| "learning_rate": 4.310584585762422e-05, | |
| "loss": 5.5566, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 0.42166746679468714, | |
| "grad_norm": 2.2297472953796387, | |
| "learning_rate": 4.309247472856608e-05, | |
| "loss": 5.7431, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 0.42246759481517043, | |
| "grad_norm": 2.2767512798309326, | |
| "learning_rate": 4.3079103599507945e-05, | |
| "loss": 5.6661, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.4232677228356537, | |
| "grad_norm": 2.8959579467773438, | |
| "learning_rate": 4.30657324704498e-05, | |
| "loss": 5.6584, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 0.42406785085613696, | |
| "grad_norm": 2.49867844581604, | |
| "learning_rate": 4.3052361341391664e-05, | |
| "loss": 5.7564, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.42486797887662026, | |
| "grad_norm": 2.1820337772369385, | |
| "learning_rate": 4.3038990212333526e-05, | |
| "loss": 5.6288, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 0.42566810689710355, | |
| "grad_norm": 2.7174227237701416, | |
| "learning_rate": 4.302561908327539e-05, | |
| "loss": 5.6496, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 0.42646823491758684, | |
| "grad_norm": 2.7261149883270264, | |
| "learning_rate": 4.301224795421725e-05, | |
| "loss": 5.6557, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 0.4272683629380701, | |
| "grad_norm": 2.581760883331299, | |
| "learning_rate": 4.2998876825159114e-05, | |
| "loss": 5.604, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 0.42806849095855337, | |
| "grad_norm": 2.43254017829895, | |
| "learning_rate": 4.298550569610098e-05, | |
| "loss": 5.6041, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.42886861897903666, | |
| "grad_norm": 4.465782165527344, | |
| "learning_rate": 4.297213456704284e-05, | |
| "loss": 5.7158, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 0.4296687469995199, | |
| "grad_norm": 2.6434614658355713, | |
| "learning_rate": 4.29587634379847e-05, | |
| "loss": 5.6347, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 0.4304688750200032, | |
| "grad_norm": 2.344190835952759, | |
| "learning_rate": 4.2945392308926565e-05, | |
| "loss": 5.6062, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 0.4312690030404865, | |
| "grad_norm": 4.311372756958008, | |
| "learning_rate": 4.293202117986843e-05, | |
| "loss": 5.7356, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 0.4320691310609698, | |
| "grad_norm": 2.8204123973846436, | |
| "learning_rate": 4.291865005081029e-05, | |
| "loss": 5.63, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.432869259081453, | |
| "grad_norm": 3.333059072494507, | |
| "learning_rate": 4.290527892175215e-05, | |
| "loss": 5.5992, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 0.4336693871019363, | |
| "grad_norm": 2.0647048950195312, | |
| "learning_rate": 4.2891907792694016e-05, | |
| "loss": 5.691, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 0.4344695151224196, | |
| "grad_norm": 2.5100045204162598, | |
| "learning_rate": 4.287853666363588e-05, | |
| "loss": 5.615, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 0.43526964314290284, | |
| "grad_norm": 2.6120762825012207, | |
| "learning_rate": 4.286516553457774e-05, | |
| "loss": 5.746, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 0.43606977116338613, | |
| "grad_norm": 2.2886853218078613, | |
| "learning_rate": 4.2851794405519604e-05, | |
| "loss": 5.6783, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.4368698991838694, | |
| "grad_norm": 2.6724119186401367, | |
| "learning_rate": 4.283842327646147e-05, | |
| "loss": 5.6526, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 0.4376700272043527, | |
| "grad_norm": 2.2408151626586914, | |
| "learning_rate": 4.282505214740333e-05, | |
| "loss": 5.6314, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 0.43847015522483596, | |
| "grad_norm": 3.0294084548950195, | |
| "learning_rate": 4.281168101834519e-05, | |
| "loss": 5.6669, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 0.43927028324531925, | |
| "grad_norm": 2.1664011478424072, | |
| "learning_rate": 4.2798309889287055e-05, | |
| "loss": 5.4856, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 0.44007041126580254, | |
| "grad_norm": 3.4465417861938477, | |
| "learning_rate": 4.278493876022892e-05, | |
| "loss": 5.5859, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.4408705392862858, | |
| "grad_norm": 2.0116310119628906, | |
| "learning_rate": 4.277156763117078e-05, | |
| "loss": 5.5982, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 0.44167066730676907, | |
| "grad_norm": 2.578658103942871, | |
| "learning_rate": 4.275819650211264e-05, | |
| "loss": 5.4026, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 0.44247079532725236, | |
| "grad_norm": 3.1201677322387695, | |
| "learning_rate": 4.2744825373054506e-05, | |
| "loss": 5.7024, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 0.44327092334773566, | |
| "grad_norm": 2.2246837615966797, | |
| "learning_rate": 4.273145424399637e-05, | |
| "loss": 5.5842, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 0.4440710513682189, | |
| "grad_norm": 2.1593568325042725, | |
| "learning_rate": 4.271808311493823e-05, | |
| "loss": 5.5099, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.4448711793887022, | |
| "grad_norm": 3.082218885421753, | |
| "learning_rate": 4.2704711985880094e-05, | |
| "loss": 5.5539, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 0.4456713074091855, | |
| "grad_norm": 3.2272634506225586, | |
| "learning_rate": 4.2691340856821956e-05, | |
| "loss": 5.73, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 0.4464714354296688, | |
| "grad_norm": 2.301713466644287, | |
| "learning_rate": 4.267796972776382e-05, | |
| "loss": 5.5444, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 0.447271563450152, | |
| "grad_norm": 3.2985429763793945, | |
| "learning_rate": 4.2664598598705675e-05, | |
| "loss": 5.7499, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 0.4480716914706353, | |
| "grad_norm": 2.103994607925415, | |
| "learning_rate": 4.265122746964754e-05, | |
| "loss": 5.5627, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.4488718194911186, | |
| "grad_norm": 3.260099172592163, | |
| "learning_rate": 4.26378563405894e-05, | |
| "loss": 5.5692, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 0.44967194751160183, | |
| "grad_norm": 2.740907907485962, | |
| "learning_rate": 4.262448521153126e-05, | |
| "loss": 5.4984, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 0.4504720755320851, | |
| "grad_norm": 5.314218997955322, | |
| "learning_rate": 4.2611114082473126e-05, | |
| "loss": 5.5641, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 0.4512722035525684, | |
| "grad_norm": 3.0524938106536865, | |
| "learning_rate": 4.259774295341499e-05, | |
| "loss": 5.6375, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 0.4520723315730517, | |
| "grad_norm": 3.57781982421875, | |
| "learning_rate": 4.258437182435685e-05, | |
| "loss": 5.6726, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.45287245959353495, | |
| "grad_norm": 3.094510793685913, | |
| "learning_rate": 4.2571000695298714e-05, | |
| "loss": 5.7328, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 0.45367258761401824, | |
| "grad_norm": 2.731092929840088, | |
| "learning_rate": 4.2557629566240576e-05, | |
| "loss": 5.6667, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 0.45447271563450153, | |
| "grad_norm": 3.6701395511627197, | |
| "learning_rate": 4.254425843718244e-05, | |
| "loss": 5.641, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 0.45527284365498477, | |
| "grad_norm": 1.9017853736877441, | |
| "learning_rate": 4.25308873081243e-05, | |
| "loss": 5.6521, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 0.45607297167546806, | |
| "grad_norm": 3.2658119201660156, | |
| "learning_rate": 4.2517516179066165e-05, | |
| "loss": 5.6431, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.45687309969595136, | |
| "grad_norm": 2.227353572845459, | |
| "learning_rate": 4.250414505000803e-05, | |
| "loss": 5.6198, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 0.45767322771643465, | |
| "grad_norm": 1.7804296016693115, | |
| "learning_rate": 4.249077392094989e-05, | |
| "loss": 5.618, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 0.4584733557369179, | |
| "grad_norm": 2.9357879161834717, | |
| "learning_rate": 4.247740279189175e-05, | |
| "loss": 5.5222, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 0.4592734837574012, | |
| "grad_norm": 5.074959754943848, | |
| "learning_rate": 4.2464031662833615e-05, | |
| "loss": 5.7604, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 0.4600736117778845, | |
| "grad_norm": 2.4961061477661133, | |
| "learning_rate": 4.245066053377547e-05, | |
| "loss": 5.5699, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.46087373979836777, | |
| "grad_norm": 2.636403799057007, | |
| "learning_rate": 4.2437289404717334e-05, | |
| "loss": 5.745, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 0.461673867818851, | |
| "grad_norm": 2.4829630851745605, | |
| "learning_rate": 4.2423918275659197e-05, | |
| "loss": 5.9779, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 0.4624739958393343, | |
| "grad_norm": 2.389112710952759, | |
| "learning_rate": 4.241054714660106e-05, | |
| "loss": 5.696, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 0.4632741238598176, | |
| "grad_norm": 2.3053462505340576, | |
| "learning_rate": 4.239717601754292e-05, | |
| "loss": 5.6567, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 0.4640742518803008, | |
| "grad_norm": 2.9635446071624756, | |
| "learning_rate": 4.2383804888484785e-05, | |
| "loss": 5.7643, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.4648743799007841, | |
| "grad_norm": 3.3227570056915283, | |
| "learning_rate": 4.237043375942665e-05, | |
| "loss": 5.5425, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 0.4656745079212674, | |
| "grad_norm": 3.2959067821502686, | |
| "learning_rate": 4.235706263036851e-05, | |
| "loss": 5.5886, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 0.4664746359417507, | |
| "grad_norm": 2.497953176498413, | |
| "learning_rate": 4.234369150131037e-05, | |
| "loss": 5.6248, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 0.46727476396223394, | |
| "grad_norm": 3.5957205295562744, | |
| "learning_rate": 4.2330320372252235e-05, | |
| "loss": 5.5345, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 0.46807489198271723, | |
| "grad_norm": 2.9113316535949707, | |
| "learning_rate": 4.23169492431941e-05, | |
| "loss": 5.7358, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.4688750200032005, | |
| "grad_norm": 3.8617255687713623, | |
| "learning_rate": 4.230357811413596e-05, | |
| "loss": 5.7451, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 0.46967514802368376, | |
| "grad_norm": 2.5546538829803467, | |
| "learning_rate": 4.2290206985077824e-05, | |
| "loss": 5.5874, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 0.47047527604416706, | |
| "grad_norm": 3.7215869426727295, | |
| "learning_rate": 4.2276835856019686e-05, | |
| "loss": 5.5462, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 0.47127540406465035, | |
| "grad_norm": 3.3122622966766357, | |
| "learning_rate": 4.226346472696155e-05, | |
| "loss": 5.7368, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 0.47207553208513364, | |
| "grad_norm": 2.3962459564208984, | |
| "learning_rate": 4.2250093597903405e-05, | |
| "loss": 5.7328, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.4728756601056169, | |
| "grad_norm": 2.497668504714966, | |
| "learning_rate": 4.223672246884527e-05, | |
| "loss": 5.7063, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 0.4736757881261002, | |
| "grad_norm": 2.301725387573242, | |
| "learning_rate": 4.222335133978713e-05, | |
| "loss": 5.6029, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 0.47447591614658347, | |
| "grad_norm": 3.840155839920044, | |
| "learning_rate": 4.220998021072899e-05, | |
| "loss": 5.825, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 0.47527604416706676, | |
| "grad_norm": 3.1776278018951416, | |
| "learning_rate": 4.2196609081670856e-05, | |
| "loss": 5.6421, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 0.47607617218755, | |
| "grad_norm": 2.1823127269744873, | |
| "learning_rate": 4.218323795261272e-05, | |
| "loss": 5.7154, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.4768763002080333, | |
| "grad_norm": 2.944390058517456, | |
| "learning_rate": 4.216986682355458e-05, | |
| "loss": 5.5429, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 0.4776764282285166, | |
| "grad_norm": 2.035430431365967, | |
| "learning_rate": 4.2156495694496444e-05, | |
| "loss": 5.8187, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 0.4784765562489998, | |
| "grad_norm": 3.167098045349121, | |
| "learning_rate": 4.2143124565438306e-05, | |
| "loss": 5.5891, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 0.4792766842694831, | |
| "grad_norm": 1.9377233982086182, | |
| "learning_rate": 4.212975343638017e-05, | |
| "loss": 5.7428, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 0.4800768122899664, | |
| "grad_norm": 2.759096622467041, | |
| "learning_rate": 4.211638230732203e-05, | |
| "loss": 5.5572, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.4808769403104497, | |
| "grad_norm": 2.074033498764038, | |
| "learning_rate": 4.2103011178263894e-05, | |
| "loss": 5.517, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 0.48167706833093293, | |
| "grad_norm": 2.2866854667663574, | |
| "learning_rate": 4.208964004920576e-05, | |
| "loss": 5.6539, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 0.4824771963514162, | |
| "grad_norm": 1.9909095764160156, | |
| "learning_rate": 4.207626892014762e-05, | |
| "loss": 5.5532, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 0.4832773243718995, | |
| "grad_norm": 3.245906114578247, | |
| "learning_rate": 4.206289779108948e-05, | |
| "loss": 5.6797, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 0.48407745239238276, | |
| "grad_norm": 2.013009786605835, | |
| "learning_rate": 4.2049526662031345e-05, | |
| "loss": 5.6378, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.48487758041286605, | |
| "grad_norm": 2.5478925704956055, | |
| "learning_rate": 4.20361555329732e-05, | |
| "loss": 5.555, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 0.48567770843334934, | |
| "grad_norm": 3.079225778579712, | |
| "learning_rate": 4.2022784403915064e-05, | |
| "loss": 5.7618, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 0.48647783645383263, | |
| "grad_norm": 2.2639927864074707, | |
| "learning_rate": 4.2009413274856926e-05, | |
| "loss": 5.8063, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 0.48727796447431587, | |
| "grad_norm": 4.630524158477783, | |
| "learning_rate": 4.199604214579879e-05, | |
| "loss": 5.6403, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 0.48807809249479917, | |
| "grad_norm": 3.11018967628479, | |
| "learning_rate": 4.198267101674065e-05, | |
| "loss": 5.7517, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.48887822051528246, | |
| "grad_norm": 8.462982177734375, | |
| "learning_rate": 4.1969299887682515e-05, | |
| "loss": 5.7311, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 0.4896783485357657, | |
| "grad_norm": 2.418065071105957, | |
| "learning_rate": 4.195592875862438e-05, | |
| "loss": 5.6239, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 0.490478476556249, | |
| "grad_norm": 2.5452466011047363, | |
| "learning_rate": 4.194255762956624e-05, | |
| "loss": 5.7417, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 0.4912786045767323, | |
| "grad_norm": 2.986041307449341, | |
| "learning_rate": 4.19291865005081e-05, | |
| "loss": 5.663, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 0.4920787325972156, | |
| "grad_norm": 2.7642807960510254, | |
| "learning_rate": 4.1915815371449965e-05, | |
| "loss": 5.5379, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.4928788606176988, | |
| "grad_norm": 4.326907157897949, | |
| "learning_rate": 4.190244424239183e-05, | |
| "loss": 5.8058, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 0.4936789886381821, | |
| "grad_norm": 1.9514706134796143, | |
| "learning_rate": 4.188907311333369e-05, | |
| "loss": 5.7004, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 0.4944791166586654, | |
| "grad_norm": 2.5721428394317627, | |
| "learning_rate": 4.187570198427555e-05, | |
| "loss": 5.6959, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 0.4952792446791487, | |
| "grad_norm": 2.6619083881378174, | |
| "learning_rate": 4.1862330855217416e-05, | |
| "loss": 5.7196, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 0.4960793726996319, | |
| "grad_norm": 2.322341203689575, | |
| "learning_rate": 4.184895972615928e-05, | |
| "loss": 5.5998, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.4968795007201152, | |
| "grad_norm": 2.280777931213379, | |
| "learning_rate": 4.183558859710114e-05, | |
| "loss": 5.5171, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 0.4976796287405985, | |
| "grad_norm": 1.9774320125579834, | |
| "learning_rate": 4.1822217468043004e-05, | |
| "loss": 5.6368, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 0.49847975676108175, | |
| "grad_norm": 2.199708938598633, | |
| "learning_rate": 4.180884633898487e-05, | |
| "loss": 5.4638, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 0.49927988478156504, | |
| "grad_norm": 2.0054879188537598, | |
| "learning_rate": 4.179547520992673e-05, | |
| "loss": 5.4624, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 0.5000800128020483, | |
| "grad_norm": 2.0623903274536133, | |
| "learning_rate": 4.178210408086859e-05, | |
| "loss": 5.6554, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.5008801408225316, | |
| "grad_norm": 2.5907487869262695, | |
| "learning_rate": 4.1768732951810455e-05, | |
| "loss": 5.4989, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 0.5016802688430149, | |
| "grad_norm": 2.181987762451172, | |
| "learning_rate": 4.175536182275232e-05, | |
| "loss": 5.624, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 0.5024803968634982, | |
| "grad_norm": 2.9678001403808594, | |
| "learning_rate": 4.174199069369418e-05, | |
| "loss": 5.6545, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 0.5032805248839815, | |
| "grad_norm": 5.213638782501221, | |
| "learning_rate": 4.172861956463604e-05, | |
| "loss": 5.7048, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 0.5040806529044647, | |
| "grad_norm": 2.465900182723999, | |
| "learning_rate": 4.1715248435577906e-05, | |
| "loss": 5.646, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.504880780924948, | |
| "grad_norm": 2.94570255279541, | |
| "learning_rate": 4.170187730651977e-05, | |
| "loss": 5.6274, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 0.5056809089454313, | |
| "grad_norm": 3.5255651473999023, | |
| "learning_rate": 4.168850617746163e-05, | |
| "loss": 5.5336, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 0.5064810369659145, | |
| "grad_norm": 2.3499608039855957, | |
| "learning_rate": 4.1675135048403494e-05, | |
| "loss": 5.7768, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 0.5072811649863979, | |
| "grad_norm": 2.0476951599121094, | |
| "learning_rate": 4.1661763919345356e-05, | |
| "loss": 5.5927, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 0.5080812930068811, | |
| "grad_norm": 2.4708118438720703, | |
| "learning_rate": 4.164839279028722e-05, | |
| "loss": 5.6458, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.5088814210273643, | |
| "grad_norm": 2.465075731277466, | |
| "learning_rate": 4.163502166122908e-05, | |
| "loss": 5.5744, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 0.5096815490478477, | |
| "grad_norm": 2.9378490447998047, | |
| "learning_rate": 4.162165053217094e-05, | |
| "loss": 5.6963, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 0.5104816770683309, | |
| "grad_norm": 2.201359987258911, | |
| "learning_rate": 4.16082794031128e-05, | |
| "loss": 5.613, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 0.5112818050888142, | |
| "grad_norm": 1.8427401781082153, | |
| "learning_rate": 4.159490827405466e-05, | |
| "loss": 5.5494, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 0.5120819331092975, | |
| "grad_norm": 1.9969813823699951, | |
| "learning_rate": 4.1581537144996526e-05, | |
| "loss": 5.5783, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.5128820611297807, | |
| "grad_norm": 2.9670321941375732, | |
| "learning_rate": 4.156816601593839e-05, | |
| "loss": 5.7176, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 0.5136821891502641, | |
| "grad_norm": 2.76875901222229, | |
| "learning_rate": 4.155479488688025e-05, | |
| "loss": 5.5584, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 0.5144823171707473, | |
| "grad_norm": 3.2874600887298584, | |
| "learning_rate": 4.1541423757822114e-05, | |
| "loss": 5.8726, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 0.5152824451912306, | |
| "grad_norm": 2.4672482013702393, | |
| "learning_rate": 4.1528052628763977e-05, | |
| "loss": 5.764, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 0.5160825732117139, | |
| "grad_norm": 3.5424506664276123, | |
| "learning_rate": 4.151468149970584e-05, | |
| "loss": 5.6612, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.5168827012321972, | |
| "grad_norm": 2.7947871685028076, | |
| "learning_rate": 4.15013103706477e-05, | |
| "loss": 5.668, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 0.5176828292526804, | |
| "grad_norm": 2.624370574951172, | |
| "learning_rate": 4.1487939241589565e-05, | |
| "loss": 5.577, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 0.5184829572731637, | |
| "grad_norm": 2.276289701461792, | |
| "learning_rate": 4.147456811253143e-05, | |
| "loss": 5.7592, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 0.519283085293647, | |
| "grad_norm": 2.751945972442627, | |
| "learning_rate": 4.146119698347329e-05, | |
| "loss": 5.6251, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 0.5200832133141302, | |
| "grad_norm": 2.1990444660186768, | |
| "learning_rate": 4.144782585441515e-05, | |
| "loss": 5.5141, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.5208833413346136, | |
| "grad_norm": 2.732024908065796, | |
| "learning_rate": 4.1434454725357015e-05, | |
| "loss": 5.5938, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 0.5216834693550968, | |
| "grad_norm": 2.6876533031463623, | |
| "learning_rate": 4.142108359629887e-05, | |
| "loss": 5.7126, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 0.5224835973755801, | |
| "grad_norm": 2.660323143005371, | |
| "learning_rate": 4.1407712467240734e-05, | |
| "loss": 5.6261, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 0.5232837253960634, | |
| "grad_norm": 2.567084550857544, | |
| "learning_rate": 4.13943413381826e-05, | |
| "loss": 5.5248, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 0.5240838534165466, | |
| "grad_norm": 4.317018032073975, | |
| "learning_rate": 4.138097020912446e-05, | |
| "loss": 5.4444, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 0.52488398143703, | |
| "grad_norm": 2.0361647605895996, | |
| "learning_rate": 4.136759908006632e-05, | |
| "loss": 5.7532, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 0.5256841094575132, | |
| "grad_norm": 2.0946271419525146, | |
| "learning_rate": 4.1354227951008185e-05, | |
| "loss": 5.6343, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 0.5264842374779964, | |
| "grad_norm": 3.3724842071533203, | |
| "learning_rate": 4.134085682195005e-05, | |
| "loss": 5.6455, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 0.5272843654984798, | |
| "grad_norm": 4.078947067260742, | |
| "learning_rate": 4.132748569289191e-05, | |
| "loss": 5.6681, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 0.528084493518963, | |
| "grad_norm": 4.288105010986328, | |
| "learning_rate": 4.131411456383377e-05, | |
| "loss": 5.7152, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.5288846215394463, | |
| "grad_norm": 2.5208754539489746, | |
| "learning_rate": 4.1300743434775635e-05, | |
| "loss": 5.5715, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 0.5296847495599296, | |
| "grad_norm": 2.6902217864990234, | |
| "learning_rate": 4.12873723057175e-05, | |
| "loss": 5.4997, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 0.5304848775804129, | |
| "grad_norm": 2.4580068588256836, | |
| "learning_rate": 4.127400117665936e-05, | |
| "loss": 5.7656, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 0.5312850056008962, | |
| "grad_norm": 2.5117955207824707, | |
| "learning_rate": 4.1260630047601224e-05, | |
| "loss": 5.6373, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 0.5320851336213794, | |
| "grad_norm": 2.660921096801758, | |
| "learning_rate": 4.1247258918543086e-05, | |
| "loss": 5.6829, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 0.5328852616418627, | |
| "grad_norm": 2.4601287841796875, | |
| "learning_rate": 4.123388778948495e-05, | |
| "loss": 5.7702, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 0.533685389662346, | |
| "grad_norm": 2.9025120735168457, | |
| "learning_rate": 4.122051666042681e-05, | |
| "loss": 5.6374, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 0.5344855176828293, | |
| "grad_norm": 2.8221569061279297, | |
| "learning_rate": 4.120714553136867e-05, | |
| "loss": 5.5568, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 0.5352856457033125, | |
| "grad_norm": 2.3035178184509277, | |
| "learning_rate": 4.119377440231053e-05, | |
| "loss": 5.5845, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 0.5360857737237958, | |
| "grad_norm": 2.0955657958984375, | |
| "learning_rate": 4.118040327325239e-05, | |
| "loss": 5.687, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.5368859017442791, | |
| "grad_norm": 2.530156135559082, | |
| "learning_rate": 4.1167032144194256e-05, | |
| "loss": 5.5772, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 0.5376860297647623, | |
| "grad_norm": 2.2060387134552, | |
| "learning_rate": 4.115366101513612e-05, | |
| "loss": 5.5964, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 0.5384861577852457, | |
| "grad_norm": 2.720702886581421, | |
| "learning_rate": 4.114028988607798e-05, | |
| "loss": 5.5432, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 0.5392862858057289, | |
| "grad_norm": 2.2585232257843018, | |
| "learning_rate": 4.1126918757019844e-05, | |
| "loss": 5.77, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 0.5400864138262121, | |
| "grad_norm": 2.052316904067993, | |
| "learning_rate": 4.1113547627961706e-05, | |
| "loss": 5.5679, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.5408865418466955, | |
| "grad_norm": 2.772500991821289, | |
| "learning_rate": 4.110017649890357e-05, | |
| "loss": 5.5608, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 0.5416866698671787, | |
| "grad_norm": 2.158129930496216, | |
| "learning_rate": 4.108680536984543e-05, | |
| "loss": 5.6612, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 0.5424867978876621, | |
| "grad_norm": 2.874685287475586, | |
| "learning_rate": 4.1073434240787294e-05, | |
| "loss": 5.5999, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 0.5432869259081453, | |
| "grad_norm": 2.2797632217407227, | |
| "learning_rate": 4.106006311172916e-05, | |
| "loss": 5.7243, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 0.5440870539286286, | |
| "grad_norm": 2.998309850692749, | |
| "learning_rate": 4.1048029095576836e-05, | |
| "loss": 5.5031, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.5448871819491119, | |
| "grad_norm": 2.8155364990234375, | |
| "learning_rate": 4.10346579665187e-05, | |
| "loss": 5.7631, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 0.5456873099695951, | |
| "grad_norm": 2.327279806137085, | |
| "learning_rate": 4.102128683746056e-05, | |
| "loss": 5.6293, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 0.5464874379900784, | |
| "grad_norm": 3.3200621604919434, | |
| "learning_rate": 4.100791570840242e-05, | |
| "loss": 5.717, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 0.5472875660105617, | |
| "grad_norm": 2.521144390106201, | |
| "learning_rate": 4.099454457934428e-05, | |
| "loss": 5.5705, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 0.548087694031045, | |
| "grad_norm": 2.7198219299316406, | |
| "learning_rate": 4.098117345028614e-05, | |
| "loss": 5.5931, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 0.5488878220515282, | |
| "grad_norm": 2.701251268386841, | |
| "learning_rate": 4.0967802321228006e-05, | |
| "loss": 5.4706, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 0.5496879500720115, | |
| "grad_norm": 2.2789149284362793, | |
| "learning_rate": 4.095443119216987e-05, | |
| "loss": 5.5883, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 0.5504880780924948, | |
| "grad_norm": 2.8821568489074707, | |
| "learning_rate": 4.094106006311173e-05, | |
| "loss": 5.7525, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 0.5512882061129781, | |
| "grad_norm": 2.3450064659118652, | |
| "learning_rate": 4.0927688934053594e-05, | |
| "loss": 5.5166, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 0.5520883341334614, | |
| "grad_norm": 2.639960527420044, | |
| "learning_rate": 4.0914317804995456e-05, | |
| "loss": 5.7001, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.5528884621539446, | |
| "grad_norm": 2.6743710041046143, | |
| "learning_rate": 4.090094667593732e-05, | |
| "loss": 5.7049, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 0.553688590174428, | |
| "grad_norm": 2.7540199756622314, | |
| "learning_rate": 4.088757554687918e-05, | |
| "loss": 5.5705, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 0.5544887181949112, | |
| "grad_norm": 3.2703442573547363, | |
| "learning_rate": 4.0874204417821044e-05, | |
| "loss": 5.5585, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 0.5552888462153944, | |
| "grad_norm": 3.684135913848877, | |
| "learning_rate": 4.086083328876291e-05, | |
| "loss": 5.6561, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 0.5560889742358778, | |
| "grad_norm": 2.918989896774292, | |
| "learning_rate": 4.084746215970477e-05, | |
| "loss": 5.5171, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 0.556889102256361, | |
| "grad_norm": 2.5902323722839355, | |
| "learning_rate": 4.083409103064663e-05, | |
| "loss": 5.6703, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 0.5576892302768442, | |
| "grad_norm": 2.23820161819458, | |
| "learning_rate": 4.0820719901588495e-05, | |
| "loss": 5.7048, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 0.5584893582973276, | |
| "grad_norm": 2.4339401721954346, | |
| "learning_rate": 4.080734877253036e-05, | |
| "loss": 5.4264, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 0.5592894863178108, | |
| "grad_norm": 3.3097031116485596, | |
| "learning_rate": 4.0793977643472214e-05, | |
| "loss": 5.5931, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 0.5600896143382941, | |
| "grad_norm": 2.6903202533721924, | |
| "learning_rate": 4.0780606514414077e-05, | |
| "loss": 5.5349, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.5600896143382941, | |
| "eval_loss": 5.870830535888672, | |
| "eval_runtime": 13.3044, | |
| "eval_samples_per_second": 3.007, | |
| "eval_steps_per_second": 0.376, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.5608897423587774, | |
| "grad_norm": 2.144684314727783, | |
| "learning_rate": 4.076723538535594e-05, | |
| "loss": 5.6295, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 0.5616898703792607, | |
| "grad_norm": 3.227046489715576, | |
| "learning_rate": 4.07538642562978e-05, | |
| "loss": 5.5506, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 0.562489998399744, | |
| "grad_norm": 2.7323713302612305, | |
| "learning_rate": 4.0740493127239665e-05, | |
| "loss": 5.5441, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 0.5632901264202272, | |
| "grad_norm": 2.3682384490966797, | |
| "learning_rate": 4.072712199818153e-05, | |
| "loss": 5.6632, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 0.5640902544407105, | |
| "grad_norm": 3.006518602371216, | |
| "learning_rate": 4.071375086912339e-05, | |
| "loss": 5.5702, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.5648903824611938, | |
| "grad_norm": 2.554481029510498, | |
| "learning_rate": 4.070037974006525e-05, | |
| "loss": 5.4405, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 0.5656905104816771, | |
| "grad_norm": 2.2349042892456055, | |
| "learning_rate": 4.0687008611007115e-05, | |
| "loss": 5.5774, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 0.5664906385021603, | |
| "grad_norm": 2.24906325340271, | |
| "learning_rate": 4.067363748194898e-05, | |
| "loss": 5.6362, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 0.5672907665226437, | |
| "grad_norm": 2.2345407009124756, | |
| "learning_rate": 4.066026635289084e-05, | |
| "loss": 5.642, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 0.5680908945431269, | |
| "grad_norm": 3.2273216247558594, | |
| "learning_rate": 4.0646895223832703e-05, | |
| "loss": 5.5204, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.5688910225636101, | |
| "grad_norm": 2.689624071121216, | |
| "learning_rate": 4.0633524094774566e-05, | |
| "loss": 5.5565, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 0.5696911505840935, | |
| "grad_norm": 3.4473490715026855, | |
| "learning_rate": 4.062015296571643e-05, | |
| "loss": 5.4041, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 0.5704912786045767, | |
| "grad_norm": 2.528700590133667, | |
| "learning_rate": 4.060678183665829e-05, | |
| "loss": 5.4294, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 0.5712914066250601, | |
| "grad_norm": 2.6679399013519287, | |
| "learning_rate": 4.059341070760015e-05, | |
| "loss": 5.6018, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 0.5720915346455433, | |
| "grad_norm": 2.0572123527526855, | |
| "learning_rate": 4.058003957854201e-05, | |
| "loss": 5.6527, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 0.5728916626660265, | |
| "grad_norm": 2.446279287338257, | |
| "learning_rate": 4.056666844948387e-05, | |
| "loss": 5.5862, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 0.5736917906865099, | |
| "grad_norm": 2.067232131958008, | |
| "learning_rate": 4.0553297320425735e-05, | |
| "loss": 5.5159, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 0.5744919187069931, | |
| "grad_norm": 2.225755214691162, | |
| "learning_rate": 4.05399261913676e-05, | |
| "loss": 5.6483, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 0.5752920467274764, | |
| "grad_norm": 2.3613367080688477, | |
| "learning_rate": 4.052655506230946e-05, | |
| "loss": 5.6226, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 0.5760921747479597, | |
| "grad_norm": 2.4239625930786133, | |
| "learning_rate": 4.0513183933251324e-05, | |
| "loss": 5.6164, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.5768923027684429, | |
| "grad_norm": 3.5525450706481934, | |
| "learning_rate": 4.0499812804193186e-05, | |
| "loss": 5.4503, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 0.5776924307889262, | |
| "grad_norm": 2.664311170578003, | |
| "learning_rate": 4.048644167513505e-05, | |
| "loss": 5.5188, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 0.5784925588094095, | |
| "grad_norm": 2.4020540714263916, | |
| "learning_rate": 4.047307054607691e-05, | |
| "loss": 5.5481, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 0.5792926868298928, | |
| "grad_norm": 2.256044626235962, | |
| "learning_rate": 4.0459699417018774e-05, | |
| "loss": 5.6097, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 0.5800928148503761, | |
| "grad_norm": 2.1168150901794434, | |
| "learning_rate": 4.044632828796064e-05, | |
| "loss": 5.5249, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.5808929428708594, | |
| "grad_norm": 2.329375743865967, | |
| "learning_rate": 4.04329571589025e-05, | |
| "loss": 5.504, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 0.5816930708913426, | |
| "grad_norm": 2.1734092235565186, | |
| "learning_rate": 4.041958602984436e-05, | |
| "loss": 5.5017, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 0.5824931989118259, | |
| "grad_norm": 3.232649564743042, | |
| "learning_rate": 4.0406214900786225e-05, | |
| "loss": 5.6462, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 0.5832933269323092, | |
| "grad_norm": 3.140702724456787, | |
| "learning_rate": 4.039284377172809e-05, | |
| "loss": 5.4393, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 0.5840934549527924, | |
| "grad_norm": 2.284515619277954, | |
| "learning_rate": 4.0379472642669944e-05, | |
| "loss": 5.4891, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.5848935829732758, | |
| "grad_norm": 4.518533706665039, | |
| "learning_rate": 4.0366101513611806e-05, | |
| "loss": 5.7371, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 0.585693710993759, | |
| "grad_norm": 2.2323620319366455, | |
| "learning_rate": 4.035273038455367e-05, | |
| "loss": 5.6324, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 0.5864938390142422, | |
| "grad_norm": 3.123394012451172, | |
| "learning_rate": 4.033935925549553e-05, | |
| "loss": 5.6266, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 0.5872939670347256, | |
| "grad_norm": 2.577545642852783, | |
| "learning_rate": 4.0325988126437394e-05, | |
| "loss": 5.6541, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 0.5880940950552088, | |
| "grad_norm": 2.8590281009674072, | |
| "learning_rate": 4.031261699737926e-05, | |
| "loss": 5.6927, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.5888942230756921, | |
| "grad_norm": 3.0693793296813965, | |
| "learning_rate": 4.029924586832112e-05, | |
| "loss": 5.5101, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 0.5896943510961754, | |
| "grad_norm": 2.5813119411468506, | |
| "learning_rate": 4.028587473926298e-05, | |
| "loss": 5.625, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 0.5904944791166586, | |
| "grad_norm": 2.7804691791534424, | |
| "learning_rate": 4.0272503610204845e-05, | |
| "loss": 5.6264, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 0.591294607137142, | |
| "grad_norm": 2.4291296005249023, | |
| "learning_rate": 4.025913248114671e-05, | |
| "loss": 5.5024, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 0.5920947351576252, | |
| "grad_norm": 2.6989386081695557, | |
| "learning_rate": 4.024576135208857e-05, | |
| "loss": 5.4484, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.5928948631781085, | |
| "grad_norm": 2.42767596244812, | |
| "learning_rate": 4.023239022303043e-05, | |
| "loss": 5.5537, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 0.5936949911985918, | |
| "grad_norm": 2.492577075958252, | |
| "learning_rate": 4.0219019093972296e-05, | |
| "loss": 5.616, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 0.594495119219075, | |
| "grad_norm": 2.4696478843688965, | |
| "learning_rate": 4.020564796491416e-05, | |
| "loss": 5.62, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 0.5952952472395583, | |
| "grad_norm": 3.2339985370635986, | |
| "learning_rate": 4.019227683585602e-05, | |
| "loss": 5.485, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 0.5960953752600416, | |
| "grad_norm": 3.9647512435913086, | |
| "learning_rate": 4.0178905706797884e-05, | |
| "loss": 5.5868, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 0.5968955032805249, | |
| "grad_norm": 2.36417293548584, | |
| "learning_rate": 4.016553457773975e-05, | |
| "loss": 5.5179, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 0.5976956313010081, | |
| "grad_norm": 2.1484084129333496, | |
| "learning_rate": 4.015216344868161e-05, | |
| "loss": 5.6915, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 0.5984957593214915, | |
| "grad_norm": 2.5233757495880127, | |
| "learning_rate": 4.013879231962347e-05, | |
| "loss": 5.4879, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 0.5992958873419747, | |
| "grad_norm": 3.3730146884918213, | |
| "learning_rate": 4.0125421190565335e-05, | |
| "loss": 5.6531, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 0.600096015362458, | |
| "grad_norm": 3.0788846015930176, | |
| "learning_rate": 4.01120500615072e-05, | |
| "loss": 5.6058, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.6008961433829413, | |
| "grad_norm": 2.93515944480896, | |
| "learning_rate": 4.009867893244906e-05, | |
| "loss": 5.4777, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 0.6016962714034245, | |
| "grad_norm": 2.6020236015319824, | |
| "learning_rate": 4.008530780339092e-05, | |
| "loss": 5.6444, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 0.6024963994239079, | |
| "grad_norm": 2.4522392749786377, | |
| "learning_rate": 4.0071936674332786e-05, | |
| "loss": 5.6157, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 0.6032965274443911, | |
| "grad_norm": 3.1317343711853027, | |
| "learning_rate": 4.005856554527465e-05, | |
| "loss": 5.5527, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 0.6040966554648743, | |
| "grad_norm": 2.485154390335083, | |
| "learning_rate": 4.004519441621651e-05, | |
| "loss": 5.6467, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 0.6048967834853577, | |
| "grad_norm": 2.2032833099365234, | |
| "learning_rate": 4.0031823287158374e-05, | |
| "loss": 5.4957, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 0.6056969115058409, | |
| "grad_norm": 3.1787898540496826, | |
| "learning_rate": 4.0018452158100236e-05, | |
| "loss": 5.6204, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 0.6064970395263242, | |
| "grad_norm": 2.9925789833068848, | |
| "learning_rate": 4.00050810290421e-05, | |
| "loss": 5.6732, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 0.6072971675468075, | |
| "grad_norm": 2.7631521224975586, | |
| "learning_rate": 3.999170989998396e-05, | |
| "loss": 5.6743, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 0.6080972955672908, | |
| "grad_norm": 2.808265447616577, | |
| "learning_rate": 3.997833877092582e-05, | |
| "loss": 5.5951, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.608897423587774, | |
| "grad_norm": 3.6244983673095703, | |
| "learning_rate": 3.996496764186768e-05, | |
| "loss": 5.5216, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 0.6096975516082573, | |
| "grad_norm": 2.4245145320892334, | |
| "learning_rate": 3.995159651280954e-05, | |
| "loss": 5.5844, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 0.6104976796287406, | |
| "grad_norm": 2.2855565547943115, | |
| "learning_rate": 3.9938225383751406e-05, | |
| "loss": 5.5674, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 0.6112978076492239, | |
| "grad_norm": 2.2801260948181152, | |
| "learning_rate": 3.992485425469327e-05, | |
| "loss": 5.4406, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 0.6120979356697072, | |
| "grad_norm": 2.0117592811584473, | |
| "learning_rate": 3.991148312563513e-05, | |
| "loss": 5.5463, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 0.6128980636901904, | |
| "grad_norm": 3.110349655151367, | |
| "learning_rate": 3.9898111996576994e-05, | |
| "loss": 5.6124, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 0.6136981917106737, | |
| "grad_norm": 2.9789066314697266, | |
| "learning_rate": 3.9884740867518856e-05, | |
| "loss": 5.789, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 0.614498319731157, | |
| "grad_norm": 2.641871213912964, | |
| "learning_rate": 3.987136973846072e-05, | |
| "loss": 5.4838, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 0.6152984477516402, | |
| "grad_norm": 3.82928466796875, | |
| "learning_rate": 3.985799860940258e-05, | |
| "loss": 5.7108, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 0.6160985757721236, | |
| "grad_norm": 3.2533349990844727, | |
| "learning_rate": 3.9844627480344444e-05, | |
| "loss": 5.4167, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.6168987037926068, | |
| "grad_norm": 2.4259872436523438, | |
| "learning_rate": 3.983125635128631e-05, | |
| "loss": 5.5539, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 0.61769883181309, | |
| "grad_norm": 3.5356359481811523, | |
| "learning_rate": 3.981788522222817e-05, | |
| "loss": 5.4643, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 0.6184989598335734, | |
| "grad_norm": 2.5774996280670166, | |
| "learning_rate": 3.980451409317003e-05, | |
| "loss": 5.5389, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 0.6192990878540566, | |
| "grad_norm": 2.3197529315948486, | |
| "learning_rate": 3.9791142964111895e-05, | |
| "loss": 5.5724, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 0.62009921587454, | |
| "grad_norm": 2.2660646438598633, | |
| "learning_rate": 3.977777183505376e-05, | |
| "loss": 5.5675, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.6208993438950232, | |
| "grad_norm": 2.7596511840820312, | |
| "learning_rate": 3.9764400705995614e-05, | |
| "loss": 5.6168, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 0.6216994719155065, | |
| "grad_norm": 2.4579806327819824, | |
| "learning_rate": 3.9751029576937477e-05, | |
| "loss": 5.4243, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 0.6224995999359898, | |
| "grad_norm": 2.7039647102355957, | |
| "learning_rate": 3.973765844787934e-05, | |
| "loss": 5.633, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 0.623299727956473, | |
| "grad_norm": 2.274777412414551, | |
| "learning_rate": 3.97242873188212e-05, | |
| "loss": 5.5945, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 0.6240998559769563, | |
| "grad_norm": 2.4263217449188232, | |
| "learning_rate": 3.9710916189763065e-05, | |
| "loss": 5.6763, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.6248999839974396, | |
| "grad_norm": 3.420625686645508, | |
| "learning_rate": 3.969754506070493e-05, | |
| "loss": 5.4884, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 0.6257001120179229, | |
| "grad_norm": 2.1576149463653564, | |
| "learning_rate": 3.968417393164679e-05, | |
| "loss": 5.6325, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 0.6265002400384061, | |
| "grad_norm": 2.4189348220825195, | |
| "learning_rate": 3.967080280258865e-05, | |
| "loss": 5.5113, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 0.6273003680588894, | |
| "grad_norm": 2.533433675765991, | |
| "learning_rate": 3.9657431673530515e-05, | |
| "loss": 5.3743, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 0.6281004960793727, | |
| "grad_norm": 2.2747883796691895, | |
| "learning_rate": 3.964406054447238e-05, | |
| "loss": 5.4912, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 0.628900624099856, | |
| "grad_norm": 2.546261787414551, | |
| "learning_rate": 3.963068941541424e-05, | |
| "loss": 5.6571, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 0.6297007521203393, | |
| "grad_norm": 2.5970914363861084, | |
| "learning_rate": 3.9617318286356103e-05, | |
| "loss": 5.6732, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 0.6305008801408225, | |
| "grad_norm": 2.956646680831909, | |
| "learning_rate": 3.9603947157297966e-05, | |
| "loss": 5.4769, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 0.6313010081613059, | |
| "grad_norm": 2.9553463459014893, | |
| "learning_rate": 3.959057602823983e-05, | |
| "loss": 5.4675, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 0.6321011361817891, | |
| "grad_norm": 2.6471643447875977, | |
| "learning_rate": 3.957720489918169e-05, | |
| "loss": 5.4538, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.6329012642022723, | |
| "grad_norm": 2.847944736480713, | |
| "learning_rate": 3.956383377012355e-05, | |
| "loss": 5.384, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 0.6337013922227557, | |
| "grad_norm": 3.6218080520629883, | |
| "learning_rate": 3.955046264106541e-05, | |
| "loss": 5.56, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 0.6345015202432389, | |
| "grad_norm": 2.396426200866699, | |
| "learning_rate": 3.953709151200727e-05, | |
| "loss": 5.6353, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 0.6353016482637222, | |
| "grad_norm": 2.4465904235839844, | |
| "learning_rate": 3.9523720382949135e-05, | |
| "loss": 5.6698, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 0.6361017762842055, | |
| "grad_norm": 2.6707208156585693, | |
| "learning_rate": 3.9510349253891e-05, | |
| "loss": 5.4316, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 0.6369019043046887, | |
| "grad_norm": 2.982117176055908, | |
| "learning_rate": 3.949697812483286e-05, | |
| "loss": 5.6359, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 0.637702032325172, | |
| "grad_norm": 2.6343331336975098, | |
| "learning_rate": 3.9483606995774724e-05, | |
| "loss": 5.6188, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 0.6385021603456553, | |
| "grad_norm": 2.290728807449341, | |
| "learning_rate": 3.9470235866716586e-05, | |
| "loss": 5.5824, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 0.6393022883661386, | |
| "grad_norm": 2.3056259155273438, | |
| "learning_rate": 3.945686473765845e-05, | |
| "loss": 5.5314, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 0.6401024163866219, | |
| "grad_norm": 2.301790714263916, | |
| "learning_rate": 3.944349360860031e-05, | |
| "loss": 5.497, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.6409025444071051, | |
| "grad_norm": 2.2784414291381836, | |
| "learning_rate": 3.9430122479542174e-05, | |
| "loss": 5.6482, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 0.6417026724275884, | |
| "grad_norm": 2.3686752319335938, | |
| "learning_rate": 3.941675135048404e-05, | |
| "loss": 5.449, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 0.6425028004480717, | |
| "grad_norm": 3.0353329181671143, | |
| "learning_rate": 3.94033802214259e-05, | |
| "loss": 5.4544, | |
| "step": 8030 | |
| }, | |
| { | |
| "epoch": 0.643302928468555, | |
| "grad_norm": 3.035477876663208, | |
| "learning_rate": 3.939000909236776e-05, | |
| "loss": 5.4641, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 0.6441030564890382, | |
| "grad_norm": 2.6078028678894043, | |
| "learning_rate": 3.9376637963309625e-05, | |
| "loss": 5.6181, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 0.6449031845095216, | |
| "grad_norm": 2.7835607528686523, | |
| "learning_rate": 3.936326683425149e-05, | |
| "loss": 5.459, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 0.6457033125300048, | |
| "grad_norm": 2.465331792831421, | |
| "learning_rate": 3.9349895705193344e-05, | |
| "loss": 5.5365, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 0.646503440550488, | |
| "grad_norm": 2.0666961669921875, | |
| "learning_rate": 3.9336524576135206e-05, | |
| "loss": 5.5158, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 0.6473035685709714, | |
| "grad_norm": 2.2512967586517334, | |
| "learning_rate": 3.932315344707707e-05, | |
| "loss": 5.4235, | |
| "step": 8090 | |
| }, | |
| { | |
| "epoch": 0.6481036965914546, | |
| "grad_norm": 2.081125497817993, | |
| "learning_rate": 3.930978231801893e-05, | |
| "loss": 5.4172, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.648903824611938, | |
| "grad_norm": 2.0393776893615723, | |
| "learning_rate": 3.9296411188960794e-05, | |
| "loss": 5.5454, | |
| "step": 8110 | |
| }, | |
| { | |
| "epoch": 0.6497039526324212, | |
| "grad_norm": 2.671065092086792, | |
| "learning_rate": 3.928304005990266e-05, | |
| "loss": 5.4562, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 0.6505040806529044, | |
| "grad_norm": 2.3266165256500244, | |
| "learning_rate": 3.926966893084452e-05, | |
| "loss": 5.5839, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 0.6513042086733878, | |
| "grad_norm": 2.400386333465576, | |
| "learning_rate": 3.925629780178638e-05, | |
| "loss": 5.7815, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 0.652104336693871, | |
| "grad_norm": 2.3798139095306396, | |
| "learning_rate": 3.9242926672728245e-05, | |
| "loss": 5.5736, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 0.6529044647143543, | |
| "grad_norm": 2.4090096950531006, | |
| "learning_rate": 3.922955554367011e-05, | |
| "loss": 5.4634, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 0.6537045927348376, | |
| "grad_norm": 3.5072951316833496, | |
| "learning_rate": 3.921618441461197e-05, | |
| "loss": 5.5608, | |
| "step": 8170 | |
| }, | |
| { | |
| "epoch": 0.6545047207553208, | |
| "grad_norm": 2.364222526550293, | |
| "learning_rate": 3.920281328555383e-05, | |
| "loss": 5.7275, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 0.6553048487758041, | |
| "grad_norm": 4.594448566436768, | |
| "learning_rate": 3.9189442156495696e-05, | |
| "loss": 5.7235, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 0.6561049767962874, | |
| "grad_norm": 3.863098621368408, | |
| "learning_rate": 3.917607102743756e-05, | |
| "loss": 5.5359, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.6569051048167707, | |
| "grad_norm": 3.201704978942871, | |
| "learning_rate": 3.916269989837942e-05, | |
| "loss": 5.645, | |
| "step": 8210 | |
| }, | |
| { | |
| "epoch": 0.6577052328372539, | |
| "grad_norm": 2.697448492050171, | |
| "learning_rate": 3.9149328769321284e-05, | |
| "loss": 5.523, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 0.6585053608577373, | |
| "grad_norm": 2.4561972618103027, | |
| "learning_rate": 3.913595764026315e-05, | |
| "loss": 5.734, | |
| "step": 8230 | |
| }, | |
| { | |
| "epoch": 0.6593054888782205, | |
| "grad_norm": 4.527692794799805, | |
| "learning_rate": 3.912258651120501e-05, | |
| "loss": 5.4594, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 0.6601056168987038, | |
| "grad_norm": 2.8713691234588623, | |
| "learning_rate": 3.910921538214687e-05, | |
| "loss": 5.7247, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 0.6609057449191871, | |
| "grad_norm": 2.167921304702759, | |
| "learning_rate": 3.9095844253088735e-05, | |
| "loss": 5.6405, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 0.6617058729396703, | |
| "grad_norm": 2.8967878818511963, | |
| "learning_rate": 3.90824731240306e-05, | |
| "loss": 5.4989, | |
| "step": 8270 | |
| }, | |
| { | |
| "epoch": 0.6625060009601537, | |
| "grad_norm": 2.002103090286255, | |
| "learning_rate": 3.906910199497246e-05, | |
| "loss": 5.4434, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 0.6633061289806369, | |
| "grad_norm": 2.187889575958252, | |
| "learning_rate": 3.905573086591432e-05, | |
| "loss": 5.4078, | |
| "step": 8290 | |
| }, | |
| { | |
| "epoch": 0.6641062570011201, | |
| "grad_norm": 2.4078755378723145, | |
| "learning_rate": 3.9042359736856186e-05, | |
| "loss": 5.5381, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.6649063850216035, | |
| "grad_norm": 3.071484327316284, | |
| "learning_rate": 3.902898860779805e-05, | |
| "loss": 5.4298, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 0.6657065130420867, | |
| "grad_norm": 3.8413217067718506, | |
| "learning_rate": 3.901561747873991e-05, | |
| "loss": 5.4844, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 0.66650664106257, | |
| "grad_norm": 3.0394554138183594, | |
| "learning_rate": 3.9002246349681774e-05, | |
| "loss": 5.5524, | |
| "step": 8330 | |
| }, | |
| { | |
| "epoch": 0.6673067690830533, | |
| "grad_norm": 2.635354518890381, | |
| "learning_rate": 3.8988875220623636e-05, | |
| "loss": 5.5727, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 0.6681068971035365, | |
| "grad_norm": 2.2557764053344727, | |
| "learning_rate": 3.89755040915655e-05, | |
| "loss": 5.3455, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 0.6689070251240199, | |
| "grad_norm": 2.837040662765503, | |
| "learning_rate": 3.896213296250736e-05, | |
| "loss": 5.3729, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 0.6697071531445031, | |
| "grad_norm": 6.783266067504883, | |
| "learning_rate": 3.8948761833449224e-05, | |
| "loss": 5.4372, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 0.6705072811649864, | |
| "grad_norm": 2.20611310005188, | |
| "learning_rate": 3.893539070439108e-05, | |
| "loss": 5.4983, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 0.6713074091854697, | |
| "grad_norm": 2.378692626953125, | |
| "learning_rate": 3.892201957533294e-05, | |
| "loss": 5.6309, | |
| "step": 8390 | |
| }, | |
| { | |
| "epoch": 0.672107537205953, | |
| "grad_norm": 2.7219278812408447, | |
| "learning_rate": 3.8908648446274806e-05, | |
| "loss": 5.67, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.6729076652264362, | |
| "grad_norm": 2.7383148670196533, | |
| "learning_rate": 3.889527731721667e-05, | |
| "loss": 5.5648, | |
| "step": 8410 | |
| }, | |
| { | |
| "epoch": 0.6737077932469195, | |
| "grad_norm": 1.882124423980713, | |
| "learning_rate": 3.888190618815853e-05, | |
| "loss": 5.5879, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 0.6745079212674028, | |
| "grad_norm": 2.5975465774536133, | |
| "learning_rate": 3.8868535059100394e-05, | |
| "loss": 5.5644, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 0.675308049287886, | |
| "grad_norm": 3.4361534118652344, | |
| "learning_rate": 3.8855163930042256e-05, | |
| "loss": 5.6302, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 0.6761081773083694, | |
| "grad_norm": 2.241267442703247, | |
| "learning_rate": 3.884179280098412e-05, | |
| "loss": 5.5003, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 0.6769083053288526, | |
| "grad_norm": 1.9234975576400757, | |
| "learning_rate": 3.882842167192598e-05, | |
| "loss": 5.4739, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 0.677708433349336, | |
| "grad_norm": 2.05928897857666, | |
| "learning_rate": 3.8815050542867845e-05, | |
| "loss": 5.5566, | |
| "step": 8470 | |
| }, | |
| { | |
| "epoch": 0.6785085613698192, | |
| "grad_norm": 2.5602006912231445, | |
| "learning_rate": 3.880167941380971e-05, | |
| "loss": 5.6363, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 0.6793086893903024, | |
| "grad_norm": 2.36325740814209, | |
| "learning_rate": 3.878830828475157e-05, | |
| "loss": 5.4635, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 0.6801088174107858, | |
| "grad_norm": 3.087769031524658, | |
| "learning_rate": 3.877493715569343e-05, | |
| "loss": 5.5537, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.680908945431269, | |
| "grad_norm": 2.759660482406616, | |
| "learning_rate": 3.8761566026635295e-05, | |
| "loss": 5.5427, | |
| "step": 8510 | |
| }, | |
| { | |
| "epoch": 0.6817090734517522, | |
| "grad_norm": 2.7726991176605225, | |
| "learning_rate": 3.874819489757716e-05, | |
| "loss": 5.4868, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 0.6825092014722356, | |
| "grad_norm": 3.408202648162842, | |
| "learning_rate": 3.8734823768519014e-05, | |
| "loss": 5.5416, | |
| "step": 8530 | |
| }, | |
| { | |
| "epoch": 0.6833093294927188, | |
| "grad_norm": 3.801959753036499, | |
| "learning_rate": 3.8721452639460877e-05, | |
| "loss": 5.5577, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 0.6841094575132021, | |
| "grad_norm": 2.7447824478149414, | |
| "learning_rate": 3.870808151040274e-05, | |
| "loss": 5.5837, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 0.6849095855336854, | |
| "grad_norm": 3.7551326751708984, | |
| "learning_rate": 3.86947103813446e-05, | |
| "loss": 5.4772, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 0.6857097135541687, | |
| "grad_norm": 2.036146640777588, | |
| "learning_rate": 3.8681339252286465e-05, | |
| "loss": 5.659, | |
| "step": 8570 | |
| }, | |
| { | |
| "epoch": 0.6865098415746519, | |
| "grad_norm": 2.392986536026001, | |
| "learning_rate": 3.866796812322833e-05, | |
| "loss": 5.3913, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 0.6873099695951352, | |
| "grad_norm": 2.7194063663482666, | |
| "learning_rate": 3.865459699417019e-05, | |
| "loss": 5.418, | |
| "step": 8590 | |
| }, | |
| { | |
| "epoch": 0.6881100976156185, | |
| "grad_norm": 2.2499608993530273, | |
| "learning_rate": 3.864122586511205e-05, | |
| "loss": 5.4924, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.6889102256361018, | |
| "grad_norm": 3.661318302154541, | |
| "learning_rate": 3.8627854736053915e-05, | |
| "loss": 5.5578, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 0.6897103536565851, | |
| "grad_norm": 3.076019048690796, | |
| "learning_rate": 3.861448360699578e-05, | |
| "loss": 5.6017, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 0.6905104816770683, | |
| "grad_norm": 2.133923053741455, | |
| "learning_rate": 3.860111247793764e-05, | |
| "loss": 5.5295, | |
| "step": 8630 | |
| }, | |
| { | |
| "epoch": 0.6913106096975516, | |
| "grad_norm": 3.3584773540496826, | |
| "learning_rate": 3.8587741348879503e-05, | |
| "loss": 5.4534, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 0.6921107377180349, | |
| "grad_norm": 2.499058723449707, | |
| "learning_rate": 3.8574370219821366e-05, | |
| "loss": 5.3402, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 0.6929108657385181, | |
| "grad_norm": 2.5099146366119385, | |
| "learning_rate": 3.856099909076323e-05, | |
| "loss": 5.3765, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 0.6937109937590015, | |
| "grad_norm": 2.9601941108703613, | |
| "learning_rate": 3.854762796170509e-05, | |
| "loss": 5.5139, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 0.6945111217794847, | |
| "grad_norm": 3.2487246990203857, | |
| "learning_rate": 3.8534256832646954e-05, | |
| "loss": 5.5665, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 0.695311249799968, | |
| "grad_norm": 2.8433704376220703, | |
| "learning_rate": 3.852088570358881e-05, | |
| "loss": 5.4445, | |
| "step": 8690 | |
| }, | |
| { | |
| "epoch": 0.6961113778204513, | |
| "grad_norm": 2.204953670501709, | |
| "learning_rate": 3.850751457453067e-05, | |
| "loss": 5.5415, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.6969115058409345, | |
| "grad_norm": 2.7477571964263916, | |
| "learning_rate": 3.8494143445472536e-05, | |
| "loss": 5.5603, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 0.6977116338614179, | |
| "grad_norm": 3.2059755325317383, | |
| "learning_rate": 3.84807723164144e-05, | |
| "loss": 5.5524, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 0.6985117618819011, | |
| "grad_norm": 3.2654213905334473, | |
| "learning_rate": 3.846740118735626e-05, | |
| "loss": 5.5482, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 0.6993118899023844, | |
| "grad_norm": 2.3536834716796875, | |
| "learning_rate": 3.8454030058298124e-05, | |
| "loss": 5.6251, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 0.7001120179228677, | |
| "grad_norm": 3.132542371749878, | |
| "learning_rate": 3.8440658929239986e-05, | |
| "loss": 5.762, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 0.7009121459433509, | |
| "grad_norm": 2.3961470127105713, | |
| "learning_rate": 3.842728780018185e-05, | |
| "loss": 5.4919, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 0.7017122739638342, | |
| "grad_norm": 1.9365229606628418, | |
| "learning_rate": 3.841391667112371e-05, | |
| "loss": 5.4369, | |
| "step": 8770 | |
| }, | |
| { | |
| "epoch": 0.7025124019843175, | |
| "grad_norm": 2.227877140045166, | |
| "learning_rate": 3.8400545542065574e-05, | |
| "loss": 5.4361, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 0.7033125300048008, | |
| "grad_norm": 2.521822452545166, | |
| "learning_rate": 3.838717441300744e-05, | |
| "loss": 5.6763, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 0.704112658025284, | |
| "grad_norm": 2.4155185222625732, | |
| "learning_rate": 3.83738032839493e-05, | |
| "loss": 5.7041, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.7049127860457673, | |
| "grad_norm": 1.9704358577728271, | |
| "learning_rate": 3.836043215489116e-05, | |
| "loss": 5.5136, | |
| "step": 8810 | |
| }, | |
| { | |
| "epoch": 0.7057129140662506, | |
| "grad_norm": 3.447098731994629, | |
| "learning_rate": 3.8347061025833025e-05, | |
| "loss": 5.5963, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 0.7065130420867338, | |
| "grad_norm": 2.0857930183410645, | |
| "learning_rate": 3.833368989677489e-05, | |
| "loss": 5.5328, | |
| "step": 8830 | |
| }, | |
| { | |
| "epoch": 0.7073131701072172, | |
| "grad_norm": 5.354836940765381, | |
| "learning_rate": 3.8320318767716744e-05, | |
| "loss": 5.561, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 0.7081132981277004, | |
| "grad_norm": 2.1317214965820312, | |
| "learning_rate": 3.8306947638658606e-05, | |
| "loss": 5.7044, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 0.7089134261481838, | |
| "grad_norm": 2.163472890853882, | |
| "learning_rate": 3.829357650960047e-05, | |
| "loss": 5.4564, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 0.709713554168667, | |
| "grad_norm": 2.155075788497925, | |
| "learning_rate": 3.828020538054233e-05, | |
| "loss": 5.5767, | |
| "step": 8870 | |
| }, | |
| { | |
| "epoch": 0.7105136821891502, | |
| "grad_norm": 2.225407361984253, | |
| "learning_rate": 3.8266834251484194e-05, | |
| "loss": 5.574, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 0.7113138102096336, | |
| "grad_norm": 2.737126350402832, | |
| "learning_rate": 3.825346312242606e-05, | |
| "loss": 5.5425, | |
| "step": 8890 | |
| }, | |
| { | |
| "epoch": 0.7121139382301168, | |
| "grad_norm": 3.4771502017974854, | |
| "learning_rate": 3.824009199336792e-05, | |
| "loss": 5.6085, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.7129140662506, | |
| "grad_norm": 3.2826528549194336, | |
| "learning_rate": 3.822672086430978e-05, | |
| "loss": 5.5632, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 0.7137141942710834, | |
| "grad_norm": 2.4936113357543945, | |
| "learning_rate": 3.8213349735251645e-05, | |
| "loss": 5.4818, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 0.7145143222915666, | |
| "grad_norm": 3.6719648838043213, | |
| "learning_rate": 3.819997860619351e-05, | |
| "loss": 5.5637, | |
| "step": 8930 | |
| }, | |
| { | |
| "epoch": 0.7153144503120499, | |
| "grad_norm": 2.7252962589263916, | |
| "learning_rate": 3.818660747713537e-05, | |
| "loss": 5.5623, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 0.7161145783325332, | |
| "grad_norm": 3.8873820304870605, | |
| "learning_rate": 3.817323634807723e-05, | |
| "loss": 5.5009, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 0.7169147063530165, | |
| "grad_norm": 2.6248092651367188, | |
| "learning_rate": 3.8159865219019096e-05, | |
| "loss": 5.6683, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 0.7177148343734998, | |
| "grad_norm": 2.1327767372131348, | |
| "learning_rate": 3.814649408996096e-05, | |
| "loss": 5.373, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 0.718514962393983, | |
| "grad_norm": 3.1641392707824707, | |
| "learning_rate": 3.813312296090282e-05, | |
| "loss": 5.6192, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 0.7193150904144663, | |
| "grad_norm": 2.533423662185669, | |
| "learning_rate": 3.811975183184468e-05, | |
| "loss": 5.4736, | |
| "step": 8990 | |
| }, | |
| { | |
| "epoch": 0.7201152184349496, | |
| "grad_norm": 2.892228841781616, | |
| "learning_rate": 3.810638070278654e-05, | |
| "loss": 5.437, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.7209153464554329, | |
| "grad_norm": 2.295328140258789, | |
| "learning_rate": 3.80930095737284e-05, | |
| "loss": 5.4327, | |
| "step": 9010 | |
| }, | |
| { | |
| "epoch": 0.7217154744759161, | |
| "grad_norm": 2.4300477504730225, | |
| "learning_rate": 3.8079638444670265e-05, | |
| "loss": 5.6341, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 0.7225156024963995, | |
| "grad_norm": 4.092593669891357, | |
| "learning_rate": 3.806626731561213e-05, | |
| "loss": 5.5062, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 0.7233157305168827, | |
| "grad_norm": 2.7330925464630127, | |
| "learning_rate": 3.805289618655399e-05, | |
| "loss": 5.4915, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 0.7241158585373659, | |
| "grad_norm": 2.0372865200042725, | |
| "learning_rate": 3.8039525057495853e-05, | |
| "loss": 5.5056, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 0.7249159865578493, | |
| "grad_norm": 2.5585618019104004, | |
| "learning_rate": 3.8026153928437716e-05, | |
| "loss": 5.4614, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 0.7257161145783325, | |
| "grad_norm": 2.653251886367798, | |
| "learning_rate": 3.801278279937958e-05, | |
| "loss": 5.4437, | |
| "step": 9070 | |
| }, | |
| { | |
| "epoch": 0.7265162425988159, | |
| "grad_norm": 2.7902703285217285, | |
| "learning_rate": 3.799941167032144e-05, | |
| "loss": 5.4927, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 0.7273163706192991, | |
| "grad_norm": 3.366363525390625, | |
| "learning_rate": 3.7986040541263304e-05, | |
| "loss": 5.382, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 0.7281164986397823, | |
| "grad_norm": 2.065732479095459, | |
| "learning_rate": 3.797266941220517e-05, | |
| "loss": 5.5663, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.7289166266602657, | |
| "grad_norm": 3.823241710662842, | |
| "learning_rate": 3.795929828314703e-05, | |
| "loss": 5.4697, | |
| "step": 9110 | |
| }, | |
| { | |
| "epoch": 0.7297167546807489, | |
| "grad_norm": 2.3972017765045166, | |
| "learning_rate": 3.794592715408889e-05, | |
| "loss": 5.5508, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 0.7305168827012322, | |
| "grad_norm": 2.4955368041992188, | |
| "learning_rate": 3.7932556025030755e-05, | |
| "loss": 5.5437, | |
| "step": 9130 | |
| }, | |
| { | |
| "epoch": 0.7313170107217155, | |
| "grad_norm": 5.454606533050537, | |
| "learning_rate": 3.791918489597262e-05, | |
| "loss": 5.4974, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 0.7321171387421987, | |
| "grad_norm": 2.6541287899017334, | |
| "learning_rate": 3.790581376691448e-05, | |
| "loss": 5.5327, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 0.732917266762682, | |
| "grad_norm": 2.974902391433716, | |
| "learning_rate": 3.789244263785634e-05, | |
| "loss": 5.5352, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 0.7337173947831653, | |
| "grad_norm": 7.2000274658203125, | |
| "learning_rate": 3.7879071508798206e-05, | |
| "loss": 5.5946, | |
| "step": 9170 | |
| }, | |
| { | |
| "epoch": 0.7345175228036486, | |
| "grad_norm": 2.418121576309204, | |
| "learning_rate": 3.786570037974007e-05, | |
| "loss": 5.4985, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 0.7353176508241318, | |
| "grad_norm": 2.3174428939819336, | |
| "learning_rate": 3.785232925068193e-05, | |
| "loss": 5.6393, | |
| "step": 9190 | |
| }, | |
| { | |
| "epoch": 0.7361177788446152, | |
| "grad_norm": 2.172489643096924, | |
| "learning_rate": 3.7838958121623794e-05, | |
| "loss": 5.6173, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.7369179068650984, | |
| "grad_norm": 3.9107019901275635, | |
| "learning_rate": 3.7825586992565656e-05, | |
| "loss": 5.4436, | |
| "step": 9210 | |
| }, | |
| { | |
| "epoch": 0.7377180348855817, | |
| "grad_norm": 2.3483355045318604, | |
| "learning_rate": 3.781221586350752e-05, | |
| "loss": 5.4981, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 0.738518162906065, | |
| "grad_norm": 3.839348077774048, | |
| "learning_rate": 3.779884473444938e-05, | |
| "loss": 5.5541, | |
| "step": 9230 | |
| }, | |
| { | |
| "epoch": 0.7393182909265482, | |
| "grad_norm": 1.686996579170227, | |
| "learning_rate": 3.7785473605391245e-05, | |
| "loss": 5.6328, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 0.7401184189470316, | |
| "grad_norm": 2.7277584075927734, | |
| "learning_rate": 3.777210247633311e-05, | |
| "loss": 5.5787, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 0.7409185469675148, | |
| "grad_norm": 2.60896635055542, | |
| "learning_rate": 3.775873134727497e-05, | |
| "loss": 5.5082, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 0.741718674987998, | |
| "grad_norm": 2.957674264907837, | |
| "learning_rate": 3.774669733112264e-05, | |
| "loss": 5.516, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 0.7425188030084814, | |
| "grad_norm": 2.223433017730713, | |
| "learning_rate": 3.7733326202064505e-05, | |
| "loss": 5.502, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 0.7433189310289646, | |
| "grad_norm": 2.6075685024261475, | |
| "learning_rate": 3.771995507300637e-05, | |
| "loss": 5.5067, | |
| "step": 9290 | |
| }, | |
| { | |
| "epoch": 0.7441190590494479, | |
| "grad_norm": 2.6572721004486084, | |
| "learning_rate": 3.7706583943948224e-05, | |
| "loss": 5.6304, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.7449191870699312, | |
| "grad_norm": 2.0563318729400635, | |
| "learning_rate": 3.7693212814890086e-05, | |
| "loss": 5.4974, | |
| "step": 9310 | |
| }, | |
| { | |
| "epoch": 0.7457193150904144, | |
| "grad_norm": 2.032820463180542, | |
| "learning_rate": 3.767984168583195e-05, | |
| "loss": 5.6016, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 0.7465194431108978, | |
| "grad_norm": 5.646316051483154, | |
| "learning_rate": 3.766647055677381e-05, | |
| "loss": 5.6661, | |
| "step": 9330 | |
| }, | |
| { | |
| "epoch": 0.747319571131381, | |
| "grad_norm": 2.5043859481811523, | |
| "learning_rate": 3.7653099427715674e-05, | |
| "loss": 5.6445, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 0.7481196991518643, | |
| "grad_norm": 2.817434787750244, | |
| "learning_rate": 3.763972829865754e-05, | |
| "loss": 5.3901, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 0.7489198271723476, | |
| "grad_norm": 2.4041759967803955, | |
| "learning_rate": 3.76263571695994e-05, | |
| "loss": 5.7132, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 0.7497199551928309, | |
| "grad_norm": 1.8806638717651367, | |
| "learning_rate": 3.761298604054126e-05, | |
| "loss": 5.5203, | |
| "step": 9370 | |
| }, | |
| { | |
| "epoch": 0.7505200832133141, | |
| "grad_norm": 2.088700532913208, | |
| "learning_rate": 3.7599614911483125e-05, | |
| "loss": 5.4414, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 0.7513202112337974, | |
| "grad_norm": 2.519188165664673, | |
| "learning_rate": 3.758624378242499e-05, | |
| "loss": 5.4094, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 0.7521203392542807, | |
| "grad_norm": 4.597784042358398, | |
| "learning_rate": 3.757287265336685e-05, | |
| "loss": 5.6246, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.7529204672747639, | |
| "grad_norm": 2.0422868728637695, | |
| "learning_rate": 3.755950152430871e-05, | |
| "loss": 5.3393, | |
| "step": 9410 | |
| }, | |
| { | |
| "epoch": 0.7537205952952473, | |
| "grad_norm": 3.0451338291168213, | |
| "learning_rate": 3.7546130395250576e-05, | |
| "loss": 5.618, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 0.7545207233157305, | |
| "grad_norm": 2.3379099369049072, | |
| "learning_rate": 3.753275926619244e-05, | |
| "loss": 5.4859, | |
| "step": 9430 | |
| }, | |
| { | |
| "epoch": 0.7553208513362137, | |
| "grad_norm": 2.6721060276031494, | |
| "learning_rate": 3.75193881371343e-05, | |
| "loss": 5.5349, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 0.7561209793566971, | |
| "grad_norm": 2.495716094970703, | |
| "learning_rate": 3.7506017008076164e-05, | |
| "loss": 5.626, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 0.7569211073771803, | |
| "grad_norm": 2.9002442359924316, | |
| "learning_rate": 3.749264587901803e-05, | |
| "loss": 5.5438, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 0.7577212353976637, | |
| "grad_norm": 2.3616931438446045, | |
| "learning_rate": 3.747927474995989e-05, | |
| "loss": 5.6381, | |
| "step": 9470 | |
| }, | |
| { | |
| "epoch": 0.7585213634181469, | |
| "grad_norm": 2.389329433441162, | |
| "learning_rate": 3.746590362090175e-05, | |
| "loss": 5.4326, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 0.7593214914386301, | |
| "grad_norm": 2.1870810985565186, | |
| "learning_rate": 3.7452532491843615e-05, | |
| "loss": 5.5129, | |
| "step": 9490 | |
| }, | |
| { | |
| "epoch": 0.7601216194591135, | |
| "grad_norm": 2.2454891204833984, | |
| "learning_rate": 3.743916136278548e-05, | |
| "loss": 5.3963, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.7609217474795967, | |
| "grad_norm": 2.5803539752960205, | |
| "learning_rate": 3.742579023372734e-05, | |
| "loss": 5.5237, | |
| "step": 9510 | |
| }, | |
| { | |
| "epoch": 0.76172187550008, | |
| "grad_norm": 2.5508155822753906, | |
| "learning_rate": 3.74124191046692e-05, | |
| "loss": 5.4525, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 0.7625220035205633, | |
| "grad_norm": 3.693437337875366, | |
| "learning_rate": 3.7399047975611065e-05, | |
| "loss": 5.5101, | |
| "step": 9530 | |
| }, | |
| { | |
| "epoch": 0.7633221315410466, | |
| "grad_norm": 2.4398484230041504, | |
| "learning_rate": 3.738567684655293e-05, | |
| "loss": 5.5372, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 0.7641222595615298, | |
| "grad_norm": 2.226680278778076, | |
| "learning_rate": 3.737230571749479e-05, | |
| "loss": 5.3711, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 0.7649223875820131, | |
| "grad_norm": 2.182704210281372, | |
| "learning_rate": 3.7358934588436654e-05, | |
| "loss": 5.4957, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 0.7657225156024964, | |
| "grad_norm": 3.145799398422241, | |
| "learning_rate": 3.7345563459378516e-05, | |
| "loss": 5.5411, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 0.7665226436229797, | |
| "grad_norm": 2.656719923019409, | |
| "learning_rate": 3.733219233032038e-05, | |
| "loss": 5.4737, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 0.767322771643463, | |
| "grad_norm": 2.2230639457702637, | |
| "learning_rate": 3.731882120126224e-05, | |
| "loss": 5.5192, | |
| "step": 9590 | |
| }, | |
| { | |
| "epoch": 0.7681228996639462, | |
| "grad_norm": 4.286400318145752, | |
| "learning_rate": 3.7305450072204104e-05, | |
| "loss": 5.6413, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.7689230276844295, | |
| "grad_norm": 2.3106577396392822, | |
| "learning_rate": 3.729207894314596e-05, | |
| "loss": 5.5998, | |
| "step": 9610 | |
| }, | |
| { | |
| "epoch": 0.7697231557049128, | |
| "grad_norm": 2.7155752182006836, | |
| "learning_rate": 3.727870781408782e-05, | |
| "loss": 5.4494, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 0.770523283725396, | |
| "grad_norm": 2.082399368286133, | |
| "learning_rate": 3.7265336685029686e-05, | |
| "loss": 5.4897, | |
| "step": 9630 | |
| }, | |
| { | |
| "epoch": 0.7713234117458794, | |
| "grad_norm": 2.0752410888671875, | |
| "learning_rate": 3.725196555597155e-05, | |
| "loss": 5.537, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 0.7721235397663626, | |
| "grad_norm": 2.258284091949463, | |
| "learning_rate": 3.723859442691341e-05, | |
| "loss": 5.6481, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 0.7729236677868458, | |
| "grad_norm": 2.8548264503479004, | |
| "learning_rate": 3.7225223297855274e-05, | |
| "loss": 5.5508, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 0.7737237958073292, | |
| "grad_norm": 3.375497579574585, | |
| "learning_rate": 3.7211852168797136e-05, | |
| "loss": 5.3847, | |
| "step": 9670 | |
| }, | |
| { | |
| "epoch": 0.7745239238278124, | |
| "grad_norm": 2.6680548191070557, | |
| "learning_rate": 3.7198481039739e-05, | |
| "loss": 5.3742, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 0.7753240518482958, | |
| "grad_norm": 2.2915420532226562, | |
| "learning_rate": 3.718510991068086e-05, | |
| "loss": 5.5593, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 0.776124179868779, | |
| "grad_norm": 3.224327325820923, | |
| "learning_rate": 3.7171738781622724e-05, | |
| "loss": 5.5711, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.7769243078892623, | |
| "grad_norm": 3.025899887084961, | |
| "learning_rate": 3.715836765256459e-05, | |
| "loss": 5.3164, | |
| "step": 9710 | |
| }, | |
| { | |
| "epoch": 0.7777244359097456, | |
| "grad_norm": 1.9424941539764404, | |
| "learning_rate": 3.714499652350645e-05, | |
| "loss": 5.4804, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 0.7785245639302288, | |
| "grad_norm": 2.863312005996704, | |
| "learning_rate": 3.713162539444831e-05, | |
| "loss": 5.3353, | |
| "step": 9730 | |
| }, | |
| { | |
| "epoch": 0.7793246919507121, | |
| "grad_norm": 2.0607283115386963, | |
| "learning_rate": 3.7118254265390175e-05, | |
| "loss": 5.5311, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 0.7801248199711954, | |
| "grad_norm": 2.225666046142578, | |
| "learning_rate": 3.710488313633204e-05, | |
| "loss": 5.5315, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 0.7809249479916787, | |
| "grad_norm": 2.1531851291656494, | |
| "learning_rate": 3.70915120072739e-05, | |
| "loss": 5.5311, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 0.7817250760121619, | |
| "grad_norm": 2.6129846572875977, | |
| "learning_rate": 3.7078140878215756e-05, | |
| "loss": 5.5927, | |
| "step": 9770 | |
| }, | |
| { | |
| "epoch": 0.7825252040326452, | |
| "grad_norm": 3.1822173595428467, | |
| "learning_rate": 3.706476974915762e-05, | |
| "loss": 5.5403, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 0.7833253320531285, | |
| "grad_norm": 5.453544616699219, | |
| "learning_rate": 3.705139862009948e-05, | |
| "loss": 5.4393, | |
| "step": 9790 | |
| }, | |
| { | |
| "epoch": 0.7841254600736117, | |
| "grad_norm": 2.573024272918701, | |
| "learning_rate": 3.7038027491041345e-05, | |
| "loss": 5.5677, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.7849255880940951, | |
| "grad_norm": 2.283381700515747, | |
| "learning_rate": 3.702465636198321e-05, | |
| "loss": 5.3814, | |
| "step": 9810 | |
| }, | |
| { | |
| "epoch": 0.7857257161145783, | |
| "grad_norm": 3.119277238845825, | |
| "learning_rate": 3.701128523292507e-05, | |
| "loss": 5.5022, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 0.7865258441350617, | |
| "grad_norm": 5.085709571838379, | |
| "learning_rate": 3.699791410386693e-05, | |
| "loss": 5.5322, | |
| "step": 9830 | |
| }, | |
| { | |
| "epoch": 0.7873259721555449, | |
| "grad_norm": 2.4339115619659424, | |
| "learning_rate": 3.6984542974808795e-05, | |
| "loss": 5.5885, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 0.7881261001760281, | |
| "grad_norm": 2.2715206146240234, | |
| "learning_rate": 3.697117184575066e-05, | |
| "loss": 5.4657, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 0.7889262281965115, | |
| "grad_norm": 2.1434290409088135, | |
| "learning_rate": 3.695780071669252e-05, | |
| "loss": 5.5571, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 0.7897263562169947, | |
| "grad_norm": 2.235814094543457, | |
| "learning_rate": 3.694442958763438e-05, | |
| "loss": 5.5054, | |
| "step": 9870 | |
| }, | |
| { | |
| "epoch": 0.790526484237478, | |
| "grad_norm": 4.322607517242432, | |
| "learning_rate": 3.6931058458576246e-05, | |
| "loss": 5.3727, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 0.7913266122579613, | |
| "grad_norm": 2.0876612663269043, | |
| "learning_rate": 3.691768732951811e-05, | |
| "loss": 5.5682, | |
| "step": 9890 | |
| }, | |
| { | |
| "epoch": 0.7921267402784445, | |
| "grad_norm": 1.9573509693145752, | |
| "learning_rate": 3.690431620045997e-05, | |
| "loss": 5.4981, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.7929268682989278, | |
| "grad_norm": 2.527776002883911, | |
| "learning_rate": 3.6890945071401834e-05, | |
| "loss": 5.3799, | |
| "step": 9910 | |
| }, | |
| { | |
| "epoch": 0.7937269963194111, | |
| "grad_norm": 3.043266773223877, | |
| "learning_rate": 3.687757394234369e-05, | |
| "loss": 5.5366, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 0.7945271243398944, | |
| "grad_norm": 2.502704381942749, | |
| "learning_rate": 3.686420281328555e-05, | |
| "loss": 5.576, | |
| "step": 9930 | |
| }, | |
| { | |
| "epoch": 0.7953272523603777, | |
| "grad_norm": 2.863032817840576, | |
| "learning_rate": 3.6850831684227415e-05, | |
| "loss": 5.4838, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 0.796127380380861, | |
| "grad_norm": 2.4610373973846436, | |
| "learning_rate": 3.683746055516928e-05, | |
| "loss": 5.6119, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 0.7969275084013442, | |
| "grad_norm": 2.193134069442749, | |
| "learning_rate": 3.682408942611114e-05, | |
| "loss": 5.3948, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 0.7977276364218275, | |
| "grad_norm": 3.6384451389312744, | |
| "learning_rate": 3.6810718297053003e-05, | |
| "loss": 5.5381, | |
| "step": 9970 | |
| }, | |
| { | |
| "epoch": 0.7985277644423108, | |
| "grad_norm": 2.5201289653778076, | |
| "learning_rate": 3.6797347167994866e-05, | |
| "loss": 5.4386, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 0.799327892462794, | |
| "grad_norm": 2.3459038734436035, | |
| "learning_rate": 3.678397603893673e-05, | |
| "loss": 5.8173, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 0.8001280204832774, | |
| "grad_norm": 2.575666904449463, | |
| "learning_rate": 3.677060490987859e-05, | |
| "loss": 5.4436, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.8009281485037606, | |
| "grad_norm": 4.0012712478637695, | |
| "learning_rate": 3.6757233780820454e-05, | |
| "loss": 5.5222, | |
| "step": 10010 | |
| }, | |
| { | |
| "epoch": 0.8017282765242438, | |
| "grad_norm": 2.3244402408599854, | |
| "learning_rate": 3.674386265176232e-05, | |
| "loss": 5.398, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 0.8025284045447272, | |
| "grad_norm": 2.2298974990844727, | |
| "learning_rate": 3.673049152270418e-05, | |
| "loss": 5.4749, | |
| "step": 10030 | |
| }, | |
| { | |
| "epoch": 0.8033285325652104, | |
| "grad_norm": 3.589245080947876, | |
| "learning_rate": 3.671712039364604e-05, | |
| "loss": 5.5091, | |
| "step": 10040 | |
| }, | |
| { | |
| "epoch": 0.8041286605856938, | |
| "grad_norm": 2.2426655292510986, | |
| "learning_rate": 3.6703749264587905e-05, | |
| "loss": 5.5136, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 0.804928788606177, | |
| "grad_norm": 2.5258290767669678, | |
| "learning_rate": 3.669037813552977e-05, | |
| "loss": 5.522, | |
| "step": 10060 | |
| }, | |
| { | |
| "epoch": 0.8057289166266602, | |
| "grad_norm": 3.040107250213623, | |
| "learning_rate": 3.667700700647163e-05, | |
| "loss": 5.5748, | |
| "step": 10070 | |
| }, | |
| { | |
| "epoch": 0.8065290446471436, | |
| "grad_norm": 2.561196804046631, | |
| "learning_rate": 3.6663635877413486e-05, | |
| "loss": 5.5973, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 0.8073291726676268, | |
| "grad_norm": 2.4179880619049072, | |
| "learning_rate": 3.665026474835535e-05, | |
| "loss": 5.5915, | |
| "step": 10090 | |
| }, | |
| { | |
| "epoch": 0.8081293006881101, | |
| "grad_norm": 2.393134593963623, | |
| "learning_rate": 3.663689361929721e-05, | |
| "loss": 5.4809, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.8089294287085934, | |
| "grad_norm": 3.107543468475342, | |
| "learning_rate": 3.6623522490239074e-05, | |
| "loss": 5.6127, | |
| "step": 10110 | |
| }, | |
| { | |
| "epoch": 0.8097295567290766, | |
| "grad_norm": 2.8467986583709717, | |
| "learning_rate": 3.661015136118094e-05, | |
| "loss": 5.5274, | |
| "step": 10120 | |
| }, | |
| { | |
| "epoch": 0.8105296847495599, | |
| "grad_norm": 2.49955153465271, | |
| "learning_rate": 3.65967802321228e-05, | |
| "loss": 5.4469, | |
| "step": 10130 | |
| }, | |
| { | |
| "epoch": 0.8113298127700432, | |
| "grad_norm": 2.817401885986328, | |
| "learning_rate": 3.658340910306466e-05, | |
| "loss": 5.5901, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 0.8121299407905265, | |
| "grad_norm": 2.284855842590332, | |
| "learning_rate": 3.6570037974006525e-05, | |
| "loss": 5.588, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 0.8129300688110097, | |
| "grad_norm": 3.13712739944458, | |
| "learning_rate": 3.655666684494839e-05, | |
| "loss": 5.5035, | |
| "step": 10160 | |
| }, | |
| { | |
| "epoch": 0.8137301968314931, | |
| "grad_norm": 2.7964253425598145, | |
| "learning_rate": 3.654329571589025e-05, | |
| "loss": 5.4622, | |
| "step": 10170 | |
| }, | |
| { | |
| "epoch": 0.8145303248519763, | |
| "grad_norm": 3.7489845752716064, | |
| "learning_rate": 3.652992458683211e-05, | |
| "loss": 5.6106, | |
| "step": 10180 | |
| }, | |
| { | |
| "epoch": 0.8153304528724596, | |
| "grad_norm": 2.0697953701019287, | |
| "learning_rate": 3.6516553457773976e-05, | |
| "loss": 5.4128, | |
| "step": 10190 | |
| }, | |
| { | |
| "epoch": 0.8161305808929429, | |
| "grad_norm": 2.495635986328125, | |
| "learning_rate": 3.650318232871584e-05, | |
| "loss": 5.3183, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.8169307089134261, | |
| "grad_norm": 1.9717586040496826, | |
| "learning_rate": 3.64898111996577e-05, | |
| "loss": 5.4251, | |
| "step": 10210 | |
| }, | |
| { | |
| "epoch": 0.8177308369339095, | |
| "grad_norm": 2.591371774673462, | |
| "learning_rate": 3.6476440070599564e-05, | |
| "loss": 5.3903, | |
| "step": 10220 | |
| }, | |
| { | |
| "epoch": 0.8185309649543927, | |
| "grad_norm": 2.9142751693725586, | |
| "learning_rate": 3.646306894154142e-05, | |
| "loss": 5.4119, | |
| "step": 10230 | |
| }, | |
| { | |
| "epoch": 0.8193310929748759, | |
| "grad_norm": 2.1791203022003174, | |
| "learning_rate": 3.644969781248328e-05, | |
| "loss": 5.5931, | |
| "step": 10240 | |
| }, | |
| { | |
| "epoch": 0.8201312209953593, | |
| "grad_norm": 2.787339925765991, | |
| "learning_rate": 3.6436326683425145e-05, | |
| "loss": 5.5301, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 0.8209313490158425, | |
| "grad_norm": 2.722717523574829, | |
| "learning_rate": 3.642295555436701e-05, | |
| "loss": 5.5608, | |
| "step": 10260 | |
| }, | |
| { | |
| "epoch": 0.8217314770363258, | |
| "grad_norm": 2.937549114227295, | |
| "learning_rate": 3.640958442530887e-05, | |
| "loss": 5.5967, | |
| "step": 10270 | |
| }, | |
| { | |
| "epoch": 0.8225316050568091, | |
| "grad_norm": 3.0384104251861572, | |
| "learning_rate": 3.639621329625073e-05, | |
| "loss": 5.5901, | |
| "step": 10280 | |
| }, | |
| { | |
| "epoch": 0.8233317330772923, | |
| "grad_norm": 2.6817758083343506, | |
| "learning_rate": 3.6382842167192596e-05, | |
| "loss": 5.4188, | |
| "step": 10290 | |
| }, | |
| { | |
| "epoch": 0.8241318610977757, | |
| "grad_norm": 2.6184494495391846, | |
| "learning_rate": 3.636947103813446e-05, | |
| "loss": 5.5194, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.8249319891182589, | |
| "grad_norm": 2.613208293914795, | |
| "learning_rate": 3.635609990907632e-05, | |
| "loss": 5.4968, | |
| "step": 10310 | |
| }, | |
| { | |
| "epoch": 0.8257321171387422, | |
| "grad_norm": 6.223053932189941, | |
| "learning_rate": 3.6342728780018184e-05, | |
| "loss": 5.3478, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 0.8265322451592255, | |
| "grad_norm": 3.294417381286621, | |
| "learning_rate": 3.632935765096005e-05, | |
| "loss": 5.5736, | |
| "step": 10330 | |
| }, | |
| { | |
| "epoch": 0.8273323731797088, | |
| "grad_norm": 2.3347206115722656, | |
| "learning_rate": 3.631598652190191e-05, | |
| "loss": 5.6787, | |
| "step": 10340 | |
| }, | |
| { | |
| "epoch": 0.828132501200192, | |
| "grad_norm": 3.219491958618164, | |
| "learning_rate": 3.630261539284377e-05, | |
| "loss": 5.5125, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 0.8289326292206753, | |
| "grad_norm": 2.5759575366973877, | |
| "learning_rate": 3.6289244263785635e-05, | |
| "loss": 5.4405, | |
| "step": 10360 | |
| }, | |
| { | |
| "epoch": 0.8297327572411586, | |
| "grad_norm": 2.4145963191986084, | |
| "learning_rate": 3.62758731347275e-05, | |
| "loss": 5.479, | |
| "step": 10370 | |
| }, | |
| { | |
| "epoch": 0.8305328852616418, | |
| "grad_norm": 2.7548952102661133, | |
| "learning_rate": 3.626250200566936e-05, | |
| "loss": 5.5466, | |
| "step": 10380 | |
| }, | |
| { | |
| "epoch": 0.8313330132821252, | |
| "grad_norm": 1.9488781690597534, | |
| "learning_rate": 3.624913087661122e-05, | |
| "loss": 5.5063, | |
| "step": 10390 | |
| }, | |
| { | |
| "epoch": 0.8321331413026084, | |
| "grad_norm": 2.648233652114868, | |
| "learning_rate": 3.6235759747553086e-05, | |
| "loss": 5.4158, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.8329332693230916, | |
| "grad_norm": 2.8808720111846924, | |
| "learning_rate": 3.622238861849495e-05, | |
| "loss": 5.5431, | |
| "step": 10410 | |
| }, | |
| { | |
| "epoch": 0.833733397343575, | |
| "grad_norm": 3.4570131301879883, | |
| "learning_rate": 3.620901748943681e-05, | |
| "loss": 5.4842, | |
| "step": 10420 | |
| }, | |
| { | |
| "epoch": 0.8345335253640582, | |
| "grad_norm": 4.246754169464111, | |
| "learning_rate": 3.6195646360378674e-05, | |
| "loss": 5.5809, | |
| "step": 10430 | |
| }, | |
| { | |
| "epoch": 0.8353336533845416, | |
| "grad_norm": 1.8645952939987183, | |
| "learning_rate": 3.6182275231320536e-05, | |
| "loss": 5.4272, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 0.8361337814050248, | |
| "grad_norm": 3.3832550048828125, | |
| "learning_rate": 3.61689041022624e-05, | |
| "loss": 5.4291, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 0.836933909425508, | |
| "grad_norm": 2.1454830169677734, | |
| "learning_rate": 3.615553297320426e-05, | |
| "loss": 5.4457, | |
| "step": 10460 | |
| }, | |
| { | |
| "epoch": 0.8377340374459914, | |
| "grad_norm": 2.9275059700012207, | |
| "learning_rate": 3.6142161844146124e-05, | |
| "loss": 5.3577, | |
| "step": 10470 | |
| }, | |
| { | |
| "epoch": 0.8385341654664746, | |
| "grad_norm": 2.9177403450012207, | |
| "learning_rate": 3.612879071508799e-05, | |
| "loss": 5.5011, | |
| "step": 10480 | |
| }, | |
| { | |
| "epoch": 0.8393342934869579, | |
| "grad_norm": 2.9115045070648193, | |
| "learning_rate": 3.611541958602985e-05, | |
| "loss": 5.4961, | |
| "step": 10490 | |
| }, | |
| { | |
| "epoch": 0.8401344215074412, | |
| "grad_norm": 3.270296335220337, | |
| "learning_rate": 3.610204845697171e-05, | |
| "loss": 5.4651, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.8409345495279245, | |
| "grad_norm": 2.2930686473846436, | |
| "learning_rate": 3.6088677327913575e-05, | |
| "loss": 5.4363, | |
| "step": 10510 | |
| }, | |
| { | |
| "epoch": 0.8417346775484077, | |
| "grad_norm": 3.168717622756958, | |
| "learning_rate": 3.607530619885544e-05, | |
| "loss": 5.361, | |
| "step": 10520 | |
| }, | |
| { | |
| "epoch": 0.842534805568891, | |
| "grad_norm": 2.009021759033203, | |
| "learning_rate": 3.60619350697973e-05, | |
| "loss": 5.4435, | |
| "step": 10530 | |
| }, | |
| { | |
| "epoch": 0.8433349335893743, | |
| "grad_norm": 3.454181432723999, | |
| "learning_rate": 3.6048563940739156e-05, | |
| "loss": 5.4134, | |
| "step": 10540 | |
| }, | |
| { | |
| "epoch": 0.8441350616098576, | |
| "grad_norm": 2.8601911067962646, | |
| "learning_rate": 3.603519281168102e-05, | |
| "loss": 5.3224, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 0.8449351896303409, | |
| "grad_norm": 2.612689733505249, | |
| "learning_rate": 3.602182168262288e-05, | |
| "loss": 5.3947, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 0.8457353176508241, | |
| "grad_norm": 2.813868284225464, | |
| "learning_rate": 3.6008450553564745e-05, | |
| "loss": 5.4598, | |
| "step": 10570 | |
| }, | |
| { | |
| "epoch": 0.8465354456713075, | |
| "grad_norm": 2.226395606994629, | |
| "learning_rate": 3.599507942450661e-05, | |
| "loss": 5.4401, | |
| "step": 10580 | |
| }, | |
| { | |
| "epoch": 0.8473355736917907, | |
| "grad_norm": 3.4722280502319336, | |
| "learning_rate": 3.598170829544847e-05, | |
| "loss": 5.4831, | |
| "step": 10590 | |
| }, | |
| { | |
| "epoch": 0.8481357017122739, | |
| "grad_norm": 3.270322799682617, | |
| "learning_rate": 3.596833716639033e-05, | |
| "loss": 5.6256, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.8489358297327573, | |
| "grad_norm": 1.9735034704208374, | |
| "learning_rate": 3.5954966037332195e-05, | |
| "loss": 5.491, | |
| "step": 10610 | |
| }, | |
| { | |
| "epoch": 0.8497359577532405, | |
| "grad_norm": 2.9609665870666504, | |
| "learning_rate": 3.594159490827406e-05, | |
| "loss": 5.5421, | |
| "step": 10620 | |
| }, | |
| { | |
| "epoch": 0.8505360857737237, | |
| "grad_norm": 3.1109185218811035, | |
| "learning_rate": 3.592822377921592e-05, | |
| "loss": 5.5718, | |
| "step": 10630 | |
| }, | |
| { | |
| "epoch": 0.8513362137942071, | |
| "grad_norm": 2.68784761428833, | |
| "learning_rate": 3.5914852650157783e-05, | |
| "loss": 5.4769, | |
| "step": 10640 | |
| }, | |
| { | |
| "epoch": 0.8521363418146903, | |
| "grad_norm": 2.2947535514831543, | |
| "learning_rate": 3.5901481521099646e-05, | |
| "loss": 5.4901, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 0.8529364698351737, | |
| "grad_norm": 1.894142746925354, | |
| "learning_rate": 3.588811039204151e-05, | |
| "loss": 5.5021, | |
| "step": 10660 | |
| }, | |
| { | |
| "epoch": 0.8537365978556569, | |
| "grad_norm": 2.800260543823242, | |
| "learning_rate": 3.587473926298337e-05, | |
| "loss": 5.6767, | |
| "step": 10670 | |
| }, | |
| { | |
| "epoch": 0.8545367258761402, | |
| "grad_norm": 3.055172920227051, | |
| "learning_rate": 3.5861368133925234e-05, | |
| "loss": 5.5765, | |
| "step": 10680 | |
| }, | |
| { | |
| "epoch": 0.8553368538966235, | |
| "grad_norm": 2.3778443336486816, | |
| "learning_rate": 3.58479970048671e-05, | |
| "loss": 5.5377, | |
| "step": 10690 | |
| }, | |
| { | |
| "epoch": 0.8561369819171067, | |
| "grad_norm": 4.772058486938477, | |
| "learning_rate": 3.583462587580895e-05, | |
| "loss": 5.432, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.85693710993759, | |
| "grad_norm": 1.9563825130462646, | |
| "learning_rate": 3.5821254746750815e-05, | |
| "loss": 5.4832, | |
| "step": 10710 | |
| }, | |
| { | |
| "epoch": 0.8577372379580733, | |
| "grad_norm": 2.149519205093384, | |
| "learning_rate": 3.580788361769268e-05, | |
| "loss": 5.491, | |
| "step": 10720 | |
| }, | |
| { | |
| "epoch": 0.8585373659785566, | |
| "grad_norm": 3.5061347484588623, | |
| "learning_rate": 3.579451248863454e-05, | |
| "loss": 5.5747, | |
| "step": 10730 | |
| }, | |
| { | |
| "epoch": 0.8593374939990398, | |
| "grad_norm": 2.74947452545166, | |
| "learning_rate": 3.5781141359576404e-05, | |
| "loss": 5.3591, | |
| "step": 10740 | |
| }, | |
| { | |
| "epoch": 0.8601376220195232, | |
| "grad_norm": 2.818753719329834, | |
| "learning_rate": 3.5767770230518266e-05, | |
| "loss": 5.4722, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 0.8609377500400064, | |
| "grad_norm": 2.7501718997955322, | |
| "learning_rate": 3.575439910146013e-05, | |
| "loss": 5.4531, | |
| "step": 10760 | |
| }, | |
| { | |
| "epoch": 0.8617378780604896, | |
| "grad_norm": 2.314549207687378, | |
| "learning_rate": 3.574102797240199e-05, | |
| "loss": 5.5488, | |
| "step": 10770 | |
| }, | |
| { | |
| "epoch": 0.862538006080973, | |
| "grad_norm": 2.583895683288574, | |
| "learning_rate": 3.5727656843343854e-05, | |
| "loss": 5.5101, | |
| "step": 10780 | |
| }, | |
| { | |
| "epoch": 0.8633381341014562, | |
| "grad_norm": 2.778087854385376, | |
| "learning_rate": 3.571428571428572e-05, | |
| "loss": 5.421, | |
| "step": 10790 | |
| }, | |
| { | |
| "epoch": 0.8641382621219396, | |
| "grad_norm": 3.679514169692993, | |
| "learning_rate": 3.570091458522758e-05, | |
| "loss": 5.5277, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.8649383901424228, | |
| "grad_norm": 3.3869597911834717, | |
| "learning_rate": 3.568754345616944e-05, | |
| "loss": 5.5185, | |
| "step": 10810 | |
| }, | |
| { | |
| "epoch": 0.865738518162906, | |
| "grad_norm": 3.1094346046447754, | |
| "learning_rate": 3.5674172327111305e-05, | |
| "loss": 5.396, | |
| "step": 10820 | |
| }, | |
| { | |
| "epoch": 0.8665386461833894, | |
| "grad_norm": 2.3561792373657227, | |
| "learning_rate": 3.566080119805317e-05, | |
| "loss": 5.5995, | |
| "step": 10830 | |
| }, | |
| { | |
| "epoch": 0.8673387742038726, | |
| "grad_norm": 2.7533133029937744, | |
| "learning_rate": 3.564743006899503e-05, | |
| "loss": 5.4848, | |
| "step": 10840 | |
| }, | |
| { | |
| "epoch": 0.8681389022243559, | |
| "grad_norm": 2.923741579055786, | |
| "learning_rate": 3.5634058939936886e-05, | |
| "loss": 5.5549, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 0.8689390302448392, | |
| "grad_norm": 2.002704381942749, | |
| "learning_rate": 3.562068781087875e-05, | |
| "loss": 5.4354, | |
| "step": 10860 | |
| }, | |
| { | |
| "epoch": 0.8697391582653224, | |
| "grad_norm": 2.277064085006714, | |
| "learning_rate": 3.560731668182061e-05, | |
| "loss": 5.4404, | |
| "step": 10870 | |
| }, | |
| { | |
| "epoch": 0.8705392862858057, | |
| "grad_norm": 2.23490047454834, | |
| "learning_rate": 3.5593945552762474e-05, | |
| "loss": 5.7253, | |
| "step": 10880 | |
| }, | |
| { | |
| "epoch": 0.871339414306289, | |
| "grad_norm": 2.42874813079834, | |
| "learning_rate": 3.558057442370434e-05, | |
| "loss": 5.4351, | |
| "step": 10890 | |
| }, | |
| { | |
| "epoch": 0.8721395423267723, | |
| "grad_norm": 2.097278118133545, | |
| "learning_rate": 3.55672032946462e-05, | |
| "loss": 5.4772, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.8729396703472556, | |
| "grad_norm": 2.045832395553589, | |
| "learning_rate": 3.555383216558806e-05, | |
| "loss": 5.4132, | |
| "step": 10910 | |
| }, | |
| { | |
| "epoch": 0.8737397983677389, | |
| "grad_norm": 2.695033550262451, | |
| "learning_rate": 3.5540461036529925e-05, | |
| "loss": 5.3975, | |
| "step": 10920 | |
| }, | |
| { | |
| "epoch": 0.8745399263882221, | |
| "grad_norm": 2.62748384475708, | |
| "learning_rate": 3.552708990747179e-05, | |
| "loss": 5.5843, | |
| "step": 10930 | |
| }, | |
| { | |
| "epoch": 0.8753400544087054, | |
| "grad_norm": 2.6703569889068604, | |
| "learning_rate": 3.551371877841365e-05, | |
| "loss": 5.548, | |
| "step": 10940 | |
| }, | |
| { | |
| "epoch": 0.8761401824291887, | |
| "grad_norm": 2.7184908390045166, | |
| "learning_rate": 3.550034764935551e-05, | |
| "loss": 5.4833, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 0.8769403104496719, | |
| "grad_norm": 2.6194417476654053, | |
| "learning_rate": 3.5486976520297376e-05, | |
| "loss": 5.3647, | |
| "step": 10960 | |
| }, | |
| { | |
| "epoch": 0.8777404384701553, | |
| "grad_norm": 2.5021440982818604, | |
| "learning_rate": 3.547360539123924e-05, | |
| "loss": 5.4775, | |
| "step": 10970 | |
| }, | |
| { | |
| "epoch": 0.8785405664906385, | |
| "grad_norm": 3.3758370876312256, | |
| "learning_rate": 3.54602342621811e-05, | |
| "loss": 5.4144, | |
| "step": 10980 | |
| }, | |
| { | |
| "epoch": 0.8793406945111217, | |
| "grad_norm": 2.7361087799072266, | |
| "learning_rate": 3.5446863133122964e-05, | |
| "loss": 5.3614, | |
| "step": 10990 | |
| }, | |
| { | |
| "epoch": 0.8801408225316051, | |
| "grad_norm": 3.831631660461426, | |
| "learning_rate": 3.543349200406482e-05, | |
| "loss": 5.4672, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.8809409505520883, | |
| "grad_norm": 2.9705264568328857, | |
| "learning_rate": 3.542012087500668e-05, | |
| "loss": 5.5334, | |
| "step": 11010 | |
| }, | |
| { | |
| "epoch": 0.8817410785725716, | |
| "grad_norm": 3.578693389892578, | |
| "learning_rate": 3.5406749745948545e-05, | |
| "loss": 5.4943, | |
| "step": 11020 | |
| }, | |
| { | |
| "epoch": 0.8825412065930549, | |
| "grad_norm": 2.0674843788146973, | |
| "learning_rate": 3.539337861689041e-05, | |
| "loss": 5.4054, | |
| "step": 11030 | |
| }, | |
| { | |
| "epoch": 0.8833413346135381, | |
| "grad_norm": 2.1904194355010986, | |
| "learning_rate": 3.538000748783227e-05, | |
| "loss": 5.37, | |
| "step": 11040 | |
| }, | |
| { | |
| "epoch": 0.8841414626340215, | |
| "grad_norm": 3.7718141078948975, | |
| "learning_rate": 3.536663635877413e-05, | |
| "loss": 5.6004, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 0.8849415906545047, | |
| "grad_norm": 2.7325282096862793, | |
| "learning_rate": 3.5353265229715996e-05, | |
| "loss": 5.4552, | |
| "step": 11060 | |
| }, | |
| { | |
| "epoch": 0.885741718674988, | |
| "grad_norm": 3.3750839233398438, | |
| "learning_rate": 3.533989410065786e-05, | |
| "loss": 5.5041, | |
| "step": 11070 | |
| }, | |
| { | |
| "epoch": 0.8865418466954713, | |
| "grad_norm": 2.5617001056671143, | |
| "learning_rate": 3.532652297159972e-05, | |
| "loss": 5.4912, | |
| "step": 11080 | |
| }, | |
| { | |
| "epoch": 0.8873419747159546, | |
| "grad_norm": 1.9870737791061401, | |
| "learning_rate": 3.5313151842541584e-05, | |
| "loss": 5.4576, | |
| "step": 11090 | |
| }, | |
| { | |
| "epoch": 0.8881421027364378, | |
| "grad_norm": 2.458249568939209, | |
| "learning_rate": 3.529978071348345e-05, | |
| "loss": 5.7306, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.8889422307569211, | |
| "grad_norm": 3.1406562328338623, | |
| "learning_rate": 3.528640958442531e-05, | |
| "loss": 5.5833, | |
| "step": 11110 | |
| }, | |
| { | |
| "epoch": 0.8897423587774044, | |
| "grad_norm": 2.4337878227233887, | |
| "learning_rate": 3.527303845536717e-05, | |
| "loss": 5.4938, | |
| "step": 11120 | |
| }, | |
| { | |
| "epoch": 0.8905424867978876, | |
| "grad_norm": 2.925147294998169, | |
| "learning_rate": 3.5259667326309035e-05, | |
| "loss": 5.5591, | |
| "step": 11130 | |
| }, | |
| { | |
| "epoch": 0.891342614818371, | |
| "grad_norm": 2.5177969932556152, | |
| "learning_rate": 3.52462961972509e-05, | |
| "loss": 5.5199, | |
| "step": 11140 | |
| }, | |
| { | |
| "epoch": 0.8921427428388542, | |
| "grad_norm": 2.3133068084716797, | |
| "learning_rate": 3.523292506819276e-05, | |
| "loss": 5.3506, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 0.8929428708593375, | |
| "grad_norm": 2.1670310497283936, | |
| "learning_rate": 3.521955393913462e-05, | |
| "loss": 5.3459, | |
| "step": 11160 | |
| }, | |
| { | |
| "epoch": 0.8937429988798208, | |
| "grad_norm": 2.875126838684082, | |
| "learning_rate": 3.5206182810076486e-05, | |
| "loss": 5.3948, | |
| "step": 11170 | |
| }, | |
| { | |
| "epoch": 0.894543126900304, | |
| "grad_norm": 2.3784403800964355, | |
| "learning_rate": 3.519281168101835e-05, | |
| "loss": 5.431, | |
| "step": 11180 | |
| }, | |
| { | |
| "epoch": 0.8953432549207874, | |
| "grad_norm": 2.400426149368286, | |
| "learning_rate": 3.517944055196021e-05, | |
| "loss": 5.4228, | |
| "step": 11190 | |
| }, | |
| { | |
| "epoch": 0.8961433829412706, | |
| "grad_norm": 2.2166919708251953, | |
| "learning_rate": 3.5166069422902074e-05, | |
| "loss": 5.6408, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.8969435109617538, | |
| "grad_norm": 1.7938240766525269, | |
| "learning_rate": 3.5152698293843936e-05, | |
| "loss": 5.3972, | |
| "step": 11210 | |
| }, | |
| { | |
| "epoch": 0.8977436389822372, | |
| "grad_norm": 2.4942996501922607, | |
| "learning_rate": 3.51393271647858e-05, | |
| "loss": 5.5523, | |
| "step": 11220 | |
| }, | |
| { | |
| "epoch": 0.8985437670027204, | |
| "grad_norm": 2.706131935119629, | |
| "learning_rate": 3.512595603572766e-05, | |
| "loss": 5.6029, | |
| "step": 11230 | |
| }, | |
| { | |
| "epoch": 0.8993438950232037, | |
| "grad_norm": 3.6749794483184814, | |
| "learning_rate": 3.5112584906669524e-05, | |
| "loss": 5.5903, | |
| "step": 11240 | |
| }, | |
| { | |
| "epoch": 0.900144023043687, | |
| "grad_norm": 2.8764829635620117, | |
| "learning_rate": 3.509921377761139e-05, | |
| "loss": 5.392, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 0.9009441510641703, | |
| "grad_norm": 1.9971251487731934, | |
| "learning_rate": 3.508584264855325e-05, | |
| "loss": 5.5115, | |
| "step": 11260 | |
| }, | |
| { | |
| "epoch": 0.9017442790846536, | |
| "grad_norm": 1.9127808809280396, | |
| "learning_rate": 3.507247151949511e-05, | |
| "loss": 5.6273, | |
| "step": 11270 | |
| }, | |
| { | |
| "epoch": 0.9025444071051368, | |
| "grad_norm": 2.679152727127075, | |
| "learning_rate": 3.5059100390436975e-05, | |
| "loss": 5.5216, | |
| "step": 11280 | |
| }, | |
| { | |
| "epoch": 0.9033445351256201, | |
| "grad_norm": 3.1412837505340576, | |
| "learning_rate": 3.504572926137884e-05, | |
| "loss": 5.5665, | |
| "step": 11290 | |
| }, | |
| { | |
| "epoch": 0.9041446631461034, | |
| "grad_norm": 3.2604153156280518, | |
| "learning_rate": 3.50323581323207e-05, | |
| "loss": 5.6283, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.9049447911665867, | |
| "grad_norm": 2.2050578594207764, | |
| "learning_rate": 3.5018987003262557e-05, | |
| "loss": 5.429, | |
| "step": 11310 | |
| }, | |
| { | |
| "epoch": 0.9057449191870699, | |
| "grad_norm": 3.6569366455078125, | |
| "learning_rate": 3.500561587420442e-05, | |
| "loss": 5.5833, | |
| "step": 11320 | |
| }, | |
| { | |
| "epoch": 0.9065450472075532, | |
| "grad_norm": 2.38771653175354, | |
| "learning_rate": 3.499224474514628e-05, | |
| "loss": 5.4127, | |
| "step": 11330 | |
| }, | |
| { | |
| "epoch": 0.9073451752280365, | |
| "grad_norm": 2.1471800804138184, | |
| "learning_rate": 3.4978873616088145e-05, | |
| "loss": 5.4064, | |
| "step": 11340 | |
| }, | |
| { | |
| "epoch": 0.9081453032485197, | |
| "grad_norm": 2.340174674987793, | |
| "learning_rate": 3.496550248703001e-05, | |
| "loss": 5.5581, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 0.9089454312690031, | |
| "grad_norm": 2.771235466003418, | |
| "learning_rate": 3.495213135797187e-05, | |
| "loss": 5.4221, | |
| "step": 11360 | |
| }, | |
| { | |
| "epoch": 0.9097455592894863, | |
| "grad_norm": 2.7797491550445557, | |
| "learning_rate": 3.493876022891373e-05, | |
| "loss": 5.5604, | |
| "step": 11370 | |
| }, | |
| { | |
| "epoch": 0.9105456873099695, | |
| "grad_norm": 2.0206966400146484, | |
| "learning_rate": 3.4925389099855595e-05, | |
| "loss": 5.3382, | |
| "step": 11380 | |
| }, | |
| { | |
| "epoch": 0.9113458153304529, | |
| "grad_norm": 3.5101125240325928, | |
| "learning_rate": 3.491201797079746e-05, | |
| "loss": 5.5358, | |
| "step": 11390 | |
| }, | |
| { | |
| "epoch": 0.9121459433509361, | |
| "grad_norm": 2.3375003337860107, | |
| "learning_rate": 3.489864684173932e-05, | |
| "loss": 5.5492, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.9129460713714195, | |
| "grad_norm": 2.4977264404296875, | |
| "learning_rate": 3.4885275712681183e-05, | |
| "loss": 5.507, | |
| "step": 11410 | |
| }, | |
| { | |
| "epoch": 0.9137461993919027, | |
| "grad_norm": 2.0408174991607666, | |
| "learning_rate": 3.4871904583623046e-05, | |
| "loss": 5.5587, | |
| "step": 11420 | |
| }, | |
| { | |
| "epoch": 0.914546327412386, | |
| "grad_norm": 2.525320053100586, | |
| "learning_rate": 3.485853345456491e-05, | |
| "loss": 5.5013, | |
| "step": 11430 | |
| }, | |
| { | |
| "epoch": 0.9153464554328693, | |
| "grad_norm": 2.946377992630005, | |
| "learning_rate": 3.484516232550677e-05, | |
| "loss": 5.5959, | |
| "step": 11440 | |
| }, | |
| { | |
| "epoch": 0.9161465834533525, | |
| "grad_norm": 2.138331174850464, | |
| "learning_rate": 3.4831791196448634e-05, | |
| "loss": 5.4817, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 0.9169467114738358, | |
| "grad_norm": 1.7159631252288818, | |
| "learning_rate": 3.48184200673905e-05, | |
| "loss": 5.5036, | |
| "step": 11460 | |
| }, | |
| { | |
| "epoch": 0.9177468394943191, | |
| "grad_norm": 2.5576088428497314, | |
| "learning_rate": 3.480504893833235e-05, | |
| "loss": 5.4721, | |
| "step": 11470 | |
| }, | |
| { | |
| "epoch": 0.9185469675148024, | |
| "grad_norm": 2.057349443435669, | |
| "learning_rate": 3.4791677809274215e-05, | |
| "loss": 5.5468, | |
| "step": 11480 | |
| }, | |
| { | |
| "epoch": 0.9193470955352856, | |
| "grad_norm": 2.4942944049835205, | |
| "learning_rate": 3.477830668021608e-05, | |
| "loss": 5.5999, | |
| "step": 11490 | |
| }, | |
| { | |
| "epoch": 0.920147223555769, | |
| "grad_norm": 3.3070192337036133, | |
| "learning_rate": 3.476493555115794e-05, | |
| "loss": 5.5418, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.9209473515762522, | |
| "grad_norm": 2.2323672771453857, | |
| "learning_rate": 3.4751564422099804e-05, | |
| "loss": 5.398, | |
| "step": 11510 | |
| }, | |
| { | |
| "epoch": 0.9217474795967355, | |
| "grad_norm": 1.9982457160949707, | |
| "learning_rate": 3.4738193293041666e-05, | |
| "loss": 5.4668, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 0.9225476076172188, | |
| "grad_norm": 3.4668660163879395, | |
| "learning_rate": 3.472482216398353e-05, | |
| "loss": 5.5433, | |
| "step": 11530 | |
| }, | |
| { | |
| "epoch": 0.923347735637702, | |
| "grad_norm": 2.7247307300567627, | |
| "learning_rate": 3.471145103492539e-05, | |
| "loss": 5.4156, | |
| "step": 11540 | |
| }, | |
| { | |
| "epoch": 0.9241478636581854, | |
| "grad_norm": 2.42948317527771, | |
| "learning_rate": 3.4698079905867254e-05, | |
| "loss": 5.4336, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 0.9249479916786686, | |
| "grad_norm": 4.134993076324463, | |
| "learning_rate": 3.468470877680912e-05, | |
| "loss": 5.3362, | |
| "step": 11560 | |
| }, | |
| { | |
| "epoch": 0.9257481196991518, | |
| "grad_norm": 2.0852134227752686, | |
| "learning_rate": 3.467133764775098e-05, | |
| "loss": 5.4117, | |
| "step": 11570 | |
| }, | |
| { | |
| "epoch": 0.9265482477196352, | |
| "grad_norm": 2.224235773086548, | |
| "learning_rate": 3.465796651869284e-05, | |
| "loss": 5.4132, | |
| "step": 11580 | |
| }, | |
| { | |
| "epoch": 0.9273483757401184, | |
| "grad_norm": 2.0093464851379395, | |
| "learning_rate": 3.4644595389634705e-05, | |
| "loss": 5.3876, | |
| "step": 11590 | |
| }, | |
| { | |
| "epoch": 0.9281485037606017, | |
| "grad_norm": 1.9892866611480713, | |
| "learning_rate": 3.463122426057657e-05, | |
| "loss": 5.4069, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.928948631781085, | |
| "grad_norm": 3.9974398612976074, | |
| "learning_rate": 3.461785313151843e-05, | |
| "loss": 5.4892, | |
| "step": 11610 | |
| }, | |
| { | |
| "epoch": 0.9297487598015682, | |
| "grad_norm": 1.9878896474838257, | |
| "learning_rate": 3.4604482002460286e-05, | |
| "loss": 5.5017, | |
| "step": 11620 | |
| }, | |
| { | |
| "epoch": 0.9305488878220515, | |
| "grad_norm": 3.1477320194244385, | |
| "learning_rate": 3.459111087340215e-05, | |
| "loss": 5.3199, | |
| "step": 11630 | |
| }, | |
| { | |
| "epoch": 0.9313490158425348, | |
| "grad_norm": 2.434946298599243, | |
| "learning_rate": 3.457773974434401e-05, | |
| "loss": 5.4885, | |
| "step": 11640 | |
| }, | |
| { | |
| "epoch": 0.9321491438630181, | |
| "grad_norm": 3.2463152408599854, | |
| "learning_rate": 3.4564368615285874e-05, | |
| "loss": 5.5232, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 0.9329492718835014, | |
| "grad_norm": 3.733612537384033, | |
| "learning_rate": 3.455099748622774e-05, | |
| "loss": 5.4918, | |
| "step": 11660 | |
| }, | |
| { | |
| "epoch": 0.9337493999039846, | |
| "grad_norm": 3.3726518154144287, | |
| "learning_rate": 3.45376263571696e-05, | |
| "loss": 5.3887, | |
| "step": 11670 | |
| }, | |
| { | |
| "epoch": 0.9345495279244679, | |
| "grad_norm": 2.527639627456665, | |
| "learning_rate": 3.452425522811146e-05, | |
| "loss": 5.4, | |
| "step": 11680 | |
| }, | |
| { | |
| "epoch": 0.9353496559449512, | |
| "grad_norm": 3.3945000171661377, | |
| "learning_rate": 3.4510884099053325e-05, | |
| "loss": 5.4835, | |
| "step": 11690 | |
| }, | |
| { | |
| "epoch": 0.9361497839654345, | |
| "grad_norm": 2.492178201675415, | |
| "learning_rate": 3.449751296999519e-05, | |
| "loss": 5.5472, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.9369499119859177, | |
| "grad_norm": 2.2719671726226807, | |
| "learning_rate": 3.448414184093705e-05, | |
| "loss": 5.3069, | |
| "step": 11710 | |
| }, | |
| { | |
| "epoch": 0.937750040006401, | |
| "grad_norm": 4.121431350708008, | |
| "learning_rate": 3.447077071187891e-05, | |
| "loss": 5.3377, | |
| "step": 11720 | |
| }, | |
| { | |
| "epoch": 0.9385501680268843, | |
| "grad_norm": 2.2480831146240234, | |
| "learning_rate": 3.4457399582820776e-05, | |
| "loss": 5.3888, | |
| "step": 11730 | |
| }, | |
| { | |
| "epoch": 0.9393502960473675, | |
| "grad_norm": 3.118621349334717, | |
| "learning_rate": 3.444402845376264e-05, | |
| "loss": 5.3225, | |
| "step": 11740 | |
| }, | |
| { | |
| "epoch": 0.9401504240678509, | |
| "grad_norm": 2.513777494430542, | |
| "learning_rate": 3.44306573247045e-05, | |
| "loss": 5.4971, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 0.9409505520883341, | |
| "grad_norm": 2.491767406463623, | |
| "learning_rate": 3.4417286195646364e-05, | |
| "loss": 5.5061, | |
| "step": 11760 | |
| }, | |
| { | |
| "epoch": 0.9417506801088175, | |
| "grad_norm": 2.8964290618896484, | |
| "learning_rate": 3.440391506658823e-05, | |
| "loss": 5.3395, | |
| "step": 11770 | |
| }, | |
| { | |
| "epoch": 0.9425508081293007, | |
| "grad_norm": 2.1613073348999023, | |
| "learning_rate": 3.439054393753008e-05, | |
| "loss": 5.512, | |
| "step": 11780 | |
| }, | |
| { | |
| "epoch": 0.9433509361497839, | |
| "grad_norm": 3.5444371700286865, | |
| "learning_rate": 3.4377172808471945e-05, | |
| "loss": 5.4804, | |
| "step": 11790 | |
| }, | |
| { | |
| "epoch": 0.9441510641702673, | |
| "grad_norm": 3.0833287239074707, | |
| "learning_rate": 3.436380167941381e-05, | |
| "loss": 5.5711, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.9449511921907505, | |
| "grad_norm": 2.2267260551452637, | |
| "learning_rate": 3.435043055035567e-05, | |
| "loss": 5.3964, | |
| "step": 11810 | |
| }, | |
| { | |
| "epoch": 0.9457513202112338, | |
| "grad_norm": 3.114546537399292, | |
| "learning_rate": 3.4337059421297533e-05, | |
| "loss": 5.4296, | |
| "step": 11820 | |
| }, | |
| { | |
| "epoch": 0.9465514482317171, | |
| "grad_norm": 3.316612958908081, | |
| "learning_rate": 3.4323688292239396e-05, | |
| "loss": 5.451, | |
| "step": 11830 | |
| }, | |
| { | |
| "epoch": 0.9473515762522003, | |
| "grad_norm": 2.97145414352417, | |
| "learning_rate": 3.431031716318126e-05, | |
| "loss": 5.6184, | |
| "step": 11840 | |
| }, | |
| { | |
| "epoch": 0.9481517042726836, | |
| "grad_norm": 2.2837045192718506, | |
| "learning_rate": 3.429694603412312e-05, | |
| "loss": 5.3398, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 0.9489518322931669, | |
| "grad_norm": 2.2095916271209717, | |
| "learning_rate": 3.4283574905064984e-05, | |
| "loss": 5.3933, | |
| "step": 11860 | |
| }, | |
| { | |
| "epoch": 0.9497519603136502, | |
| "grad_norm": 1.9592795372009277, | |
| "learning_rate": 3.427020377600685e-05, | |
| "loss": 5.4423, | |
| "step": 11870 | |
| }, | |
| { | |
| "epoch": 0.9505520883341335, | |
| "grad_norm": 2.9245188236236572, | |
| "learning_rate": 3.425683264694871e-05, | |
| "loss": 5.4927, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 0.9513522163546168, | |
| "grad_norm": 2.5000531673431396, | |
| "learning_rate": 3.424346151789057e-05, | |
| "loss": 5.3523, | |
| "step": 11890 | |
| }, | |
| { | |
| "epoch": 0.9521523443751, | |
| "grad_norm": 2.4692375659942627, | |
| "learning_rate": 3.4230090388832435e-05, | |
| "loss": 5.5949, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.9529524723955833, | |
| "grad_norm": 2.387812852859497, | |
| "learning_rate": 3.42167192597743e-05, | |
| "loss": 5.4971, | |
| "step": 11910 | |
| }, | |
| { | |
| "epoch": 0.9537526004160666, | |
| "grad_norm": 2.938291072845459, | |
| "learning_rate": 3.420334813071616e-05, | |
| "loss": 5.3849, | |
| "step": 11920 | |
| }, | |
| { | |
| "epoch": 0.9545527284365498, | |
| "grad_norm": 2.608431339263916, | |
| "learning_rate": 3.4189977001658016e-05, | |
| "loss": 5.3414, | |
| "step": 11930 | |
| }, | |
| { | |
| "epoch": 0.9553528564570332, | |
| "grad_norm": 2.695615530014038, | |
| "learning_rate": 3.417660587259988e-05, | |
| "loss": 5.2343, | |
| "step": 11940 | |
| }, | |
| { | |
| "epoch": 0.9561529844775164, | |
| "grad_norm": 3.0142087936401367, | |
| "learning_rate": 3.416323474354174e-05, | |
| "loss": 5.3293, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 0.9569531124979996, | |
| "grad_norm": 2.5953242778778076, | |
| "learning_rate": 3.4149863614483604e-05, | |
| "loss": 5.459, | |
| "step": 11960 | |
| }, | |
| { | |
| "epoch": 0.957753240518483, | |
| "grad_norm": 2.2795822620391846, | |
| "learning_rate": 3.413649248542547e-05, | |
| "loss": 5.5305, | |
| "step": 11970 | |
| }, | |
| { | |
| "epoch": 0.9585533685389662, | |
| "grad_norm": 2.5979270935058594, | |
| "learning_rate": 3.412312135636733e-05, | |
| "loss": 5.4866, | |
| "step": 11980 | |
| }, | |
| { | |
| "epoch": 0.9593534965594495, | |
| "grad_norm": 2.66823673248291, | |
| "learning_rate": 3.410975022730919e-05, | |
| "loss": 5.5734, | |
| "step": 11990 | |
| }, | |
| { | |
| "epoch": 0.9601536245799328, | |
| "grad_norm": 2.3899004459381104, | |
| "learning_rate": 3.4096379098251055e-05, | |
| "loss": 5.5367, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.960953752600416, | |
| "grad_norm": 2.233553171157837, | |
| "learning_rate": 3.408300796919292e-05, | |
| "loss": 5.3773, | |
| "step": 12010 | |
| }, | |
| { | |
| "epoch": 0.9617538806208994, | |
| "grad_norm": 2.2967305183410645, | |
| "learning_rate": 3.406963684013478e-05, | |
| "loss": 5.4409, | |
| "step": 12020 | |
| }, | |
| { | |
| "epoch": 0.9625540086413826, | |
| "grad_norm": 2.4291601181030273, | |
| "learning_rate": 3.405626571107664e-05, | |
| "loss": 5.4198, | |
| "step": 12030 | |
| }, | |
| { | |
| "epoch": 0.9633541366618659, | |
| "grad_norm": 2.6325435638427734, | |
| "learning_rate": 3.4042894582018506e-05, | |
| "loss": 5.6044, | |
| "step": 12040 | |
| }, | |
| { | |
| "epoch": 0.9641542646823492, | |
| "grad_norm": 2.4688518047332764, | |
| "learning_rate": 3.402952345296037e-05, | |
| "loss": 5.3633, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 0.9649543927028325, | |
| "grad_norm": 2.3974521160125732, | |
| "learning_rate": 3.401615232390223e-05, | |
| "loss": 5.3022, | |
| "step": 12060 | |
| }, | |
| { | |
| "epoch": 0.9657545207233157, | |
| "grad_norm": 2.146742105484009, | |
| "learning_rate": 3.4002781194844094e-05, | |
| "loss": 5.2753, | |
| "step": 12070 | |
| }, | |
| { | |
| "epoch": 0.966554648743799, | |
| "grad_norm": 2.1239147186279297, | |
| "learning_rate": 3.3989410065785957e-05, | |
| "loss": 5.466, | |
| "step": 12080 | |
| }, | |
| { | |
| "epoch": 0.9673547767642823, | |
| "grad_norm": 2.939096450805664, | |
| "learning_rate": 3.397603893672782e-05, | |
| "loss": 5.5288, | |
| "step": 12090 | |
| }, | |
| { | |
| "epoch": 0.9681549047847655, | |
| "grad_norm": 2.6875243186950684, | |
| "learning_rate": 3.396266780766968e-05, | |
| "loss": 5.4279, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.9689550328052489, | |
| "grad_norm": 3.1991941928863525, | |
| "learning_rate": 3.3949296678611545e-05, | |
| "loss": 5.5397, | |
| "step": 12110 | |
| }, | |
| { | |
| "epoch": 0.9697551608257321, | |
| "grad_norm": 2.4558470249176025, | |
| "learning_rate": 3.393592554955341e-05, | |
| "loss": 5.3246, | |
| "step": 12120 | |
| }, | |
| { | |
| "epoch": 0.9705552888462154, | |
| "grad_norm": 2.2693309783935547, | |
| "learning_rate": 3.392255442049527e-05, | |
| "loss": 5.5941, | |
| "step": 12130 | |
| }, | |
| { | |
| "epoch": 0.9713554168666987, | |
| "grad_norm": 2.8864657878875732, | |
| "learning_rate": 3.390918329143713e-05, | |
| "loss": 5.4632, | |
| "step": 12140 | |
| }, | |
| { | |
| "epoch": 0.9721555448871819, | |
| "grad_norm": 2.3996002674102783, | |
| "learning_rate": 3.3895812162378995e-05, | |
| "loss": 5.4724, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 0.9729556729076653, | |
| "grad_norm": 1.979028582572937, | |
| "learning_rate": 3.388244103332086e-05, | |
| "loss": 5.4229, | |
| "step": 12160 | |
| }, | |
| { | |
| "epoch": 0.9737558009281485, | |
| "grad_norm": 2.0203795433044434, | |
| "learning_rate": 3.386906990426272e-05, | |
| "loss": 5.5592, | |
| "step": 12170 | |
| }, | |
| { | |
| "epoch": 0.9745559289486317, | |
| "grad_norm": 2.0890145301818848, | |
| "learning_rate": 3.3855698775204583e-05, | |
| "loss": 5.4313, | |
| "step": 12180 | |
| }, | |
| { | |
| "epoch": 0.9753560569691151, | |
| "grad_norm": 2.4817287921905518, | |
| "learning_rate": 3.3842327646146446e-05, | |
| "loss": 5.5, | |
| "step": 12190 | |
| }, | |
| { | |
| "epoch": 0.9761561849895983, | |
| "grad_norm": 2.2497968673706055, | |
| "learning_rate": 3.382895651708831e-05, | |
| "loss": 5.3126, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.9769563130100816, | |
| "grad_norm": 3.2818548679351807, | |
| "learning_rate": 3.381558538803017e-05, | |
| "loss": 5.3421, | |
| "step": 12210 | |
| }, | |
| { | |
| "epoch": 0.9777564410305649, | |
| "grad_norm": 7.580129623413086, | |
| "learning_rate": 3.3802214258972034e-05, | |
| "loss": 5.6585, | |
| "step": 12220 | |
| }, | |
| { | |
| "epoch": 0.9785565690510482, | |
| "grad_norm": 3.0450634956359863, | |
| "learning_rate": 3.37888431299139e-05, | |
| "loss": 5.4403, | |
| "step": 12230 | |
| }, | |
| { | |
| "epoch": 0.9793566970715314, | |
| "grad_norm": 2.5230050086975098, | |
| "learning_rate": 3.377547200085575e-05, | |
| "loss": 5.5331, | |
| "step": 12240 | |
| }, | |
| { | |
| "epoch": 0.9801568250920147, | |
| "grad_norm": 3.398266315460205, | |
| "learning_rate": 3.3762100871797616e-05, | |
| "loss": 5.3996, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 0.980956953112498, | |
| "grad_norm": 2.2126028537750244, | |
| "learning_rate": 3.374872974273948e-05, | |
| "loss": 5.4175, | |
| "step": 12260 | |
| }, | |
| { | |
| "epoch": 0.9817570811329813, | |
| "grad_norm": 3.0015792846679688, | |
| "learning_rate": 3.373535861368134e-05, | |
| "loss": 5.3961, | |
| "step": 12270 | |
| }, | |
| { | |
| "epoch": 0.9825572091534646, | |
| "grad_norm": 2.5461559295654297, | |
| "learning_rate": 3.3721987484623204e-05, | |
| "loss": 5.6026, | |
| "step": 12280 | |
| }, | |
| { | |
| "epoch": 0.9833573371739478, | |
| "grad_norm": 2.498425245285034, | |
| "learning_rate": 3.3708616355565066e-05, | |
| "loss": 5.3524, | |
| "step": 12290 | |
| }, | |
| { | |
| "epoch": 0.9841574651944311, | |
| "grad_norm": 2.9614803791046143, | |
| "learning_rate": 3.369524522650693e-05, | |
| "loss": 5.5101, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.9849575932149144, | |
| "grad_norm": 2.7508606910705566, | |
| "learning_rate": 3.368187409744879e-05, | |
| "loss": 5.3776, | |
| "step": 12310 | |
| }, | |
| { | |
| "epoch": 0.9857577212353976, | |
| "grad_norm": 2.0286755561828613, | |
| "learning_rate": 3.3668502968390654e-05, | |
| "loss": 5.4913, | |
| "step": 12320 | |
| }, | |
| { | |
| "epoch": 0.986557849255881, | |
| "grad_norm": 3.728842258453369, | |
| "learning_rate": 3.365513183933252e-05, | |
| "loss": 5.4477, | |
| "step": 12330 | |
| }, | |
| { | |
| "epoch": 0.9873579772763642, | |
| "grad_norm": 3.3132193088531494, | |
| "learning_rate": 3.364176071027438e-05, | |
| "loss": 5.2361, | |
| "step": 12340 | |
| }, | |
| { | |
| "epoch": 0.9881581052968474, | |
| "grad_norm": 2.515298843383789, | |
| "learning_rate": 3.362838958121624e-05, | |
| "loss": 5.4632, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 0.9889582333173308, | |
| "grad_norm": 2.0937442779541016, | |
| "learning_rate": 3.3615018452158105e-05, | |
| "loss": 5.5075, | |
| "step": 12360 | |
| }, | |
| { | |
| "epoch": 0.989758361337814, | |
| "grad_norm": 3.3019323348999023, | |
| "learning_rate": 3.360164732309997e-05, | |
| "loss": 5.4566, | |
| "step": 12370 | |
| }, | |
| { | |
| "epoch": 0.9905584893582974, | |
| "grad_norm": 3.502408266067505, | |
| "learning_rate": 3.358827619404183e-05, | |
| "loss": 5.464, | |
| "step": 12380 | |
| }, | |
| { | |
| "epoch": 0.9913586173787806, | |
| "grad_norm": 2.3667659759521484, | |
| "learning_rate": 3.357490506498369e-05, | |
| "loss": 5.5423, | |
| "step": 12390 | |
| }, | |
| { | |
| "epoch": 0.9921587453992639, | |
| "grad_norm": 2.15498423576355, | |
| "learning_rate": 3.356153393592555e-05, | |
| "loss": 5.4031, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.9929588734197472, | |
| "grad_norm": 2.733090877532959, | |
| "learning_rate": 3.354816280686741e-05, | |
| "loss": 5.4771, | |
| "step": 12410 | |
| }, | |
| { | |
| "epoch": 0.9937590014402304, | |
| "grad_norm": 2.595238208770752, | |
| "learning_rate": 3.3534791677809274e-05, | |
| "loss": 5.4538, | |
| "step": 12420 | |
| }, | |
| { | |
| "epoch": 0.9945591294607137, | |
| "grad_norm": 2.3755598068237305, | |
| "learning_rate": 3.352142054875114e-05, | |
| "loss": 5.432, | |
| "step": 12430 | |
| }, | |
| { | |
| "epoch": 0.995359257481197, | |
| "grad_norm": 2.2179529666900635, | |
| "learning_rate": 3.3508049419693e-05, | |
| "loss": 5.4359, | |
| "step": 12440 | |
| }, | |
| { | |
| "epoch": 0.9961593855016803, | |
| "grad_norm": 2.264469623565674, | |
| "learning_rate": 3.349467829063486e-05, | |
| "loss": 5.4514, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 0.9969595135221635, | |
| "grad_norm": 2.9361791610717773, | |
| "learning_rate": 3.3481307161576725e-05, | |
| "loss": 5.4411, | |
| "step": 12460 | |
| }, | |
| { | |
| "epoch": 0.9977596415426468, | |
| "grad_norm": 2.6548573970794678, | |
| "learning_rate": 3.346793603251859e-05, | |
| "loss": 5.4368, | |
| "step": 12470 | |
| }, | |
| { | |
| "epoch": 0.9985597695631301, | |
| "grad_norm": 3.5749149322509766, | |
| "learning_rate": 3.345456490346045e-05, | |
| "loss": 5.6314, | |
| "step": 12480 | |
| }, | |
| { | |
| "epoch": 0.9993598975836134, | |
| "grad_norm": 2.848527193069458, | |
| "learning_rate": 3.344119377440231e-05, | |
| "loss": 5.3849, | |
| "step": 12490 | |
| }, | |
| { | |
| "epoch": 1.0001600256040966, | |
| "grad_norm": 2.036498546600342, | |
| "learning_rate": 3.3427822645344176e-05, | |
| "loss": 5.5973, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.00096015362458, | |
| "grad_norm": 3.499455451965332, | |
| "learning_rate": 3.341445151628604e-05, | |
| "loss": 5.1882, | |
| "step": 12510 | |
| }, | |
| { | |
| "epoch": 1.0017602816450633, | |
| "grad_norm": 2.4391655921936035, | |
| "learning_rate": 3.34010803872279e-05, | |
| "loss": 5.0281, | |
| "step": 12520 | |
| }, | |
| { | |
| "epoch": 1.0025604096655465, | |
| "grad_norm": 2.522850513458252, | |
| "learning_rate": 3.3387709258169764e-05, | |
| "loss": 5.1038, | |
| "step": 12530 | |
| }, | |
| { | |
| "epoch": 1.0033605376860297, | |
| "grad_norm": 2.631127119064331, | |
| "learning_rate": 3.337433812911163e-05, | |
| "loss": 4.9671, | |
| "step": 12540 | |
| }, | |
| { | |
| "epoch": 1.004160665706513, | |
| "grad_norm": 2.9861068725585938, | |
| "learning_rate": 3.336096700005348e-05, | |
| "loss": 5.2225, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 1.0049607937269964, | |
| "grad_norm": 2.59002423286438, | |
| "learning_rate": 3.3347595870995345e-05, | |
| "loss": 5.142, | |
| "step": 12560 | |
| }, | |
| { | |
| "epoch": 1.0057609217474797, | |
| "grad_norm": 2.830385208129883, | |
| "learning_rate": 3.333422474193721e-05, | |
| "loss": 5.0919, | |
| "step": 12570 | |
| }, | |
| { | |
| "epoch": 1.006561049767963, | |
| "grad_norm": 2.6355655193328857, | |
| "learning_rate": 3.332085361287907e-05, | |
| "loss": 5.0604, | |
| "step": 12580 | |
| }, | |
| { | |
| "epoch": 1.0073611777884461, | |
| "grad_norm": 2.8990426063537598, | |
| "learning_rate": 3.3307482483820933e-05, | |
| "loss": 5.0488, | |
| "step": 12590 | |
| }, | |
| { | |
| "epoch": 1.0081613058089294, | |
| "grad_norm": 2.657283067703247, | |
| "learning_rate": 3.3294111354762796e-05, | |
| "loss": 5.157, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 1.0089614338294126, | |
| "grad_norm": 3.652735710144043, | |
| "learning_rate": 3.328074022570466e-05, | |
| "loss": 5.1629, | |
| "step": 12610 | |
| }, | |
| { | |
| "epoch": 1.009761561849896, | |
| "grad_norm": 2.9064295291900635, | |
| "learning_rate": 3.326736909664652e-05, | |
| "loss": 5.1757, | |
| "step": 12620 | |
| }, | |
| { | |
| "epoch": 1.0105616898703793, | |
| "grad_norm": 3.015488386154175, | |
| "learning_rate": 3.3253997967588384e-05, | |
| "loss": 5.2311, | |
| "step": 12630 | |
| }, | |
| { | |
| "epoch": 1.0113618178908625, | |
| "grad_norm": 9.49726390838623, | |
| "learning_rate": 3.324062683853025e-05, | |
| "loss": 5.1402, | |
| "step": 12640 | |
| }, | |
| { | |
| "epoch": 1.0121619459113458, | |
| "grad_norm": 6.71565055847168, | |
| "learning_rate": 3.322725570947211e-05, | |
| "loss": 4.7297, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 1.012962073931829, | |
| "grad_norm": 4.39326286315918, | |
| "learning_rate": 3.321388458041397e-05, | |
| "loss": 5.1663, | |
| "step": 12660 | |
| }, | |
| { | |
| "epoch": 1.0137622019523125, | |
| "grad_norm": 2.8973264694213867, | |
| "learning_rate": 3.3200513451355835e-05, | |
| "loss": 5.0674, | |
| "step": 12670 | |
| }, | |
| { | |
| "epoch": 1.0145623299727957, | |
| "grad_norm": 3.1058743000030518, | |
| "learning_rate": 3.31871423222977e-05, | |
| "loss": 4.9689, | |
| "step": 12680 | |
| }, | |
| { | |
| "epoch": 1.015362457993279, | |
| "grad_norm": 2.688951253890991, | |
| "learning_rate": 3.317377119323956e-05, | |
| "loss": 5.0916, | |
| "step": 12690 | |
| }, | |
| { | |
| "epoch": 1.0161625860137622, | |
| "grad_norm": 2.9495773315429688, | |
| "learning_rate": 3.3160400064181416e-05, | |
| "loss": 5.0939, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 1.0169627140342454, | |
| "grad_norm": 2.5915777683258057, | |
| "learning_rate": 3.314702893512328e-05, | |
| "loss": 5.134, | |
| "step": 12710 | |
| }, | |
| { | |
| "epoch": 1.0177628420547287, | |
| "grad_norm": 2.703012228012085, | |
| "learning_rate": 3.313365780606514e-05, | |
| "loss": 5.1285, | |
| "step": 12720 | |
| }, | |
| { | |
| "epoch": 1.0185629700752121, | |
| "grad_norm": 3.0492970943450928, | |
| "learning_rate": 3.3120286677007004e-05, | |
| "loss": 5.1477, | |
| "step": 12730 | |
| }, | |
| { | |
| "epoch": 1.0193630980956954, | |
| "grad_norm": 2.756546974182129, | |
| "learning_rate": 3.310691554794887e-05, | |
| "loss": 5.0668, | |
| "step": 12740 | |
| }, | |
| { | |
| "epoch": 1.0201632261161786, | |
| "grad_norm": 4.764959335327148, | |
| "learning_rate": 3.309354441889073e-05, | |
| "loss": 5.1243, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 1.0209633541366618, | |
| "grad_norm": 5.539842128753662, | |
| "learning_rate": 3.308017328983259e-05, | |
| "loss": 5.0519, | |
| "step": 12760 | |
| }, | |
| { | |
| "epoch": 1.021763482157145, | |
| "grad_norm": 3.8945937156677246, | |
| "learning_rate": 3.3066802160774455e-05, | |
| "loss": 5.1758, | |
| "step": 12770 | |
| }, | |
| { | |
| "epoch": 1.0225636101776283, | |
| "grad_norm": 2.5580265522003174, | |
| "learning_rate": 3.305343103171632e-05, | |
| "loss": 5.0893, | |
| "step": 12780 | |
| }, | |
| { | |
| "epoch": 1.0233637381981118, | |
| "grad_norm": 2.8203110694885254, | |
| "learning_rate": 3.304005990265818e-05, | |
| "loss": 5.2472, | |
| "step": 12790 | |
| }, | |
| { | |
| "epoch": 1.024163866218595, | |
| "grad_norm": 3.5090975761413574, | |
| "learning_rate": 3.302668877360004e-05, | |
| "loss": 5.0594, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 1.0249639942390782, | |
| "grad_norm": 2.915062189102173, | |
| "learning_rate": 3.3013317644541906e-05, | |
| "loss": 5.0673, | |
| "step": 12810 | |
| }, | |
| { | |
| "epoch": 1.0257641222595615, | |
| "grad_norm": 2.648737668991089, | |
| "learning_rate": 3.299994651548377e-05, | |
| "loss": 4.8937, | |
| "step": 12820 | |
| }, | |
| { | |
| "epoch": 1.0265642502800447, | |
| "grad_norm": 3.2576730251312256, | |
| "learning_rate": 3.298657538642563e-05, | |
| "loss": 5.1564, | |
| "step": 12830 | |
| }, | |
| { | |
| "epoch": 1.0273643783005282, | |
| "grad_norm": 5.624968528747559, | |
| "learning_rate": 3.2973204257367494e-05, | |
| "loss": 5.3011, | |
| "step": 12840 | |
| }, | |
| { | |
| "epoch": 1.0281645063210114, | |
| "grad_norm": 2.492978811264038, | |
| "learning_rate": 3.2959833128309357e-05, | |
| "loss": 5.0935, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 1.0289646343414947, | |
| "grad_norm": 2.4655046463012695, | |
| "learning_rate": 3.294646199925121e-05, | |
| "loss": 5.1768, | |
| "step": 12860 | |
| }, | |
| { | |
| "epoch": 1.029764762361978, | |
| "grad_norm": 3.4421567916870117, | |
| "learning_rate": 3.2933090870193075e-05, | |
| "loss": 5.0756, | |
| "step": 12870 | |
| }, | |
| { | |
| "epoch": 1.0305648903824611, | |
| "grad_norm": 2.6774377822875977, | |
| "learning_rate": 3.291971974113494e-05, | |
| "loss": 5.036, | |
| "step": 12880 | |
| }, | |
| { | |
| "epoch": 1.0313650184029444, | |
| "grad_norm": 2.665099859237671, | |
| "learning_rate": 3.29063486120768e-05, | |
| "loss": 5.1284, | |
| "step": 12890 | |
| }, | |
| { | |
| "epoch": 1.0321651464234278, | |
| "grad_norm": 3.7092061042785645, | |
| "learning_rate": 3.289297748301866e-05, | |
| "loss": 4.8892, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 1.032965274443911, | |
| "grad_norm": 2.875427484512329, | |
| "learning_rate": 3.2879606353960526e-05, | |
| "loss": 4.928, | |
| "step": 12910 | |
| }, | |
| { | |
| "epoch": 1.0337654024643943, | |
| "grad_norm": 2.409395694732666, | |
| "learning_rate": 3.286623522490239e-05, | |
| "loss": 5.0545, | |
| "step": 12920 | |
| }, | |
| { | |
| "epoch": 1.0345655304848775, | |
| "grad_norm": 3.936565637588501, | |
| "learning_rate": 3.285286409584425e-05, | |
| "loss": 5.0556, | |
| "step": 12930 | |
| }, | |
| { | |
| "epoch": 1.0353656585053608, | |
| "grad_norm": 3.52986216545105, | |
| "learning_rate": 3.2839492966786114e-05, | |
| "loss": 5.0738, | |
| "step": 12940 | |
| }, | |
| { | |
| "epoch": 1.0361657865258442, | |
| "grad_norm": 3.0732507705688477, | |
| "learning_rate": 3.282612183772798e-05, | |
| "loss": 5.0852, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 1.0369659145463275, | |
| "grad_norm": 2.800020217895508, | |
| "learning_rate": 3.281275070866984e-05, | |
| "loss": 4.9983, | |
| "step": 12960 | |
| }, | |
| { | |
| "epoch": 1.0377660425668107, | |
| "grad_norm": 2.682191848754883, | |
| "learning_rate": 3.27993795796117e-05, | |
| "loss": 4.8372, | |
| "step": 12970 | |
| }, | |
| { | |
| "epoch": 1.038566170587294, | |
| "grad_norm": 5.331565856933594, | |
| "learning_rate": 3.2786008450553565e-05, | |
| "loss": 5.1444, | |
| "step": 12980 | |
| }, | |
| { | |
| "epoch": 1.0393662986077772, | |
| "grad_norm": 3.530069589614868, | |
| "learning_rate": 3.277263732149543e-05, | |
| "loss": 5.1467, | |
| "step": 12990 | |
| }, | |
| { | |
| "epoch": 1.0401664266282604, | |
| "grad_norm": 2.296837568283081, | |
| "learning_rate": 3.275926619243729e-05, | |
| "loss": 5.0782, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.0409665546487439, | |
| "grad_norm": 4.3493146896362305, | |
| "learning_rate": 3.274589506337915e-05, | |
| "loss": 5.0574, | |
| "step": 13010 | |
| }, | |
| { | |
| "epoch": 1.0417666826692271, | |
| "grad_norm": 3.2167856693267822, | |
| "learning_rate": 3.2732523934321016e-05, | |
| "loss": 5.1219, | |
| "step": 13020 | |
| }, | |
| { | |
| "epoch": 1.0425668106897104, | |
| "grad_norm": 3.200861692428589, | |
| "learning_rate": 3.271915280526288e-05, | |
| "loss": 5.0674, | |
| "step": 13030 | |
| }, | |
| { | |
| "epoch": 1.0433669387101936, | |
| "grad_norm": 2.286841869354248, | |
| "learning_rate": 3.270578167620474e-05, | |
| "loss": 5.0125, | |
| "step": 13040 | |
| }, | |
| { | |
| "epoch": 1.0441670667306768, | |
| "grad_norm": 3.6788413524627686, | |
| "learning_rate": 3.2692410547146604e-05, | |
| "loss": 5.2975, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 1.0449671947511603, | |
| "grad_norm": 2.77284574508667, | |
| "learning_rate": 3.2679039418088466e-05, | |
| "loss": 5.0099, | |
| "step": 13060 | |
| }, | |
| { | |
| "epoch": 1.0457673227716435, | |
| "grad_norm": 4.33493185043335, | |
| "learning_rate": 3.266566828903033e-05, | |
| "loss": 5.0362, | |
| "step": 13070 | |
| }, | |
| { | |
| "epoch": 1.0465674507921268, | |
| "grad_norm": 3.2839553356170654, | |
| "learning_rate": 3.265229715997219e-05, | |
| "loss": 4.9569, | |
| "step": 13080 | |
| }, | |
| { | |
| "epoch": 1.04736757881261, | |
| "grad_norm": 2.9086809158325195, | |
| "learning_rate": 3.2638926030914054e-05, | |
| "loss": 5.0341, | |
| "step": 13090 | |
| }, | |
| { | |
| "epoch": 1.0481677068330932, | |
| "grad_norm": 2.565225124359131, | |
| "learning_rate": 3.262555490185592e-05, | |
| "loss": 5.0601, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 1.0489678348535765, | |
| "grad_norm": 2.8457388877868652, | |
| "learning_rate": 3.261218377279778e-05, | |
| "loss": 4.9952, | |
| "step": 13110 | |
| }, | |
| { | |
| "epoch": 1.04976796287406, | |
| "grad_norm": 2.5370593070983887, | |
| "learning_rate": 3.259881264373964e-05, | |
| "loss": 5.1425, | |
| "step": 13120 | |
| }, | |
| { | |
| "epoch": 1.0505680908945432, | |
| "grad_norm": 2.504817008972168, | |
| "learning_rate": 3.2585441514681505e-05, | |
| "loss": 4.9605, | |
| "step": 13130 | |
| }, | |
| { | |
| "epoch": 1.0513682189150264, | |
| "grad_norm": 2.9582226276397705, | |
| "learning_rate": 3.257207038562337e-05, | |
| "loss": 5.1436, | |
| "step": 13140 | |
| }, | |
| { | |
| "epoch": 1.0521683469355096, | |
| "grad_norm": 3.7598915100097656, | |
| "learning_rate": 3.255869925656523e-05, | |
| "loss": 5.0743, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 1.0529684749559929, | |
| "grad_norm": 3.2642862796783447, | |
| "learning_rate": 3.254532812750709e-05, | |
| "loss": 5.139, | |
| "step": 13160 | |
| }, | |
| { | |
| "epoch": 1.0537686029764763, | |
| "grad_norm": 3.4917502403259277, | |
| "learning_rate": 3.253195699844895e-05, | |
| "loss": 5.0566, | |
| "step": 13170 | |
| }, | |
| { | |
| "epoch": 1.0545687309969596, | |
| "grad_norm": 2.9878995418548584, | |
| "learning_rate": 3.251858586939081e-05, | |
| "loss": 5.2385, | |
| "step": 13180 | |
| }, | |
| { | |
| "epoch": 1.0553688590174428, | |
| "grad_norm": 2.9996213912963867, | |
| "learning_rate": 3.2505214740332674e-05, | |
| "loss": 5.1138, | |
| "step": 13190 | |
| }, | |
| { | |
| "epoch": 1.056168987037926, | |
| "grad_norm": 5.470676422119141, | |
| "learning_rate": 3.249184361127454e-05, | |
| "loss": 5.0921, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 1.0569691150584093, | |
| "grad_norm": 2.9724602699279785, | |
| "learning_rate": 3.24784724822164e-05, | |
| "loss": 4.9315, | |
| "step": 13210 | |
| }, | |
| { | |
| "epoch": 1.0577692430788925, | |
| "grad_norm": 3.191342353820801, | |
| "learning_rate": 3.246510135315826e-05, | |
| "loss": 5.1095, | |
| "step": 13220 | |
| }, | |
| { | |
| "epoch": 1.058569371099376, | |
| "grad_norm": 4.010619163513184, | |
| "learning_rate": 3.2451730224100125e-05, | |
| "loss": 5.1697, | |
| "step": 13230 | |
| }, | |
| { | |
| "epoch": 1.0593694991198592, | |
| "grad_norm": 2.828768253326416, | |
| "learning_rate": 3.243835909504199e-05, | |
| "loss": 5.1114, | |
| "step": 13240 | |
| }, | |
| { | |
| "epoch": 1.0601696271403425, | |
| "grad_norm": 4.081239223480225, | |
| "learning_rate": 3.242498796598385e-05, | |
| "loss": 5.0629, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 1.0609697551608257, | |
| "grad_norm": 3.347407817840576, | |
| "learning_rate": 3.241161683692571e-05, | |
| "loss": 5.0355, | |
| "step": 13260 | |
| }, | |
| { | |
| "epoch": 1.061769883181309, | |
| "grad_norm": 2.902289390563965, | |
| "learning_rate": 3.2398245707867576e-05, | |
| "loss": 5.1561, | |
| "step": 13270 | |
| }, | |
| { | |
| "epoch": 1.0625700112017924, | |
| "grad_norm": 15.202888488769531, | |
| "learning_rate": 3.238487457880944e-05, | |
| "loss": 5.2149, | |
| "step": 13280 | |
| }, | |
| { | |
| "epoch": 1.0633701392222756, | |
| "grad_norm": 3.353285551071167, | |
| "learning_rate": 3.23715034497513e-05, | |
| "loss": 4.8566, | |
| "step": 13290 | |
| }, | |
| { | |
| "epoch": 1.0641702672427589, | |
| "grad_norm": 4.258049011230469, | |
| "learning_rate": 3.2358132320693164e-05, | |
| "loss": 5.0358, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 1.064970395263242, | |
| "grad_norm": 2.727367639541626, | |
| "learning_rate": 3.234476119163503e-05, | |
| "loss": 4.9733, | |
| "step": 13310 | |
| }, | |
| { | |
| "epoch": 1.0657705232837253, | |
| "grad_norm": 4.626856803894043, | |
| "learning_rate": 3.233139006257688e-05, | |
| "loss": 5.162, | |
| "step": 13320 | |
| }, | |
| { | |
| "epoch": 1.0665706513042086, | |
| "grad_norm": 3.074949264526367, | |
| "learning_rate": 3.2318018933518745e-05, | |
| "loss": 5.1322, | |
| "step": 13330 | |
| }, | |
| { | |
| "epoch": 1.067370779324692, | |
| "grad_norm": 4.150319576263428, | |
| "learning_rate": 3.230464780446061e-05, | |
| "loss": 5.0567, | |
| "step": 13340 | |
| }, | |
| { | |
| "epoch": 1.0681709073451753, | |
| "grad_norm": 5.132182598114014, | |
| "learning_rate": 3.229127667540247e-05, | |
| "loss": 5.1743, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 1.0689710353656585, | |
| "grad_norm": 4.4582839012146, | |
| "learning_rate": 3.2277905546344333e-05, | |
| "loss": 5.2236, | |
| "step": 13360 | |
| }, | |
| { | |
| "epoch": 1.0697711633861418, | |
| "grad_norm": 2.9640562534332275, | |
| "learning_rate": 3.2264534417286196e-05, | |
| "loss": 5.0974, | |
| "step": 13370 | |
| }, | |
| { | |
| "epoch": 1.070571291406625, | |
| "grad_norm": 2.8978335857391357, | |
| "learning_rate": 3.225116328822806e-05, | |
| "loss": 5.1591, | |
| "step": 13380 | |
| }, | |
| { | |
| "epoch": 1.0713714194271082, | |
| "grad_norm": 2.773488759994507, | |
| "learning_rate": 3.223779215916992e-05, | |
| "loss": 5.0814, | |
| "step": 13390 | |
| }, | |
| { | |
| "epoch": 1.0721715474475917, | |
| "grad_norm": 2.719374656677246, | |
| "learning_rate": 3.2224421030111784e-05, | |
| "loss": 5.0352, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 1.072971675468075, | |
| "grad_norm": 2.918991804122925, | |
| "learning_rate": 3.221104990105365e-05, | |
| "loss": 5.0955, | |
| "step": 13410 | |
| }, | |
| { | |
| "epoch": 1.0737718034885582, | |
| "grad_norm": 3.3438122272491455, | |
| "learning_rate": 3.219767877199551e-05, | |
| "loss": 5.0205, | |
| "step": 13420 | |
| }, | |
| { | |
| "epoch": 1.0745719315090414, | |
| "grad_norm": 2.915687322616577, | |
| "learning_rate": 3.218430764293737e-05, | |
| "loss": 5.0708, | |
| "step": 13430 | |
| }, | |
| { | |
| "epoch": 1.0753720595295246, | |
| "grad_norm": 2.3897652626037598, | |
| "learning_rate": 3.2170936513879235e-05, | |
| "loss": 5.0898, | |
| "step": 13440 | |
| }, | |
| { | |
| "epoch": 1.076172187550008, | |
| "grad_norm": 2.5261075496673584, | |
| "learning_rate": 3.21575653848211e-05, | |
| "loss": 5.0002, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 1.0769723155704913, | |
| "grad_norm": 4.839473247528076, | |
| "learning_rate": 3.214419425576296e-05, | |
| "loss": 5.1853, | |
| "step": 13460 | |
| }, | |
| { | |
| "epoch": 1.0777724435909746, | |
| "grad_norm": 2.396831512451172, | |
| "learning_rate": 3.213082312670482e-05, | |
| "loss": 5.0397, | |
| "step": 13470 | |
| }, | |
| { | |
| "epoch": 1.0785725716114578, | |
| "grad_norm": 4.165911674499512, | |
| "learning_rate": 3.211745199764668e-05, | |
| "loss": 5.2065, | |
| "step": 13480 | |
| }, | |
| { | |
| "epoch": 1.079372699631941, | |
| "grad_norm": 2.74873423576355, | |
| "learning_rate": 3.210408086858854e-05, | |
| "loss": 5.2217, | |
| "step": 13490 | |
| }, | |
| { | |
| "epoch": 1.0801728276524245, | |
| "grad_norm": 3.480703353881836, | |
| "learning_rate": 3.2090709739530404e-05, | |
| "loss": 4.9929, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.0809729556729077, | |
| "grad_norm": 3.747199773788452, | |
| "learning_rate": 3.207733861047227e-05, | |
| "loss": 5.1235, | |
| "step": 13510 | |
| }, | |
| { | |
| "epoch": 1.081773083693391, | |
| "grad_norm": 3.634990692138672, | |
| "learning_rate": 3.206396748141413e-05, | |
| "loss": 4.9466, | |
| "step": 13520 | |
| }, | |
| { | |
| "epoch": 1.0825732117138742, | |
| "grad_norm": 3.6419565677642822, | |
| "learning_rate": 3.205059635235599e-05, | |
| "loss": 5.1791, | |
| "step": 13530 | |
| }, | |
| { | |
| "epoch": 1.0833733397343575, | |
| "grad_norm": 3.413770914077759, | |
| "learning_rate": 3.2037225223297855e-05, | |
| "loss": 5.1777, | |
| "step": 13540 | |
| }, | |
| { | |
| "epoch": 1.0841734677548407, | |
| "grad_norm": 5.771011829376221, | |
| "learning_rate": 3.202385409423972e-05, | |
| "loss": 5.0543, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 1.0849735957753242, | |
| "grad_norm": 2.9491965770721436, | |
| "learning_rate": 3.201048296518158e-05, | |
| "loss": 4.9719, | |
| "step": 13560 | |
| }, | |
| { | |
| "epoch": 1.0857737237958074, | |
| "grad_norm": 3.3095767498016357, | |
| "learning_rate": 3.199711183612344e-05, | |
| "loss": 5.2155, | |
| "step": 13570 | |
| }, | |
| { | |
| "epoch": 1.0865738518162906, | |
| "grad_norm": 4.941197395324707, | |
| "learning_rate": 3.1983740707065306e-05, | |
| "loss": 5.073, | |
| "step": 13580 | |
| }, | |
| { | |
| "epoch": 1.0873739798367739, | |
| "grad_norm": 2.3605270385742188, | |
| "learning_rate": 3.197036957800717e-05, | |
| "loss": 5.1746, | |
| "step": 13590 | |
| }, | |
| { | |
| "epoch": 1.088174107857257, | |
| "grad_norm": 2.9810526371002197, | |
| "learning_rate": 3.195699844894903e-05, | |
| "loss": 5.157, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 1.0889742358777403, | |
| "grad_norm": 2.767223358154297, | |
| "learning_rate": 3.1943627319890894e-05, | |
| "loss": 5.0831, | |
| "step": 13610 | |
| }, | |
| { | |
| "epoch": 1.0897743638982238, | |
| "grad_norm": 6.959831714630127, | |
| "learning_rate": 3.193025619083276e-05, | |
| "loss": 4.883, | |
| "step": 13620 | |
| }, | |
| { | |
| "epoch": 1.090574491918707, | |
| "grad_norm": 6.120983123779297, | |
| "learning_rate": 3.191688506177461e-05, | |
| "loss": 5.0368, | |
| "step": 13630 | |
| }, | |
| { | |
| "epoch": 1.0913746199391903, | |
| "grad_norm": 2.680748462677002, | |
| "learning_rate": 3.1903513932716475e-05, | |
| "loss": 5.1996, | |
| "step": 13640 | |
| }, | |
| { | |
| "epoch": 1.0921747479596735, | |
| "grad_norm": 4.287043571472168, | |
| "learning_rate": 3.189014280365834e-05, | |
| "loss": 4.9824, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 1.0929748759801567, | |
| "grad_norm": 2.647005319595337, | |
| "learning_rate": 3.18767716746002e-05, | |
| "loss": 4.9845, | |
| "step": 13660 | |
| }, | |
| { | |
| "epoch": 1.0937750040006402, | |
| "grad_norm": 2.9568288326263428, | |
| "learning_rate": 3.186340054554206e-05, | |
| "loss": 5.0804, | |
| "step": 13670 | |
| }, | |
| { | |
| "epoch": 1.0945751320211234, | |
| "grad_norm": 4.118317127227783, | |
| "learning_rate": 3.1850029416483926e-05, | |
| "loss": 5.0375, | |
| "step": 13680 | |
| }, | |
| { | |
| "epoch": 1.0953752600416067, | |
| "grad_norm": 3.7457168102264404, | |
| "learning_rate": 3.183665828742579e-05, | |
| "loss": 5.0193, | |
| "step": 13690 | |
| }, | |
| { | |
| "epoch": 1.09617538806209, | |
| "grad_norm": 2.829274892807007, | |
| "learning_rate": 3.182328715836765e-05, | |
| "loss": 5.1896, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 1.0969755160825732, | |
| "grad_norm": 3.568166971206665, | |
| "learning_rate": 3.1809916029309514e-05, | |
| "loss": 5.0527, | |
| "step": 13710 | |
| }, | |
| { | |
| "epoch": 1.0977756441030564, | |
| "grad_norm": 2.8555142879486084, | |
| "learning_rate": 3.179654490025138e-05, | |
| "loss": 5.0873, | |
| "step": 13720 | |
| }, | |
| { | |
| "epoch": 1.0985757721235399, | |
| "grad_norm": 2.9258460998535156, | |
| "learning_rate": 3.178317377119324e-05, | |
| "loss": 4.9293, | |
| "step": 13730 | |
| }, | |
| { | |
| "epoch": 1.099375900144023, | |
| "grad_norm": 3.3614535331726074, | |
| "learning_rate": 3.17698026421351e-05, | |
| "loss": 4.992, | |
| "step": 13740 | |
| }, | |
| { | |
| "epoch": 1.1001760281645063, | |
| "grad_norm": 3.859238624572754, | |
| "learning_rate": 3.1756431513076965e-05, | |
| "loss": 4.9695, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 1.1009761561849896, | |
| "grad_norm": 2.9869918823242188, | |
| "learning_rate": 3.174306038401883e-05, | |
| "loss": 5.0833, | |
| "step": 13760 | |
| }, | |
| { | |
| "epoch": 1.1017762842054728, | |
| "grad_norm": 2.874736785888672, | |
| "learning_rate": 3.172968925496069e-05, | |
| "loss": 5.0329, | |
| "step": 13770 | |
| }, | |
| { | |
| "epoch": 1.102576412225956, | |
| "grad_norm": 3.2926857471466064, | |
| "learning_rate": 3.171631812590255e-05, | |
| "loss": 5.0431, | |
| "step": 13780 | |
| }, | |
| { | |
| "epoch": 1.1033765402464395, | |
| "grad_norm": 3.0349912643432617, | |
| "learning_rate": 3.1702946996844416e-05, | |
| "loss": 5.0485, | |
| "step": 13790 | |
| }, | |
| { | |
| "epoch": 1.1041766682669227, | |
| "grad_norm": 3.0139970779418945, | |
| "learning_rate": 3.168957586778628e-05, | |
| "loss": 5.0519, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 1.104976796287406, | |
| "grad_norm": 3.5662894248962402, | |
| "learning_rate": 3.167620473872814e-05, | |
| "loss": 5.2053, | |
| "step": 13810 | |
| }, | |
| { | |
| "epoch": 1.1057769243078892, | |
| "grad_norm": 3.348515033721924, | |
| "learning_rate": 3.1662833609670004e-05, | |
| "loss": 5.0588, | |
| "step": 13820 | |
| }, | |
| { | |
| "epoch": 1.1065770523283724, | |
| "grad_norm": 2.439892292022705, | |
| "learning_rate": 3.1649462480611866e-05, | |
| "loss": 5.0894, | |
| "step": 13830 | |
| }, | |
| { | |
| "epoch": 1.107377180348856, | |
| "grad_norm": 3.85776948928833, | |
| "learning_rate": 3.163609135155373e-05, | |
| "loss": 5.0345, | |
| "step": 13840 | |
| }, | |
| { | |
| "epoch": 1.1081773083693391, | |
| "grad_norm": 2.6576287746429443, | |
| "learning_rate": 3.162272022249559e-05, | |
| "loss": 5.0607, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 1.1089774363898224, | |
| "grad_norm": 2.6049861907958984, | |
| "learning_rate": 3.1609349093437454e-05, | |
| "loss": 5.0033, | |
| "step": 13860 | |
| }, | |
| { | |
| "epoch": 1.1097775644103056, | |
| "grad_norm": 2.5496983528137207, | |
| "learning_rate": 3.159597796437932e-05, | |
| "loss": 5.2102, | |
| "step": 13870 | |
| }, | |
| { | |
| "epoch": 1.1105776924307889, | |
| "grad_norm": 4.300173282623291, | |
| "learning_rate": 3.158260683532118e-05, | |
| "loss": 5.1137, | |
| "step": 13880 | |
| }, | |
| { | |
| "epoch": 1.1113778204512723, | |
| "grad_norm": 2.4413559436798096, | |
| "learning_rate": 3.156923570626304e-05, | |
| "loss": 4.9222, | |
| "step": 13890 | |
| }, | |
| { | |
| "epoch": 1.1121779484717556, | |
| "grad_norm": 2.4938573837280273, | |
| "learning_rate": 3.1555864577204905e-05, | |
| "loss": 5.1414, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 1.1129780764922388, | |
| "grad_norm": 3.333294153213501, | |
| "learning_rate": 3.154249344814677e-05, | |
| "loss": 5.1243, | |
| "step": 13910 | |
| }, | |
| { | |
| "epoch": 1.113778204512722, | |
| "grad_norm": 3.8718490600585938, | |
| "learning_rate": 3.152912231908863e-05, | |
| "loss": 5.2178, | |
| "step": 13920 | |
| }, | |
| { | |
| "epoch": 1.1145783325332053, | |
| "grad_norm": 4.667349338531494, | |
| "learning_rate": 3.151575119003049e-05, | |
| "loss": 5.185, | |
| "step": 13930 | |
| }, | |
| { | |
| "epoch": 1.1153784605536885, | |
| "grad_norm": 3.7269580364227295, | |
| "learning_rate": 3.150238006097235e-05, | |
| "loss": 4.9231, | |
| "step": 13940 | |
| }, | |
| { | |
| "epoch": 1.116178588574172, | |
| "grad_norm": 3.8037633895874023, | |
| "learning_rate": 3.148900893191421e-05, | |
| "loss": 5.0166, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 1.1169787165946552, | |
| "grad_norm": 3.2636613845825195, | |
| "learning_rate": 3.1475637802856075e-05, | |
| "loss": 5.0339, | |
| "step": 13960 | |
| }, | |
| { | |
| "epoch": 1.1177788446151384, | |
| "grad_norm": 4.069303035736084, | |
| "learning_rate": 3.146226667379794e-05, | |
| "loss": 5.1558, | |
| "step": 13970 | |
| }, | |
| { | |
| "epoch": 1.1185789726356217, | |
| "grad_norm": 3.160214424133301, | |
| "learning_rate": 3.14488955447398e-05, | |
| "loss": 5.0048, | |
| "step": 13980 | |
| }, | |
| { | |
| "epoch": 1.119379100656105, | |
| "grad_norm": 2.7678611278533936, | |
| "learning_rate": 3.143552441568166e-05, | |
| "loss": 5.0992, | |
| "step": 13990 | |
| }, | |
| { | |
| "epoch": 1.1201792286765881, | |
| "grad_norm": 3.162316083908081, | |
| "learning_rate": 3.1422153286623525e-05, | |
| "loss": 5.0398, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.1201792286765881, | |
| "eval_loss": 5.684463977813721, | |
| "eval_runtime": 11.9219, | |
| "eval_samples_per_second": 3.355, | |
| "eval_steps_per_second": 0.419, | |
| "step": 14000 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 37494, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 7000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |