| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.5600896143382941, | |
| "eval_steps": 7000, | |
| "global_step": 7000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0, | |
| "eval_loss": 9.306169509887695, | |
| "eval_runtime": 10.9126, | |
| "eval_samples_per_second": 3.665, | |
| "eval_steps_per_second": 0.458, | |
| "step": 0 | |
| }, | |
| { | |
| "epoch": 0.0008001280204832773, | |
| "grad_norm": 8.51533031463623, | |
| "learning_rate": 3.5000000000000004e-06, | |
| "loss": 8.786, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0016002560409665546, | |
| "grad_norm": 10.90935230255127, | |
| "learning_rate": 8.500000000000002e-06, | |
| "loss": 8.3433, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.002400384061449832, | |
| "grad_norm": 7.269016265869141, | |
| "learning_rate": 1.3500000000000001e-05, | |
| "loss": 7.549, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.003200512081933109, | |
| "grad_norm": 8.790578842163086, | |
| "learning_rate": 1.85e-05, | |
| "loss": 7.2574, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.004000640102416387, | |
| "grad_norm": 6.52068567276001, | |
| "learning_rate": 2.35e-05, | |
| "loss": 7.0024, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.004800768122899664, | |
| "grad_norm": 6.902959823608398, | |
| "learning_rate": 2.8499999999999998e-05, | |
| "loss": 6.9074, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.005600896143382941, | |
| "grad_norm": 5.350945949554443, | |
| "learning_rate": 3.35e-05, | |
| "loss": 6.8765, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.006401024163866218, | |
| "grad_norm": 5.928489685058594, | |
| "learning_rate": 3.85e-05, | |
| "loss": 6.5663, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.007201152184349496, | |
| "grad_norm": 9.222543716430664, | |
| "learning_rate": 4.35e-05, | |
| "loss": 6.6131, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.008001280204832774, | |
| "grad_norm": 6.57027006149292, | |
| "learning_rate": 4.85e-05, | |
| "loss": 6.5829, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.00880140822531605, | |
| "grad_norm": 5.280848503112793, | |
| "learning_rate": 4.999064020965931e-05, | |
| "loss": 6.5996, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.009601536245799328, | |
| "grad_norm": 5.950971603393555, | |
| "learning_rate": 4.997726908060117e-05, | |
| "loss": 6.6075, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.010401664266282605, | |
| "grad_norm": 4.300549507141113, | |
| "learning_rate": 4.996389795154303e-05, | |
| "loss": 6.5074, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.011201792286765882, | |
| "grad_norm": 4.824333190917969, | |
| "learning_rate": 4.9950526822484896e-05, | |
| "loss": 6.6072, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.01200192030724916, | |
| "grad_norm": 5.4324116706848145, | |
| "learning_rate": 4.993715569342676e-05, | |
| "loss": 6.6183, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.012802048327732437, | |
| "grad_norm": 4.087579250335693, | |
| "learning_rate": 4.992378456436862e-05, | |
| "loss": 6.4806, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.013602176348215714, | |
| "grad_norm": 7.260207653045654, | |
| "learning_rate": 4.9910413435310484e-05, | |
| "loss": 6.3709, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.014402304368698993, | |
| "grad_norm": 4.145061016082764, | |
| "learning_rate": 4.9897042306252346e-05, | |
| "loss": 6.2951, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.01520243238918227, | |
| "grad_norm": 3.2026450634002686, | |
| "learning_rate": 4.98836711771942e-05, | |
| "loss": 6.3255, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.016002560409665547, | |
| "grad_norm": 3.443145751953125, | |
| "learning_rate": 4.9870300048136065e-05, | |
| "loss": 6.4894, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.016802688430148822, | |
| "grad_norm": 5.324231147766113, | |
| "learning_rate": 4.985692891907793e-05, | |
| "loss": 6.4312, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.0176028164506321, | |
| "grad_norm": 3.2833452224731445, | |
| "learning_rate": 4.984355779001979e-05, | |
| "loss": 6.513, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.018402944471115377, | |
| "grad_norm": 3.8984358310699463, | |
| "learning_rate": 4.983018666096165e-05, | |
| "loss": 6.1683, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.019203072491598656, | |
| "grad_norm": 4.183676719665527, | |
| "learning_rate": 4.9816815531903516e-05, | |
| "loss": 6.329, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.020003200512081935, | |
| "grad_norm": 3.136693239212036, | |
| "learning_rate": 4.980344440284538e-05, | |
| "loss": 6.466, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.02080332853256521, | |
| "grad_norm": 4.185967445373535, | |
| "learning_rate": 4.979007327378724e-05, | |
| "loss": 6.4613, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.02160345655304849, | |
| "grad_norm": 3.105653762817383, | |
| "learning_rate": 4.9776702144729104e-05, | |
| "loss": 6.3596, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.022403584573531764, | |
| "grad_norm": 3.927561044692993, | |
| "learning_rate": 4.9763331015670967e-05, | |
| "loss": 6.2604, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.023203712594015043, | |
| "grad_norm": 3.513439178466797, | |
| "learning_rate": 4.974995988661283e-05, | |
| "loss": 6.2747, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.02400384061449832, | |
| "grad_norm": 3.07377290725708, | |
| "learning_rate": 4.973658875755469e-05, | |
| "loss": 6.202, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.024803968634981598, | |
| "grad_norm": 3.045619249343872, | |
| "learning_rate": 4.9723217628496555e-05, | |
| "loss": 6.1022, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.025604096655464873, | |
| "grad_norm": 3.330648183822632, | |
| "learning_rate": 4.970984649943842e-05, | |
| "loss": 6.1544, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.026404224675948152, | |
| "grad_norm": 3.0299668312072754, | |
| "learning_rate": 4.969647537038028e-05, | |
| "loss": 6.3119, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.027204352696431428, | |
| "grad_norm": 3.687938928604126, | |
| "learning_rate": 4.9683104241322136e-05, | |
| "loss": 6.333, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.028004480716914706, | |
| "grad_norm": 4.0919413566589355, | |
| "learning_rate": 4.9669733112264e-05, | |
| "loss": 6.1711, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.028804608737397985, | |
| "grad_norm": 3.1327242851257324, | |
| "learning_rate": 4.965636198320586e-05, | |
| "loss": 6.3365, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.02960473675788126, | |
| "grad_norm": 4.531859874725342, | |
| "learning_rate": 4.9642990854147724e-05, | |
| "loss": 6.2121, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.03040486477836454, | |
| "grad_norm": 2.522672414779663, | |
| "learning_rate": 4.962961972508959e-05, | |
| "loss": 6.2388, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.031204992798847815, | |
| "grad_norm": 5.62153959274292, | |
| "learning_rate": 4.961624859603145e-05, | |
| "loss": 6.168, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.032005120819331094, | |
| "grad_norm": 3.522804021835327, | |
| "learning_rate": 4.960287746697331e-05, | |
| "loss": 6.1207, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.03280524883981437, | |
| "grad_norm": 7.260324478149414, | |
| "learning_rate": 4.9589506337915175e-05, | |
| "loss": 6.31, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.033605376860297645, | |
| "grad_norm": 4.309441566467285, | |
| "learning_rate": 4.957613520885704e-05, | |
| "loss": 6.1107, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.034405504880780924, | |
| "grad_norm": 3.2409913539886475, | |
| "learning_rate": 4.95627640797989e-05, | |
| "loss": 6.2082, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.0352056329012642, | |
| "grad_norm": 3.9414610862731934, | |
| "learning_rate": 4.954939295074076e-05, | |
| "loss": 6.2102, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.03600576092174748, | |
| "grad_norm": 2.441235303878784, | |
| "learning_rate": 4.9536021821682626e-05, | |
| "loss": 6.1023, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.036805888942230754, | |
| "grad_norm": 2.997591972351074, | |
| "learning_rate": 4.952265069262449e-05, | |
| "loss": 6.1147, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.03760601696271403, | |
| "grad_norm": 3.950436592102051, | |
| "learning_rate": 4.950927956356635e-05, | |
| "loss": 6.0725, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.03840614498319731, | |
| "grad_norm": 3.4340896606445312, | |
| "learning_rate": 4.9495908434508214e-05, | |
| "loss": 6.1336, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.03920627300368059, | |
| "grad_norm": 3.28839373588562, | |
| "learning_rate": 4.948253730545007e-05, | |
| "loss": 6.1709, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.04000640102416387, | |
| "grad_norm": 2.976365566253662, | |
| "learning_rate": 4.946916617639193e-05, | |
| "loss": 6.2074, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.04080652904464714, | |
| "grad_norm": 4.156027793884277, | |
| "learning_rate": 4.9455795047333795e-05, | |
| "loss": 6.1694, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.04160665706513042, | |
| "grad_norm": 3.4855797290802, | |
| "learning_rate": 4.944242391827566e-05, | |
| "loss": 6.1218, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.0424067850856137, | |
| "grad_norm": 4.489185333251953, | |
| "learning_rate": 4.942905278921752e-05, | |
| "loss": 6.1507, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.04320691310609698, | |
| "grad_norm": 3.2751166820526123, | |
| "learning_rate": 4.941568166015938e-05, | |
| "loss": 6.1055, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.04400704112658025, | |
| "grad_norm": 2.4234585762023926, | |
| "learning_rate": 4.9402310531101246e-05, | |
| "loss": 6.1755, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.04480716914706353, | |
| "grad_norm": 3.4436991214752197, | |
| "learning_rate": 4.938893940204311e-05, | |
| "loss": 6.1882, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.04560729716754681, | |
| "grad_norm": 3.3731908798217773, | |
| "learning_rate": 4.937556827298497e-05, | |
| "loss": 6.0648, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.04640742518803009, | |
| "grad_norm": 3.8733670711517334, | |
| "learning_rate": 4.9362197143926834e-05, | |
| "loss": 6.0621, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.04720755320851336, | |
| "grad_norm": 4.126636505126953, | |
| "learning_rate": 4.9348826014868696e-05, | |
| "loss": 6.122, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.04800768122899664, | |
| "grad_norm": 3.8605775833129883, | |
| "learning_rate": 4.933545488581056e-05, | |
| "loss": 5.9788, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.048807809249479917, | |
| "grad_norm": 2.9509966373443604, | |
| "learning_rate": 4.932208375675242e-05, | |
| "loss": 6.2045, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.049607937269963195, | |
| "grad_norm": 4.4266510009765625, | |
| "learning_rate": 4.9308712627694285e-05, | |
| "loss": 5.9981, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.050408065290446474, | |
| "grad_norm": 2.79042649269104, | |
| "learning_rate": 4.929534149863615e-05, | |
| "loss": 6.1882, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.051208193310929746, | |
| "grad_norm": 2.8986568450927734, | |
| "learning_rate": 4.928197036957801e-05, | |
| "loss": 6.1739, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.052008321331413025, | |
| "grad_norm": 4.294217586517334, | |
| "learning_rate": 4.926859924051987e-05, | |
| "loss": 6.0566, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.052808449351896304, | |
| "grad_norm": 8.848836898803711, | |
| "learning_rate": 4.9255228111461735e-05, | |
| "loss": 6.2994, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.05360857737237958, | |
| "grad_norm": 3.2204337120056152, | |
| "learning_rate": 4.92418569824036e-05, | |
| "loss": 6.0573, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.054408705392862855, | |
| "grad_norm": 4.775251865386963, | |
| "learning_rate": 4.922848585334546e-05, | |
| "loss": 5.9764, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.055208833413346134, | |
| "grad_norm": 3.5426905155181885, | |
| "learning_rate": 4.921511472428732e-05, | |
| "loss": 6.0402, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.05600896143382941, | |
| "grad_norm": 10.72481632232666, | |
| "learning_rate": 4.9201743595229186e-05, | |
| "loss": 6.0024, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.05680908945431269, | |
| "grad_norm": 2.441681385040283, | |
| "learning_rate": 4.918837246617105e-05, | |
| "loss": 6.1122, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.05760921747479597, | |
| "grad_norm": 3.375319480895996, | |
| "learning_rate": 4.917500133711291e-05, | |
| "loss": 6.058, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.05840934549527924, | |
| "grad_norm": 2.821507453918457, | |
| "learning_rate": 4.9161630208054774e-05, | |
| "loss": 6.0586, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.05920947351576252, | |
| "grad_norm": 2.8658957481384277, | |
| "learning_rate": 4.914825907899664e-05, | |
| "loss": 6.0115, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.0600096015362458, | |
| "grad_norm": 2.239774227142334, | |
| "learning_rate": 4.91348879499385e-05, | |
| "loss": 6.0669, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.06080972955672908, | |
| "grad_norm": 3.5249900817871094, | |
| "learning_rate": 4.912151682088036e-05, | |
| "loss": 6.1013, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.06160985757721235, | |
| "grad_norm": 2.790356159210205, | |
| "learning_rate": 4.9108145691822225e-05, | |
| "loss": 6.0099, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.06240998559769563, | |
| "grad_norm": 3.0729963779449463, | |
| "learning_rate": 4.909477456276409e-05, | |
| "loss": 6.1376, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.06321011361817891, | |
| "grad_norm": 2.9490275382995605, | |
| "learning_rate": 4.908140343370595e-05, | |
| "loss": 6.1457, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.06401024163866219, | |
| "grad_norm": 2.7475438117980957, | |
| "learning_rate": 4.9068032304647806e-05, | |
| "loss": 6.0041, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.06481036965914547, | |
| "grad_norm": 2.755703926086426, | |
| "learning_rate": 4.905466117558967e-05, | |
| "loss": 6.0242, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.06561049767962875, | |
| "grad_norm": 2.724515676498413, | |
| "learning_rate": 4.904129004653153e-05, | |
| "loss": 6.1827, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.06641062570011202, | |
| "grad_norm": 4.498260974884033, | |
| "learning_rate": 4.9027918917473394e-05, | |
| "loss": 6.0892, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.06721075372059529, | |
| "grad_norm": 2.4399070739746094, | |
| "learning_rate": 4.901454778841526e-05, | |
| "loss": 6.0197, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.06801088174107857, | |
| "grad_norm": 2.7584304809570312, | |
| "learning_rate": 4.900117665935712e-05, | |
| "loss": 5.9056, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.06881100976156185, | |
| "grad_norm": 2.8177144527435303, | |
| "learning_rate": 4.898780553029898e-05, | |
| "loss": 6.1484, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.06961113778204513, | |
| "grad_norm": 4.181133270263672, | |
| "learning_rate": 4.8974434401240845e-05, | |
| "loss": 5.9376, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.0704112658025284, | |
| "grad_norm": 3.677849769592285, | |
| "learning_rate": 4.896106327218271e-05, | |
| "loss": 6.0403, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.07121139382301168, | |
| "grad_norm": 3.1553192138671875, | |
| "learning_rate": 4.894769214312457e-05, | |
| "loss": 6.0488, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.07201152184349496, | |
| "grad_norm": 3.2580947875976562, | |
| "learning_rate": 4.893432101406643e-05, | |
| "loss": 6.1002, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.07281164986397824, | |
| "grad_norm": 6.328150749206543, | |
| "learning_rate": 4.8920949885008296e-05, | |
| "loss": 6.0225, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.07361177788446151, | |
| "grad_norm": 2.7467615604400635, | |
| "learning_rate": 4.890757875595016e-05, | |
| "loss": 5.9622, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.07441190590494479, | |
| "grad_norm": 2.86570405960083, | |
| "learning_rate": 4.889420762689202e-05, | |
| "loss": 5.9718, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.07521203392542807, | |
| "grad_norm": 2.544917106628418, | |
| "learning_rate": 4.8880836497833884e-05, | |
| "loss": 5.8697, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.07601216194591134, | |
| "grad_norm": 2.5245840549468994, | |
| "learning_rate": 4.8867465368775746e-05, | |
| "loss": 5.9973, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.07681228996639462, | |
| "grad_norm": 3.6830902099609375, | |
| "learning_rate": 4.88540942397176e-05, | |
| "loss": 5.943, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.0776124179868779, | |
| "grad_norm": 2.6643354892730713, | |
| "learning_rate": 4.8840723110659465e-05, | |
| "loss": 5.8958, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.07841254600736118, | |
| "grad_norm": 6.4623565673828125, | |
| "learning_rate": 4.882735198160133e-05, | |
| "loss": 6.0236, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.07921267402784446, | |
| "grad_norm": 2.186974048614502, | |
| "learning_rate": 4.881398085254319e-05, | |
| "loss": 6.0481, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.08001280204832774, | |
| "grad_norm": 2.4983859062194824, | |
| "learning_rate": 4.880060972348505e-05, | |
| "loss": 6.075, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.080812930068811, | |
| "grad_norm": 2.778280258178711, | |
| "learning_rate": 4.8787238594426916e-05, | |
| "loss": 6.0757, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.08161305808929428, | |
| "grad_norm": 2.706965923309326, | |
| "learning_rate": 4.877386746536878e-05, | |
| "loss": 6.1504, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.08241318610977756, | |
| "grad_norm": 3.4069600105285645, | |
| "learning_rate": 4.876049633631064e-05, | |
| "loss": 6.0889, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.08321331413026084, | |
| "grad_norm": 3.179551124572754, | |
| "learning_rate": 4.8747125207252504e-05, | |
| "loss": 6.0057, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.08401344215074412, | |
| "grad_norm": 2.924018383026123, | |
| "learning_rate": 4.873375407819437e-05, | |
| "loss": 5.8406, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.0848135701712274, | |
| "grad_norm": 3.103912115097046, | |
| "learning_rate": 4.872038294913623e-05, | |
| "loss": 6.0351, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.08561369819171068, | |
| "grad_norm": 2.8037219047546387, | |
| "learning_rate": 4.870701182007809e-05, | |
| "loss": 6.0272, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.08641382621219396, | |
| "grad_norm": 2.477062940597534, | |
| "learning_rate": 4.8693640691019955e-05, | |
| "loss": 5.9269, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.08721395423267723, | |
| "grad_norm": 2.748488187789917, | |
| "learning_rate": 4.868026956196182e-05, | |
| "loss": 5.943, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.0880140822531605, | |
| "grad_norm": 3.3991920948028564, | |
| "learning_rate": 4.866689843290368e-05, | |
| "loss": 6.1455, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.08881421027364378, | |
| "grad_norm": 3.208509683609009, | |
| "learning_rate": 4.8653527303845536e-05, | |
| "loss": 5.9746, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.08961433829412706, | |
| "grad_norm": 3.3378469944000244, | |
| "learning_rate": 4.86401561747874e-05, | |
| "loss": 5.9185, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.09041446631461034, | |
| "grad_norm": 2.269606113433838, | |
| "learning_rate": 4.862678504572926e-05, | |
| "loss": 5.9369, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.09121459433509362, | |
| "grad_norm": 2.749335765838623, | |
| "learning_rate": 4.8613413916671124e-05, | |
| "loss": 6.0648, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.0920147223555769, | |
| "grad_norm": 2.821913480758667, | |
| "learning_rate": 4.860004278761299e-05, | |
| "loss": 5.952, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.09281485037606017, | |
| "grad_norm": 2.640990734100342, | |
| "learning_rate": 4.858667165855485e-05, | |
| "loss": 6.0537, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.09361497839654345, | |
| "grad_norm": 3.570896625518799, | |
| "learning_rate": 4.857330052949671e-05, | |
| "loss": 5.7721, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.09441510641702672, | |
| "grad_norm": 3.245318651199341, | |
| "learning_rate": 4.8559929400438575e-05, | |
| "loss": 5.7305, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.09521523443751, | |
| "grad_norm": 4.075076580047607, | |
| "learning_rate": 4.854655827138044e-05, | |
| "loss": 5.974, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.09601536245799328, | |
| "grad_norm": 2.429893732070923, | |
| "learning_rate": 4.85331871423223e-05, | |
| "loss": 5.7828, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.09681549047847655, | |
| "grad_norm": 2.7077040672302246, | |
| "learning_rate": 4.851981601326416e-05, | |
| "loss": 5.9143, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.09761561849895983, | |
| "grad_norm": 2.767918586730957, | |
| "learning_rate": 4.8506444884206026e-05, | |
| "loss": 5.9449, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.09841574651944311, | |
| "grad_norm": 2.4544034004211426, | |
| "learning_rate": 4.849307375514789e-05, | |
| "loss": 6.0034, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.09921587453992639, | |
| "grad_norm": 5.215607643127441, | |
| "learning_rate": 4.847970262608975e-05, | |
| "loss": 5.867, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.10001600256040967, | |
| "grad_norm": 2.7856080532073975, | |
| "learning_rate": 4.8466331497031614e-05, | |
| "loss": 6.0213, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.10081613058089295, | |
| "grad_norm": 2.5528719425201416, | |
| "learning_rate": 4.8452960367973476e-05, | |
| "loss": 5.9634, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.10161625860137621, | |
| "grad_norm": 2.4917409420013428, | |
| "learning_rate": 4.843958923891533e-05, | |
| "loss": 5.887, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.10241638662185949, | |
| "grad_norm": 6.125699520111084, | |
| "learning_rate": 4.8426218109857195e-05, | |
| "loss": 6.1189, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.10321651464234277, | |
| "grad_norm": 2.783156156539917, | |
| "learning_rate": 4.841284698079906e-05, | |
| "loss": 5.9064, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.10401664266282605, | |
| "grad_norm": 3.611070156097412, | |
| "learning_rate": 4.839947585174092e-05, | |
| "loss": 5.9405, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.10481677068330933, | |
| "grad_norm": 4.296909809112549, | |
| "learning_rate": 4.838610472268278e-05, | |
| "loss": 5.9067, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.10561689870379261, | |
| "grad_norm": 2.4273040294647217, | |
| "learning_rate": 4.8372733593624646e-05, | |
| "loss": 5.888, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.10641702672427589, | |
| "grad_norm": 2.6499924659729004, | |
| "learning_rate": 4.835936246456651e-05, | |
| "loss": 5.9683, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.10721715474475917, | |
| "grad_norm": 3.1474297046661377, | |
| "learning_rate": 4.834599133550837e-05, | |
| "loss": 5.8946, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.10801728276524244, | |
| "grad_norm": 3.5050199031829834, | |
| "learning_rate": 4.8332620206450234e-05, | |
| "loss": 5.9179, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.10881741078572571, | |
| "grad_norm": 2.693700075149536, | |
| "learning_rate": 4.8319249077392096e-05, | |
| "loss": 5.7965, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.10961753880620899, | |
| "grad_norm": 2.8202953338623047, | |
| "learning_rate": 4.830587794833396e-05, | |
| "loss": 5.9526, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.11041766682669227, | |
| "grad_norm": 2.514862060546875, | |
| "learning_rate": 4.829250681927582e-05, | |
| "loss": 5.936, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.11121779484717555, | |
| "grad_norm": 3.18804931640625, | |
| "learning_rate": 4.8279135690217685e-05, | |
| "loss": 5.9246, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.11201792286765883, | |
| "grad_norm": 2.77697491645813, | |
| "learning_rate": 4.826576456115955e-05, | |
| "loss": 5.9576, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.1128180508881421, | |
| "grad_norm": 2.762524127960205, | |
| "learning_rate": 4.825239343210141e-05, | |
| "loss": 5.9085, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.11361817890862538, | |
| "grad_norm": 2.4407670497894287, | |
| "learning_rate": 4.8239022303043266e-05, | |
| "loss": 5.9518, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.11441830692910866, | |
| "grad_norm": 3.1036713123321533, | |
| "learning_rate": 4.822565117398513e-05, | |
| "loss": 5.8412, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.11521843494959194, | |
| "grad_norm": 3.319058418273926, | |
| "learning_rate": 4.821228004492699e-05, | |
| "loss": 5.9733, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.1160185629700752, | |
| "grad_norm": 2.13468599319458, | |
| "learning_rate": 4.8198908915868854e-05, | |
| "loss": 5.9193, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.11681869099055849, | |
| "grad_norm": 2.6057028770446777, | |
| "learning_rate": 4.8185537786810717e-05, | |
| "loss": 5.9807, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.11761881901104176, | |
| "grad_norm": 2.7509753704071045, | |
| "learning_rate": 4.817216665775258e-05, | |
| "loss": 5.9534, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.11841894703152504, | |
| "grad_norm": 2.111055850982666, | |
| "learning_rate": 4.815879552869444e-05, | |
| "loss": 5.9207, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.11921907505200832, | |
| "grad_norm": 2.5271990299224854, | |
| "learning_rate": 4.8145424399636305e-05, | |
| "loss": 5.7148, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.1200192030724916, | |
| "grad_norm": 2.814138174057007, | |
| "learning_rate": 4.813205327057817e-05, | |
| "loss": 5.9498, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.12081933109297488, | |
| "grad_norm": 3.449355363845825, | |
| "learning_rate": 4.811868214152003e-05, | |
| "loss": 5.7814, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.12161945911345816, | |
| "grad_norm": 2.813746213912964, | |
| "learning_rate": 4.810531101246189e-05, | |
| "loss": 5.9517, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.12241958713394142, | |
| "grad_norm": 2.529242753982544, | |
| "learning_rate": 4.8091939883403755e-05, | |
| "loss": 5.8227, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.1232197151544247, | |
| "grad_norm": 2.2425034046173096, | |
| "learning_rate": 4.807856875434562e-05, | |
| "loss": 6.1064, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.12401984317490798, | |
| "grad_norm": 2.7732784748077393, | |
| "learning_rate": 4.806519762528748e-05, | |
| "loss": 5.8888, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.12481997119539126, | |
| "grad_norm": 2.5558009147644043, | |
| "learning_rate": 4.8051826496229343e-05, | |
| "loss": 5.8185, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.12562009921587455, | |
| "grad_norm": 2.884411096572876, | |
| "learning_rate": 4.8038455367171206e-05, | |
| "loss": 6.0534, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.12642022723635782, | |
| "grad_norm": 2.5747668743133545, | |
| "learning_rate": 4.802508423811307e-05, | |
| "loss": 5.8186, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.12722035525684108, | |
| "grad_norm": 2.324767827987671, | |
| "learning_rate": 4.801171310905493e-05, | |
| "loss": 5.8642, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.12802048327732438, | |
| "grad_norm": 2.2255160808563232, | |
| "learning_rate": 4.7998341979996794e-05, | |
| "loss": 5.8559, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.12882061129780764, | |
| "grad_norm": 2.97525954246521, | |
| "learning_rate": 4.798497085093866e-05, | |
| "loss": 5.8744, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.12962073931829093, | |
| "grad_norm": 2.23962664604187, | |
| "learning_rate": 4.797159972188052e-05, | |
| "loss": 5.7545, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.1304208673387742, | |
| "grad_norm": 3.6182124614715576, | |
| "learning_rate": 4.795822859282238e-05, | |
| "loss": 5.8872, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.1312209953592575, | |
| "grad_norm": 4.068545341491699, | |
| "learning_rate": 4.7944857463764245e-05, | |
| "loss": 5.9008, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.13202112337974076, | |
| "grad_norm": 3.627082109451294, | |
| "learning_rate": 4.793148633470611e-05, | |
| "loss": 5.8215, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.13282125140022405, | |
| "grad_norm": 3.0080721378326416, | |
| "learning_rate": 4.791811520564797e-05, | |
| "loss": 5.9086, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.13362137942070731, | |
| "grad_norm": 2.5463860034942627, | |
| "learning_rate": 4.790474407658983e-05, | |
| "loss": 5.776, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.13442150744119058, | |
| "grad_norm": 2.212488889694214, | |
| "learning_rate": 4.7891372947531696e-05, | |
| "loss": 6.006, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.13522163546167387, | |
| "grad_norm": 4.147563934326172, | |
| "learning_rate": 4.787800181847356e-05, | |
| "loss": 5.886, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.13602176348215714, | |
| "grad_norm": 2.6021018028259277, | |
| "learning_rate": 4.786463068941542e-05, | |
| "loss": 5.9182, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.13682189150264043, | |
| "grad_norm": 2.3109893798828125, | |
| "learning_rate": 4.7851259560357284e-05, | |
| "loss": 5.8084, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.1376220195231237, | |
| "grad_norm": 2.8678529262542725, | |
| "learning_rate": 4.7837888431299147e-05, | |
| "loss": 6.0363, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.138422147543607, | |
| "grad_norm": 2.1921958923339844, | |
| "learning_rate": 4.7824517302241e-05, | |
| "loss": 5.7667, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.13922227556409025, | |
| "grad_norm": 2.6883316040039062, | |
| "learning_rate": 4.7811146173182865e-05, | |
| "loss": 5.7906, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.14002240358457352, | |
| "grad_norm": 2.4079957008361816, | |
| "learning_rate": 4.779777504412473e-05, | |
| "loss": 5.7698, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.1408225316050568, | |
| "grad_norm": 4.29390287399292, | |
| "learning_rate": 4.778440391506659e-05, | |
| "loss": 5.9639, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.14162265962554008, | |
| "grad_norm": 4.133132457733154, | |
| "learning_rate": 4.777103278600845e-05, | |
| "loss": 6.0901, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.14242278764602337, | |
| "grad_norm": 3.871561288833618, | |
| "learning_rate": 4.7757661656950316e-05, | |
| "loss": 5.7455, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.14322291566650663, | |
| "grad_norm": 4.266111850738525, | |
| "learning_rate": 4.774429052789218e-05, | |
| "loss": 5.9971, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.14402304368698993, | |
| "grad_norm": 2.9000513553619385, | |
| "learning_rate": 4.773091939883404e-05, | |
| "loss": 5.9025, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.1448231717074732, | |
| "grad_norm": 2.549964189529419, | |
| "learning_rate": 4.7717548269775904e-05, | |
| "loss": 5.768, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.14562329972795648, | |
| "grad_norm": 2.2882704734802246, | |
| "learning_rate": 4.770417714071777e-05, | |
| "loss": 6.022, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.14642342774843975, | |
| "grad_norm": 2.6501784324645996, | |
| "learning_rate": 4.769080601165963e-05, | |
| "loss": 5.8539, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.14722355576892301, | |
| "grad_norm": 2.3417108058929443, | |
| "learning_rate": 4.767743488260149e-05, | |
| "loss": 5.7734, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.1480236837894063, | |
| "grad_norm": 2.2151668071746826, | |
| "learning_rate": 4.7664063753543355e-05, | |
| "loss": 5.84, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.14882381180988957, | |
| "grad_norm": 3.114260196685791, | |
| "learning_rate": 4.765069262448522e-05, | |
| "loss": 5.9409, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.14962393983037287, | |
| "grad_norm": 2.4931910037994385, | |
| "learning_rate": 4.763732149542708e-05, | |
| "loss": 5.9396, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.15042406785085613, | |
| "grad_norm": 3.736487865447998, | |
| "learning_rate": 4.7623950366368936e-05, | |
| "loss": 5.7427, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.15122419587133942, | |
| "grad_norm": 4.730785846710205, | |
| "learning_rate": 4.76105792373108e-05, | |
| "loss": 5.9181, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.1520243238918227, | |
| "grad_norm": 2.9264132976531982, | |
| "learning_rate": 4.759720810825266e-05, | |
| "loss": 5.8967, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.15282445191230598, | |
| "grad_norm": 3.2538132667541504, | |
| "learning_rate": 4.7583836979194524e-05, | |
| "loss": 5.8459, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.15362457993278925, | |
| "grad_norm": 2.7208549976348877, | |
| "learning_rate": 4.757046585013639e-05, | |
| "loss": 5.7038, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.1544247079532725, | |
| "grad_norm": 2.7510788440704346, | |
| "learning_rate": 4.755709472107825e-05, | |
| "loss": 5.8524, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.1552248359737558, | |
| "grad_norm": 2.6565892696380615, | |
| "learning_rate": 4.754372359202011e-05, | |
| "loss": 5.6324, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.15602496399423907, | |
| "grad_norm": 2.954798936843872, | |
| "learning_rate": 4.7530352462961975e-05, | |
| "loss": 5.8388, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.15682509201472236, | |
| "grad_norm": 2.291714668273926, | |
| "learning_rate": 4.751698133390384e-05, | |
| "loss": 5.7504, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.15762522003520563, | |
| "grad_norm": 2.1387598514556885, | |
| "learning_rate": 4.75036102048457e-05, | |
| "loss": 5.7556, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.15842534805568892, | |
| "grad_norm": 2.290407180786133, | |
| "learning_rate": 4.749023907578756e-05, | |
| "loss": 5.7089, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.15922547607617218, | |
| "grad_norm": 2.852696657180786, | |
| "learning_rate": 4.7476867946729426e-05, | |
| "loss": 5.8656, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.16002560409665548, | |
| "grad_norm": 2.8190526962280273, | |
| "learning_rate": 4.746349681767129e-05, | |
| "loss": 6.0134, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.16082573211713874, | |
| "grad_norm": 2.705008029937744, | |
| "learning_rate": 4.745012568861315e-05, | |
| "loss": 5.8713, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.161625860137622, | |
| "grad_norm": 3.571394205093384, | |
| "learning_rate": 4.7436754559555014e-05, | |
| "loss": 5.8329, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.1624259881581053, | |
| "grad_norm": 2.687455177307129, | |
| "learning_rate": 4.7423383430496876e-05, | |
| "loss": 5.8355, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.16322611617858857, | |
| "grad_norm": 2.6158690452575684, | |
| "learning_rate": 4.741001230143873e-05, | |
| "loss": 5.6938, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.16402624419907186, | |
| "grad_norm": 2.9657154083251953, | |
| "learning_rate": 4.7396641172380595e-05, | |
| "loss": 5.7514, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.16482637221955512, | |
| "grad_norm": 2.310607433319092, | |
| "learning_rate": 4.738327004332246e-05, | |
| "loss": 5.7397, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.16562650024003842, | |
| "grad_norm": 2.855271339416504, | |
| "learning_rate": 4.736989891426432e-05, | |
| "loss": 5.7645, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.16642662826052168, | |
| "grad_norm": 2.778768301010132, | |
| "learning_rate": 4.735652778520618e-05, | |
| "loss": 5.9582, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.16722675628100497, | |
| "grad_norm": 3.069973945617676, | |
| "learning_rate": 4.7343156656148046e-05, | |
| "loss": 5.8205, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.16802688430148824, | |
| "grad_norm": 3.5799551010131836, | |
| "learning_rate": 4.732978552708991e-05, | |
| "loss": 5.9001, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.1688270123219715, | |
| "grad_norm": 2.556668758392334, | |
| "learning_rate": 4.731641439803177e-05, | |
| "loss": 5.7258, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.1696271403424548, | |
| "grad_norm": 2.7847707271575928, | |
| "learning_rate": 4.7303043268973634e-05, | |
| "loss": 5.9007, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.17042726836293806, | |
| "grad_norm": 4.071508407592773, | |
| "learning_rate": 4.7289672139915496e-05, | |
| "loss": 5.7035, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.17122739638342135, | |
| "grad_norm": 2.6188418865203857, | |
| "learning_rate": 4.727630101085736e-05, | |
| "loss": 5.651, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.17202752440390462, | |
| "grad_norm": 1.952249526977539, | |
| "learning_rate": 4.726292988179922e-05, | |
| "loss": 6.1107, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.1728276524243879, | |
| "grad_norm": 2.299018144607544, | |
| "learning_rate": 4.7249558752741085e-05, | |
| "loss": 5.7609, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.17362778044487118, | |
| "grad_norm": 2.5578439235687256, | |
| "learning_rate": 4.723618762368295e-05, | |
| "loss": 5.792, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.17442790846535447, | |
| "grad_norm": 3.9921529293060303, | |
| "learning_rate": 4.722281649462481e-05, | |
| "loss": 5.7233, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.17522803648583773, | |
| "grad_norm": 2.5521302223205566, | |
| "learning_rate": 4.7209445365566666e-05, | |
| "loss": 5.807, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.176028164506321, | |
| "grad_norm": 2.71401047706604, | |
| "learning_rate": 4.719607423650853e-05, | |
| "loss": 5.6689, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.1768282925268043, | |
| "grad_norm": 3.782607316970825, | |
| "learning_rate": 4.718270310745039e-05, | |
| "loss": 5.734, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.17762842054728756, | |
| "grad_norm": 2.57356333732605, | |
| "learning_rate": 4.7169331978392254e-05, | |
| "loss": 5.8101, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.17842854856777085, | |
| "grad_norm": 2.7005815505981445, | |
| "learning_rate": 4.715596084933412e-05, | |
| "loss": 6.0603, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.17922867658825412, | |
| "grad_norm": 2.081550359725952, | |
| "learning_rate": 4.714258972027598e-05, | |
| "loss": 5.7677, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.1800288046087374, | |
| "grad_norm": 3.6565728187561035, | |
| "learning_rate": 4.712921859121784e-05, | |
| "loss": 5.9672, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.18082893262922067, | |
| "grad_norm": 2.4702320098876953, | |
| "learning_rate": 4.7115847462159705e-05, | |
| "loss": 5.8397, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.18162906064970397, | |
| "grad_norm": 3.335736036300659, | |
| "learning_rate": 4.710247633310157e-05, | |
| "loss": 5.7021, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.18242918867018723, | |
| "grad_norm": 3.3939075469970703, | |
| "learning_rate": 4.708910520404343e-05, | |
| "loss": 5.8464, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.1832293166906705, | |
| "grad_norm": 2.4869279861450195, | |
| "learning_rate": 4.707573407498529e-05, | |
| "loss": 5.6904, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.1840294447111538, | |
| "grad_norm": 2.4240360260009766, | |
| "learning_rate": 4.7062362945927155e-05, | |
| "loss": 5.7227, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.18482957273163705, | |
| "grad_norm": 2.428786039352417, | |
| "learning_rate": 4.704899181686902e-05, | |
| "loss": 5.8295, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.18562970075212035, | |
| "grad_norm": 3.3214187622070312, | |
| "learning_rate": 4.703562068781088e-05, | |
| "loss": 5.8341, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.1864298287726036, | |
| "grad_norm": 3.2146456241607666, | |
| "learning_rate": 4.7022249558752744e-05, | |
| "loss": 5.7217, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.1872299567930869, | |
| "grad_norm": 4.442914009094238, | |
| "learning_rate": 4.7008878429694606e-05, | |
| "loss": 5.9003, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.18803008481357017, | |
| "grad_norm": 1.9268267154693604, | |
| "learning_rate": 4.699550730063646e-05, | |
| "loss": 5.8292, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.18883021283405343, | |
| "grad_norm": 3.130021095275879, | |
| "learning_rate": 4.6982136171578325e-05, | |
| "loss": 5.6864, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.18963034085453673, | |
| "grad_norm": 2.8835690021514893, | |
| "learning_rate": 4.696876504252019e-05, | |
| "loss": 5.829, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.19043046887502, | |
| "grad_norm": 2.4171135425567627, | |
| "learning_rate": 4.695539391346205e-05, | |
| "loss": 5.7972, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.19123059689550329, | |
| "grad_norm": 3.782817840576172, | |
| "learning_rate": 4.694202278440391e-05, | |
| "loss": 5.8497, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.19203072491598655, | |
| "grad_norm": 2.475249767303467, | |
| "learning_rate": 4.6928651655345776e-05, | |
| "loss": 5.9237, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.19283085293646984, | |
| "grad_norm": 2.5809242725372314, | |
| "learning_rate": 4.691528052628764e-05, | |
| "loss": 5.7756, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.1936309809569531, | |
| "grad_norm": 2.6922059059143066, | |
| "learning_rate": 4.69019093972295e-05, | |
| "loss": 5.9326, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.1944311089774364, | |
| "grad_norm": 2.7542431354522705, | |
| "learning_rate": 4.6888538268171364e-05, | |
| "loss": 5.6279, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.19523123699791967, | |
| "grad_norm": 2.4063303470611572, | |
| "learning_rate": 4.6875167139113226e-05, | |
| "loss": 5.91, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.19603136501840293, | |
| "grad_norm": 4.855547904968262, | |
| "learning_rate": 4.686179601005509e-05, | |
| "loss": 5.7286, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.19683149303888622, | |
| "grad_norm": 2.9875595569610596, | |
| "learning_rate": 4.684842488099695e-05, | |
| "loss": 5.8299, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.1976316210593695, | |
| "grad_norm": 4.467639923095703, | |
| "learning_rate": 4.6835053751938814e-05, | |
| "loss": 5.8469, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.19843174907985278, | |
| "grad_norm": 2.2144124507904053, | |
| "learning_rate": 4.682168262288068e-05, | |
| "loss": 5.7871, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.19923187710033605, | |
| "grad_norm": 2.4507012367248535, | |
| "learning_rate": 4.680831149382254e-05, | |
| "loss": 5.7529, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.20003200512081934, | |
| "grad_norm": 2.208648681640625, | |
| "learning_rate": 4.67949403647644e-05, | |
| "loss": 5.7265, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.2008321331413026, | |
| "grad_norm": 2.560302257537842, | |
| "learning_rate": 4.6781569235706265e-05, | |
| "loss": 5.7842, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.2016322611617859, | |
| "grad_norm": 2.354292154312134, | |
| "learning_rate": 4.676819810664813e-05, | |
| "loss": 5.8468, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.20243238918226916, | |
| "grad_norm": 2.9559860229492188, | |
| "learning_rate": 4.675482697758999e-05, | |
| "loss": 5.7003, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.20323251720275243, | |
| "grad_norm": 3.251077651977539, | |
| "learning_rate": 4.674145584853185e-05, | |
| "loss": 5.8129, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.20403264522323572, | |
| "grad_norm": 2.7863471508026123, | |
| "learning_rate": 4.6728084719473716e-05, | |
| "loss": 5.6814, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.20483277324371899, | |
| "grad_norm": 2.9006989002227783, | |
| "learning_rate": 4.671471359041558e-05, | |
| "loss": 5.8292, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.20563290126420228, | |
| "grad_norm": 2.930689573287964, | |
| "learning_rate": 4.670134246135744e-05, | |
| "loss": 5.8825, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.20643302928468554, | |
| "grad_norm": 2.3105032444000244, | |
| "learning_rate": 4.6687971332299304e-05, | |
| "loss": 5.7039, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.20723315730516884, | |
| "grad_norm": 3.1141879558563232, | |
| "learning_rate": 4.667460020324117e-05, | |
| "loss": 5.8692, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.2080332853256521, | |
| "grad_norm": 3.5017199516296387, | |
| "learning_rate": 4.666122907418303e-05, | |
| "loss": 5.7922, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.2088334133461354, | |
| "grad_norm": 2.657975912094116, | |
| "learning_rate": 4.664785794512489e-05, | |
| "loss": 5.7736, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.20963354136661866, | |
| "grad_norm": 3.246952772140503, | |
| "learning_rate": 4.6634486816066755e-05, | |
| "loss": 5.768, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.21043366938710192, | |
| "grad_norm": 6.832335948944092, | |
| "learning_rate": 4.662111568700862e-05, | |
| "loss": 5.6752, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.21123379740758522, | |
| "grad_norm": 3.2479753494262695, | |
| "learning_rate": 4.660774455795048e-05, | |
| "loss": 5.8015, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.21203392542806848, | |
| "grad_norm": 2.809082508087158, | |
| "learning_rate": 4.659437342889234e-05, | |
| "loss": 5.8663, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.21283405344855177, | |
| "grad_norm": 3.7948036193847656, | |
| "learning_rate": 4.65810022998342e-05, | |
| "loss": 5.889, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.21363418146903504, | |
| "grad_norm": 2.836090564727783, | |
| "learning_rate": 4.656763117077606e-05, | |
| "loss": 5.7516, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.21443430948951833, | |
| "grad_norm": 3.0940232276916504, | |
| "learning_rate": 4.6554260041717924e-05, | |
| "loss": 5.7033, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.2152344375100016, | |
| "grad_norm": 2.436757802963257, | |
| "learning_rate": 4.654088891265979e-05, | |
| "loss": 5.746, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.2160345655304849, | |
| "grad_norm": 2.4339609146118164, | |
| "learning_rate": 4.652751778360165e-05, | |
| "loss": 5.828, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.21683469355096816, | |
| "grad_norm": 2.379366874694824, | |
| "learning_rate": 4.651414665454351e-05, | |
| "loss": 5.719, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.21763482157145142, | |
| "grad_norm": 2.1722371578216553, | |
| "learning_rate": 4.6500775525485375e-05, | |
| "loss": 5.7875, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.2184349495919347, | |
| "grad_norm": 3.633279800415039, | |
| "learning_rate": 4.648740439642724e-05, | |
| "loss": 5.802, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.21923507761241798, | |
| "grad_norm": 2.4091219902038574, | |
| "learning_rate": 4.64740332673691e-05, | |
| "loss": 5.8197, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.22003520563290127, | |
| "grad_norm": 2.7289021015167236, | |
| "learning_rate": 4.646066213831096e-05, | |
| "loss": 5.9445, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.22083533365338454, | |
| "grad_norm": 2.376481294631958, | |
| "learning_rate": 4.6447291009252826e-05, | |
| "loss": 5.9943, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.22163546167386783, | |
| "grad_norm": 2.6542563438415527, | |
| "learning_rate": 4.643391988019469e-05, | |
| "loss": 5.6049, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.2224355896943511, | |
| "grad_norm": 2.320472240447998, | |
| "learning_rate": 4.642054875113655e-05, | |
| "loss": 5.7637, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.2232357177148344, | |
| "grad_norm": 2.8923239707946777, | |
| "learning_rate": 4.6407177622078414e-05, | |
| "loss": 5.9666, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.22403584573531765, | |
| "grad_norm": 4.277271270751953, | |
| "learning_rate": 4.6393806493020276e-05, | |
| "loss": 5.8393, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.22483597375580092, | |
| "grad_norm": 2.797428607940674, | |
| "learning_rate": 4.638043536396213e-05, | |
| "loss": 5.759, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.2256361017762842, | |
| "grad_norm": 2.1849517822265625, | |
| "learning_rate": 4.6367064234903995e-05, | |
| "loss": 5.7514, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.22643622979676747, | |
| "grad_norm": 2.8607492446899414, | |
| "learning_rate": 4.635369310584586e-05, | |
| "loss": 5.7545, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.22723635781725077, | |
| "grad_norm": 3.722041130065918, | |
| "learning_rate": 4.634032197678772e-05, | |
| "loss": 5.8011, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.22803648583773403, | |
| "grad_norm": 2.8563833236694336, | |
| "learning_rate": 4.632695084772958e-05, | |
| "loss": 5.8569, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.22883661385821732, | |
| "grad_norm": 3.5724806785583496, | |
| "learning_rate": 4.6313579718671446e-05, | |
| "loss": 5.9649, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.2296367418787006, | |
| "grad_norm": 2.380469560623169, | |
| "learning_rate": 4.630020858961331e-05, | |
| "loss": 5.7467, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.23043686989918388, | |
| "grad_norm": 3.1629838943481445, | |
| "learning_rate": 4.628683746055517e-05, | |
| "loss": 5.642, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.23123699791966715, | |
| "grad_norm": 2.1239373683929443, | |
| "learning_rate": 4.6273466331497034e-05, | |
| "loss": 5.6483, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.2320371259401504, | |
| "grad_norm": 3.049079418182373, | |
| "learning_rate": 4.6260095202438897e-05, | |
| "loss": 5.9736, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.2328372539606337, | |
| "grad_norm": 2.556830406188965, | |
| "learning_rate": 4.624672407338076e-05, | |
| "loss": 5.6037, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.23363738198111697, | |
| "grad_norm": 2.8762035369873047, | |
| "learning_rate": 4.623335294432262e-05, | |
| "loss": 5.6345, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.23443751000160026, | |
| "grad_norm": 2.11167573928833, | |
| "learning_rate": 4.6219981815264485e-05, | |
| "loss": 5.7822, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.23523763802208353, | |
| "grad_norm": 4.623869895935059, | |
| "learning_rate": 4.620661068620635e-05, | |
| "loss": 5.7063, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.23603776604256682, | |
| "grad_norm": 2.4420578479766846, | |
| "learning_rate": 4.619323955714821e-05, | |
| "loss": 5.686, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.2368378940630501, | |
| "grad_norm": 2.6543869972229004, | |
| "learning_rate": 4.617986842809007e-05, | |
| "loss": 5.7802, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.23763802208353338, | |
| "grad_norm": 2.6264312267303467, | |
| "learning_rate": 4.616649729903193e-05, | |
| "loss": 5.6667, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.23843815010401664, | |
| "grad_norm": 2.4579195976257324, | |
| "learning_rate": 4.615312616997379e-05, | |
| "loss": 5.6738, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.2392382781244999, | |
| "grad_norm": 2.299448251724243, | |
| "learning_rate": 4.6139755040915654e-05, | |
| "loss": 5.8622, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.2400384061449832, | |
| "grad_norm": 3.6527328491210938, | |
| "learning_rate": 4.612638391185752e-05, | |
| "loss": 5.6346, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.24083853416546647, | |
| "grad_norm": 2.217876434326172, | |
| "learning_rate": 4.611301278279938e-05, | |
| "loss": 5.7892, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.24163866218594976, | |
| "grad_norm": 3.500544309616089, | |
| "learning_rate": 4.609964165374124e-05, | |
| "loss": 5.8026, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.24243879020643302, | |
| "grad_norm": 3.1694483757019043, | |
| "learning_rate": 4.6086270524683105e-05, | |
| "loss": 5.827, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.24323891822691632, | |
| "grad_norm": 2.899625778198242, | |
| "learning_rate": 4.607289939562497e-05, | |
| "loss": 5.7384, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.24403904624739958, | |
| "grad_norm": 2.8286776542663574, | |
| "learning_rate": 4.605952826656683e-05, | |
| "loss": 5.7629, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.24483917426788285, | |
| "grad_norm": 2.7585489749908447, | |
| "learning_rate": 4.604615713750869e-05, | |
| "loss": 5.7462, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.24563930228836614, | |
| "grad_norm": 2.2017667293548584, | |
| "learning_rate": 4.6032786008450555e-05, | |
| "loss": 5.844, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.2464394303088494, | |
| "grad_norm": 4.679725170135498, | |
| "learning_rate": 4.601941487939242e-05, | |
| "loss": 5.7254, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.2472395583293327, | |
| "grad_norm": 2.923884868621826, | |
| "learning_rate": 4.600604375033428e-05, | |
| "loss": 5.703, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.24803968634981596, | |
| "grad_norm": 2.2205090522766113, | |
| "learning_rate": 4.5992672621276144e-05, | |
| "loss": 5.7185, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.24883981437029926, | |
| "grad_norm": 2.852313280105591, | |
| "learning_rate": 4.5979301492218006e-05, | |
| "loss": 5.5653, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.24963994239078252, | |
| "grad_norm": 2.7683911323547363, | |
| "learning_rate": 4.596593036315986e-05, | |
| "loss": 5.7262, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.2504400704112658, | |
| "grad_norm": 3.1315665245056152, | |
| "learning_rate": 4.5952559234101725e-05, | |
| "loss": 5.7524, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.2512401984317491, | |
| "grad_norm": 2.5233592987060547, | |
| "learning_rate": 4.593918810504359e-05, | |
| "loss": 5.7443, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.25204032645223234, | |
| "grad_norm": 2.3802831172943115, | |
| "learning_rate": 4.592581697598545e-05, | |
| "loss": 5.8091, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.25284045447271564, | |
| "grad_norm": 2.378218412399292, | |
| "learning_rate": 4.591244584692731e-05, | |
| "loss": 5.7741, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.25364058249319893, | |
| "grad_norm": 4.712483882904053, | |
| "learning_rate": 4.5899074717869176e-05, | |
| "loss": 5.8643, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.25444071051368217, | |
| "grad_norm": 2.798752784729004, | |
| "learning_rate": 4.588570358881104e-05, | |
| "loss": 5.7984, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.25524083853416546, | |
| "grad_norm": 2.302037477493286, | |
| "learning_rate": 4.58723324597529e-05, | |
| "loss": 5.6548, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.25604096655464875, | |
| "grad_norm": 2.8621273040771484, | |
| "learning_rate": 4.5858961330694764e-05, | |
| "loss": 5.6875, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.25684109457513205, | |
| "grad_norm": 2.9079480171203613, | |
| "learning_rate": 4.5845590201636626e-05, | |
| "loss": 5.8801, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.2576412225956153, | |
| "grad_norm": 2.9576847553253174, | |
| "learning_rate": 4.583221907257849e-05, | |
| "loss": 5.6646, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.2584413506160986, | |
| "grad_norm": 4.085951805114746, | |
| "learning_rate": 4.581884794352035e-05, | |
| "loss": 5.9078, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.25924147863658187, | |
| "grad_norm": 2.622903347015381, | |
| "learning_rate": 4.5805476814462214e-05, | |
| "loss": 5.6821, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.2600416066570651, | |
| "grad_norm": 1.794255256652832, | |
| "learning_rate": 4.579210568540408e-05, | |
| "loss": 5.751, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.2608417346775484, | |
| "grad_norm": 3.074042558670044, | |
| "learning_rate": 4.577873455634594e-05, | |
| "loss": 5.7864, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.2616418626980317, | |
| "grad_norm": 2.3138844966888428, | |
| "learning_rate": 4.57653634272878e-05, | |
| "loss": 5.693, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.262441990718515, | |
| "grad_norm": 3.8877549171447754, | |
| "learning_rate": 4.5751992298229665e-05, | |
| "loss": 5.7154, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.2632421187389982, | |
| "grad_norm": 2.9623680114746094, | |
| "learning_rate": 4.573862116917153e-05, | |
| "loss": 5.7514, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.2640422467594815, | |
| "grad_norm": 2.840122938156128, | |
| "learning_rate": 4.572525004011339e-05, | |
| "loss": 5.7397, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.2648423747799648, | |
| "grad_norm": 2.9699277877807617, | |
| "learning_rate": 4.571187891105525e-05, | |
| "loss": 5.7626, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.2656425028004481, | |
| "grad_norm": 2.6493773460388184, | |
| "learning_rate": 4.5698507781997116e-05, | |
| "loss": 5.7619, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.26644263082093134, | |
| "grad_norm": 2.283259868621826, | |
| "learning_rate": 4.568513665293898e-05, | |
| "loss": 5.8409, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.26724275884141463, | |
| "grad_norm": 1.9254164695739746, | |
| "learning_rate": 4.567176552388084e-05, | |
| "loss": 5.8218, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.2680428868618979, | |
| "grad_norm": 2.382345676422119, | |
| "learning_rate": 4.5658394394822704e-05, | |
| "loss": 5.6865, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.26884301488238116, | |
| "grad_norm": 2.6039271354675293, | |
| "learning_rate": 4.564502326576457e-05, | |
| "loss": 5.7254, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.26964314290286445, | |
| "grad_norm": 2.0948996543884277, | |
| "learning_rate": 4.563165213670643e-05, | |
| "loss": 5.7589, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.27044327092334774, | |
| "grad_norm": 2.939955711364746, | |
| "learning_rate": 4.561828100764829e-05, | |
| "loss": 5.8298, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.27124339894383104, | |
| "grad_norm": 2.748307466506958, | |
| "learning_rate": 4.5604909878590155e-05, | |
| "loss": 5.8505, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.2720435269643143, | |
| "grad_norm": 2.7122459411621094, | |
| "learning_rate": 4.559153874953202e-05, | |
| "loss": 5.9027, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.27284365498479757, | |
| "grad_norm": 3.6053593158721924, | |
| "learning_rate": 4.557816762047388e-05, | |
| "loss": 5.6746, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.27364378300528086, | |
| "grad_norm": 4.433299541473389, | |
| "learning_rate": 4.556479649141574e-05, | |
| "loss": 5.7713, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.2744439110257641, | |
| "grad_norm": 2.5253539085388184, | |
| "learning_rate": 4.55514253623576e-05, | |
| "loss": 5.8219, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.2752440390462474, | |
| "grad_norm": 4.9358062744140625, | |
| "learning_rate": 4.553805423329946e-05, | |
| "loss": 5.7971, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.2760441670667307, | |
| "grad_norm": 2.6247594356536865, | |
| "learning_rate": 4.5524683104241324e-05, | |
| "loss": 5.1528, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.276844295087214, | |
| "grad_norm": 2.8152048587799072, | |
| "learning_rate": 4.551131197518319e-05, | |
| "loss": 5.7955, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.2776444231076972, | |
| "grad_norm": 2.143275499343872, | |
| "learning_rate": 4.549794084612505e-05, | |
| "loss": 5.6875, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.2784445511281805, | |
| "grad_norm": 2.9896023273468018, | |
| "learning_rate": 4.548456971706691e-05, | |
| "loss": 5.7981, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.2792446791486638, | |
| "grad_norm": 3.5231759548187256, | |
| "learning_rate": 4.5471198588008775e-05, | |
| "loss": 5.7343, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.28004480716914704, | |
| "grad_norm": 2.391721487045288, | |
| "learning_rate": 4.545782745895064e-05, | |
| "loss": 5.6821, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.28084493518963033, | |
| "grad_norm": 2.414992332458496, | |
| "learning_rate": 4.54444563298925e-05, | |
| "loss": 5.7357, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.2816450632101136, | |
| "grad_norm": 2.7502214908599854, | |
| "learning_rate": 4.543108520083436e-05, | |
| "loss": 5.6511, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.2824451912305969, | |
| "grad_norm": 2.1601436138153076, | |
| "learning_rate": 4.5417714071776226e-05, | |
| "loss": 5.6249, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.28324531925108015, | |
| "grad_norm": 2.89013671875, | |
| "learning_rate": 4.540434294271809e-05, | |
| "loss": 5.7583, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.28404544727156344, | |
| "grad_norm": 2.4915778636932373, | |
| "learning_rate": 4.539097181365995e-05, | |
| "loss": 5.6957, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.28484557529204674, | |
| "grad_norm": 5.053386688232422, | |
| "learning_rate": 4.5377600684601814e-05, | |
| "loss": 5.632, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.28564570331253003, | |
| "grad_norm": 2.6207687854766846, | |
| "learning_rate": 4.5364229555543676e-05, | |
| "loss": 5.8514, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.28644583133301327, | |
| "grad_norm": 4.157670497894287, | |
| "learning_rate": 4.535085842648553e-05, | |
| "loss": 5.7608, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.28724595935349656, | |
| "grad_norm": 3.4464797973632812, | |
| "learning_rate": 4.5337487297427395e-05, | |
| "loss": 5.6737, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.28804608737397985, | |
| "grad_norm": 4.255002498626709, | |
| "learning_rate": 4.532411616836926e-05, | |
| "loss": 5.7977, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.2888462153944631, | |
| "grad_norm": 2.7926547527313232, | |
| "learning_rate": 4.531074503931112e-05, | |
| "loss": 5.6891, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.2896463434149464, | |
| "grad_norm": 3.150400400161743, | |
| "learning_rate": 4.529737391025298e-05, | |
| "loss": 5.7931, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.2904464714354297, | |
| "grad_norm": 2.1223199367523193, | |
| "learning_rate": 4.5284002781194846e-05, | |
| "loss": 5.8646, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.29124659945591297, | |
| "grad_norm": 3.950665235519409, | |
| "learning_rate": 4.527063165213671e-05, | |
| "loss": 5.7008, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.2920467274763962, | |
| "grad_norm": 2.995692729949951, | |
| "learning_rate": 4.525726052307857e-05, | |
| "loss": 5.688, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.2928468554968795, | |
| "grad_norm": 2.041736125946045, | |
| "learning_rate": 4.5243889394020434e-05, | |
| "loss": 5.7301, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.2936469835173628, | |
| "grad_norm": 2.541757106781006, | |
| "learning_rate": 4.5230518264962297e-05, | |
| "loss": 5.5606, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.29444711153784603, | |
| "grad_norm": 2.140761613845825, | |
| "learning_rate": 4.521714713590416e-05, | |
| "loss": 5.7671, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.2952472395583293, | |
| "grad_norm": 2.6869146823883057, | |
| "learning_rate": 4.520377600684602e-05, | |
| "loss": 5.6452, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.2960473675788126, | |
| "grad_norm": 3.072376012802124, | |
| "learning_rate": 4.5190404877787885e-05, | |
| "loss": 5.6956, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.2968474955992959, | |
| "grad_norm": 2.5933837890625, | |
| "learning_rate": 4.517703374872975e-05, | |
| "loss": 5.6212, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.29764762361977914, | |
| "grad_norm": 3.0443103313446045, | |
| "learning_rate": 4.516366261967161e-05, | |
| "loss": 5.7849, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.29844775164026244, | |
| "grad_norm": 2.673583745956421, | |
| "learning_rate": 4.515029149061347e-05, | |
| "loss": 5.6186, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.29924787966074573, | |
| "grad_norm": 2.3276283740997314, | |
| "learning_rate": 4.513692036155533e-05, | |
| "loss": 5.9188, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.300048007681229, | |
| "grad_norm": 5.504491329193115, | |
| "learning_rate": 4.512354923249719e-05, | |
| "loss": 5.5676, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.30084813570171226, | |
| "grad_norm": 2.4181482791900635, | |
| "learning_rate": 4.5110178103439054e-05, | |
| "loss": 5.6852, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.30164826372219555, | |
| "grad_norm": 2.2489006519317627, | |
| "learning_rate": 4.509680697438092e-05, | |
| "loss": 5.7003, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.30244839174267885, | |
| "grad_norm": 2.6925253868103027, | |
| "learning_rate": 4.508343584532278e-05, | |
| "loss": 5.8176, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.3032485197631621, | |
| "grad_norm": 2.904318332672119, | |
| "learning_rate": 4.507006471626464e-05, | |
| "loss": 5.6912, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.3040486477836454, | |
| "grad_norm": 3.3189070224761963, | |
| "learning_rate": 4.5056693587206505e-05, | |
| "loss": 5.8706, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.30484877580412867, | |
| "grad_norm": 2.8324170112609863, | |
| "learning_rate": 4.504332245814837e-05, | |
| "loss": 5.8795, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.30564890382461196, | |
| "grad_norm": 3.113417148590088, | |
| "learning_rate": 4.502995132909023e-05, | |
| "loss": 5.8689, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.3064490318450952, | |
| "grad_norm": 2.469269275665283, | |
| "learning_rate": 4.501658020003209e-05, | |
| "loss": 5.7934, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.3072491598655785, | |
| "grad_norm": 2.778571128845215, | |
| "learning_rate": 4.5003209070973956e-05, | |
| "loss": 5.8577, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.3080492878860618, | |
| "grad_norm": 3.4269161224365234, | |
| "learning_rate": 4.498983794191582e-05, | |
| "loss": 5.8378, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.308849415906545, | |
| "grad_norm": 3.417850971221924, | |
| "learning_rate": 4.497646681285768e-05, | |
| "loss": 5.6532, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.3096495439270283, | |
| "grad_norm": 2.389784097671509, | |
| "learning_rate": 4.4963095683799544e-05, | |
| "loss": 5.5454, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.3104496719475116, | |
| "grad_norm": 2.384453296661377, | |
| "learning_rate": 4.4949724554741406e-05, | |
| "loss": 5.8014, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.3112497999679949, | |
| "grad_norm": 1.913668155670166, | |
| "learning_rate": 4.493635342568326e-05, | |
| "loss": 5.6033, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.31204992798847814, | |
| "grad_norm": 3.4930074214935303, | |
| "learning_rate": 4.4922982296625125e-05, | |
| "loss": 5.7649, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.31285005600896143, | |
| "grad_norm": 3.517458200454712, | |
| "learning_rate": 4.490961116756699e-05, | |
| "loss": 5.5635, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.3136501840294447, | |
| "grad_norm": 2.611274480819702, | |
| "learning_rate": 4.489624003850885e-05, | |
| "loss": 5.8121, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.314450312049928, | |
| "grad_norm": 2.373997926712036, | |
| "learning_rate": 4.488286890945071e-05, | |
| "loss": 5.6002, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.31525044007041125, | |
| "grad_norm": 2.554847002029419, | |
| "learning_rate": 4.4869497780392576e-05, | |
| "loss": 5.6432, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.31605056809089455, | |
| "grad_norm": 3.3720595836639404, | |
| "learning_rate": 4.485612665133444e-05, | |
| "loss": 5.5794, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.31685069611137784, | |
| "grad_norm": 2.2308788299560547, | |
| "learning_rate": 4.48427555222763e-05, | |
| "loss": 5.794, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.3176508241318611, | |
| "grad_norm": 2.0659661293029785, | |
| "learning_rate": 4.4829384393218164e-05, | |
| "loss": 5.5383, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.31845095215234437, | |
| "grad_norm": 3.2644894123077393, | |
| "learning_rate": 4.4816013264160026e-05, | |
| "loss": 5.6979, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.31925108017282766, | |
| "grad_norm": 2.3485729694366455, | |
| "learning_rate": 4.480264213510189e-05, | |
| "loss": 5.7214, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.32005120819331095, | |
| "grad_norm": 2.7470600605010986, | |
| "learning_rate": 4.478927100604375e-05, | |
| "loss": 5.6032, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.3208513362137942, | |
| "grad_norm": 2.1622989177703857, | |
| "learning_rate": 4.4775899876985614e-05, | |
| "loss": 5.7976, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.3216514642342775, | |
| "grad_norm": 2.7463905811309814, | |
| "learning_rate": 4.476252874792748e-05, | |
| "loss": 5.7181, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.3224515922547608, | |
| "grad_norm": 3.503662109375, | |
| "learning_rate": 4.474915761886934e-05, | |
| "loss": 5.8092, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.323251720275244, | |
| "grad_norm": 2.6073853969573975, | |
| "learning_rate": 4.47357864898112e-05, | |
| "loss": 5.7876, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.3240518482957273, | |
| "grad_norm": 3.354768991470337, | |
| "learning_rate": 4.472241536075306e-05, | |
| "loss": 5.7741, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.3248519763162106, | |
| "grad_norm": 2.648145914077759, | |
| "learning_rate": 4.470904423169492e-05, | |
| "loss": 5.7522, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.3256521043366939, | |
| "grad_norm": 3.086655378341675, | |
| "learning_rate": 4.4695673102636784e-05, | |
| "loss": 5.81, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.32645223235717713, | |
| "grad_norm": 2.230905771255493, | |
| "learning_rate": 4.4682301973578647e-05, | |
| "loss": 5.8839, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.3272523603776604, | |
| "grad_norm": 2.5391674041748047, | |
| "learning_rate": 4.466893084452051e-05, | |
| "loss": 5.5535, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.3280524883981437, | |
| "grad_norm": 2.7574117183685303, | |
| "learning_rate": 4.465555971546237e-05, | |
| "loss": 5.8275, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.32885261641862695, | |
| "grad_norm": 3.1114678382873535, | |
| "learning_rate": 4.4642188586404235e-05, | |
| "loss": 5.6876, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.32965274443911025, | |
| "grad_norm": 2.404892683029175, | |
| "learning_rate": 4.46288174573461e-05, | |
| "loss": 5.6876, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.33045287245959354, | |
| "grad_norm": 2.590759754180908, | |
| "learning_rate": 4.461544632828796e-05, | |
| "loss": 5.802, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.33125300048007683, | |
| "grad_norm": 2.4358649253845215, | |
| "learning_rate": 4.460207519922982e-05, | |
| "loss": 5.632, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.33205312850056007, | |
| "grad_norm": 3.9567458629608154, | |
| "learning_rate": 4.4588704070171685e-05, | |
| "loss": 5.8761, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.33285325652104336, | |
| "grad_norm": 2.3808743953704834, | |
| "learning_rate": 4.457533294111355e-05, | |
| "loss": 5.6815, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.33365338454152665, | |
| "grad_norm": 2.6527156829833984, | |
| "learning_rate": 4.456196181205541e-05, | |
| "loss": 5.805, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.33445351256200995, | |
| "grad_norm": 2.351062536239624, | |
| "learning_rate": 4.4548590682997273e-05, | |
| "loss": 5.6681, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.3352536405824932, | |
| "grad_norm": 2.3213460445404053, | |
| "learning_rate": 4.4535219553939136e-05, | |
| "loss": 5.6363, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.3360537686029765, | |
| "grad_norm": 1.9470767974853516, | |
| "learning_rate": 4.4521848424881e-05, | |
| "loss": 5.8772, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.33685389662345977, | |
| "grad_norm": 4.303500652313232, | |
| "learning_rate": 4.450847729582286e-05, | |
| "loss": 5.6185, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.337654024643943, | |
| "grad_norm": 2.713275909423828, | |
| "learning_rate": 4.4495106166764724e-05, | |
| "loss": 5.6754, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.3384541526644263, | |
| "grad_norm": 2.34993314743042, | |
| "learning_rate": 4.448173503770659e-05, | |
| "loss": 5.7003, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.3392542806849096, | |
| "grad_norm": 2.276228666305542, | |
| "learning_rate": 4.446836390864845e-05, | |
| "loss": 5.6, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.3400544087053929, | |
| "grad_norm": 2.3635685443878174, | |
| "learning_rate": 4.445499277959031e-05, | |
| "loss": 5.7373, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.3408545367258761, | |
| "grad_norm": 3.100604772567749, | |
| "learning_rate": 4.4441621650532175e-05, | |
| "loss": 5.7354, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.3416546647463594, | |
| "grad_norm": 2.6743876934051514, | |
| "learning_rate": 4.442825052147404e-05, | |
| "loss": 5.7544, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.3424547927668427, | |
| "grad_norm": 2.5783612728118896, | |
| "learning_rate": 4.44148793924159e-05, | |
| "loss": 5.8826, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.34325492078732595, | |
| "grad_norm": 2.8976659774780273, | |
| "learning_rate": 4.440150826335776e-05, | |
| "loss": 5.5418, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.34405504880780924, | |
| "grad_norm": 2.1061089038848877, | |
| "learning_rate": 4.4388137134299626e-05, | |
| "loss": 5.6406, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.34485517682829253, | |
| "grad_norm": 2.1303789615631104, | |
| "learning_rate": 4.437476600524149e-05, | |
| "loss": 5.6491, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.3456553048487758, | |
| "grad_norm": 2.6240499019622803, | |
| "learning_rate": 4.436139487618335e-05, | |
| "loss": 5.7161, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.34645543286925906, | |
| "grad_norm": 2.325155019760132, | |
| "learning_rate": 4.4348023747125214e-05, | |
| "loss": 5.6172, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.34725556088974235, | |
| "grad_norm": 2.8844404220581055, | |
| "learning_rate": 4.4334652618067076e-05, | |
| "loss": 5.7438, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.34805568891022565, | |
| "grad_norm": 2.375324249267578, | |
| "learning_rate": 4.432128148900894e-05, | |
| "loss": 5.8335, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.34885581693070894, | |
| "grad_norm": 2.1572377681732178, | |
| "learning_rate": 4.4307910359950795e-05, | |
| "loss": 5.706, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.3496559449511922, | |
| "grad_norm": 2.5218889713287354, | |
| "learning_rate": 4.429453923089266e-05, | |
| "loss": 5.7487, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.35045607297167547, | |
| "grad_norm": 2.636223554611206, | |
| "learning_rate": 4.428116810183452e-05, | |
| "loss": 5.8327, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.35125620099215876, | |
| "grad_norm": 2.436155080795288, | |
| "learning_rate": 4.426779697277638e-05, | |
| "loss": 5.6895, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.352056329012642, | |
| "grad_norm": 3.4435484409332275, | |
| "learning_rate": 4.4254425843718246e-05, | |
| "loss": 5.6171, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.3528564570331253, | |
| "grad_norm": 2.3990628719329834, | |
| "learning_rate": 4.424105471466011e-05, | |
| "loss": 5.7574, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.3536565850536086, | |
| "grad_norm": 2.544774293899536, | |
| "learning_rate": 4.422768358560197e-05, | |
| "loss": 5.558, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.3544567130740919, | |
| "grad_norm": 2.389491081237793, | |
| "learning_rate": 4.4214312456543834e-05, | |
| "loss": 5.6628, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.3552568410945751, | |
| "grad_norm": 5.203212261199951, | |
| "learning_rate": 4.4200941327485697e-05, | |
| "loss": 5.5403, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.3560569691150584, | |
| "grad_norm": 2.0861873626708984, | |
| "learning_rate": 4.418757019842756e-05, | |
| "loss": 5.625, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.3568570971355417, | |
| "grad_norm": 2.2355470657348633, | |
| "learning_rate": 4.417419906936942e-05, | |
| "loss": 5.614, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.35765722515602494, | |
| "grad_norm": 2.2239274978637695, | |
| "learning_rate": 4.4160827940311285e-05, | |
| "loss": 5.6885, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.35845735317650823, | |
| "grad_norm": 4.571592807769775, | |
| "learning_rate": 4.414745681125315e-05, | |
| "loss": 5.8495, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.3592574811969915, | |
| "grad_norm": 2.6501150131225586, | |
| "learning_rate": 4.413408568219501e-05, | |
| "loss": 5.6158, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.3600576092174748, | |
| "grad_norm": 2.8568902015686035, | |
| "learning_rate": 4.412071455313687e-05, | |
| "loss": 5.6403, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.36085773723795805, | |
| "grad_norm": 2.4179179668426514, | |
| "learning_rate": 4.410734342407873e-05, | |
| "loss": 5.749, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.36165786525844135, | |
| "grad_norm": 2.950491189956665, | |
| "learning_rate": 4.409397229502059e-05, | |
| "loss": 5.7128, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.36245799327892464, | |
| "grad_norm": 3.731049060821533, | |
| "learning_rate": 4.4080601165962454e-05, | |
| "loss": 5.6397, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.36325812129940793, | |
| "grad_norm": 2.255730390548706, | |
| "learning_rate": 4.406723003690432e-05, | |
| "loss": 5.626, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.36405824931989117, | |
| "grad_norm": 2.623455047607422, | |
| "learning_rate": 4.405385890784618e-05, | |
| "loss": 5.6792, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.36485837734037446, | |
| "grad_norm": 2.366481065750122, | |
| "learning_rate": 4.404048777878804e-05, | |
| "loss": 5.5455, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.36565850536085776, | |
| "grad_norm": 2.56351375579834, | |
| "learning_rate": 4.4027116649729905e-05, | |
| "loss": 5.7982, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.366458633381341, | |
| "grad_norm": 2.3203811645507812, | |
| "learning_rate": 4.401374552067177e-05, | |
| "loss": 5.7969, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.3672587614018243, | |
| "grad_norm": 2.3838179111480713, | |
| "learning_rate": 4.400037439161363e-05, | |
| "loss": 5.7484, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.3680588894223076, | |
| "grad_norm": 2.0725440979003906, | |
| "learning_rate": 4.398700326255549e-05, | |
| "loss": 5.8405, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.36885901744279087, | |
| "grad_norm": 3.49495005607605, | |
| "learning_rate": 4.3973632133497356e-05, | |
| "loss": 5.7151, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.3696591454632741, | |
| "grad_norm": 2.643007755279541, | |
| "learning_rate": 4.396026100443922e-05, | |
| "loss": 5.6374, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.3704592734837574, | |
| "grad_norm": 2.282304286956787, | |
| "learning_rate": 4.394688987538108e-05, | |
| "loss": 5.589, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.3712594015042407, | |
| "grad_norm": 2.244058609008789, | |
| "learning_rate": 4.3933518746322944e-05, | |
| "loss": 5.7516, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.37205952952472393, | |
| "grad_norm": 2.44496488571167, | |
| "learning_rate": 4.3920147617264806e-05, | |
| "loss": 5.8393, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.3728596575452072, | |
| "grad_norm": 2.6613078117370605, | |
| "learning_rate": 4.390677648820667e-05, | |
| "loss": 5.6764, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.3736597855656905, | |
| "grad_norm": 3.99092173576355, | |
| "learning_rate": 4.3893405359148525e-05, | |
| "loss": 5.8658, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.3744599135861738, | |
| "grad_norm": 1.6338485479354858, | |
| "learning_rate": 4.388003423009039e-05, | |
| "loss": 5.7527, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.37526004160665705, | |
| "grad_norm": 2.3723371028900146, | |
| "learning_rate": 4.386666310103225e-05, | |
| "loss": 5.7482, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.37606016962714034, | |
| "grad_norm": 2.630424976348877, | |
| "learning_rate": 4.385329197197411e-05, | |
| "loss": 5.7539, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.37686029764762363, | |
| "grad_norm": 2.3873038291931152, | |
| "learning_rate": 4.3839920842915976e-05, | |
| "loss": 5.6729, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.37766042566810687, | |
| "grad_norm": 1.9391748905181885, | |
| "learning_rate": 4.382654971385784e-05, | |
| "loss": 5.6794, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.37846055368859016, | |
| "grad_norm": 2.103975296020508, | |
| "learning_rate": 4.38131785847997e-05, | |
| "loss": 5.5104, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.37926068170907346, | |
| "grad_norm": 3.731184959411621, | |
| "learning_rate": 4.3799807455741564e-05, | |
| "loss": 5.6699, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.38006080972955675, | |
| "grad_norm": 2.881068468093872, | |
| "learning_rate": 4.3786436326683426e-05, | |
| "loss": 5.6394, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.38086093775004, | |
| "grad_norm": 2.5963799953460693, | |
| "learning_rate": 4.377306519762529e-05, | |
| "loss": 5.784, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.3816610657705233, | |
| "grad_norm": 1.9520230293273926, | |
| "learning_rate": 4.375969406856715e-05, | |
| "loss": 5.7608, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.38246119379100657, | |
| "grad_norm": 2.386702537536621, | |
| "learning_rate": 4.374766005241483e-05, | |
| "loss": 5.5725, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.38326132181148986, | |
| "grad_norm": 2.3830511569976807, | |
| "learning_rate": 4.3734288923356694e-05, | |
| "loss": 5.5584, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.3840614498319731, | |
| "grad_norm": 2.1514739990234375, | |
| "learning_rate": 4.3720917794298556e-05, | |
| "loss": 5.6621, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.3848615778524564, | |
| "grad_norm": 2.5376317501068115, | |
| "learning_rate": 4.370754666524042e-05, | |
| "loss": 5.4138, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.3856617058729397, | |
| "grad_norm": 3.425899028778076, | |
| "learning_rate": 4.3694175536182275e-05, | |
| "loss": 5.6478, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.3864618338934229, | |
| "grad_norm": 2.7518632411956787, | |
| "learning_rate": 4.368080440712414e-05, | |
| "loss": 5.6556, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.3872619619139062, | |
| "grad_norm": 3.119227647781372, | |
| "learning_rate": 4.3667433278066e-05, | |
| "loss": 5.7925, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.3880620899343895, | |
| "grad_norm": 3.2664616107940674, | |
| "learning_rate": 4.365406214900786e-05, | |
| "loss": 5.7176, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.3888622179548728, | |
| "grad_norm": 2.5125045776367188, | |
| "learning_rate": 4.3640691019949726e-05, | |
| "loss": 5.6511, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.38966234597535604, | |
| "grad_norm": 2.992112874984741, | |
| "learning_rate": 4.362731989089159e-05, | |
| "loss": 5.6426, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 0.39046247399583933, | |
| "grad_norm": 4.46783971786499, | |
| "learning_rate": 4.361394876183345e-05, | |
| "loss": 5.736, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.3912626020163226, | |
| "grad_norm": 1.8372838497161865, | |
| "learning_rate": 4.3600577632775314e-05, | |
| "loss": 5.7603, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.39206273003680586, | |
| "grad_norm": 2.1635375022888184, | |
| "learning_rate": 4.3587206503717176e-05, | |
| "loss": 5.6019, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.39286285805728915, | |
| "grad_norm": 2.2425310611724854, | |
| "learning_rate": 4.357383537465904e-05, | |
| "loss": 5.6829, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 0.39366298607777245, | |
| "grad_norm": 2.408907413482666, | |
| "learning_rate": 4.35604642456009e-05, | |
| "loss": 5.6821, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.39446311409825574, | |
| "grad_norm": 3.012258291244507, | |
| "learning_rate": 4.3547093116542765e-05, | |
| "loss": 5.7503, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.395263242118739, | |
| "grad_norm": 3.187053680419922, | |
| "learning_rate": 4.353372198748463e-05, | |
| "loss": 5.6459, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.39606337013922227, | |
| "grad_norm": 2.7528955936431885, | |
| "learning_rate": 4.352035085842649e-05, | |
| "loss": 5.6386, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.39686349815970556, | |
| "grad_norm": 2.9744699001312256, | |
| "learning_rate": 4.350697972936835e-05, | |
| "loss": 5.5938, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.39766362618018886, | |
| "grad_norm": 2.779604196548462, | |
| "learning_rate": 4.3493608600310215e-05, | |
| "loss": 5.5459, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.3984637542006721, | |
| "grad_norm": 2.9092133045196533, | |
| "learning_rate": 4.348023747125207e-05, | |
| "loss": 5.7695, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.3992638822211554, | |
| "grad_norm": 2.800872802734375, | |
| "learning_rate": 4.3466866342193934e-05, | |
| "loss": 5.6943, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 0.4000640102416387, | |
| "grad_norm": 3.299595832824707, | |
| "learning_rate": 4.3453495213135797e-05, | |
| "loss": 5.4432, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.4008641382621219, | |
| "grad_norm": 2.2425456047058105, | |
| "learning_rate": 4.344012408407766e-05, | |
| "loss": 5.6688, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 0.4016642662826052, | |
| "grad_norm": 2.269378423690796, | |
| "learning_rate": 4.342675295501952e-05, | |
| "loss": 5.7713, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 0.4024643943030885, | |
| "grad_norm": 2.3903868198394775, | |
| "learning_rate": 4.3413381825961385e-05, | |
| "loss": 5.5926, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 0.4032645223235718, | |
| "grad_norm": 3.267918109893799, | |
| "learning_rate": 4.340001069690325e-05, | |
| "loss": 5.6806, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.40406465034405503, | |
| "grad_norm": 3.2075066566467285, | |
| "learning_rate": 4.338663956784511e-05, | |
| "loss": 5.6582, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.4048647783645383, | |
| "grad_norm": 2.5458226203918457, | |
| "learning_rate": 4.337326843878697e-05, | |
| "loss": 5.6576, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 0.4056649063850216, | |
| "grad_norm": 2.0331077575683594, | |
| "learning_rate": 4.3359897309728835e-05, | |
| "loss": 5.6725, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 0.40646503440550485, | |
| "grad_norm": 2.406907796859741, | |
| "learning_rate": 4.33465261806707e-05, | |
| "loss": 5.5168, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 0.40726516242598815, | |
| "grad_norm": 2.661137580871582, | |
| "learning_rate": 4.333315505161256e-05, | |
| "loss": 5.5953, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 0.40806529044647144, | |
| "grad_norm": 2.857725143432617, | |
| "learning_rate": 4.3319783922554423e-05, | |
| "loss": 5.6702, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.40886541846695473, | |
| "grad_norm": 2.7894747257232666, | |
| "learning_rate": 4.3306412793496286e-05, | |
| "loss": 5.6228, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 0.40966554648743797, | |
| "grad_norm": 2.8865861892700195, | |
| "learning_rate": 4.329304166443815e-05, | |
| "loss": 5.6859, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.41046567450792126, | |
| "grad_norm": 2.1493608951568604, | |
| "learning_rate": 4.3279670535380005e-05, | |
| "loss": 5.5516, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 0.41126580252840456, | |
| "grad_norm": 3.112820863723755, | |
| "learning_rate": 4.326629940632187e-05, | |
| "loss": 5.6409, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 0.41206593054888785, | |
| "grad_norm": 2.778876543045044, | |
| "learning_rate": 4.325292827726373e-05, | |
| "loss": 5.6948, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.4128660585693711, | |
| "grad_norm": 2.0409047603607178, | |
| "learning_rate": 4.323955714820559e-05, | |
| "loss": 5.5458, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.4136661865898544, | |
| "grad_norm": 3.1058828830718994, | |
| "learning_rate": 4.3226186019147456e-05, | |
| "loss": 5.8437, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 0.41446631461033767, | |
| "grad_norm": 3.306704044342041, | |
| "learning_rate": 4.321281489008932e-05, | |
| "loss": 5.691, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 0.4152664426308209, | |
| "grad_norm": 2.9495625495910645, | |
| "learning_rate": 4.319944376103118e-05, | |
| "loss": 5.6364, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 0.4160665706513042, | |
| "grad_norm": 2.1773974895477295, | |
| "learning_rate": 4.3186072631973044e-05, | |
| "loss": 5.6713, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.4168666986717875, | |
| "grad_norm": 2.0897533893585205, | |
| "learning_rate": 4.3172701502914906e-05, | |
| "loss": 5.6022, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 0.4176668266922708, | |
| "grad_norm": 2.2131927013397217, | |
| "learning_rate": 4.315933037385677e-05, | |
| "loss": 5.5728, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.418466954712754, | |
| "grad_norm": 2.225728750228882, | |
| "learning_rate": 4.314595924479863e-05, | |
| "loss": 5.5374, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 0.4192670827332373, | |
| "grad_norm": 2.219791889190674, | |
| "learning_rate": 4.3132588115740494e-05, | |
| "loss": 5.6986, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 0.4200672107537206, | |
| "grad_norm": 2.720323085784912, | |
| "learning_rate": 4.311921698668236e-05, | |
| "loss": 5.6046, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.42086733877420385, | |
| "grad_norm": 2.4254257678985596, | |
| "learning_rate": 4.310584585762422e-05, | |
| "loss": 5.5566, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 0.42166746679468714, | |
| "grad_norm": 2.2297472953796387, | |
| "learning_rate": 4.309247472856608e-05, | |
| "loss": 5.7431, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 0.42246759481517043, | |
| "grad_norm": 2.2767512798309326, | |
| "learning_rate": 4.3079103599507945e-05, | |
| "loss": 5.6661, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.4232677228356537, | |
| "grad_norm": 2.8959579467773438, | |
| "learning_rate": 4.30657324704498e-05, | |
| "loss": 5.6584, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 0.42406785085613696, | |
| "grad_norm": 2.49867844581604, | |
| "learning_rate": 4.3052361341391664e-05, | |
| "loss": 5.7564, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.42486797887662026, | |
| "grad_norm": 2.1820337772369385, | |
| "learning_rate": 4.3038990212333526e-05, | |
| "loss": 5.6288, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 0.42566810689710355, | |
| "grad_norm": 2.7174227237701416, | |
| "learning_rate": 4.302561908327539e-05, | |
| "loss": 5.6496, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 0.42646823491758684, | |
| "grad_norm": 2.7261149883270264, | |
| "learning_rate": 4.301224795421725e-05, | |
| "loss": 5.6557, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 0.4272683629380701, | |
| "grad_norm": 2.581760883331299, | |
| "learning_rate": 4.2998876825159114e-05, | |
| "loss": 5.604, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 0.42806849095855337, | |
| "grad_norm": 2.43254017829895, | |
| "learning_rate": 4.298550569610098e-05, | |
| "loss": 5.6041, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.42886861897903666, | |
| "grad_norm": 4.465782165527344, | |
| "learning_rate": 4.297213456704284e-05, | |
| "loss": 5.7158, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 0.4296687469995199, | |
| "grad_norm": 2.6434614658355713, | |
| "learning_rate": 4.29587634379847e-05, | |
| "loss": 5.6347, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 0.4304688750200032, | |
| "grad_norm": 2.344190835952759, | |
| "learning_rate": 4.2945392308926565e-05, | |
| "loss": 5.6062, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 0.4312690030404865, | |
| "grad_norm": 4.311372756958008, | |
| "learning_rate": 4.293202117986843e-05, | |
| "loss": 5.7356, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 0.4320691310609698, | |
| "grad_norm": 2.8204123973846436, | |
| "learning_rate": 4.291865005081029e-05, | |
| "loss": 5.63, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.432869259081453, | |
| "grad_norm": 3.333059072494507, | |
| "learning_rate": 4.290527892175215e-05, | |
| "loss": 5.5992, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 0.4336693871019363, | |
| "grad_norm": 2.0647048950195312, | |
| "learning_rate": 4.2891907792694016e-05, | |
| "loss": 5.691, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 0.4344695151224196, | |
| "grad_norm": 2.5100045204162598, | |
| "learning_rate": 4.287853666363588e-05, | |
| "loss": 5.615, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 0.43526964314290284, | |
| "grad_norm": 2.6120762825012207, | |
| "learning_rate": 4.286516553457774e-05, | |
| "loss": 5.746, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 0.43606977116338613, | |
| "grad_norm": 2.2886853218078613, | |
| "learning_rate": 4.2851794405519604e-05, | |
| "loss": 5.6783, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.4368698991838694, | |
| "grad_norm": 2.6724119186401367, | |
| "learning_rate": 4.283842327646147e-05, | |
| "loss": 5.6526, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 0.4376700272043527, | |
| "grad_norm": 2.2408151626586914, | |
| "learning_rate": 4.282505214740333e-05, | |
| "loss": 5.6314, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 0.43847015522483596, | |
| "grad_norm": 3.0294084548950195, | |
| "learning_rate": 4.281168101834519e-05, | |
| "loss": 5.6669, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 0.43927028324531925, | |
| "grad_norm": 2.1664011478424072, | |
| "learning_rate": 4.2798309889287055e-05, | |
| "loss": 5.4856, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 0.44007041126580254, | |
| "grad_norm": 3.4465417861938477, | |
| "learning_rate": 4.278493876022892e-05, | |
| "loss": 5.5859, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.4408705392862858, | |
| "grad_norm": 2.0116310119628906, | |
| "learning_rate": 4.277156763117078e-05, | |
| "loss": 5.5982, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 0.44167066730676907, | |
| "grad_norm": 2.578658103942871, | |
| "learning_rate": 4.275819650211264e-05, | |
| "loss": 5.4026, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 0.44247079532725236, | |
| "grad_norm": 3.1201677322387695, | |
| "learning_rate": 4.2744825373054506e-05, | |
| "loss": 5.7024, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 0.44327092334773566, | |
| "grad_norm": 2.2246837615966797, | |
| "learning_rate": 4.273145424399637e-05, | |
| "loss": 5.5842, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 0.4440710513682189, | |
| "grad_norm": 2.1593568325042725, | |
| "learning_rate": 4.271808311493823e-05, | |
| "loss": 5.5099, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.4448711793887022, | |
| "grad_norm": 3.082218885421753, | |
| "learning_rate": 4.2704711985880094e-05, | |
| "loss": 5.5539, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 0.4456713074091855, | |
| "grad_norm": 3.2272634506225586, | |
| "learning_rate": 4.2691340856821956e-05, | |
| "loss": 5.73, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 0.4464714354296688, | |
| "grad_norm": 2.301713466644287, | |
| "learning_rate": 4.267796972776382e-05, | |
| "loss": 5.5444, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 0.447271563450152, | |
| "grad_norm": 3.2985429763793945, | |
| "learning_rate": 4.2664598598705675e-05, | |
| "loss": 5.7499, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 0.4480716914706353, | |
| "grad_norm": 2.103994607925415, | |
| "learning_rate": 4.265122746964754e-05, | |
| "loss": 5.5627, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.4488718194911186, | |
| "grad_norm": 3.260099172592163, | |
| "learning_rate": 4.26378563405894e-05, | |
| "loss": 5.5692, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 0.44967194751160183, | |
| "grad_norm": 2.740907907485962, | |
| "learning_rate": 4.262448521153126e-05, | |
| "loss": 5.4984, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 0.4504720755320851, | |
| "grad_norm": 5.314218997955322, | |
| "learning_rate": 4.2611114082473126e-05, | |
| "loss": 5.5641, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 0.4512722035525684, | |
| "grad_norm": 3.0524938106536865, | |
| "learning_rate": 4.259774295341499e-05, | |
| "loss": 5.6375, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 0.4520723315730517, | |
| "grad_norm": 3.57781982421875, | |
| "learning_rate": 4.258437182435685e-05, | |
| "loss": 5.6726, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.45287245959353495, | |
| "grad_norm": 3.094510793685913, | |
| "learning_rate": 4.2571000695298714e-05, | |
| "loss": 5.7328, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 0.45367258761401824, | |
| "grad_norm": 2.731092929840088, | |
| "learning_rate": 4.2557629566240576e-05, | |
| "loss": 5.6667, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 0.45447271563450153, | |
| "grad_norm": 3.6701395511627197, | |
| "learning_rate": 4.254425843718244e-05, | |
| "loss": 5.641, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 0.45527284365498477, | |
| "grad_norm": 1.9017853736877441, | |
| "learning_rate": 4.25308873081243e-05, | |
| "loss": 5.6521, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 0.45607297167546806, | |
| "grad_norm": 3.2658119201660156, | |
| "learning_rate": 4.2517516179066165e-05, | |
| "loss": 5.6431, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.45687309969595136, | |
| "grad_norm": 2.227353572845459, | |
| "learning_rate": 4.250414505000803e-05, | |
| "loss": 5.6198, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 0.45767322771643465, | |
| "grad_norm": 1.7804296016693115, | |
| "learning_rate": 4.249077392094989e-05, | |
| "loss": 5.618, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 0.4584733557369179, | |
| "grad_norm": 2.9357879161834717, | |
| "learning_rate": 4.247740279189175e-05, | |
| "loss": 5.5222, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 0.4592734837574012, | |
| "grad_norm": 5.074959754943848, | |
| "learning_rate": 4.2464031662833615e-05, | |
| "loss": 5.7604, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 0.4600736117778845, | |
| "grad_norm": 2.4961061477661133, | |
| "learning_rate": 4.245066053377547e-05, | |
| "loss": 5.5699, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.46087373979836777, | |
| "grad_norm": 2.636403799057007, | |
| "learning_rate": 4.2437289404717334e-05, | |
| "loss": 5.745, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 0.461673867818851, | |
| "grad_norm": 2.4829630851745605, | |
| "learning_rate": 4.2423918275659197e-05, | |
| "loss": 5.9779, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 0.4624739958393343, | |
| "grad_norm": 2.389112710952759, | |
| "learning_rate": 4.241054714660106e-05, | |
| "loss": 5.696, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 0.4632741238598176, | |
| "grad_norm": 2.3053462505340576, | |
| "learning_rate": 4.239717601754292e-05, | |
| "loss": 5.6567, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 0.4640742518803008, | |
| "grad_norm": 2.9635446071624756, | |
| "learning_rate": 4.2383804888484785e-05, | |
| "loss": 5.7643, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.4648743799007841, | |
| "grad_norm": 3.3227570056915283, | |
| "learning_rate": 4.237043375942665e-05, | |
| "loss": 5.5425, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 0.4656745079212674, | |
| "grad_norm": 3.2959067821502686, | |
| "learning_rate": 4.235706263036851e-05, | |
| "loss": 5.5886, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 0.4664746359417507, | |
| "grad_norm": 2.497953176498413, | |
| "learning_rate": 4.234369150131037e-05, | |
| "loss": 5.6248, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 0.46727476396223394, | |
| "grad_norm": 3.5957205295562744, | |
| "learning_rate": 4.2330320372252235e-05, | |
| "loss": 5.5345, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 0.46807489198271723, | |
| "grad_norm": 2.9113316535949707, | |
| "learning_rate": 4.23169492431941e-05, | |
| "loss": 5.7358, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.4688750200032005, | |
| "grad_norm": 3.8617255687713623, | |
| "learning_rate": 4.230357811413596e-05, | |
| "loss": 5.7451, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 0.46967514802368376, | |
| "grad_norm": 2.5546538829803467, | |
| "learning_rate": 4.2290206985077824e-05, | |
| "loss": 5.5874, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 0.47047527604416706, | |
| "grad_norm": 3.7215869426727295, | |
| "learning_rate": 4.2276835856019686e-05, | |
| "loss": 5.5462, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 0.47127540406465035, | |
| "grad_norm": 3.3122622966766357, | |
| "learning_rate": 4.226346472696155e-05, | |
| "loss": 5.7368, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 0.47207553208513364, | |
| "grad_norm": 2.3962459564208984, | |
| "learning_rate": 4.2250093597903405e-05, | |
| "loss": 5.7328, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.4728756601056169, | |
| "grad_norm": 2.497668504714966, | |
| "learning_rate": 4.223672246884527e-05, | |
| "loss": 5.7063, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 0.4736757881261002, | |
| "grad_norm": 2.301725387573242, | |
| "learning_rate": 4.222335133978713e-05, | |
| "loss": 5.6029, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 0.47447591614658347, | |
| "grad_norm": 3.840155839920044, | |
| "learning_rate": 4.220998021072899e-05, | |
| "loss": 5.825, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 0.47527604416706676, | |
| "grad_norm": 3.1776278018951416, | |
| "learning_rate": 4.2196609081670856e-05, | |
| "loss": 5.6421, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 0.47607617218755, | |
| "grad_norm": 2.1823127269744873, | |
| "learning_rate": 4.218323795261272e-05, | |
| "loss": 5.7154, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.4768763002080333, | |
| "grad_norm": 2.944390058517456, | |
| "learning_rate": 4.216986682355458e-05, | |
| "loss": 5.5429, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 0.4776764282285166, | |
| "grad_norm": 2.035430431365967, | |
| "learning_rate": 4.2156495694496444e-05, | |
| "loss": 5.8187, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 0.4784765562489998, | |
| "grad_norm": 3.167098045349121, | |
| "learning_rate": 4.2143124565438306e-05, | |
| "loss": 5.5891, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 0.4792766842694831, | |
| "grad_norm": 1.9377233982086182, | |
| "learning_rate": 4.212975343638017e-05, | |
| "loss": 5.7428, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 0.4800768122899664, | |
| "grad_norm": 2.759096622467041, | |
| "learning_rate": 4.211638230732203e-05, | |
| "loss": 5.5572, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.4808769403104497, | |
| "grad_norm": 2.074033498764038, | |
| "learning_rate": 4.2103011178263894e-05, | |
| "loss": 5.517, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 0.48167706833093293, | |
| "grad_norm": 2.2866854667663574, | |
| "learning_rate": 4.208964004920576e-05, | |
| "loss": 5.6539, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 0.4824771963514162, | |
| "grad_norm": 1.9909095764160156, | |
| "learning_rate": 4.207626892014762e-05, | |
| "loss": 5.5532, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 0.4832773243718995, | |
| "grad_norm": 3.245906114578247, | |
| "learning_rate": 4.206289779108948e-05, | |
| "loss": 5.6797, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 0.48407745239238276, | |
| "grad_norm": 2.013009786605835, | |
| "learning_rate": 4.2049526662031345e-05, | |
| "loss": 5.6378, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.48487758041286605, | |
| "grad_norm": 2.5478925704956055, | |
| "learning_rate": 4.20361555329732e-05, | |
| "loss": 5.555, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 0.48567770843334934, | |
| "grad_norm": 3.079225778579712, | |
| "learning_rate": 4.2022784403915064e-05, | |
| "loss": 5.7618, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 0.48647783645383263, | |
| "grad_norm": 2.2639927864074707, | |
| "learning_rate": 4.2009413274856926e-05, | |
| "loss": 5.8063, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 0.48727796447431587, | |
| "grad_norm": 4.630524158477783, | |
| "learning_rate": 4.199604214579879e-05, | |
| "loss": 5.6403, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 0.48807809249479917, | |
| "grad_norm": 3.11018967628479, | |
| "learning_rate": 4.198267101674065e-05, | |
| "loss": 5.7517, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.48887822051528246, | |
| "grad_norm": 8.462982177734375, | |
| "learning_rate": 4.1969299887682515e-05, | |
| "loss": 5.7311, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 0.4896783485357657, | |
| "grad_norm": 2.418065071105957, | |
| "learning_rate": 4.195592875862438e-05, | |
| "loss": 5.6239, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 0.490478476556249, | |
| "grad_norm": 2.5452466011047363, | |
| "learning_rate": 4.194255762956624e-05, | |
| "loss": 5.7417, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 0.4912786045767323, | |
| "grad_norm": 2.986041307449341, | |
| "learning_rate": 4.19291865005081e-05, | |
| "loss": 5.663, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 0.4920787325972156, | |
| "grad_norm": 2.7642807960510254, | |
| "learning_rate": 4.1915815371449965e-05, | |
| "loss": 5.5379, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.4928788606176988, | |
| "grad_norm": 4.326907157897949, | |
| "learning_rate": 4.190244424239183e-05, | |
| "loss": 5.8058, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 0.4936789886381821, | |
| "grad_norm": 1.9514706134796143, | |
| "learning_rate": 4.188907311333369e-05, | |
| "loss": 5.7004, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 0.4944791166586654, | |
| "grad_norm": 2.5721428394317627, | |
| "learning_rate": 4.187570198427555e-05, | |
| "loss": 5.6959, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 0.4952792446791487, | |
| "grad_norm": 2.6619083881378174, | |
| "learning_rate": 4.1862330855217416e-05, | |
| "loss": 5.7196, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 0.4960793726996319, | |
| "grad_norm": 2.322341203689575, | |
| "learning_rate": 4.184895972615928e-05, | |
| "loss": 5.5998, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.4968795007201152, | |
| "grad_norm": 2.280777931213379, | |
| "learning_rate": 4.183558859710114e-05, | |
| "loss": 5.5171, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 0.4976796287405985, | |
| "grad_norm": 1.9774320125579834, | |
| "learning_rate": 4.1822217468043004e-05, | |
| "loss": 5.6368, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 0.49847975676108175, | |
| "grad_norm": 2.199708938598633, | |
| "learning_rate": 4.180884633898487e-05, | |
| "loss": 5.4638, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 0.49927988478156504, | |
| "grad_norm": 2.0054879188537598, | |
| "learning_rate": 4.179547520992673e-05, | |
| "loss": 5.4624, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 0.5000800128020483, | |
| "grad_norm": 2.0623903274536133, | |
| "learning_rate": 4.178210408086859e-05, | |
| "loss": 5.6554, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.5008801408225316, | |
| "grad_norm": 2.5907487869262695, | |
| "learning_rate": 4.1768732951810455e-05, | |
| "loss": 5.4989, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 0.5016802688430149, | |
| "grad_norm": 2.181987762451172, | |
| "learning_rate": 4.175536182275232e-05, | |
| "loss": 5.624, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 0.5024803968634982, | |
| "grad_norm": 2.9678001403808594, | |
| "learning_rate": 4.174199069369418e-05, | |
| "loss": 5.6545, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 0.5032805248839815, | |
| "grad_norm": 5.213638782501221, | |
| "learning_rate": 4.172861956463604e-05, | |
| "loss": 5.7048, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 0.5040806529044647, | |
| "grad_norm": 2.465900182723999, | |
| "learning_rate": 4.1715248435577906e-05, | |
| "loss": 5.646, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.504880780924948, | |
| "grad_norm": 2.94570255279541, | |
| "learning_rate": 4.170187730651977e-05, | |
| "loss": 5.6274, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 0.5056809089454313, | |
| "grad_norm": 3.5255651473999023, | |
| "learning_rate": 4.168850617746163e-05, | |
| "loss": 5.5336, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 0.5064810369659145, | |
| "grad_norm": 2.3499608039855957, | |
| "learning_rate": 4.1675135048403494e-05, | |
| "loss": 5.7768, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 0.5072811649863979, | |
| "grad_norm": 2.0476951599121094, | |
| "learning_rate": 4.1661763919345356e-05, | |
| "loss": 5.5927, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 0.5080812930068811, | |
| "grad_norm": 2.4708118438720703, | |
| "learning_rate": 4.164839279028722e-05, | |
| "loss": 5.6458, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.5088814210273643, | |
| "grad_norm": 2.465075731277466, | |
| "learning_rate": 4.163502166122908e-05, | |
| "loss": 5.5744, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 0.5096815490478477, | |
| "grad_norm": 2.9378490447998047, | |
| "learning_rate": 4.162165053217094e-05, | |
| "loss": 5.6963, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 0.5104816770683309, | |
| "grad_norm": 2.201359987258911, | |
| "learning_rate": 4.16082794031128e-05, | |
| "loss": 5.613, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 0.5112818050888142, | |
| "grad_norm": 1.8427401781082153, | |
| "learning_rate": 4.159490827405466e-05, | |
| "loss": 5.5494, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 0.5120819331092975, | |
| "grad_norm": 1.9969813823699951, | |
| "learning_rate": 4.1581537144996526e-05, | |
| "loss": 5.5783, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.5128820611297807, | |
| "grad_norm": 2.9670321941375732, | |
| "learning_rate": 4.156816601593839e-05, | |
| "loss": 5.7176, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 0.5136821891502641, | |
| "grad_norm": 2.76875901222229, | |
| "learning_rate": 4.155479488688025e-05, | |
| "loss": 5.5584, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 0.5144823171707473, | |
| "grad_norm": 3.2874600887298584, | |
| "learning_rate": 4.1541423757822114e-05, | |
| "loss": 5.8726, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 0.5152824451912306, | |
| "grad_norm": 2.4672482013702393, | |
| "learning_rate": 4.1528052628763977e-05, | |
| "loss": 5.764, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 0.5160825732117139, | |
| "grad_norm": 3.5424506664276123, | |
| "learning_rate": 4.151468149970584e-05, | |
| "loss": 5.6612, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.5168827012321972, | |
| "grad_norm": 2.7947871685028076, | |
| "learning_rate": 4.15013103706477e-05, | |
| "loss": 5.668, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 0.5176828292526804, | |
| "grad_norm": 2.624370574951172, | |
| "learning_rate": 4.1487939241589565e-05, | |
| "loss": 5.577, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 0.5184829572731637, | |
| "grad_norm": 2.276289701461792, | |
| "learning_rate": 4.147456811253143e-05, | |
| "loss": 5.7592, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 0.519283085293647, | |
| "grad_norm": 2.751945972442627, | |
| "learning_rate": 4.146119698347329e-05, | |
| "loss": 5.6251, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 0.5200832133141302, | |
| "grad_norm": 2.1990444660186768, | |
| "learning_rate": 4.144782585441515e-05, | |
| "loss": 5.5141, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.5208833413346136, | |
| "grad_norm": 2.732024908065796, | |
| "learning_rate": 4.1434454725357015e-05, | |
| "loss": 5.5938, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 0.5216834693550968, | |
| "grad_norm": 2.6876533031463623, | |
| "learning_rate": 4.142108359629887e-05, | |
| "loss": 5.7126, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 0.5224835973755801, | |
| "grad_norm": 2.660323143005371, | |
| "learning_rate": 4.1407712467240734e-05, | |
| "loss": 5.6261, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 0.5232837253960634, | |
| "grad_norm": 2.567084550857544, | |
| "learning_rate": 4.13943413381826e-05, | |
| "loss": 5.5248, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 0.5240838534165466, | |
| "grad_norm": 4.317018032073975, | |
| "learning_rate": 4.138097020912446e-05, | |
| "loss": 5.4444, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 0.52488398143703, | |
| "grad_norm": 2.0361647605895996, | |
| "learning_rate": 4.136759908006632e-05, | |
| "loss": 5.7532, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 0.5256841094575132, | |
| "grad_norm": 2.0946271419525146, | |
| "learning_rate": 4.1354227951008185e-05, | |
| "loss": 5.6343, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 0.5264842374779964, | |
| "grad_norm": 3.3724842071533203, | |
| "learning_rate": 4.134085682195005e-05, | |
| "loss": 5.6455, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 0.5272843654984798, | |
| "grad_norm": 4.078947067260742, | |
| "learning_rate": 4.132748569289191e-05, | |
| "loss": 5.6681, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 0.528084493518963, | |
| "grad_norm": 4.288105010986328, | |
| "learning_rate": 4.131411456383377e-05, | |
| "loss": 5.7152, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.5288846215394463, | |
| "grad_norm": 2.5208754539489746, | |
| "learning_rate": 4.1300743434775635e-05, | |
| "loss": 5.5715, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 0.5296847495599296, | |
| "grad_norm": 2.6902217864990234, | |
| "learning_rate": 4.12873723057175e-05, | |
| "loss": 5.4997, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 0.5304848775804129, | |
| "grad_norm": 2.4580068588256836, | |
| "learning_rate": 4.127400117665936e-05, | |
| "loss": 5.7656, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 0.5312850056008962, | |
| "grad_norm": 2.5117955207824707, | |
| "learning_rate": 4.1260630047601224e-05, | |
| "loss": 5.6373, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 0.5320851336213794, | |
| "grad_norm": 2.660921096801758, | |
| "learning_rate": 4.1247258918543086e-05, | |
| "loss": 5.6829, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 0.5328852616418627, | |
| "grad_norm": 2.4601287841796875, | |
| "learning_rate": 4.123388778948495e-05, | |
| "loss": 5.7702, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 0.533685389662346, | |
| "grad_norm": 2.9025120735168457, | |
| "learning_rate": 4.122051666042681e-05, | |
| "loss": 5.6374, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 0.5344855176828293, | |
| "grad_norm": 2.8221569061279297, | |
| "learning_rate": 4.120714553136867e-05, | |
| "loss": 5.5568, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 0.5352856457033125, | |
| "grad_norm": 2.3035178184509277, | |
| "learning_rate": 4.119377440231053e-05, | |
| "loss": 5.5845, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 0.5360857737237958, | |
| "grad_norm": 2.0955657958984375, | |
| "learning_rate": 4.118040327325239e-05, | |
| "loss": 5.687, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.5368859017442791, | |
| "grad_norm": 2.530156135559082, | |
| "learning_rate": 4.1167032144194256e-05, | |
| "loss": 5.5772, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 0.5376860297647623, | |
| "grad_norm": 2.2060387134552, | |
| "learning_rate": 4.115366101513612e-05, | |
| "loss": 5.5964, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 0.5384861577852457, | |
| "grad_norm": 2.720702886581421, | |
| "learning_rate": 4.114028988607798e-05, | |
| "loss": 5.5432, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 0.5392862858057289, | |
| "grad_norm": 2.2585232257843018, | |
| "learning_rate": 4.1126918757019844e-05, | |
| "loss": 5.77, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 0.5400864138262121, | |
| "grad_norm": 2.052316904067993, | |
| "learning_rate": 4.1113547627961706e-05, | |
| "loss": 5.5679, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.5408865418466955, | |
| "grad_norm": 2.772500991821289, | |
| "learning_rate": 4.110017649890357e-05, | |
| "loss": 5.5608, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 0.5416866698671787, | |
| "grad_norm": 2.158129930496216, | |
| "learning_rate": 4.108680536984543e-05, | |
| "loss": 5.6612, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 0.5424867978876621, | |
| "grad_norm": 2.874685287475586, | |
| "learning_rate": 4.1073434240787294e-05, | |
| "loss": 5.5999, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 0.5432869259081453, | |
| "grad_norm": 2.2797632217407227, | |
| "learning_rate": 4.106006311172916e-05, | |
| "loss": 5.7243, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 0.5440870539286286, | |
| "grad_norm": 2.998309850692749, | |
| "learning_rate": 4.1048029095576836e-05, | |
| "loss": 5.5031, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.5448871819491119, | |
| "grad_norm": 2.8155364990234375, | |
| "learning_rate": 4.10346579665187e-05, | |
| "loss": 5.7631, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 0.5456873099695951, | |
| "grad_norm": 2.327279806137085, | |
| "learning_rate": 4.102128683746056e-05, | |
| "loss": 5.6293, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 0.5464874379900784, | |
| "grad_norm": 3.3200621604919434, | |
| "learning_rate": 4.100791570840242e-05, | |
| "loss": 5.717, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 0.5472875660105617, | |
| "grad_norm": 2.521144390106201, | |
| "learning_rate": 4.099454457934428e-05, | |
| "loss": 5.5705, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 0.548087694031045, | |
| "grad_norm": 2.7198219299316406, | |
| "learning_rate": 4.098117345028614e-05, | |
| "loss": 5.5931, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 0.5488878220515282, | |
| "grad_norm": 2.701251268386841, | |
| "learning_rate": 4.0967802321228006e-05, | |
| "loss": 5.4706, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 0.5496879500720115, | |
| "grad_norm": 2.2789149284362793, | |
| "learning_rate": 4.095443119216987e-05, | |
| "loss": 5.5883, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 0.5504880780924948, | |
| "grad_norm": 2.8821568489074707, | |
| "learning_rate": 4.094106006311173e-05, | |
| "loss": 5.7525, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 0.5512882061129781, | |
| "grad_norm": 2.3450064659118652, | |
| "learning_rate": 4.0927688934053594e-05, | |
| "loss": 5.5166, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 0.5520883341334614, | |
| "grad_norm": 2.639960527420044, | |
| "learning_rate": 4.0914317804995456e-05, | |
| "loss": 5.7001, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.5528884621539446, | |
| "grad_norm": 2.6743710041046143, | |
| "learning_rate": 4.090094667593732e-05, | |
| "loss": 5.7049, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 0.553688590174428, | |
| "grad_norm": 2.7540199756622314, | |
| "learning_rate": 4.088757554687918e-05, | |
| "loss": 5.5705, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 0.5544887181949112, | |
| "grad_norm": 3.2703442573547363, | |
| "learning_rate": 4.0874204417821044e-05, | |
| "loss": 5.5585, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 0.5552888462153944, | |
| "grad_norm": 3.684135913848877, | |
| "learning_rate": 4.086083328876291e-05, | |
| "loss": 5.6561, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 0.5560889742358778, | |
| "grad_norm": 2.918989896774292, | |
| "learning_rate": 4.084746215970477e-05, | |
| "loss": 5.5171, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 0.556889102256361, | |
| "grad_norm": 2.5902323722839355, | |
| "learning_rate": 4.083409103064663e-05, | |
| "loss": 5.6703, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 0.5576892302768442, | |
| "grad_norm": 2.23820161819458, | |
| "learning_rate": 4.0820719901588495e-05, | |
| "loss": 5.7048, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 0.5584893582973276, | |
| "grad_norm": 2.4339401721954346, | |
| "learning_rate": 4.080734877253036e-05, | |
| "loss": 5.4264, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 0.5592894863178108, | |
| "grad_norm": 3.3097031116485596, | |
| "learning_rate": 4.0793977643472214e-05, | |
| "loss": 5.5931, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 0.5600896143382941, | |
| "grad_norm": 2.6903202533721924, | |
| "learning_rate": 4.0780606514414077e-05, | |
| "loss": 5.5349, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.5600896143382941, | |
| "eval_loss": 5.870830535888672, | |
| "eval_runtime": 13.3044, | |
| "eval_samples_per_second": 3.007, | |
| "eval_steps_per_second": 0.376, | |
| "step": 7000 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 37494, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 7000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |