| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 20.0, | |
| "eval_steps": 500, | |
| "global_step": 62980, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.015878056525881232, | |
| "grad_norm": 69456.203125, | |
| "learning_rate": 4.992219752302319e-05, | |
| "loss": 3.3489, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.031756113051762465, | |
| "grad_norm": 28315.216796875, | |
| "learning_rate": 4.984280724039378e-05, | |
| "loss": 2.1825, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.047634169577643694, | |
| "grad_norm": 22939.28515625, | |
| "learning_rate": 4.976341695776437e-05, | |
| "loss": 1.8984, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.06351222610352493, | |
| "grad_norm": 23446.751953125, | |
| "learning_rate": 4.968402667513497e-05, | |
| "loss": 1.8356, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07939028262940616, | |
| "grad_norm": 15620.2763671875, | |
| "learning_rate": 4.960463639250556e-05, | |
| "loss": 1.8063, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.09526833915528739, | |
| "grad_norm": 17849.13671875, | |
| "learning_rate": 4.9525246109876156e-05, | |
| "loss": 1.7904, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.11114639568116863, | |
| "grad_norm": 16811.81640625, | |
| "learning_rate": 4.944585582724674e-05, | |
| "loss": 1.7763, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.12702445220704986, | |
| "grad_norm": 15963.05859375, | |
| "learning_rate": 4.936646554461734e-05, | |
| "loss": 1.7694, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1429025087329311, | |
| "grad_norm": 13125.708984375, | |
| "learning_rate": 4.928707526198794e-05, | |
| "loss": 1.7613, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.15878056525881232, | |
| "grad_norm": 13808.8876953125, | |
| "learning_rate": 4.920768497935853e-05, | |
| "loss": 1.7565, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.17465862178469355, | |
| "grad_norm": 15005.7353515625, | |
| "learning_rate": 4.912829469672912e-05, | |
| "loss": 1.7512, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.19053667831057478, | |
| "grad_norm": 15644.3916015625, | |
| "learning_rate": 4.904890441409972e-05, | |
| "loss": 1.7457, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.206414734836456, | |
| "grad_norm": 13973.876953125, | |
| "learning_rate": 4.896951413147031e-05, | |
| "loss": 1.7395, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.22229279136233726, | |
| "grad_norm": 16523.75390625, | |
| "learning_rate": 4.88901238488409e-05, | |
| "loss": 1.7285, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.2381708478882185, | |
| "grad_norm": 25229.689453125, | |
| "learning_rate": 4.88107335662115e-05, | |
| "loss": 1.7046, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.2540489044140997, | |
| "grad_norm": 48292.5078125, | |
| "learning_rate": 4.8731343283582094e-05, | |
| "loss": 1.664, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.2699269609399809, | |
| "grad_norm": 38503.4296875, | |
| "learning_rate": 4.865195300095269e-05, | |
| "loss": 1.6121, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.2858050174658622, | |
| "grad_norm": 48101.1484375, | |
| "learning_rate": 4.8572562718323275e-05, | |
| "loss": 1.5622, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.30168307399174343, | |
| "grad_norm": 63038.48046875, | |
| "learning_rate": 4.8493172435693875e-05, | |
| "loss": 1.5095, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.31756113051762463, | |
| "grad_norm": 80867.75, | |
| "learning_rate": 4.841378215306447e-05, | |
| "loss": 1.4587, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.3334391870435059, | |
| "grad_norm": 42233.703125, | |
| "learning_rate": 4.8334391870435056e-05, | |
| "loss": 1.4149, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.3493172435693871, | |
| "grad_norm": 53614.875, | |
| "learning_rate": 4.825500158780566e-05, | |
| "loss": 1.375, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.36519530009526835, | |
| "grad_norm": 43879.546875, | |
| "learning_rate": 4.817561130517625e-05, | |
| "loss": 1.3372, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.38107335662114955, | |
| "grad_norm": 43690.40625, | |
| "learning_rate": 4.8096221022546844e-05, | |
| "loss": 1.3026, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.3969514131470308, | |
| "grad_norm": 46655.68359375, | |
| "learning_rate": 4.801683073991743e-05, | |
| "loss": 1.2798, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.412829469672912, | |
| "grad_norm": 39245.97265625, | |
| "learning_rate": 4.793744045728803e-05, | |
| "loss": 1.2521, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.42870752619879327, | |
| "grad_norm": 52106.55859375, | |
| "learning_rate": 4.7858050174658626e-05, | |
| "loss": 1.2299, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.4445855827246745, | |
| "grad_norm": 41391.16796875, | |
| "learning_rate": 4.777865989202922e-05, | |
| "loss": 1.2141, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.4604636392505557, | |
| "grad_norm": 41273.65625, | |
| "learning_rate": 4.769926960939981e-05, | |
| "loss": 1.1954, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.476341695776437, | |
| "grad_norm": 48099.3203125, | |
| "learning_rate": 4.761987932677041e-05, | |
| "loss": 1.1785, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.4922197523023182, | |
| "grad_norm": 41167.43359375, | |
| "learning_rate": 4.7540489044141e-05, | |
| "loss": 1.1642, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.5080978088281994, | |
| "grad_norm": 40066.16796875, | |
| "learning_rate": 4.746109876151159e-05, | |
| "loss": 1.15, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.5239758653540807, | |
| "grad_norm": 57272.24609375, | |
| "learning_rate": 4.738170847888219e-05, | |
| "loss": 1.1426, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.5398539218799618, | |
| "grad_norm": 38700.19921875, | |
| "learning_rate": 4.730231819625278e-05, | |
| "loss": 1.1286, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.5557319784058431, | |
| "grad_norm": 39080.23828125, | |
| "learning_rate": 4.7222927913623376e-05, | |
| "loss": 1.116, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.5716100349317244, | |
| "grad_norm": 40904.78515625, | |
| "learning_rate": 4.714353763099397e-05, | |
| "loss": 1.1037, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.5874880914576056, | |
| "grad_norm": 46008.6328125, | |
| "learning_rate": 4.7064147348364564e-05, | |
| "loss": 1.1006, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.6033661479834869, | |
| "grad_norm": 51325.14453125, | |
| "learning_rate": 4.698475706573516e-05, | |
| "loss": 1.0899, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.619244204509368, | |
| "grad_norm": 38173.07421875, | |
| "learning_rate": 4.6905366783105745e-05, | |
| "loss": 1.0767, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.6351222610352493, | |
| "grad_norm": 36432.33203125, | |
| "learning_rate": 4.6825976500476345e-05, | |
| "loss": 1.0737, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.6510003175611305, | |
| "grad_norm": 38897.46484375, | |
| "learning_rate": 4.674658621784694e-05, | |
| "loss": 1.0631, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.6668783740870118, | |
| "grad_norm": 36565.90234375, | |
| "learning_rate": 4.666719593521753e-05, | |
| "loss": 1.0611, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.6827564306128929, | |
| "grad_norm": 38040.09765625, | |
| "learning_rate": 4.658780565258813e-05, | |
| "loss": 1.049, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.6986344871387742, | |
| "grad_norm": 40119.8984375, | |
| "learning_rate": 4.650841536995872e-05, | |
| "loss": 1.0401, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.7145125436646554, | |
| "grad_norm": 38403.37890625, | |
| "learning_rate": 4.6429025087329314e-05, | |
| "loss": 1.0373, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.7303906001905367, | |
| "grad_norm": 35558.62109375, | |
| "learning_rate": 4.634963480469991e-05, | |
| "loss": 1.0327, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.746268656716418, | |
| "grad_norm": 41901.265625, | |
| "learning_rate": 4.62702445220705e-05, | |
| "loss": 1.0258, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.7621467132422991, | |
| "grad_norm": 35863.7109375, | |
| "learning_rate": 4.6190854239441096e-05, | |
| "loss": 1.0181, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.7780247697681804, | |
| "grad_norm": 34477.07421875, | |
| "learning_rate": 4.611146395681169e-05, | |
| "loss": 1.0156, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.7939028262940616, | |
| "grad_norm": 36488.30078125, | |
| "learning_rate": 4.603207367418228e-05, | |
| "loss": 1.0109, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.8097808828199429, | |
| "grad_norm": 37530.74609375, | |
| "learning_rate": 4.595268339155288e-05, | |
| "loss": 1.0063, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.825658939345824, | |
| "grad_norm": 38817.25390625, | |
| "learning_rate": 4.587329310892347e-05, | |
| "loss": 1.0007, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.8415369958717053, | |
| "grad_norm": 36987.03515625, | |
| "learning_rate": 4.5793902826294065e-05, | |
| "loss": 0.9925, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.8574150523975865, | |
| "grad_norm": 36874.86328125, | |
| "learning_rate": 4.571451254366466e-05, | |
| "loss": 0.9931, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.8732931089234678, | |
| "grad_norm": 38325.61328125, | |
| "learning_rate": 4.563512226103525e-05, | |
| "loss": 0.984, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.889171165449349, | |
| "grad_norm": 35897.0859375, | |
| "learning_rate": 4.5555731978405846e-05, | |
| "loss": 0.982, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.9050492219752302, | |
| "grad_norm": 38165.0, | |
| "learning_rate": 4.547634169577643e-05, | |
| "loss": 0.9786, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.9209272785011114, | |
| "grad_norm": 36260.8671875, | |
| "learning_rate": 4.5396951413147034e-05, | |
| "loss": 0.9736, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.9368053350269927, | |
| "grad_norm": 35681.60546875, | |
| "learning_rate": 4.531756113051763e-05, | |
| "loss": 0.9657, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.952683391552874, | |
| "grad_norm": 33949.65234375, | |
| "learning_rate": 4.523817084788822e-05, | |
| "loss": 0.9691, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.9685614480787551, | |
| "grad_norm": 34695.6796875, | |
| "learning_rate": 4.5158780565258815e-05, | |
| "loss": 0.9621, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.9844395046046364, | |
| "grad_norm": 42460.8359375, | |
| "learning_rate": 4.507939028262941e-05, | |
| "loss": 0.9627, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.0003175611305177, | |
| "grad_norm": 37083.6796875, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.9538, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.0161956176563989, | |
| "grad_norm": 35333.8359375, | |
| "learning_rate": 4.49206097173706e-05, | |
| "loss": 0.951, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.03207367418228, | |
| "grad_norm": 34177.54296875, | |
| "learning_rate": 4.484121943474119e-05, | |
| "loss": 0.9496, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.0479517307081614, | |
| "grad_norm": 33729.51953125, | |
| "learning_rate": 4.4761829152111784e-05, | |
| "loss": 0.9431, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.0638297872340425, | |
| "grad_norm": 35274.5625, | |
| "learning_rate": 4.468243886948238e-05, | |
| "loss": 0.9405, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.0797078437599237, | |
| "grad_norm": 35014.6796875, | |
| "learning_rate": 4.460304858685297e-05, | |
| "loss": 0.9404, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.095585900285805, | |
| "grad_norm": 34381.96484375, | |
| "learning_rate": 4.4523658304223566e-05, | |
| "loss": 0.9367, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.1114639568116862, | |
| "grad_norm": 36695.0625, | |
| "learning_rate": 4.444426802159416e-05, | |
| "loss": 0.9309, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.1273420133375676, | |
| "grad_norm": 37935.7734375, | |
| "learning_rate": 4.436487773896475e-05, | |
| "loss": 0.9331, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.1432200698634487, | |
| "grad_norm": 34156.03125, | |
| "learning_rate": 4.428548745633535e-05, | |
| "loss": 0.9251, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.1590981263893299, | |
| "grad_norm": 34130.3046875, | |
| "learning_rate": 4.420609717370594e-05, | |
| "loss": 0.9263, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.1749761829152112, | |
| "grad_norm": 35052.70703125, | |
| "learning_rate": 4.4126706891076535e-05, | |
| "loss": 0.9217, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.1908542394410924, | |
| "grad_norm": 36061.64453125, | |
| "learning_rate": 4.404731660844713e-05, | |
| "loss": 0.9197, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.2067322959669737, | |
| "grad_norm": 32517.8984375, | |
| "learning_rate": 4.396792632581772e-05, | |
| "loss": 0.9208, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.2226103524928549, | |
| "grad_norm": 35002.6328125, | |
| "learning_rate": 4.3888536043188316e-05, | |
| "loss": 0.9197, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.238488409018736, | |
| "grad_norm": 32863.40234375, | |
| "learning_rate": 4.380914576055891e-05, | |
| "loss": 0.9089, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.2543664655446174, | |
| "grad_norm": 34469.52734375, | |
| "learning_rate": 4.3729755477929504e-05, | |
| "loss": 0.9127, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.2702445220704985, | |
| "grad_norm": 32288.01171875, | |
| "learning_rate": 4.36503651953001e-05, | |
| "loss": 0.9062, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.28612257859638, | |
| "grad_norm": 34935.56640625, | |
| "learning_rate": 4.357097491267069e-05, | |
| "loss": 0.9076, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.302000635122261, | |
| "grad_norm": 32266.908203125, | |
| "learning_rate": 4.3491584630041285e-05, | |
| "loss": 0.9079, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.3178786916481422, | |
| "grad_norm": 33226.34375, | |
| "learning_rate": 4.341219434741188e-05, | |
| "loss": 0.9044, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.3337567481740236, | |
| "grad_norm": 34449.453125, | |
| "learning_rate": 4.333280406478247e-05, | |
| "loss": 0.9038, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.3496348046999047, | |
| "grad_norm": 32593.271484375, | |
| "learning_rate": 4.325341378215307e-05, | |
| "loss": 0.8969, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.365512861225786, | |
| "grad_norm": 33997.08984375, | |
| "learning_rate": 4.317402349952366e-05, | |
| "loss": 0.8955, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.3813909177516672, | |
| "grad_norm": 34988.44921875, | |
| "learning_rate": 4.3094633216894254e-05, | |
| "loss": 0.8925, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.3972689742775484, | |
| "grad_norm": 31535.099609375, | |
| "learning_rate": 4.301524293426485e-05, | |
| "loss": 0.8931, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.4131470308034297, | |
| "grad_norm": 32298.515625, | |
| "learning_rate": 4.293585265163544e-05, | |
| "loss": 0.8914, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.4290250873293109, | |
| "grad_norm": 33798.9140625, | |
| "learning_rate": 4.2856462369006036e-05, | |
| "loss": 0.889, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.4449031438551923, | |
| "grad_norm": 33727.1484375, | |
| "learning_rate": 4.277707208637663e-05, | |
| "loss": 0.8891, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.4607812003810734, | |
| "grad_norm": 39095.109375, | |
| "learning_rate": 4.269768180374722e-05, | |
| "loss": 0.8932, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.4766592569069545, | |
| "grad_norm": 33266.66796875, | |
| "learning_rate": 4.261829152111782e-05, | |
| "loss": 0.8843, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.4925373134328357, | |
| "grad_norm": 33800.22265625, | |
| "learning_rate": 4.253890123848841e-05, | |
| "loss": 0.882, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.508415369958717, | |
| "grad_norm": 31127.240234375, | |
| "learning_rate": 4.2459510955859005e-05, | |
| "loss": 0.8828, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.5242934264845984, | |
| "grad_norm": 33765.43359375, | |
| "learning_rate": 4.23801206732296e-05, | |
| "loss": 0.8802, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.5401714830104796, | |
| "grad_norm": 32892.484375, | |
| "learning_rate": 4.230073039060019e-05, | |
| "loss": 0.882, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.5560495395363607, | |
| "grad_norm": 32505.76953125, | |
| "learning_rate": 4.2221340107970786e-05, | |
| "loss": 0.8772, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.5719275960622419, | |
| "grad_norm": 32439.31640625, | |
| "learning_rate": 4.214194982534138e-05, | |
| "loss": 0.8742, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.5878056525881232, | |
| "grad_norm": 32518.017578125, | |
| "learning_rate": 4.2062559542711974e-05, | |
| "loss": 0.873, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.6036837091140046, | |
| "grad_norm": 34613.1640625, | |
| "learning_rate": 4.198316926008257e-05, | |
| "loss": 0.873, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 1.6195617656398857, | |
| "grad_norm": 32058.451171875, | |
| "learning_rate": 4.190377897745316e-05, | |
| "loss": 0.873, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.6354398221657669, | |
| "grad_norm": 31566.736328125, | |
| "learning_rate": 4.1824388694823755e-05, | |
| "loss": 0.8675, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 1.651317878691648, | |
| "grad_norm": 33988.2734375, | |
| "learning_rate": 4.174499841219435e-05, | |
| "loss": 0.8667, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.6671959352175294, | |
| "grad_norm": 33580.97265625, | |
| "learning_rate": 4.166560812956494e-05, | |
| "loss": 0.8638, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 1.6830739917434108, | |
| "grad_norm": 33622.66015625, | |
| "learning_rate": 4.158621784693554e-05, | |
| "loss": 0.8668, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.698952048269292, | |
| "grad_norm": 32790.08203125, | |
| "learning_rate": 4.150682756430613e-05, | |
| "loss": 0.8616, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 1.714830104795173, | |
| "grad_norm": 31265.09765625, | |
| "learning_rate": 4.1427437281676724e-05, | |
| "loss": 0.8578, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.7307081613210542, | |
| "grad_norm": 31559.154296875, | |
| "learning_rate": 4.134804699904732e-05, | |
| "loss": 0.86, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 1.7465862178469356, | |
| "grad_norm": 31868.90234375, | |
| "learning_rate": 4.126865671641791e-05, | |
| "loss": 0.8612, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.7624642743728167, | |
| "grad_norm": 32017.955078125, | |
| "learning_rate": 4.1189266433788506e-05, | |
| "loss": 0.8599, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 1.778342330898698, | |
| "grad_norm": 32002.77734375, | |
| "learning_rate": 4.11098761511591e-05, | |
| "loss": 0.8592, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.7942203874245792, | |
| "grad_norm": 30244.373046875, | |
| "learning_rate": 4.103048586852969e-05, | |
| "loss": 0.855, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 1.8100984439504604, | |
| "grad_norm": 32472.248046875, | |
| "learning_rate": 4.095109558590029e-05, | |
| "loss": 0.857, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.8259765004763417, | |
| "grad_norm": 32474.484375, | |
| "learning_rate": 4.087170530327088e-05, | |
| "loss": 0.8526, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 1.841854557002223, | |
| "grad_norm": 31490.275390625, | |
| "learning_rate": 4.0792315020641475e-05, | |
| "loss": 0.8559, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.8577326135281043, | |
| "grad_norm": 31051.58203125, | |
| "learning_rate": 4.071292473801207e-05, | |
| "loss": 0.8522, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 1.8736106700539854, | |
| "grad_norm": 31409.7578125, | |
| "learning_rate": 4.063353445538267e-05, | |
| "loss": 0.8486, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.8894887265798666, | |
| "grad_norm": 32382.998046875, | |
| "learning_rate": 4.0554144172753256e-05, | |
| "loss": 0.8515, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 1.9053667831057477, | |
| "grad_norm": 31483.380859375, | |
| "learning_rate": 4.047475389012385e-05, | |
| "loss": 0.8477, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.921244839631629, | |
| "grad_norm": 32504.404296875, | |
| "learning_rate": 4.0395363607494444e-05, | |
| "loss": 0.8506, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 1.9371228961575104, | |
| "grad_norm": 31748.40234375, | |
| "learning_rate": 4.031597332486504e-05, | |
| "loss": 0.8461, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.9530009526833916, | |
| "grad_norm": 31397.05078125, | |
| "learning_rate": 4.023658304223563e-05, | |
| "loss": 0.8459, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 1.9688790092092727, | |
| "grad_norm": 32932.44140625, | |
| "learning_rate": 4.0157192759606225e-05, | |
| "loss": 0.8431, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.9847570657351539, | |
| "grad_norm": 32290.40234375, | |
| "learning_rate": 4.007780247697682e-05, | |
| "loss": 0.8435, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 2.0006351222610355, | |
| "grad_norm": 31642.935546875, | |
| "learning_rate": 3.999841219434741e-05, | |
| "loss": 0.8403, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 2.0165131787869166, | |
| "grad_norm": 31722.009765625, | |
| "learning_rate": 3.991902191171801e-05, | |
| "loss": 0.8393, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 2.0323912353127978, | |
| "grad_norm": 30855.62890625, | |
| "learning_rate": 3.98396316290886e-05, | |
| "loss": 0.8356, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 2.048269291838679, | |
| "grad_norm": 31825.283203125, | |
| "learning_rate": 3.9760241346459194e-05, | |
| "loss": 0.8342, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 2.06414734836456, | |
| "grad_norm": 32278.908203125, | |
| "learning_rate": 3.968085106382979e-05, | |
| "loss": 0.8337, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.080025404890441, | |
| "grad_norm": 31714.283203125, | |
| "learning_rate": 3.960146078120038e-05, | |
| "loss": 0.8348, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 2.0959034614163228, | |
| "grad_norm": 31567.390625, | |
| "learning_rate": 3.9522070498570976e-05, | |
| "loss": 0.8281, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 2.111781517942204, | |
| "grad_norm": 32440.802734375, | |
| "learning_rate": 3.944268021594157e-05, | |
| "loss": 0.831, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 2.127659574468085, | |
| "grad_norm": 32060.837890625, | |
| "learning_rate": 3.936328993331216e-05, | |
| "loss": 0.831, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 2.143537630993966, | |
| "grad_norm": 31656.82421875, | |
| "learning_rate": 3.928389965068276e-05, | |
| "loss": 0.829, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 2.1594156875198474, | |
| "grad_norm": 31442.107421875, | |
| "learning_rate": 3.920450936805336e-05, | |
| "loss": 0.8266, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 2.175293744045729, | |
| "grad_norm": 31876.103515625, | |
| "learning_rate": 3.9125119085423945e-05, | |
| "loss": 0.8256, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 2.19117180057161, | |
| "grad_norm": 31034.79296875, | |
| "learning_rate": 3.904572880279454e-05, | |
| "loss": 0.8217, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 2.2070498570974912, | |
| "grad_norm": 32682.744140625, | |
| "learning_rate": 3.896633852016513e-05, | |
| "loss": 0.8301, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 2.2229279136233724, | |
| "grad_norm": 32618.359375, | |
| "learning_rate": 3.8886948237535726e-05, | |
| "loss": 0.8211, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.2388059701492535, | |
| "grad_norm": 30968.298828125, | |
| "learning_rate": 3.880755795490632e-05, | |
| "loss": 0.825, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 2.254684026675135, | |
| "grad_norm": 30861.9921875, | |
| "learning_rate": 3.8728167672276914e-05, | |
| "loss": 0.8201, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 2.2705620832010163, | |
| "grad_norm": 30823.109375, | |
| "learning_rate": 3.8648777389647514e-05, | |
| "loss": 0.8197, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 2.2864401397268974, | |
| "grad_norm": 30141.990234375, | |
| "learning_rate": 3.85693871070181e-05, | |
| "loss": 0.8224, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 2.3023181962527786, | |
| "grad_norm": 34504.34375, | |
| "learning_rate": 3.8489996824388695e-05, | |
| "loss": 0.821, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 2.3181962527786597, | |
| "grad_norm": 30797.966796875, | |
| "learning_rate": 3.841060654175929e-05, | |
| "loss": 0.8203, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 2.3340743093045413, | |
| "grad_norm": 30129.06640625, | |
| "learning_rate": 3.833121625912988e-05, | |
| "loss": 0.8178, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 2.3499523658304224, | |
| "grad_norm": 30087.42578125, | |
| "learning_rate": 3.825182597650048e-05, | |
| "loss": 0.8219, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 2.3658304223563036, | |
| "grad_norm": 30917.18359375, | |
| "learning_rate": 3.817243569387107e-05, | |
| "loss": 0.8163, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 2.3817084788821847, | |
| "grad_norm": 33235.78515625, | |
| "learning_rate": 3.809304541124167e-05, | |
| "loss": 0.8149, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.397586535408066, | |
| "grad_norm": 30538.533203125, | |
| "learning_rate": 3.801365512861226e-05, | |
| "loss": 0.818, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 2.4134645919339475, | |
| "grad_norm": 31500.251953125, | |
| "learning_rate": 3.793426484598285e-05, | |
| "loss": 0.8171, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 2.4293426484598286, | |
| "grad_norm": 30612.111328125, | |
| "learning_rate": 3.7854874563353446e-05, | |
| "loss": 0.8132, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 2.4452207049857098, | |
| "grad_norm": 30479.9140625, | |
| "learning_rate": 3.7775484280724046e-05, | |
| "loss": 0.8147, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 2.461098761511591, | |
| "grad_norm": 30102.48046875, | |
| "learning_rate": 3.769609399809463e-05, | |
| "loss": 0.8112, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 2.476976818037472, | |
| "grad_norm": 30969.11328125, | |
| "learning_rate": 3.761670371546523e-05, | |
| "loss": 0.8161, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 2.4928548745633536, | |
| "grad_norm": 30215.08203125, | |
| "learning_rate": 3.753731343283583e-05, | |
| "loss": 0.8118, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 2.508732931089235, | |
| "grad_norm": 30152.841796875, | |
| "learning_rate": 3.7457923150206415e-05, | |
| "loss": 0.813, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 2.524610987615116, | |
| "grad_norm": 30816.564453125, | |
| "learning_rate": 3.737853286757701e-05, | |
| "loss": 0.8124, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 2.540489044140997, | |
| "grad_norm": 31137.095703125, | |
| "learning_rate": 3.72991425849476e-05, | |
| "loss": 0.8105, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.5563671006668782, | |
| "grad_norm": 30815.490234375, | |
| "learning_rate": 3.72197523023182e-05, | |
| "loss": 0.8117, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 2.57224515719276, | |
| "grad_norm": 31679.986328125, | |
| "learning_rate": 3.714036201968879e-05, | |
| "loss": 0.808, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 2.588123213718641, | |
| "grad_norm": 30918.755859375, | |
| "learning_rate": 3.7060971737059384e-05, | |
| "loss": 0.807, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 2.604001270244522, | |
| "grad_norm": 30721.857421875, | |
| "learning_rate": 3.6981581454429984e-05, | |
| "loss": 0.8077, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 2.6198793267704032, | |
| "grad_norm": 30369.298828125, | |
| "learning_rate": 3.690219117180057e-05, | |
| "loss": 0.8067, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 2.6357573832962844, | |
| "grad_norm": 30493.7421875, | |
| "learning_rate": 3.6822800889171165e-05, | |
| "loss": 0.8063, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 2.651635439822166, | |
| "grad_norm": 30101.5859375, | |
| "learning_rate": 3.674341060654176e-05, | |
| "loss": 0.8065, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 2.667513496348047, | |
| "grad_norm": 30403.134765625, | |
| "learning_rate": 3.666402032391236e-05, | |
| "loss": 0.8023, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 2.6833915528739283, | |
| "grad_norm": 30230.22265625, | |
| "learning_rate": 3.658463004128295e-05, | |
| "loss": 0.8025, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 2.6992696093998094, | |
| "grad_norm": 30146.58203125, | |
| "learning_rate": 3.650523975865354e-05, | |
| "loss": 0.8035, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.7151476659256906, | |
| "grad_norm": 30205.19921875, | |
| "learning_rate": 3.6425849476024134e-05, | |
| "loss": 0.8023, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 2.731025722451572, | |
| "grad_norm": 29393.84375, | |
| "learning_rate": 3.6346459193394735e-05, | |
| "loss": 0.7985, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 2.7469037789774533, | |
| "grad_norm": 30105.896484375, | |
| "learning_rate": 3.626706891076532e-05, | |
| "loss": 0.798, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 2.7627818355033344, | |
| "grad_norm": 31707.62109375, | |
| "learning_rate": 3.6187678628135916e-05, | |
| "loss": 0.8019, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 2.7786598920292156, | |
| "grad_norm": 30057.185546875, | |
| "learning_rate": 3.6108288345506516e-05, | |
| "loss": 0.8019, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 2.7945379485550967, | |
| "grad_norm": 31893.216796875, | |
| "learning_rate": 3.60288980628771e-05, | |
| "loss": 0.8004, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 2.8104160050809783, | |
| "grad_norm": 29928.91015625, | |
| "learning_rate": 3.59495077802477e-05, | |
| "loss": 0.7989, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 2.8262940616068595, | |
| "grad_norm": 29859.1015625, | |
| "learning_rate": 3.587011749761829e-05, | |
| "loss": 0.7987, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 2.8421721181327406, | |
| "grad_norm": 29658.259765625, | |
| "learning_rate": 3.579072721498889e-05, | |
| "loss": 0.8001, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 2.8580501746586218, | |
| "grad_norm": 29646.7421875, | |
| "learning_rate": 3.571133693235948e-05, | |
| "loss": 0.7946, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.873928231184503, | |
| "grad_norm": 29755.7890625, | |
| "learning_rate": 3.563194664973007e-05, | |
| "loss": 0.7983, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 2.8898062877103845, | |
| "grad_norm": 29201.306640625, | |
| "learning_rate": 3.555255636710067e-05, | |
| "loss": 0.7966, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 2.905684344236265, | |
| "grad_norm": 30059.353515625, | |
| "learning_rate": 3.547316608447126e-05, | |
| "loss": 0.7968, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 2.921562400762147, | |
| "grad_norm": 29836.736328125, | |
| "learning_rate": 3.5393775801841854e-05, | |
| "loss": 0.7915, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 2.937440457288028, | |
| "grad_norm": 29474.76171875, | |
| "learning_rate": 3.531438551921245e-05, | |
| "loss": 0.7951, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 2.953318513813909, | |
| "grad_norm": 29720.64453125, | |
| "learning_rate": 3.523499523658305e-05, | |
| "loss": 0.7973, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 2.9691965703397907, | |
| "grad_norm": 30905.662109375, | |
| "learning_rate": 3.5155604953953635e-05, | |
| "loss": 0.796, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 2.9850746268656714, | |
| "grad_norm": 29326.87109375, | |
| "learning_rate": 3.507621467132423e-05, | |
| "loss": 0.7943, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 3.000952683391553, | |
| "grad_norm": 29983.439453125, | |
| "learning_rate": 3.499682438869483e-05, | |
| "loss": 0.7913, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 3.016830739917434, | |
| "grad_norm": 31416.537109375, | |
| "learning_rate": 3.4917434106065423e-05, | |
| "loss": 0.7875, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 3.0327087964433153, | |
| "grad_norm": 29326.58984375, | |
| "learning_rate": 3.483804382343601e-05, | |
| "loss": 0.788, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 3.0485868529691964, | |
| "grad_norm": 30804.06640625, | |
| "learning_rate": 3.4758653540806604e-05, | |
| "loss": 0.7874, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 3.064464909495078, | |
| "grad_norm": 30785.3125, | |
| "learning_rate": 3.4679263258177205e-05, | |
| "loss": 0.7889, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 3.080342966020959, | |
| "grad_norm": 29545.892578125, | |
| "learning_rate": 3.459987297554779e-05, | |
| "loss": 0.786, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 3.0962210225468403, | |
| "grad_norm": 29333.875, | |
| "learning_rate": 3.4520482692918386e-05, | |
| "loss": 0.7869, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 3.1120990790727214, | |
| "grad_norm": 29140.724609375, | |
| "learning_rate": 3.4441092410288986e-05, | |
| "loss": 0.7848, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 3.1279771355986026, | |
| "grad_norm": 29844.708984375, | |
| "learning_rate": 3.436170212765958e-05, | |
| "loss": 0.7833, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 3.143855192124484, | |
| "grad_norm": 29902.115234375, | |
| "learning_rate": 3.428231184503017e-05, | |
| "loss": 0.7868, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 3.1597332486503653, | |
| "grad_norm": 29875.98828125, | |
| "learning_rate": 3.420292156240076e-05, | |
| "loss": 0.7824, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 3.1756113051762465, | |
| "grad_norm": 29536.19921875, | |
| "learning_rate": 3.412353127977136e-05, | |
| "loss": 0.7819, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 3.1914893617021276, | |
| "grad_norm": 29816.875, | |
| "learning_rate": 3.404414099714195e-05, | |
| "loss": 0.7804, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 3.2073674182280087, | |
| "grad_norm": 30143.560546875, | |
| "learning_rate": 3.396475071451254e-05, | |
| "loss": 0.7819, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 3.22324547475389, | |
| "grad_norm": 29888.271484375, | |
| "learning_rate": 3.388536043188314e-05, | |
| "loss": 0.7817, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 3.2391235312797715, | |
| "grad_norm": 29966.201171875, | |
| "learning_rate": 3.380597014925374e-05, | |
| "loss": 0.7809, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 3.2550015878056526, | |
| "grad_norm": 30786.470703125, | |
| "learning_rate": 3.3726579866624324e-05, | |
| "loss": 0.7765, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 3.2708796443315338, | |
| "grad_norm": 30272.826171875, | |
| "learning_rate": 3.364718958399492e-05, | |
| "loss": 0.7806, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 3.286757700857415, | |
| "grad_norm": 29083.400390625, | |
| "learning_rate": 3.356779930136552e-05, | |
| "loss": 0.7799, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 3.302635757383296, | |
| "grad_norm": 29236.140625, | |
| "learning_rate": 3.348840901873611e-05, | |
| "loss": 0.7801, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 3.3185138139091777, | |
| "grad_norm": 29460.9296875, | |
| "learning_rate": 3.34090187361067e-05, | |
| "loss": 0.7779, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 3.334391870435059, | |
| "grad_norm": 29783.908203125, | |
| "learning_rate": 3.332962845347729e-05, | |
| "loss": 0.7782, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 3.35026992696094, | |
| "grad_norm": 30124.951171875, | |
| "learning_rate": 3.3250238170847893e-05, | |
| "loss": 0.7778, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 3.366147983486821, | |
| "grad_norm": 29516.72265625, | |
| "learning_rate": 3.317084788821848e-05, | |
| "loss": 0.7771, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 3.3820260400127022, | |
| "grad_norm": 29329.431640625, | |
| "learning_rate": 3.3091457605589074e-05, | |
| "loss": 0.7777, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 3.397904096538584, | |
| "grad_norm": 29930.46875, | |
| "learning_rate": 3.3012067322959675e-05, | |
| "loss": 0.7776, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 3.413782153064465, | |
| "grad_norm": 30200.29296875, | |
| "learning_rate": 3.293267704033027e-05, | |
| "loss": 0.7774, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 3.429660209590346, | |
| "grad_norm": 29434.279296875, | |
| "learning_rate": 3.2853286757700856e-05, | |
| "loss": 0.7794, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 3.4455382661162273, | |
| "grad_norm": 29533.7265625, | |
| "learning_rate": 3.277389647507145e-05, | |
| "loss": 0.78, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 3.4614163226421084, | |
| "grad_norm": 30062.75390625, | |
| "learning_rate": 3.269450619244205e-05, | |
| "loss": 0.7715, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 3.47729437916799, | |
| "grad_norm": 30203.49609375, | |
| "learning_rate": 3.261511590981264e-05, | |
| "loss": 0.7754, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 3.493172435693871, | |
| "grad_norm": 29971.337890625, | |
| "learning_rate": 3.253572562718323e-05, | |
| "loss": 0.7743, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 3.5090504922197523, | |
| "grad_norm": 30626.96875, | |
| "learning_rate": 3.245633534455383e-05, | |
| "loss": 0.7722, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 3.5249285487456334, | |
| "grad_norm": 29813.41796875, | |
| "learning_rate": 3.2376945061924425e-05, | |
| "loss": 0.7762, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 3.5408066052715146, | |
| "grad_norm": 30018.158203125, | |
| "learning_rate": 3.229755477929501e-05, | |
| "loss": 0.7708, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 3.556684661797396, | |
| "grad_norm": 28745.65625, | |
| "learning_rate": 3.2218164496665606e-05, | |
| "loss": 0.7703, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 3.5725627183232773, | |
| "grad_norm": 29827.396484375, | |
| "learning_rate": 3.213877421403621e-05, | |
| "loss": 0.7731, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 3.5884407748491585, | |
| "grad_norm": 29104.228515625, | |
| "learning_rate": 3.20593839314068e-05, | |
| "loss": 0.7697, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 3.6043188313750396, | |
| "grad_norm": 29951.318359375, | |
| "learning_rate": 3.197999364877739e-05, | |
| "loss": 0.7719, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 3.6201968879009208, | |
| "grad_norm": 29854.1171875, | |
| "learning_rate": 3.190060336614799e-05, | |
| "loss": 0.7707, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 3.6360749444268023, | |
| "grad_norm": 29064.115234375, | |
| "learning_rate": 3.182121308351858e-05, | |
| "loss": 0.7679, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 3.6519530009526835, | |
| "grad_norm": 29561.064453125, | |
| "learning_rate": 3.174182280088917e-05, | |
| "loss": 0.773, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 3.6678310574785646, | |
| "grad_norm": 30519.935546875, | |
| "learning_rate": 3.166243251825976e-05, | |
| "loss": 0.7715, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 3.683709114004446, | |
| "grad_norm": 29498.1796875, | |
| "learning_rate": 3.1583042235630363e-05, | |
| "loss": 0.7696, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 3.699587170530327, | |
| "grad_norm": 29892.5, | |
| "learning_rate": 3.150365195300096e-05, | |
| "loss": 0.7722, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 3.7154652270562085, | |
| "grad_norm": 29588.62890625, | |
| "learning_rate": 3.1424261670371544e-05, | |
| "loss": 0.7695, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 3.7313432835820897, | |
| "grad_norm": 28297.962890625, | |
| "learning_rate": 3.1344871387742145e-05, | |
| "loss": 0.767, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 3.747221340107971, | |
| "grad_norm": 29926.146484375, | |
| "learning_rate": 3.126548110511274e-05, | |
| "loss": 0.7686, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 3.763099396633852, | |
| "grad_norm": 29238.9375, | |
| "learning_rate": 3.1186090822483326e-05, | |
| "loss": 0.7657, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 3.778977453159733, | |
| "grad_norm": 29130.10546875, | |
| "learning_rate": 3.110670053985392e-05, | |
| "loss": 0.7664, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 3.7948555096856147, | |
| "grad_norm": 28701.7890625, | |
| "learning_rate": 3.102731025722452e-05, | |
| "loss": 0.7658, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 3.810733566211496, | |
| "grad_norm": 29051.44140625, | |
| "learning_rate": 3.0947919974595114e-05, | |
| "loss": 0.7693, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 3.826611622737377, | |
| "grad_norm": 29298.595703125, | |
| "learning_rate": 3.08685296919657e-05, | |
| "loss": 0.767, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 3.842489679263258, | |
| "grad_norm": 28832.44921875, | |
| "learning_rate": 3.07891394093363e-05, | |
| "loss": 0.7655, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 3.8583677357891393, | |
| "grad_norm": 28990.67578125, | |
| "learning_rate": 3.0709749126706895e-05, | |
| "loss": 0.7618, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 3.874245792315021, | |
| "grad_norm": 28622.34765625, | |
| "learning_rate": 3.063035884407749e-05, | |
| "loss": 0.7664, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 3.890123848840902, | |
| "grad_norm": 29283.064453125, | |
| "learning_rate": 3.0550968561448076e-05, | |
| "loss": 0.7678, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 3.906001905366783, | |
| "grad_norm": 29989.224609375, | |
| "learning_rate": 3.0471578278818673e-05, | |
| "loss": 0.7631, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 3.9218799618926643, | |
| "grad_norm": 30515.478515625, | |
| "learning_rate": 3.039218799618927e-05, | |
| "loss": 0.7634, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 3.9377580184185454, | |
| "grad_norm": 29057.341796875, | |
| "learning_rate": 3.031279771355986e-05, | |
| "loss": 0.7649, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 3.953636074944427, | |
| "grad_norm": 29256.357421875, | |
| "learning_rate": 3.0233407430930455e-05, | |
| "loss": 0.7617, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 3.969514131470308, | |
| "grad_norm": 29832.517578125, | |
| "learning_rate": 3.015401714830105e-05, | |
| "loss": 0.762, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 3.9853921879961893, | |
| "grad_norm": 30056.580078125, | |
| "learning_rate": 3.0074626865671646e-05, | |
| "loss": 0.7635, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 4.001270244522071, | |
| "grad_norm": 30290.673828125, | |
| "learning_rate": 2.9995236583042236e-05, | |
| "loss": 0.761, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 4.017148301047952, | |
| "grad_norm": 30393.2421875, | |
| "learning_rate": 2.991584630041283e-05, | |
| "loss": 0.7576, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 4.033026357573833, | |
| "grad_norm": 29898.7734375, | |
| "learning_rate": 2.9836456017783427e-05, | |
| "loss": 0.7592, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 4.048904414099714, | |
| "grad_norm": 29665.693359375, | |
| "learning_rate": 2.9757065735154018e-05, | |
| "loss": 0.7564, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 4.0647824706255955, | |
| "grad_norm": 29677.63671875, | |
| "learning_rate": 2.967767545252461e-05, | |
| "loss": 0.7564, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 4.080660527151477, | |
| "grad_norm": 29343.1328125, | |
| "learning_rate": 2.9598285169895205e-05, | |
| "loss": 0.7589, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 4.096538583677358, | |
| "grad_norm": 28720.55078125, | |
| "learning_rate": 2.9518894887265802e-05, | |
| "loss": 0.7543, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 4.112416640203239, | |
| "grad_norm": 28604.630859375, | |
| "learning_rate": 2.9439504604636393e-05, | |
| "loss": 0.7554, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 4.12829469672912, | |
| "grad_norm": 29508.154296875, | |
| "learning_rate": 2.9360114322006987e-05, | |
| "loss": 0.7555, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 4.144172753255002, | |
| "grad_norm": 28996.201171875, | |
| "learning_rate": 2.9280724039377584e-05, | |
| "loss": 0.7552, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 4.160050809780882, | |
| "grad_norm": 31208.623046875, | |
| "learning_rate": 2.9201333756748178e-05, | |
| "loss": 0.7584, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 4.175928866306764, | |
| "grad_norm": 29778.853515625, | |
| "learning_rate": 2.9121943474118768e-05, | |
| "loss": 0.7549, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 4.1918069228326456, | |
| "grad_norm": 29337.587890625, | |
| "learning_rate": 2.9042553191489362e-05, | |
| "loss": 0.7546, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 4.207684979358526, | |
| "grad_norm": 28682.619140625, | |
| "learning_rate": 2.896316290885996e-05, | |
| "loss": 0.749, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 4.223563035884408, | |
| "grad_norm": 29200.912109375, | |
| "learning_rate": 2.888377262623055e-05, | |
| "loss": 0.7545, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 4.2394410924102885, | |
| "grad_norm": 30967.3515625, | |
| "learning_rate": 2.8804382343601143e-05, | |
| "loss": 0.7558, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 4.25531914893617, | |
| "grad_norm": 30665.638671875, | |
| "learning_rate": 2.872499206097174e-05, | |
| "loss": 0.7532, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 4.271197205462052, | |
| "grad_norm": 28959.0625, | |
| "learning_rate": 2.8645601778342334e-05, | |
| "loss": 0.7553, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 4.287075261987932, | |
| "grad_norm": 29186.98046875, | |
| "learning_rate": 2.8566211495712925e-05, | |
| "loss": 0.7535, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 4.302953318513814, | |
| "grad_norm": 29713.626953125, | |
| "learning_rate": 2.848682121308352e-05, | |
| "loss": 0.7526, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 4.318831375039695, | |
| "grad_norm": 30507.126953125, | |
| "learning_rate": 2.8407430930454116e-05, | |
| "loss": 0.7557, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 4.334709431565576, | |
| "grad_norm": 28860.220703125, | |
| "learning_rate": 2.8328040647824706e-05, | |
| "loss": 0.7497, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 4.350587488091458, | |
| "grad_norm": 28531.17578125, | |
| "learning_rate": 2.82486503651953e-05, | |
| "loss": 0.7536, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 4.366465544617339, | |
| "grad_norm": 29637.4296875, | |
| "learning_rate": 2.8169260082565897e-05, | |
| "loss": 0.7536, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 4.38234360114322, | |
| "grad_norm": 29468.94140625, | |
| "learning_rate": 2.808986979993649e-05, | |
| "loss": 0.7517, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 4.398221657669101, | |
| "grad_norm": 29931.30859375, | |
| "learning_rate": 2.801047951730708e-05, | |
| "loss": 0.7514, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 4.4140997141949825, | |
| "grad_norm": 29154.953125, | |
| "learning_rate": 2.7931089234677675e-05, | |
| "loss": 0.7545, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 4.429977770720864, | |
| "grad_norm": 29288.595703125, | |
| "learning_rate": 2.7851698952048273e-05, | |
| "loss": 0.7535, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 4.445855827246745, | |
| "grad_norm": 29386.255859375, | |
| "learning_rate": 2.7772308669418866e-05, | |
| "loss": 0.7517, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 4.461733883772626, | |
| "grad_norm": 28722.88671875, | |
| "learning_rate": 2.7692918386789457e-05, | |
| "loss": 0.7482, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 4.477611940298507, | |
| "grad_norm": 29279.75, | |
| "learning_rate": 2.7613528104160054e-05, | |
| "loss": 0.7481, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 4.493489996824389, | |
| "grad_norm": 29140.587890625, | |
| "learning_rate": 2.7534137821530648e-05, | |
| "loss": 0.7462, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 4.50936805335027, | |
| "grad_norm": 28993.6484375, | |
| "learning_rate": 2.7454747538901238e-05, | |
| "loss": 0.7473, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 4.525246109876151, | |
| "grad_norm": 30390.90234375, | |
| "learning_rate": 2.7375357256271832e-05, | |
| "loss": 0.7515, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 4.5411241664020325, | |
| "grad_norm": 29218.51953125, | |
| "learning_rate": 2.729596697364243e-05, | |
| "loss": 0.7471, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 4.557002222927913, | |
| "grad_norm": 30701.79296875, | |
| "learning_rate": 2.7216576691013023e-05, | |
| "loss": 0.7487, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 4.572880279453795, | |
| "grad_norm": 29310.14453125, | |
| "learning_rate": 2.7137186408383613e-05, | |
| "loss": 0.7492, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 4.588758335979676, | |
| "grad_norm": 29325.59375, | |
| "learning_rate": 2.705779612575421e-05, | |
| "loss": 0.7479, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 4.604636392505557, | |
| "grad_norm": 29217.953125, | |
| "learning_rate": 2.6978405843124804e-05, | |
| "loss": 0.7512, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 4.620514449031439, | |
| "grad_norm": 29466.189453125, | |
| "learning_rate": 2.6899015560495395e-05, | |
| "loss": 0.7477, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 4.636392505557319, | |
| "grad_norm": 28055.72265625, | |
| "learning_rate": 2.681962527786599e-05, | |
| "loss": 0.749, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 4.652270562083201, | |
| "grad_norm": 28245.34375, | |
| "learning_rate": 2.6740234995236586e-05, | |
| "loss": 0.7479, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 4.668148618609083, | |
| "grad_norm": 29280.736328125, | |
| "learning_rate": 2.666084471260718e-05, | |
| "loss": 0.7484, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 4.684026675134963, | |
| "grad_norm": 29603.513671875, | |
| "learning_rate": 2.658145442997777e-05, | |
| "loss": 0.7499, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 4.699904731660845, | |
| "grad_norm": 28774.55078125, | |
| "learning_rate": 2.6502064147348364e-05, | |
| "loss": 0.7463, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 4.715782788186726, | |
| "grad_norm": 29580.22265625, | |
| "learning_rate": 2.642267386471896e-05, | |
| "loss": 0.7424, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 4.731660844712607, | |
| "grad_norm": 28998.916015625, | |
| "learning_rate": 2.6343283582089555e-05, | |
| "loss": 0.7457, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 4.747538901238489, | |
| "grad_norm": 28983.86328125, | |
| "learning_rate": 2.6263893299460145e-05, | |
| "loss": 0.7412, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 4.7634169577643695, | |
| "grad_norm": 30523.78125, | |
| "learning_rate": 2.6184503016830743e-05, | |
| "loss": 0.7468, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 4.779295014290251, | |
| "grad_norm": 29167.177734375, | |
| "learning_rate": 2.6105112734201336e-05, | |
| "loss": 0.7435, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 4.795173070816132, | |
| "grad_norm": 29224.529296875, | |
| "learning_rate": 2.6025722451571927e-05, | |
| "loss": 0.7409, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 4.811051127342013, | |
| "grad_norm": 28679.3046875, | |
| "learning_rate": 2.594633216894252e-05, | |
| "loss": 0.7394, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 4.826929183867895, | |
| "grad_norm": 29085.837890625, | |
| "learning_rate": 2.5866941886313118e-05, | |
| "loss": 0.745, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 4.842807240393776, | |
| "grad_norm": 28241.09765625, | |
| "learning_rate": 2.578755160368371e-05, | |
| "loss": 0.7456, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 4.858685296919657, | |
| "grad_norm": 29162.150390625, | |
| "learning_rate": 2.5708161321054302e-05, | |
| "loss": 0.7428, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 4.874563353445538, | |
| "grad_norm": 29517.7578125, | |
| "learning_rate": 2.56287710384249e-05, | |
| "loss": 0.7418, | |
| "step": 15350 | |
| }, | |
| { | |
| "epoch": 4.8904414099714195, | |
| "grad_norm": 28757.634765625, | |
| "learning_rate": 2.5549380755795493e-05, | |
| "loss": 0.7475, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 4.906319466497301, | |
| "grad_norm": 29112.775390625, | |
| "learning_rate": 2.5469990473166083e-05, | |
| "loss": 0.742, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 4.922197523023182, | |
| "grad_norm": 29536.763671875, | |
| "learning_rate": 2.5390600190536677e-05, | |
| "loss": 0.7423, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 4.938075579549063, | |
| "grad_norm": 29418.62890625, | |
| "learning_rate": 2.5311209907907274e-05, | |
| "loss": 0.7445, | |
| "step": 15550 | |
| }, | |
| { | |
| "epoch": 4.953953636074944, | |
| "grad_norm": 29107.703125, | |
| "learning_rate": 2.5231819625277868e-05, | |
| "loss": 0.7421, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 4.969831692600826, | |
| "grad_norm": 28584.513671875, | |
| "learning_rate": 2.515242934264846e-05, | |
| "loss": 0.7416, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 4.985709749126707, | |
| "grad_norm": 29202.330078125, | |
| "learning_rate": 2.5073039060019056e-05, | |
| "loss": 0.7404, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 5.001587805652588, | |
| "grad_norm": 28508.923828125, | |
| "learning_rate": 2.499364877738965e-05, | |
| "loss": 0.7401, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 5.01746586217847, | |
| "grad_norm": 29855.150390625, | |
| "learning_rate": 2.4914258494760243e-05, | |
| "loss": 0.7377, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 5.03334391870435, | |
| "grad_norm": 28641.822265625, | |
| "learning_rate": 2.4834868212130834e-05, | |
| "loss": 0.7373, | |
| "step": 15850 | |
| }, | |
| { | |
| "epoch": 5.049221975230232, | |
| "grad_norm": 29080.0234375, | |
| "learning_rate": 2.475547792950143e-05, | |
| "loss": 0.7387, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 5.0651000317561135, | |
| "grad_norm": 29752.501953125, | |
| "learning_rate": 2.467608764687202e-05, | |
| "loss": 0.7364, | |
| "step": 15950 | |
| }, | |
| { | |
| "epoch": 5.080978088281994, | |
| "grad_norm": 29244.943359375, | |
| "learning_rate": 2.459669736424262e-05, | |
| "loss": 0.7386, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 5.096856144807876, | |
| "grad_norm": 30756.474609375, | |
| "learning_rate": 2.4517307081613213e-05, | |
| "loss": 0.7365, | |
| "step": 16050 | |
| }, | |
| { | |
| "epoch": 5.1127342013337564, | |
| "grad_norm": 29783.80078125, | |
| "learning_rate": 2.4437916798983806e-05, | |
| "loss": 0.7345, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 5.128612257859638, | |
| "grad_norm": 28871.20703125, | |
| "learning_rate": 2.43585265163544e-05, | |
| "loss": 0.7385, | |
| "step": 16150 | |
| }, | |
| { | |
| "epoch": 5.14449031438552, | |
| "grad_norm": 29666.92578125, | |
| "learning_rate": 2.4279136233724994e-05, | |
| "loss": 0.7364, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 5.1603683709114, | |
| "grad_norm": 27965.49609375, | |
| "learning_rate": 2.4199745951095588e-05, | |
| "loss": 0.7349, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 5.176246427437282, | |
| "grad_norm": 29663.21484375, | |
| "learning_rate": 2.4120355668466178e-05, | |
| "loss": 0.7336, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 5.192124483963163, | |
| "grad_norm": 29286.236328125, | |
| "learning_rate": 2.4040965385836775e-05, | |
| "loss": 0.7353, | |
| "step": 16350 | |
| }, | |
| { | |
| "epoch": 5.208002540489044, | |
| "grad_norm": 28999.544921875, | |
| "learning_rate": 2.396157510320737e-05, | |
| "loss": 0.7364, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 5.223880597014926, | |
| "grad_norm": 29259.37890625, | |
| "learning_rate": 2.3882184820577963e-05, | |
| "loss": 0.7386, | |
| "step": 16450 | |
| }, | |
| { | |
| "epoch": 5.2397586535408065, | |
| "grad_norm": 29506.087890625, | |
| "learning_rate": 2.3802794537948557e-05, | |
| "loss": 0.734, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 5.255636710066688, | |
| "grad_norm": 29443.67578125, | |
| "learning_rate": 2.372340425531915e-05, | |
| "loss": 0.7408, | |
| "step": 16550 | |
| }, | |
| { | |
| "epoch": 5.271514766592569, | |
| "grad_norm": 29926.109375, | |
| "learning_rate": 2.3644013972689744e-05, | |
| "loss": 0.7368, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 5.28739282311845, | |
| "grad_norm": 30687.26953125, | |
| "learning_rate": 2.3564623690060338e-05, | |
| "loss": 0.7345, | |
| "step": 16650 | |
| }, | |
| { | |
| "epoch": 5.303270879644332, | |
| "grad_norm": 28573.580078125, | |
| "learning_rate": 2.3485233407430932e-05, | |
| "loss": 0.7348, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 5.319148936170213, | |
| "grad_norm": 30217.728515625, | |
| "learning_rate": 2.3405843124801526e-05, | |
| "loss": 0.7348, | |
| "step": 16750 | |
| }, | |
| { | |
| "epoch": 5.335026992696094, | |
| "grad_norm": 28830.787109375, | |
| "learning_rate": 2.332645284217212e-05, | |
| "loss": 0.7389, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 5.350905049221975, | |
| "grad_norm": 29589.51953125, | |
| "learning_rate": 2.3247062559542713e-05, | |
| "loss": 0.7364, | |
| "step": 16850 | |
| }, | |
| { | |
| "epoch": 5.3667831057478566, | |
| "grad_norm": 28568.32421875, | |
| "learning_rate": 2.3167672276913307e-05, | |
| "loss": 0.7348, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 5.382661162273738, | |
| "grad_norm": 30656.90234375, | |
| "learning_rate": 2.30882819942839e-05, | |
| "loss": 0.7346, | |
| "step": 16950 | |
| }, | |
| { | |
| "epoch": 5.398539218799619, | |
| "grad_norm": 29044.07421875, | |
| "learning_rate": 2.3008891711654495e-05, | |
| "loss": 0.7363, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 5.4144172753255, | |
| "grad_norm": 29246.9765625, | |
| "learning_rate": 2.292950142902509e-05, | |
| "loss": 0.7376, | |
| "step": 17050 | |
| }, | |
| { | |
| "epoch": 5.430295331851381, | |
| "grad_norm": 29861.26171875, | |
| "learning_rate": 2.2850111146395683e-05, | |
| "loss": 0.7322, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 5.446173388377263, | |
| "grad_norm": 29695.34765625, | |
| "learning_rate": 2.2770720863766276e-05, | |
| "loss": 0.7324, | |
| "step": 17150 | |
| }, | |
| { | |
| "epoch": 5.462051444903144, | |
| "grad_norm": 29459.607421875, | |
| "learning_rate": 2.269133058113687e-05, | |
| "loss": 0.733, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 5.477929501429025, | |
| "grad_norm": 28512.21484375, | |
| "learning_rate": 2.2611940298507464e-05, | |
| "loss": 0.7274, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 5.493807557954907, | |
| "grad_norm": 29732.572265625, | |
| "learning_rate": 2.2532550015878058e-05, | |
| "loss": 0.7333, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 5.509685614480787, | |
| "grad_norm": 29110.30859375, | |
| "learning_rate": 2.245315973324865e-05, | |
| "loss": 0.7344, | |
| "step": 17350 | |
| }, | |
| { | |
| "epoch": 5.525563671006669, | |
| "grad_norm": 29294.5859375, | |
| "learning_rate": 2.2373769450619245e-05, | |
| "loss": 0.7292, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 5.5414417275325505, | |
| "grad_norm": 28999.29296875, | |
| "learning_rate": 2.229437916798984e-05, | |
| "loss": 0.7322, | |
| "step": 17450 | |
| }, | |
| { | |
| "epoch": 5.557319784058431, | |
| "grad_norm": 28789.3359375, | |
| "learning_rate": 2.2214988885360433e-05, | |
| "loss": 0.7334, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 5.573197840584313, | |
| "grad_norm": 29767.94140625, | |
| "learning_rate": 2.2135598602731027e-05, | |
| "loss": 0.7324, | |
| "step": 17550 | |
| }, | |
| { | |
| "epoch": 5.5890758971101935, | |
| "grad_norm": 29029.384765625, | |
| "learning_rate": 2.205620832010162e-05, | |
| "loss": 0.7318, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 5.604953953636075, | |
| "grad_norm": 29995.126953125, | |
| "learning_rate": 2.1976818037472214e-05, | |
| "loss": 0.7332, | |
| "step": 17650 | |
| }, | |
| { | |
| "epoch": 5.620832010161957, | |
| "grad_norm": 29895.734375, | |
| "learning_rate": 2.1897427754842808e-05, | |
| "loss": 0.7323, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 5.636710066687837, | |
| "grad_norm": 28998.80078125, | |
| "learning_rate": 2.1818037472213402e-05, | |
| "loss": 0.7308, | |
| "step": 17750 | |
| }, | |
| { | |
| "epoch": 5.652588123213719, | |
| "grad_norm": 29375.140625, | |
| "learning_rate": 2.1738647189583996e-05, | |
| "loss": 0.7307, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 5.6684661797396, | |
| "grad_norm": 30084.34375, | |
| "learning_rate": 2.165925690695459e-05, | |
| "loss": 0.7292, | |
| "step": 17850 | |
| }, | |
| { | |
| "epoch": 5.684344236265481, | |
| "grad_norm": 29009.86328125, | |
| "learning_rate": 2.1579866624325183e-05, | |
| "loss": 0.7311, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 5.700222292791363, | |
| "grad_norm": 28996.576171875, | |
| "learning_rate": 2.1500476341695777e-05, | |
| "loss": 0.731, | |
| "step": 17950 | |
| }, | |
| { | |
| "epoch": 5.7161003493172435, | |
| "grad_norm": 28874.802734375, | |
| "learning_rate": 2.142108605906637e-05, | |
| "loss": 0.7301, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 5.731978405843125, | |
| "grad_norm": 29940.734375, | |
| "learning_rate": 2.1341695776436965e-05, | |
| "loss": 0.73, | |
| "step": 18050 | |
| }, | |
| { | |
| "epoch": 5.747856462369006, | |
| "grad_norm": 29607.59765625, | |
| "learning_rate": 2.126230549380756e-05, | |
| "loss": 0.7311, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 5.763734518894887, | |
| "grad_norm": 29006.7890625, | |
| "learning_rate": 2.1182915211178153e-05, | |
| "loss": 0.7301, | |
| "step": 18150 | |
| }, | |
| { | |
| "epoch": 5.779612575420768, | |
| "grad_norm": 30157.11328125, | |
| "learning_rate": 2.1103524928548746e-05, | |
| "loss": 0.7281, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 5.79549063194665, | |
| "grad_norm": 29396.783203125, | |
| "learning_rate": 2.102413464591934e-05, | |
| "loss": 0.7315, | |
| "step": 18250 | |
| }, | |
| { | |
| "epoch": 5.811368688472531, | |
| "grad_norm": 29335.1015625, | |
| "learning_rate": 2.0944744363289934e-05, | |
| "loss": 0.7279, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 5.827246744998412, | |
| "grad_norm": 28650.416015625, | |
| "learning_rate": 2.0865354080660528e-05, | |
| "loss": 0.7347, | |
| "step": 18350 | |
| }, | |
| { | |
| "epoch": 5.843124801524294, | |
| "grad_norm": 29561.095703125, | |
| "learning_rate": 2.078596379803112e-05, | |
| "loss": 0.7263, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 5.859002858050174, | |
| "grad_norm": 28661.658203125, | |
| "learning_rate": 2.070657351540172e-05, | |
| "loss": 0.7314, | |
| "step": 18450 | |
| }, | |
| { | |
| "epoch": 5.874880914576056, | |
| "grad_norm": 28372.755859375, | |
| "learning_rate": 2.062718323277231e-05, | |
| "loss": 0.7265, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 5.8907589711019375, | |
| "grad_norm": 28641.00390625, | |
| "learning_rate": 2.0547792950142903e-05, | |
| "loss": 0.7296, | |
| "step": 18550 | |
| }, | |
| { | |
| "epoch": 5.906637027627818, | |
| "grad_norm": 29402.455078125, | |
| "learning_rate": 2.0468402667513497e-05, | |
| "loss": 0.7257, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 5.9225150841537, | |
| "grad_norm": 29028.78515625, | |
| "learning_rate": 2.038901238488409e-05, | |
| "loss": 0.7262, | |
| "step": 18650 | |
| }, | |
| { | |
| "epoch": 5.9383931406795805, | |
| "grad_norm": 29547.03125, | |
| "learning_rate": 2.0309622102254684e-05, | |
| "loss": 0.728, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 5.954271197205462, | |
| "grad_norm": 29396.15234375, | |
| "learning_rate": 2.0230231819625278e-05, | |
| "loss": 0.725, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 5.970149253731344, | |
| "grad_norm": 29648.634765625, | |
| "learning_rate": 2.0150841536995872e-05, | |
| "loss": 0.7264, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 5.986027310257224, | |
| "grad_norm": 29708.830078125, | |
| "learning_rate": 2.0071451254366466e-05, | |
| "loss": 0.7263, | |
| "step": 18850 | |
| }, | |
| { | |
| "epoch": 6.001905366783106, | |
| "grad_norm": 29569.521484375, | |
| "learning_rate": 1.9992060971737063e-05, | |
| "loss": 0.7249, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 6.017783423308987, | |
| "grad_norm": 29247.328125, | |
| "learning_rate": 1.9912670689107653e-05, | |
| "loss": 0.7248, | |
| "step": 18950 | |
| }, | |
| { | |
| "epoch": 6.033661479834868, | |
| "grad_norm": 29513.408203125, | |
| "learning_rate": 1.9833280406478247e-05, | |
| "loss": 0.7243, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 6.04953953636075, | |
| "grad_norm": 29599.560546875, | |
| "learning_rate": 1.975389012384884e-05, | |
| "loss": 0.7228, | |
| "step": 19050 | |
| }, | |
| { | |
| "epoch": 6.0654175928866305, | |
| "grad_norm": 28947.3515625, | |
| "learning_rate": 1.9674499841219435e-05, | |
| "loss": 0.7261, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 6.081295649412512, | |
| "grad_norm": 28986.390625, | |
| "learning_rate": 1.959510955859003e-05, | |
| "loss": 0.721, | |
| "step": 19150 | |
| }, | |
| { | |
| "epoch": 6.097173705938393, | |
| "grad_norm": 29116.353515625, | |
| "learning_rate": 1.9515719275960623e-05, | |
| "loss": 0.7193, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 6.113051762464274, | |
| "grad_norm": 29632.84375, | |
| "learning_rate": 1.943632899333122e-05, | |
| "loss": 0.7228, | |
| "step": 19250 | |
| }, | |
| { | |
| "epoch": 6.128929818990156, | |
| "grad_norm": 29931.40234375, | |
| "learning_rate": 1.935693871070181e-05, | |
| "loss": 0.7233, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 6.144807875516037, | |
| "grad_norm": 29751.517578125, | |
| "learning_rate": 1.9277548428072407e-05, | |
| "loss": 0.7249, | |
| "step": 19350 | |
| }, | |
| { | |
| "epoch": 6.160685932041918, | |
| "grad_norm": 30318.1796875, | |
| "learning_rate": 1.9198158145442998e-05, | |
| "loss": 0.7232, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 6.176563988567799, | |
| "grad_norm": 28459.033203125, | |
| "learning_rate": 1.911876786281359e-05, | |
| "loss": 0.7219, | |
| "step": 19450 | |
| }, | |
| { | |
| "epoch": 6.192442045093681, | |
| "grad_norm": 29604.791015625, | |
| "learning_rate": 1.9039377580184185e-05, | |
| "loss": 0.7232, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 6.208320101619562, | |
| "grad_norm": 29528.1484375, | |
| "learning_rate": 1.895998729755478e-05, | |
| "loss": 0.7192, | |
| "step": 19550 | |
| }, | |
| { | |
| "epoch": 6.224198158145443, | |
| "grad_norm": 28977.87109375, | |
| "learning_rate": 1.8880597014925376e-05, | |
| "loss": 0.7198, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 6.2400762146713245, | |
| "grad_norm": 29568.908203125, | |
| "learning_rate": 1.8801206732295967e-05, | |
| "loss": 0.7231, | |
| "step": 19650 | |
| }, | |
| { | |
| "epoch": 6.255954271197205, | |
| "grad_norm": 28843.431640625, | |
| "learning_rate": 1.8721816449666564e-05, | |
| "loss": 0.7188, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 6.271832327723087, | |
| "grad_norm": 29127.72265625, | |
| "learning_rate": 1.8642426167037154e-05, | |
| "loss": 0.7218, | |
| "step": 19750 | |
| }, | |
| { | |
| "epoch": 6.287710384248968, | |
| "grad_norm": 29642.048828125, | |
| "learning_rate": 1.856303588440775e-05, | |
| "loss": 0.7182, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 6.303588440774849, | |
| "grad_norm": 29874.93359375, | |
| "learning_rate": 1.8483645601778342e-05, | |
| "loss": 0.719, | |
| "step": 19850 | |
| }, | |
| { | |
| "epoch": 6.319466497300731, | |
| "grad_norm": 29941.87890625, | |
| "learning_rate": 1.8404255319148936e-05, | |
| "loss": 0.7198, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 6.335344553826611, | |
| "grad_norm": 29865.37890625, | |
| "learning_rate": 1.832486503651953e-05, | |
| "loss": 0.7249, | |
| "step": 19950 | |
| }, | |
| { | |
| "epoch": 6.351222610352493, | |
| "grad_norm": 30395.484375, | |
| "learning_rate": 1.8245474753890123e-05, | |
| "loss": 0.7195, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 6.367100666878374, | |
| "grad_norm": 30510.9765625, | |
| "learning_rate": 1.816608447126072e-05, | |
| "loss": 0.7206, | |
| "step": 20050 | |
| }, | |
| { | |
| "epoch": 6.382978723404255, | |
| "grad_norm": 30179.208984375, | |
| "learning_rate": 1.808669418863131e-05, | |
| "loss": 0.7208, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 6.398856779930137, | |
| "grad_norm": 30193.3125, | |
| "learning_rate": 1.8007303906001908e-05, | |
| "loss": 0.7227, | |
| "step": 20150 | |
| }, | |
| { | |
| "epoch": 6.4147348364560175, | |
| "grad_norm": 30033.546875, | |
| "learning_rate": 1.79279136233725e-05, | |
| "loss": 0.7172, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 6.430612892981899, | |
| "grad_norm": 30468.65625, | |
| "learning_rate": 1.7848523340743096e-05, | |
| "loss": 0.7215, | |
| "step": 20250 | |
| }, | |
| { | |
| "epoch": 6.44649094950778, | |
| "grad_norm": 29339.623046875, | |
| "learning_rate": 1.7769133058113686e-05, | |
| "loss": 0.7174, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 6.462369006033661, | |
| "grad_norm": 29816.017578125, | |
| "learning_rate": 1.768974277548428e-05, | |
| "loss": 0.7194, | |
| "step": 20350 | |
| }, | |
| { | |
| "epoch": 6.478247062559543, | |
| "grad_norm": 29318.12109375, | |
| "learning_rate": 1.7610352492854877e-05, | |
| "loss": 0.7196, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 6.494125119085424, | |
| "grad_norm": 29063.029296875, | |
| "learning_rate": 1.7530962210225468e-05, | |
| "loss": 0.7185, | |
| "step": 20450 | |
| }, | |
| { | |
| "epoch": 6.510003175611305, | |
| "grad_norm": 30219.3359375, | |
| "learning_rate": 1.7451571927596065e-05, | |
| "loss": 0.7168, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 6.525881232137186, | |
| "grad_norm": 29047.94140625, | |
| "learning_rate": 1.7372181644966655e-05, | |
| "loss": 0.721, | |
| "step": 20550 | |
| }, | |
| { | |
| "epoch": 6.5417592886630676, | |
| "grad_norm": 28919.94140625, | |
| "learning_rate": 1.7292791362337253e-05, | |
| "loss": 0.7172, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 6.557637345188949, | |
| "grad_norm": 30273.14453125, | |
| "learning_rate": 1.7213401079707843e-05, | |
| "loss": 0.7188, | |
| "step": 20650 | |
| }, | |
| { | |
| "epoch": 6.57351540171483, | |
| "grad_norm": 30020.40625, | |
| "learning_rate": 1.713401079707844e-05, | |
| "loss": 0.7184, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 6.589393458240711, | |
| "grad_norm": 29766.990234375, | |
| "learning_rate": 1.705462051444903e-05, | |
| "loss": 0.7162, | |
| "step": 20750 | |
| }, | |
| { | |
| "epoch": 6.605271514766592, | |
| "grad_norm": 29873.83203125, | |
| "learning_rate": 1.6975230231819624e-05, | |
| "loss": 0.7176, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 6.621149571292474, | |
| "grad_norm": 29591.2578125, | |
| "learning_rate": 1.689583994919022e-05, | |
| "loss": 0.715, | |
| "step": 20850 | |
| }, | |
| { | |
| "epoch": 6.637027627818355, | |
| "grad_norm": 30105.244140625, | |
| "learning_rate": 1.6816449666560812e-05, | |
| "loss": 0.716, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 6.652905684344236, | |
| "grad_norm": 29515.470703125, | |
| "learning_rate": 1.673705938393141e-05, | |
| "loss": 0.7165, | |
| "step": 20950 | |
| }, | |
| { | |
| "epoch": 6.668783740870118, | |
| "grad_norm": 29495.97265625, | |
| "learning_rate": 1.6657669101302e-05, | |
| "loss": 0.7157, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 6.684661797395998, | |
| "grad_norm": 29483.634765625, | |
| "learning_rate": 1.6578278818672597e-05, | |
| "loss": 0.7207, | |
| "step": 21050 | |
| }, | |
| { | |
| "epoch": 6.70053985392188, | |
| "grad_norm": 29576.375, | |
| "learning_rate": 1.6498888536043187e-05, | |
| "loss": 0.7158, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 6.7164179104477615, | |
| "grad_norm": 29991.36328125, | |
| "learning_rate": 1.6419498253413784e-05, | |
| "loss": 0.7142, | |
| "step": 21150 | |
| }, | |
| { | |
| "epoch": 6.732295966973642, | |
| "grad_norm": 30472.951171875, | |
| "learning_rate": 1.6340107970784378e-05, | |
| "loss": 0.7157, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 6.748174023499524, | |
| "grad_norm": 29432.845703125, | |
| "learning_rate": 1.626071768815497e-05, | |
| "loss": 0.7164, | |
| "step": 21250 | |
| }, | |
| { | |
| "epoch": 6.7640520800254045, | |
| "grad_norm": 30286.595703125, | |
| "learning_rate": 1.6181327405525566e-05, | |
| "loss": 0.7215, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 6.779930136551286, | |
| "grad_norm": 29090.42578125, | |
| "learning_rate": 1.6101937122896156e-05, | |
| "loss": 0.7189, | |
| "step": 21350 | |
| }, | |
| { | |
| "epoch": 6.795808193077168, | |
| "grad_norm": 29895.060546875, | |
| "learning_rate": 1.6022546840266754e-05, | |
| "loss": 0.7172, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 6.811686249603048, | |
| "grad_norm": 29853.85546875, | |
| "learning_rate": 1.5943156557637344e-05, | |
| "loss": 0.713, | |
| "step": 21450 | |
| }, | |
| { | |
| "epoch": 6.82756430612893, | |
| "grad_norm": 30071.662109375, | |
| "learning_rate": 1.586376627500794e-05, | |
| "loss": 0.712, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 6.843442362654811, | |
| "grad_norm": 28977.619140625, | |
| "learning_rate": 1.5784375992378535e-05, | |
| "loss": 0.7134, | |
| "step": 21550 | |
| }, | |
| { | |
| "epoch": 6.859320419180692, | |
| "grad_norm": 29966.154296875, | |
| "learning_rate": 1.570498570974913e-05, | |
| "loss": 0.7147, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 6.875198475706574, | |
| "grad_norm": 29842.240234375, | |
| "learning_rate": 1.5625595427119723e-05, | |
| "loss": 0.7201, | |
| "step": 21650 | |
| }, | |
| { | |
| "epoch": 6.8910765322324545, | |
| "grad_norm": 29326.939453125, | |
| "learning_rate": 1.5546205144490313e-05, | |
| "loss": 0.7159, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 6.906954588758336, | |
| "grad_norm": 30401.865234375, | |
| "learning_rate": 1.546681486186091e-05, | |
| "loss": 0.7154, | |
| "step": 21750 | |
| }, | |
| { | |
| "epoch": 6.922832645284217, | |
| "grad_norm": 30828.583984375, | |
| "learning_rate": 1.53874245792315e-05, | |
| "loss": 0.7167, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 6.938710701810098, | |
| "grad_norm": 30084.78125, | |
| "learning_rate": 1.5308034296602098e-05, | |
| "loss": 0.7147, | |
| "step": 21850 | |
| }, | |
| { | |
| "epoch": 6.95458875833598, | |
| "grad_norm": 29893.818359375, | |
| "learning_rate": 1.522864401397269e-05, | |
| "loss": 0.7174, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 6.970466814861861, | |
| "grad_norm": 29464.3125, | |
| "learning_rate": 1.5149253731343285e-05, | |
| "loss": 0.7158, | |
| "step": 21950 | |
| }, | |
| { | |
| "epoch": 6.986344871387742, | |
| "grad_norm": 28943.29296875, | |
| "learning_rate": 1.5069863448713878e-05, | |
| "loss": 0.7111, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 7.002222927913623, | |
| "grad_norm": 30447.416015625, | |
| "learning_rate": 1.4990473166084473e-05, | |
| "loss": 0.7167, | |
| "step": 22050 | |
| }, | |
| { | |
| "epoch": 7.018100984439505, | |
| "grad_norm": 30325.32421875, | |
| "learning_rate": 1.4911082883455065e-05, | |
| "loss": 0.7102, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 7.033979040965386, | |
| "grad_norm": 30525.169921875, | |
| "learning_rate": 1.4831692600825659e-05, | |
| "loss": 0.7102, | |
| "step": 22150 | |
| }, | |
| { | |
| "epoch": 7.049857097491267, | |
| "grad_norm": 30165.91015625, | |
| "learning_rate": 1.4752302318196254e-05, | |
| "loss": 0.7124, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 7.0657351540171485, | |
| "grad_norm": 29645.66796875, | |
| "learning_rate": 1.4672912035566847e-05, | |
| "loss": 0.709, | |
| "step": 22250 | |
| }, | |
| { | |
| "epoch": 7.081613210543029, | |
| "grad_norm": 29299.05859375, | |
| "learning_rate": 1.4593521752937442e-05, | |
| "loss": 0.7105, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 7.097491267068911, | |
| "grad_norm": 29655.083984375, | |
| "learning_rate": 1.4514131470308034e-05, | |
| "loss": 0.7106, | |
| "step": 22350 | |
| }, | |
| { | |
| "epoch": 7.113369323594792, | |
| "grad_norm": 30337.484375, | |
| "learning_rate": 1.443474118767863e-05, | |
| "loss": 0.7153, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 7.129247380120673, | |
| "grad_norm": 29374.36328125, | |
| "learning_rate": 1.4355350905049222e-05, | |
| "loss": 0.7101, | |
| "step": 22450 | |
| }, | |
| { | |
| "epoch": 7.145125436646555, | |
| "grad_norm": 29468.953125, | |
| "learning_rate": 1.4275960622419817e-05, | |
| "loss": 0.7074, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 7.161003493172435, | |
| "grad_norm": 30660.234375, | |
| "learning_rate": 1.4196570339790411e-05, | |
| "loss": 0.7107, | |
| "step": 22550 | |
| }, | |
| { | |
| "epoch": 7.176881549698317, | |
| "grad_norm": 29092.36328125, | |
| "learning_rate": 1.4117180057161003e-05, | |
| "loss": 0.7102, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 7.1927596062241985, | |
| "grad_norm": 29264.359375, | |
| "learning_rate": 1.4037789774531599e-05, | |
| "loss": 0.7075, | |
| "step": 22650 | |
| }, | |
| { | |
| "epoch": 7.208637662750079, | |
| "grad_norm": 30579.66015625, | |
| "learning_rate": 1.3958399491902191e-05, | |
| "loss": 0.7097, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 7.224515719275961, | |
| "grad_norm": 29726.57421875, | |
| "learning_rate": 1.3879009209272786e-05, | |
| "loss": 0.7095, | |
| "step": 22750 | |
| }, | |
| { | |
| "epoch": 7.2403937758018415, | |
| "grad_norm": 30103.4921875, | |
| "learning_rate": 1.3799618926643379e-05, | |
| "loss": 0.7108, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 7.256271832327723, | |
| "grad_norm": 29993.138671875, | |
| "learning_rate": 1.3720228644013974e-05, | |
| "loss": 0.709, | |
| "step": 22850 | |
| }, | |
| { | |
| "epoch": 7.272149888853605, | |
| "grad_norm": 30139.607421875, | |
| "learning_rate": 1.3640838361384566e-05, | |
| "loss": 0.7087, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 7.288027945379485, | |
| "grad_norm": 29735.54296875, | |
| "learning_rate": 1.3561448078755162e-05, | |
| "loss": 0.7094, | |
| "step": 22950 | |
| }, | |
| { | |
| "epoch": 7.303906001905367, | |
| "grad_norm": 29745.779296875, | |
| "learning_rate": 1.3482057796125755e-05, | |
| "loss": 0.7102, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 7.319784058431248, | |
| "grad_norm": 30882.021484375, | |
| "learning_rate": 1.3402667513496348e-05, | |
| "loss": 0.7108, | |
| "step": 23050 | |
| }, | |
| { | |
| "epoch": 7.335662114957129, | |
| "grad_norm": 29850.26171875, | |
| "learning_rate": 1.3323277230866943e-05, | |
| "loss": 0.7105, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 7.351540171483011, | |
| "grad_norm": 30374.646484375, | |
| "learning_rate": 1.3243886948237535e-05, | |
| "loss": 0.7119, | |
| "step": 23150 | |
| }, | |
| { | |
| "epoch": 7.367418228008892, | |
| "grad_norm": 29295.6640625, | |
| "learning_rate": 1.316449666560813e-05, | |
| "loss": 0.7082, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 7.383296284534773, | |
| "grad_norm": 30273.5703125, | |
| "learning_rate": 1.3085106382978723e-05, | |
| "loss": 0.7087, | |
| "step": 23250 | |
| }, | |
| { | |
| "epoch": 7.399174341060654, | |
| "grad_norm": 30317.859375, | |
| "learning_rate": 1.3005716100349318e-05, | |
| "loss": 0.7079, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 7.4150523975865354, | |
| "grad_norm": 29483.0, | |
| "learning_rate": 1.2926325817719912e-05, | |
| "loss": 0.7065, | |
| "step": 23350 | |
| }, | |
| { | |
| "epoch": 7.430930454112417, | |
| "grad_norm": 29725.1875, | |
| "learning_rate": 1.2846935535090508e-05, | |
| "loss": 0.7103, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 7.446808510638298, | |
| "grad_norm": 29491.5234375, | |
| "learning_rate": 1.27675452524611e-05, | |
| "loss": 0.7102, | |
| "step": 23450 | |
| }, | |
| { | |
| "epoch": 7.462686567164179, | |
| "grad_norm": 30156.08984375, | |
| "learning_rate": 1.2688154969831692e-05, | |
| "loss": 0.7093, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 7.47856462369006, | |
| "grad_norm": 29961.205078125, | |
| "learning_rate": 1.2608764687202287e-05, | |
| "loss": 0.7082, | |
| "step": 23550 | |
| }, | |
| { | |
| "epoch": 7.494442680215942, | |
| "grad_norm": 29865.1640625, | |
| "learning_rate": 1.252937440457288e-05, | |
| "loss": 0.7043, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 7.510320736741823, | |
| "grad_norm": 30199.93359375, | |
| "learning_rate": 1.2449984121943473e-05, | |
| "loss": 0.7076, | |
| "step": 23650 | |
| }, | |
| { | |
| "epoch": 7.526198793267704, | |
| "grad_norm": 30157.5, | |
| "learning_rate": 1.2370593839314069e-05, | |
| "loss": 0.7079, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 7.5420768497935855, | |
| "grad_norm": 29760.25390625, | |
| "learning_rate": 1.2291203556684663e-05, | |
| "loss": 0.7048, | |
| "step": 23750 | |
| }, | |
| { | |
| "epoch": 7.557954906319466, | |
| "grad_norm": 30412.80078125, | |
| "learning_rate": 1.2211813274055256e-05, | |
| "loss": 0.7067, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 7.573832962845348, | |
| "grad_norm": 29912.32421875, | |
| "learning_rate": 1.213242299142585e-05, | |
| "loss": 0.7092, | |
| "step": 23850 | |
| }, | |
| { | |
| "epoch": 7.589711019371229, | |
| "grad_norm": 30394.833984375, | |
| "learning_rate": 1.2053032708796444e-05, | |
| "loss": 0.7085, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 7.60558907589711, | |
| "grad_norm": 30330.65625, | |
| "learning_rate": 1.1973642426167038e-05, | |
| "loss": 0.7055, | |
| "step": 23950 | |
| }, | |
| { | |
| "epoch": 7.621467132422992, | |
| "grad_norm": 30122.162109375, | |
| "learning_rate": 1.1894252143537632e-05, | |
| "loss": 0.7078, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 7.637345188948872, | |
| "grad_norm": 29642.91796875, | |
| "learning_rate": 1.1814861860908225e-05, | |
| "loss": 0.7062, | |
| "step": 24050 | |
| }, | |
| { | |
| "epoch": 7.653223245474754, | |
| "grad_norm": 30002.765625, | |
| "learning_rate": 1.173547157827882e-05, | |
| "loss": 0.7054, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 7.669101302000636, | |
| "grad_norm": 29660.470703125, | |
| "learning_rate": 1.1656081295649413e-05, | |
| "loss": 0.7048, | |
| "step": 24150 | |
| }, | |
| { | |
| "epoch": 7.684979358526516, | |
| "grad_norm": 30191.8828125, | |
| "learning_rate": 1.1576691013020007e-05, | |
| "loss": 0.7085, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 7.700857415052398, | |
| "grad_norm": 29762.623046875, | |
| "learning_rate": 1.14973007303906e-05, | |
| "loss": 0.7102, | |
| "step": 24250 | |
| }, | |
| { | |
| "epoch": 7.7167354715782785, | |
| "grad_norm": 29867.5625, | |
| "learning_rate": 1.1417910447761194e-05, | |
| "loss": 0.704, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 7.73261352810416, | |
| "grad_norm": 30145.470703125, | |
| "learning_rate": 1.1338520165131788e-05, | |
| "loss": 0.7043, | |
| "step": 24350 | |
| }, | |
| { | |
| "epoch": 7.748491584630042, | |
| "grad_norm": 29339.95703125, | |
| "learning_rate": 1.1259129882502382e-05, | |
| "loss": 0.7092, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 7.764369641155922, | |
| "grad_norm": 30186.40234375, | |
| "learning_rate": 1.1179739599872976e-05, | |
| "loss": 0.7063, | |
| "step": 24450 | |
| }, | |
| { | |
| "epoch": 7.780247697681804, | |
| "grad_norm": 30043.974609375, | |
| "learning_rate": 1.110034931724357e-05, | |
| "loss": 0.7053, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 7.796125754207685, | |
| "grad_norm": 30453.666015625, | |
| "learning_rate": 1.1020959034614164e-05, | |
| "loss": 0.7066, | |
| "step": 24550 | |
| }, | |
| { | |
| "epoch": 7.812003810733566, | |
| "grad_norm": 30049.93359375, | |
| "learning_rate": 1.0941568751984757e-05, | |
| "loss": 0.71, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 7.827881867259448, | |
| "grad_norm": 30561.3203125, | |
| "learning_rate": 1.0862178469355351e-05, | |
| "loss": 0.7065, | |
| "step": 24650 | |
| }, | |
| { | |
| "epoch": 7.843759923785329, | |
| "grad_norm": 30853.423828125, | |
| "learning_rate": 1.0782788186725945e-05, | |
| "loss": 0.706, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 7.85963798031121, | |
| "grad_norm": 29456.958984375, | |
| "learning_rate": 1.0703397904096539e-05, | |
| "loss": 0.7093, | |
| "step": 24750 | |
| }, | |
| { | |
| "epoch": 7.875516036837091, | |
| "grad_norm": 31488.560546875, | |
| "learning_rate": 1.0624007621467133e-05, | |
| "loss": 0.7063, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 7.8913940933629725, | |
| "grad_norm": 30248.27734375, | |
| "learning_rate": 1.0544617338837726e-05, | |
| "loss": 0.7062, | |
| "step": 24850 | |
| }, | |
| { | |
| "epoch": 7.907272149888854, | |
| "grad_norm": 29547.48828125, | |
| "learning_rate": 1.046522705620832e-05, | |
| "loss": 0.7069, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 7.923150206414735, | |
| "grad_norm": 29567.212890625, | |
| "learning_rate": 1.0385836773578916e-05, | |
| "loss": 0.7059, | |
| "step": 24950 | |
| }, | |
| { | |
| "epoch": 7.939028262940616, | |
| "grad_norm": 30756.359375, | |
| "learning_rate": 1.0306446490949508e-05, | |
| "loss": 0.7056, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 7.954906319466497, | |
| "grad_norm": 29843.1953125, | |
| "learning_rate": 1.0227056208320102e-05, | |
| "loss": 0.7058, | |
| "step": 25050 | |
| }, | |
| { | |
| "epoch": 7.970784375992379, | |
| "grad_norm": 30317.7734375, | |
| "learning_rate": 1.0147665925690695e-05, | |
| "loss": 0.7062, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 7.98666243251826, | |
| "grad_norm": 29659.53125, | |
| "learning_rate": 1.006827564306129e-05, | |
| "loss": 0.703, | |
| "step": 25150 | |
| }, | |
| { | |
| "epoch": 8.002540489044142, | |
| "grad_norm": 30429.38671875, | |
| "learning_rate": 9.988885360431883e-06, | |
| "loss": 0.7054, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 8.018418545570022, | |
| "grad_norm": 31551.69140625, | |
| "learning_rate": 9.909495077802477e-06, | |
| "loss": 0.7015, | |
| "step": 25250 | |
| }, | |
| { | |
| "epoch": 8.034296602095903, | |
| "grad_norm": 30067.037109375, | |
| "learning_rate": 9.83010479517307e-06, | |
| "loss": 0.7018, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 8.050174658621785, | |
| "grad_norm": 30266.275390625, | |
| "learning_rate": 9.750714512543666e-06, | |
| "loss": 0.704, | |
| "step": 25350 | |
| }, | |
| { | |
| "epoch": 8.066052715147666, | |
| "grad_norm": 30958.685546875, | |
| "learning_rate": 9.67132422991426e-06, | |
| "loss": 0.7018, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 8.081930771673548, | |
| "grad_norm": 30467.9921875, | |
| "learning_rate": 9.591933947284852e-06, | |
| "loss": 0.7016, | |
| "step": 25450 | |
| }, | |
| { | |
| "epoch": 8.097808828199428, | |
| "grad_norm": 30163.07421875, | |
| "learning_rate": 9.512543664655446e-06, | |
| "loss": 0.7042, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 8.11368688472531, | |
| "grad_norm": 31141.931640625, | |
| "learning_rate": 9.43315338202604e-06, | |
| "loss": 0.6998, | |
| "step": 25550 | |
| }, | |
| { | |
| "epoch": 8.129564941251191, | |
| "grad_norm": 30357.98828125, | |
| "learning_rate": 9.353763099396634e-06, | |
| "loss": 0.7008, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 8.145442997777073, | |
| "grad_norm": 30259.6171875, | |
| "learning_rate": 9.274372816767227e-06, | |
| "loss": 0.7012, | |
| "step": 25650 | |
| }, | |
| { | |
| "epoch": 8.161321054302954, | |
| "grad_norm": 29641.35546875, | |
| "learning_rate": 9.194982534137823e-06, | |
| "loss": 0.6998, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 8.177199110828834, | |
| "grad_norm": 30559.46875, | |
| "learning_rate": 9.115592251508417e-06, | |
| "loss": 0.706, | |
| "step": 25750 | |
| }, | |
| { | |
| "epoch": 8.193077167354716, | |
| "grad_norm": 30127.62890625, | |
| "learning_rate": 9.03620196887901e-06, | |
| "loss": 0.7063, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 8.208955223880597, | |
| "grad_norm": 30027.09375, | |
| "learning_rate": 8.956811686249604e-06, | |
| "loss": 0.7025, | |
| "step": 25850 | |
| }, | |
| { | |
| "epoch": 8.224833280406479, | |
| "grad_norm": 30069.939453125, | |
| "learning_rate": 8.877421403620196e-06, | |
| "loss": 0.7002, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 8.24071133693236, | |
| "grad_norm": 30299.6171875, | |
| "learning_rate": 8.79803112099079e-06, | |
| "loss": 0.6996, | |
| "step": 25950 | |
| }, | |
| { | |
| "epoch": 8.25658939345824, | |
| "grad_norm": 30868.3671875, | |
| "learning_rate": 8.718640838361384e-06, | |
| "loss": 0.7012, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 8.272467449984122, | |
| "grad_norm": 30317.115234375, | |
| "learning_rate": 8.639250555731978e-06, | |
| "loss": 0.7003, | |
| "step": 26050 | |
| }, | |
| { | |
| "epoch": 8.288345506510003, | |
| "grad_norm": 30630.19921875, | |
| "learning_rate": 8.559860273102573e-06, | |
| "loss": 0.7023, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 8.304223563035885, | |
| "grad_norm": 31124.72265625, | |
| "learning_rate": 8.480469990473167e-06, | |
| "loss": 0.6989, | |
| "step": 26150 | |
| }, | |
| { | |
| "epoch": 8.320101619561765, | |
| "grad_norm": 30239.1875, | |
| "learning_rate": 8.401079707843761e-06, | |
| "loss": 0.7013, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 8.335979676087646, | |
| "grad_norm": 30741.263671875, | |
| "learning_rate": 8.321689425214355e-06, | |
| "loss": 0.7029, | |
| "step": 26250 | |
| }, | |
| { | |
| "epoch": 8.351857732613528, | |
| "grad_norm": 29795.580078125, | |
| "learning_rate": 8.242299142584949e-06, | |
| "loss": 0.6995, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 8.36773578913941, | |
| "grad_norm": 30709.451171875, | |
| "learning_rate": 8.16290885995554e-06, | |
| "loss": 0.7009, | |
| "step": 26350 | |
| }, | |
| { | |
| "epoch": 8.383613845665291, | |
| "grad_norm": 30076.46875, | |
| "learning_rate": 8.083518577326135e-06, | |
| "loss": 0.6988, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 8.399491902191173, | |
| "grad_norm": 29763.3828125, | |
| "learning_rate": 8.004128294696728e-06, | |
| "loss": 0.7028, | |
| "step": 26450 | |
| }, | |
| { | |
| "epoch": 8.415369958717053, | |
| "grad_norm": 30590.60546875, | |
| "learning_rate": 7.924738012067324e-06, | |
| "loss": 0.7067, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 8.431248015242934, | |
| "grad_norm": 30087.3671875, | |
| "learning_rate": 7.845347729437918e-06, | |
| "loss": 0.702, | |
| "step": 26550 | |
| }, | |
| { | |
| "epoch": 8.447126071768816, | |
| "grad_norm": 30778.0859375, | |
| "learning_rate": 7.765957446808511e-06, | |
| "loss": 0.7028, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 8.463004128294697, | |
| "grad_norm": 30542.35546875, | |
| "learning_rate": 7.686567164179105e-06, | |
| "loss": 0.6996, | |
| "step": 26650 | |
| }, | |
| { | |
| "epoch": 8.478882184820577, | |
| "grad_norm": 30533.51171875, | |
| "learning_rate": 7.607176881549699e-06, | |
| "loss": 0.7023, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 8.494760241346459, | |
| "grad_norm": 30710.169921875, | |
| "learning_rate": 7.527786598920293e-06, | |
| "loss": 0.6998, | |
| "step": 26750 | |
| }, | |
| { | |
| "epoch": 8.51063829787234, | |
| "grad_norm": 30740.416015625, | |
| "learning_rate": 7.448396316290886e-06, | |
| "loss": 0.7036, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 8.526516354398222, | |
| "grad_norm": 29898.5390625, | |
| "learning_rate": 7.36900603366148e-06, | |
| "loss": 0.7007, | |
| "step": 26850 | |
| }, | |
| { | |
| "epoch": 8.542394410924103, | |
| "grad_norm": 30001.548828125, | |
| "learning_rate": 7.2896157510320734e-06, | |
| "loss": 0.6958, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 8.558272467449985, | |
| "grad_norm": 30819.171875, | |
| "learning_rate": 7.210225468402667e-06, | |
| "loss": 0.6976, | |
| "step": 26950 | |
| }, | |
| { | |
| "epoch": 8.574150523975865, | |
| "grad_norm": 30720.3125, | |
| "learning_rate": 7.130835185773262e-06, | |
| "loss": 0.6985, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 8.590028580501746, | |
| "grad_norm": 31220.265625, | |
| "learning_rate": 7.051444903143856e-06, | |
| "loss": 0.7008, | |
| "step": 27050 | |
| }, | |
| { | |
| "epoch": 8.605906637027628, | |
| "grad_norm": 31095.111328125, | |
| "learning_rate": 6.9720546205144495e-06, | |
| "loss": 0.7011, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 8.62178469355351, | |
| "grad_norm": 30474.576171875, | |
| "learning_rate": 6.892664337885043e-06, | |
| "loss": 0.6994, | |
| "step": 27150 | |
| }, | |
| { | |
| "epoch": 8.63766275007939, | |
| "grad_norm": 30038.474609375, | |
| "learning_rate": 6.813274055255638e-06, | |
| "loss": 0.7002, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 8.653540806605271, | |
| "grad_norm": 30336.1953125, | |
| "learning_rate": 6.73388377262623e-06, | |
| "loss": 0.6994, | |
| "step": 27250 | |
| }, | |
| { | |
| "epoch": 8.669418863131153, | |
| "grad_norm": 30122.53125, | |
| "learning_rate": 6.654493489996824e-06, | |
| "loss": 0.7004, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 8.685296919657034, | |
| "grad_norm": 30615.431640625, | |
| "learning_rate": 6.575103207367419e-06, | |
| "loss": 0.7021, | |
| "step": 27350 | |
| }, | |
| { | |
| "epoch": 8.701174976182916, | |
| "grad_norm": 30339.583984375, | |
| "learning_rate": 6.495712924738012e-06, | |
| "loss": 0.7007, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 8.717053032708796, | |
| "grad_norm": 30189.884765625, | |
| "learning_rate": 6.416322642108606e-06, | |
| "loss": 0.6986, | |
| "step": 27450 | |
| }, | |
| { | |
| "epoch": 8.732931089234677, | |
| "grad_norm": 30549.412109375, | |
| "learning_rate": 6.3369323594792e-06, | |
| "loss": 0.6996, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 8.748809145760559, | |
| "grad_norm": 30457.353515625, | |
| "learning_rate": 6.257542076849794e-06, | |
| "loss": 0.6991, | |
| "step": 27550 | |
| }, | |
| { | |
| "epoch": 8.76468720228644, | |
| "grad_norm": 31163.732421875, | |
| "learning_rate": 6.178151794220388e-06, | |
| "loss": 0.702, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 8.780565258812322, | |
| "grad_norm": 29878.865234375, | |
| "learning_rate": 6.0987615115909814e-06, | |
| "loss": 0.6999, | |
| "step": 27650 | |
| }, | |
| { | |
| "epoch": 8.796443315338202, | |
| "grad_norm": 30267.07421875, | |
| "learning_rate": 6.019371228961575e-06, | |
| "loss": 0.7014, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 8.812321371864083, | |
| "grad_norm": 30354.353515625, | |
| "learning_rate": 5.939980946332169e-06, | |
| "loss": 0.6982, | |
| "step": 27750 | |
| }, | |
| { | |
| "epoch": 8.828199428389965, | |
| "grad_norm": 30582.365234375, | |
| "learning_rate": 5.860590663702763e-06, | |
| "loss": 0.7002, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 8.844077484915847, | |
| "grad_norm": 30702.970703125, | |
| "learning_rate": 5.781200381073357e-06, | |
| "loss": 0.6975, | |
| "step": 27850 | |
| }, | |
| { | |
| "epoch": 8.859955541441728, | |
| "grad_norm": 31331.51953125, | |
| "learning_rate": 5.7018100984439505e-06, | |
| "loss": 0.6966, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 8.875833597967608, | |
| "grad_norm": 31188.05859375, | |
| "learning_rate": 5.622419815814544e-06, | |
| "loss": 0.6976, | |
| "step": 27950 | |
| }, | |
| { | |
| "epoch": 8.89171165449349, | |
| "grad_norm": 30835.2578125, | |
| "learning_rate": 5.543029533185139e-06, | |
| "loss": 0.6985, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 8.907589711019371, | |
| "grad_norm": 30251.89453125, | |
| "learning_rate": 5.463639250555732e-06, | |
| "loss": 0.7002, | |
| "step": 28050 | |
| }, | |
| { | |
| "epoch": 8.923467767545253, | |
| "grad_norm": 30508.73828125, | |
| "learning_rate": 5.384248967926326e-06, | |
| "loss": 0.6986, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 8.939345824071134, | |
| "grad_norm": 30640.4375, | |
| "learning_rate": 5.3048586852969195e-06, | |
| "loss": 0.7019, | |
| "step": 28150 | |
| }, | |
| { | |
| "epoch": 8.955223880597014, | |
| "grad_norm": 31159.490234375, | |
| "learning_rate": 5.225468402667514e-06, | |
| "loss": 0.6987, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 8.971101937122896, | |
| "grad_norm": 30732.70703125, | |
| "learning_rate": 5.146078120038107e-06, | |
| "loss": 0.6996, | |
| "step": 28250 | |
| }, | |
| { | |
| "epoch": 8.986979993648777, | |
| "grad_norm": 30562.67578125, | |
| "learning_rate": 5.066687837408701e-06, | |
| "loss": 0.7028, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 9.002858050174659, | |
| "grad_norm": 31258.55078125, | |
| "learning_rate": 4.987297554779296e-06, | |
| "loss": 0.6952, | |
| "step": 28350 | |
| }, | |
| { | |
| "epoch": 9.01873610670054, | |
| "grad_norm": 30664.671875, | |
| "learning_rate": 4.9079072721498894e-06, | |
| "loss": 0.6945, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 9.03461416322642, | |
| "grad_norm": 31010.13671875, | |
| "learning_rate": 4.828516989520483e-06, | |
| "loss": 0.6963, | |
| "step": 28450 | |
| }, | |
| { | |
| "epoch": 9.050492219752302, | |
| "grad_norm": 31770.1484375, | |
| "learning_rate": 4.749126706891076e-06, | |
| "loss": 0.6951, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 9.066370276278183, | |
| "grad_norm": 30270.96484375, | |
| "learning_rate": 4.669736424261671e-06, | |
| "loss": 0.6962, | |
| "step": 28550 | |
| }, | |
| { | |
| "epoch": 9.082248332804065, | |
| "grad_norm": 31095.724609375, | |
| "learning_rate": 4.590346141632265e-06, | |
| "loss": 0.6951, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 9.098126389329947, | |
| "grad_norm": 30564.689453125, | |
| "learning_rate": 4.5109558590028585e-06, | |
| "loss": 0.6958, | |
| "step": 28650 | |
| }, | |
| { | |
| "epoch": 9.114004445855826, | |
| "grad_norm": 30729.578125, | |
| "learning_rate": 4.4315655763734514e-06, | |
| "loss": 0.6965, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 9.129882502381708, | |
| "grad_norm": 30804.77734375, | |
| "learning_rate": 4.352175293744046e-06, | |
| "loss": 0.6987, | |
| "step": 28750 | |
| }, | |
| { | |
| "epoch": 9.14576055890759, | |
| "grad_norm": 30098.283203125, | |
| "learning_rate": 4.27278501111464e-06, | |
| "loss": 0.693, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 9.161638615433471, | |
| "grad_norm": 31217.767578125, | |
| "learning_rate": 4.193394728485234e-06, | |
| "loss": 0.6952, | |
| "step": 28850 | |
| }, | |
| { | |
| "epoch": 9.177516671959353, | |
| "grad_norm": 30664.6953125, | |
| "learning_rate": 4.1140044458558275e-06, | |
| "loss": 0.6961, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 9.193394728485233, | |
| "grad_norm": 30483.142578125, | |
| "learning_rate": 4.034614163226421e-06, | |
| "loss": 0.6929, | |
| "step": 28950 | |
| }, | |
| { | |
| "epoch": 9.209272785011114, | |
| "grad_norm": 30538.34765625, | |
| "learning_rate": 3.955223880597015e-06, | |
| "loss": 0.6964, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 9.225150841536996, | |
| "grad_norm": 31039.91796875, | |
| "learning_rate": 3.875833597967609e-06, | |
| "loss": 0.6946, | |
| "step": 29050 | |
| }, | |
| { | |
| "epoch": 9.241028898062877, | |
| "grad_norm": 30256.009765625, | |
| "learning_rate": 3.796443315338203e-06, | |
| "loss": 0.6946, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 9.256906954588759, | |
| "grad_norm": 31257.78125, | |
| "learning_rate": 3.717053032708796e-06, | |
| "loss": 0.6942, | |
| "step": 29150 | |
| }, | |
| { | |
| "epoch": 9.272785011114639, | |
| "grad_norm": 30951.634765625, | |
| "learning_rate": 3.6376627500793904e-06, | |
| "loss": 0.6944, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 9.28866306764052, | |
| "grad_norm": 30461.291015625, | |
| "learning_rate": 3.558272467449984e-06, | |
| "loss": 0.6942, | |
| "step": 29250 | |
| }, | |
| { | |
| "epoch": 9.304541124166402, | |
| "grad_norm": 30459.7109375, | |
| "learning_rate": 3.4788821848205784e-06, | |
| "loss": 0.699, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 9.320419180692284, | |
| "grad_norm": 30297.685546875, | |
| "learning_rate": 3.3994919021911722e-06, | |
| "loss": 0.6909, | |
| "step": 29350 | |
| }, | |
| { | |
| "epoch": 9.336297237218165, | |
| "grad_norm": 30345.24609375, | |
| "learning_rate": 3.3201016195617656e-06, | |
| "loss": 0.6962, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 9.352175293744045, | |
| "grad_norm": 30085.6484375, | |
| "learning_rate": 3.2407113369323594e-06, | |
| "loss": 0.6957, | |
| "step": 29450 | |
| }, | |
| { | |
| "epoch": 9.368053350269927, | |
| "grad_norm": 31095.689453125, | |
| "learning_rate": 3.1613210543029537e-06, | |
| "loss": 0.698, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 9.383931406795808, | |
| "grad_norm": 30798.33203125, | |
| "learning_rate": 3.081930771673547e-06, | |
| "loss": 0.6938, | |
| "step": 29550 | |
| }, | |
| { | |
| "epoch": 9.39980946332169, | |
| "grad_norm": 30953.390625, | |
| "learning_rate": 3.0025404890441413e-06, | |
| "loss": 0.6934, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 9.415687519847571, | |
| "grad_norm": 30614.728515625, | |
| "learning_rate": 2.9231502064147347e-06, | |
| "loss": 0.6936, | |
| "step": 29650 | |
| }, | |
| { | |
| "epoch": 9.431565576373451, | |
| "grad_norm": 30555.818359375, | |
| "learning_rate": 2.843759923785329e-06, | |
| "loss": 0.6925, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 9.447443632899333, | |
| "grad_norm": 30385.14453125, | |
| "learning_rate": 2.7643696411559227e-06, | |
| "loss": 0.6989, | |
| "step": 29750 | |
| }, | |
| { | |
| "epoch": 9.463321689425214, | |
| "grad_norm": 30924.46875, | |
| "learning_rate": 2.6849793585265165e-06, | |
| "loss": 0.697, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 9.479199745951096, | |
| "grad_norm": 31371.728515625, | |
| "learning_rate": 2.6055890758971103e-06, | |
| "loss": 0.6955, | |
| "step": 29850 | |
| }, | |
| { | |
| "epoch": 9.495077802476978, | |
| "grad_norm": 30738.732421875, | |
| "learning_rate": 2.526198793267704e-06, | |
| "loss": 0.6945, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 9.510955859002857, | |
| "grad_norm": 30800.96484375, | |
| "learning_rate": 2.446808510638298e-06, | |
| "loss": 0.6951, | |
| "step": 29950 | |
| }, | |
| { | |
| "epoch": 9.526833915528739, | |
| "grad_norm": 30367.607421875, | |
| "learning_rate": 2.3674182280088918e-06, | |
| "loss": 0.6975, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 9.54271197205462, | |
| "grad_norm": 30162.599609375, | |
| "learning_rate": 2.2880279453794856e-06, | |
| "loss": 0.6949, | |
| "step": 30050 | |
| }, | |
| { | |
| "epoch": 9.558590028580502, | |
| "grad_norm": 30217.197265625, | |
| "learning_rate": 2.2086376627500794e-06, | |
| "loss": 0.6921, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 9.574468085106384, | |
| "grad_norm": 30000.412109375, | |
| "learning_rate": 2.129247380120673e-06, | |
| "loss": 0.6967, | |
| "step": 30150 | |
| }, | |
| { | |
| "epoch": 9.590346141632264, | |
| "grad_norm": 30676.177734375, | |
| "learning_rate": 2.0498570974912674e-06, | |
| "loss": 0.6957, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 9.606224198158145, | |
| "grad_norm": 30816.064453125, | |
| "learning_rate": 1.970466814861861e-06, | |
| "loss": 0.6944, | |
| "step": 30250 | |
| }, | |
| { | |
| "epoch": 9.622102254684027, | |
| "grad_norm": 30859.091796875, | |
| "learning_rate": 1.891076532232455e-06, | |
| "loss": 0.695, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 9.637980311209908, | |
| "grad_norm": 30824.66015625, | |
| "learning_rate": 1.8116862496030486e-06, | |
| "loss": 0.6955, | |
| "step": 30350 | |
| }, | |
| { | |
| "epoch": 9.65385836773579, | |
| "grad_norm": 31179.345703125, | |
| "learning_rate": 1.7322959669736427e-06, | |
| "loss": 0.692, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 9.66973642426167, | |
| "grad_norm": 29985.033203125, | |
| "learning_rate": 1.6529056843442363e-06, | |
| "loss": 0.6961, | |
| "step": 30450 | |
| }, | |
| { | |
| "epoch": 9.685614480787551, | |
| "grad_norm": 29948.798828125, | |
| "learning_rate": 1.5735154017148303e-06, | |
| "loss": 0.6948, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 9.701492537313433, | |
| "grad_norm": 30504.94140625, | |
| "learning_rate": 1.494125119085424e-06, | |
| "loss": 0.6955, | |
| "step": 30550 | |
| }, | |
| { | |
| "epoch": 9.717370593839314, | |
| "grad_norm": 30339.3515625, | |
| "learning_rate": 1.414734836456018e-06, | |
| "loss": 0.6962, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 9.733248650365196, | |
| "grad_norm": 30103.78125, | |
| "learning_rate": 1.3353445538266117e-06, | |
| "loss": 0.6953, | |
| "step": 30650 | |
| }, | |
| { | |
| "epoch": 9.749126706891076, | |
| "grad_norm": 30390.306640625, | |
| "learning_rate": 1.2559542711972055e-06, | |
| "loss": 0.6962, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 9.765004763416957, | |
| "grad_norm": 30574.888671875, | |
| "learning_rate": 1.1765639885677993e-06, | |
| "loss": 0.6963, | |
| "step": 30750 | |
| }, | |
| { | |
| "epoch": 9.780882819942839, | |
| "grad_norm": 30706.4765625, | |
| "learning_rate": 1.0971737059383931e-06, | |
| "loss": 0.6928, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 9.79676087646872, | |
| "grad_norm": 29788.431640625, | |
| "learning_rate": 1.0177834233089872e-06, | |
| "loss": 0.6918, | |
| "step": 30850 | |
| }, | |
| { | |
| "epoch": 9.812638932994602, | |
| "grad_norm": 30614.6875, | |
| "learning_rate": 9.383931406795809e-07, | |
| "loss": 0.6931, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 9.828516989520482, | |
| "grad_norm": 30468.765625, | |
| "learning_rate": 8.590028580501748e-07, | |
| "loss": 0.6976, | |
| "step": 30950 | |
| }, | |
| { | |
| "epoch": 9.844395046046364, | |
| "grad_norm": 29652.828125, | |
| "learning_rate": 7.796125754207686e-07, | |
| "loss": 0.6938, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 9.860273102572245, | |
| "grad_norm": 31199.251953125, | |
| "learning_rate": 7.002222927913624e-07, | |
| "loss": 0.6965, | |
| "step": 31050 | |
| }, | |
| { | |
| "epoch": 9.876151159098127, | |
| "grad_norm": 30488.6796875, | |
| "learning_rate": 6.208320101619562e-07, | |
| "loss": 0.6938, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 9.892029215624008, | |
| "grad_norm": 30915.556640625, | |
| "learning_rate": 5.4144172753255e-07, | |
| "loss": 0.6951, | |
| "step": 31150 | |
| }, | |
| { | |
| "epoch": 9.907907272149888, | |
| "grad_norm": 30898.82421875, | |
| "learning_rate": 4.620514449031439e-07, | |
| "loss": 0.6929, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 9.92378532867577, | |
| "grad_norm": 31000.388671875, | |
| "learning_rate": 3.826611622737377e-07, | |
| "loss": 0.6931, | |
| "step": 31250 | |
| }, | |
| { | |
| "epoch": 9.939663385201651, | |
| "grad_norm": 30619.94140625, | |
| "learning_rate": 3.0327087964433156e-07, | |
| "loss": 0.6963, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 9.955541441727533, | |
| "grad_norm": 30345.3515625, | |
| "learning_rate": 2.238805970149254e-07, | |
| "loss": 0.6932, | |
| "step": 31350 | |
| }, | |
| { | |
| "epoch": 9.971419498253415, | |
| "grad_norm": 31431.78515625, | |
| "learning_rate": 1.444903143855192e-07, | |
| "loss": 0.6936, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 9.987297554779294, | |
| "grad_norm": 30420.556640625, | |
| "learning_rate": 6.510003175611305e-08, | |
| "loss": 0.6958, | |
| "step": 31450 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 31490, | |
| "total_flos": 1.05298598559744e+18, | |
| "train_loss": 0.8101965235240424, | |
| "train_runtime": 86746.076, | |
| "train_samples_per_second": 23.228, | |
| "train_steps_per_second": 0.363 | |
| }, | |
| { | |
| "epoch": 10.003175611305176, | |
| "grad_norm": 34598.17578125, | |
| "learning_rate": 4.9992854874563356e-05, | |
| "loss": 0.7254, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 10.019053667831058, | |
| "grad_norm": 32862.16015625, | |
| "learning_rate": 4.995315973324865e-05, | |
| "loss": 0.7148, | |
| "step": 31550 | |
| }, | |
| { | |
| "epoch": 10.03493172435694, | |
| "grad_norm": 34942.2890625, | |
| "learning_rate": 4.991346459193395e-05, | |
| "loss": 0.7129, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 10.05080978088282, | |
| "grad_norm": 32553.541015625, | |
| "learning_rate": 4.987376945061924e-05, | |
| "loss": 0.7192, | |
| "step": 31650 | |
| }, | |
| { | |
| "epoch": 10.0666878374087, | |
| "grad_norm": 32205.275390625, | |
| "learning_rate": 4.983407430930454e-05, | |
| "loss": 0.719, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 10.082565893934582, | |
| "grad_norm": 32618.0078125, | |
| "learning_rate": 4.9794379167989844e-05, | |
| "loss": 0.7201, | |
| "step": 31750 | |
| }, | |
| { | |
| "epoch": 10.098443950460464, | |
| "grad_norm": 31251.701171875, | |
| "learning_rate": 4.975468402667514e-05, | |
| "loss": 0.7209, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 10.114322006986345, | |
| "grad_norm": 30804.701171875, | |
| "learning_rate": 4.971498888536044e-05, | |
| "loss": 0.7199, | |
| "step": 31850 | |
| }, | |
| { | |
| "epoch": 10.130200063512227, | |
| "grad_norm": 31691.65234375, | |
| "learning_rate": 4.967529374404573e-05, | |
| "loss": 0.7242, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 10.146078120038107, | |
| "grad_norm": 31379.8046875, | |
| "learning_rate": 4.9635598602731024e-05, | |
| "loss": 0.7244, | |
| "step": 31950 | |
| }, | |
| { | |
| "epoch": 10.161956176563988, | |
| "grad_norm": 31711.45703125, | |
| "learning_rate": 4.9595903461416325e-05, | |
| "loss": 0.7231, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 10.17783423308987, | |
| "grad_norm": 30201.376953125, | |
| "learning_rate": 4.955620832010162e-05, | |
| "loss": 0.7212, | |
| "step": 32050 | |
| }, | |
| { | |
| "epoch": 10.193712289615751, | |
| "grad_norm": 30604.0234375, | |
| "learning_rate": 4.951651317878692e-05, | |
| "loss": 0.7217, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 10.209590346141633, | |
| "grad_norm": 30721.6171875, | |
| "learning_rate": 4.947681803747222e-05, | |
| "loss": 0.7186, | |
| "step": 32150 | |
| }, | |
| { | |
| "epoch": 10.225468402667513, | |
| "grad_norm": 30430.392578125, | |
| "learning_rate": 4.943712289615751e-05, | |
| "loss": 0.7247, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 10.241346459193394, | |
| "grad_norm": 29437.306640625, | |
| "learning_rate": 4.939742775484281e-05, | |
| "loss": 0.7216, | |
| "step": 32250 | |
| }, | |
| { | |
| "epoch": 10.257224515719276, | |
| "grad_norm": 31316.037109375, | |
| "learning_rate": 4.9357732613528106e-05, | |
| "loss": 0.7232, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 10.273102572245158, | |
| "grad_norm": 30285.005859375, | |
| "learning_rate": 4.93180374722134e-05, | |
| "loss": 0.7232, | |
| "step": 32350 | |
| }, | |
| { | |
| "epoch": 10.28898062877104, | |
| "grad_norm": 30771.517578125, | |
| "learning_rate": 4.92783423308987e-05, | |
| "loss": 0.7211, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 10.304858685296919, | |
| "grad_norm": 31089.01953125, | |
| "learning_rate": 4.9238647189584e-05, | |
| "loss": 0.719, | |
| "step": 32450 | |
| }, | |
| { | |
| "epoch": 10.3207367418228, | |
| "grad_norm": 29312.451171875, | |
| "learning_rate": 4.9198952048269294e-05, | |
| "loss": 0.7217, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 10.336614798348682, | |
| "grad_norm": 29717.74609375, | |
| "learning_rate": 4.9159256906954594e-05, | |
| "loss": 0.7204, | |
| "step": 32550 | |
| }, | |
| { | |
| "epoch": 10.352492854874564, | |
| "grad_norm": 29353.939453125, | |
| "learning_rate": 4.911956176563989e-05, | |
| "loss": 0.7209, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 10.368370911400445, | |
| "grad_norm": 30583.625, | |
| "learning_rate": 4.907986662432518e-05, | |
| "loss": 0.719, | |
| "step": 32650 | |
| }, | |
| { | |
| "epoch": 10.384248967926325, | |
| "grad_norm": 30315.1484375, | |
| "learning_rate": 4.904017148301048e-05, | |
| "loss": 0.7224, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 10.400127024452207, | |
| "grad_norm": 28785.408203125, | |
| "learning_rate": 4.9000476341695775e-05, | |
| "loss": 0.7227, | |
| "step": 32750 | |
| }, | |
| { | |
| "epoch": 10.416005080978088, | |
| "grad_norm": 29400.603515625, | |
| "learning_rate": 4.8960781200381075e-05, | |
| "loss": 0.7191, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 10.43188313750397, | |
| "grad_norm": 28836.03125, | |
| "learning_rate": 4.8921086059066375e-05, | |
| "loss": 0.7191, | |
| "step": 32850 | |
| }, | |
| { | |
| "epoch": 10.447761194029852, | |
| "grad_norm": 29112.181640625, | |
| "learning_rate": 4.888139091775167e-05, | |
| "loss": 0.7196, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 10.463639250555731, | |
| "grad_norm": 29085.232421875, | |
| "learning_rate": 4.884169577643697e-05, | |
| "loss": 0.7202, | |
| "step": 32950 | |
| }, | |
| { | |
| "epoch": 10.479517307081613, | |
| "grad_norm": 29157.4609375, | |
| "learning_rate": 4.880200063512226e-05, | |
| "loss": 0.7185, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 10.495395363607495, | |
| "grad_norm": 29475.744140625, | |
| "learning_rate": 4.8762305493807556e-05, | |
| "loss": 0.7221, | |
| "step": 33050 | |
| }, | |
| { | |
| "epoch": 10.511273420133376, | |
| "grad_norm": 29044.5703125, | |
| "learning_rate": 4.8722610352492857e-05, | |
| "loss": 0.7194, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 10.527151476659256, | |
| "grad_norm": 28844.75390625, | |
| "learning_rate": 4.868291521117816e-05, | |
| "loss": 0.7178, | |
| "step": 33150 | |
| }, | |
| { | |
| "epoch": 10.543029533185138, | |
| "grad_norm": 28828.00390625, | |
| "learning_rate": 4.864322006986345e-05, | |
| "loss": 0.7173, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 10.55890758971102, | |
| "grad_norm": 28988.736328125, | |
| "learning_rate": 4.860352492854875e-05, | |
| "loss": 0.7163, | |
| "step": 33250 | |
| }, | |
| { | |
| "epoch": 10.5747856462369, | |
| "grad_norm": 27665.376953125, | |
| "learning_rate": 4.8563829787234044e-05, | |
| "loss": 0.7177, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 10.590663702762782, | |
| "grad_norm": 28359.263671875, | |
| "learning_rate": 4.852413464591934e-05, | |
| "loss": 0.7215, | |
| "step": 33350 | |
| }, | |
| { | |
| "epoch": 10.606541759288664, | |
| "grad_norm": 28809.84375, | |
| "learning_rate": 4.848443950460464e-05, | |
| "loss": 0.7186, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 10.622419815814544, | |
| "grad_norm": 27395.1171875, | |
| "learning_rate": 4.844474436328993e-05, | |
| "loss": 0.7172, | |
| "step": 33450 | |
| }, | |
| { | |
| "epoch": 10.638297872340425, | |
| "grad_norm": 27581.38671875, | |
| "learning_rate": 4.840504922197523e-05, | |
| "loss": 0.721, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 10.654175928866307, | |
| "grad_norm": 27007.13671875, | |
| "learning_rate": 4.836535408066053e-05, | |
| "loss": 0.7198, | |
| "step": 33550 | |
| }, | |
| { | |
| "epoch": 10.670053985392189, | |
| "grad_norm": 28538.033203125, | |
| "learning_rate": 4.8325658939345826e-05, | |
| "loss": 0.717, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 10.685932041918068, | |
| "grad_norm": 28231.92578125, | |
| "learning_rate": 4.8285963798031126e-05, | |
| "loss": 0.7173, | |
| "step": 33650 | |
| }, | |
| { | |
| "epoch": 10.70181009844395, | |
| "grad_norm": 27481.81640625, | |
| "learning_rate": 4.824626865671642e-05, | |
| "loss": 0.7179, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 10.717688154969832, | |
| "grad_norm": 29380.140625, | |
| "learning_rate": 4.820657351540171e-05, | |
| "loss": 0.7185, | |
| "step": 33750 | |
| }, | |
| { | |
| "epoch": 10.733566211495713, | |
| "grad_norm": 27961.125, | |
| "learning_rate": 4.816687837408701e-05, | |
| "loss": 0.7158, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 10.749444268021595, | |
| "grad_norm": 28789.4453125, | |
| "learning_rate": 4.8127183232772314e-05, | |
| "loss": 0.7184, | |
| "step": 33850 | |
| }, | |
| { | |
| "epoch": 10.765322324547476, | |
| "grad_norm": 28526.916015625, | |
| "learning_rate": 4.808748809145761e-05, | |
| "loss": 0.719, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 10.781200381073356, | |
| "grad_norm": 28261.802734375, | |
| "learning_rate": 4.804779295014291e-05, | |
| "loss": 0.7155, | |
| "step": 33950 | |
| }, | |
| { | |
| "epoch": 10.797078437599238, | |
| "grad_norm": 28534.755859375, | |
| "learning_rate": 4.80080978088282e-05, | |
| "loss": 0.7169, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 10.81295649412512, | |
| "grad_norm": 28207.771484375, | |
| "learning_rate": 4.79684026675135e-05, | |
| "loss": 0.713, | |
| "step": 34050 | |
| }, | |
| { | |
| "epoch": 10.828834550651, | |
| "grad_norm": 27474.365234375, | |
| "learning_rate": 4.7928707526198795e-05, | |
| "loss": 0.7183, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 10.84471260717688, | |
| "grad_norm": 28205.083984375, | |
| "learning_rate": 4.788901238488409e-05, | |
| "loss": 0.7189, | |
| "step": 34150 | |
| }, | |
| { | |
| "epoch": 10.860590663702762, | |
| "grad_norm": 27502.6015625, | |
| "learning_rate": 4.784931724356939e-05, | |
| "loss": 0.7161, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 10.876468720228644, | |
| "grad_norm": 27558.001953125, | |
| "learning_rate": 4.780962210225469e-05, | |
| "loss": 0.7159, | |
| "step": 34250 | |
| }, | |
| { | |
| "epoch": 10.892346776754525, | |
| "grad_norm": 28145.595703125, | |
| "learning_rate": 4.776992696093998e-05, | |
| "loss": 0.7111, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 10.908224833280407, | |
| "grad_norm": 27374.37109375, | |
| "learning_rate": 4.773023181962528e-05, | |
| "loss": 0.718, | |
| "step": 34350 | |
| }, | |
| { | |
| "epoch": 10.924102889806289, | |
| "grad_norm": 27972.486328125, | |
| "learning_rate": 4.7690536678310576e-05, | |
| "loss": 0.7195, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 10.939980946332168, | |
| "grad_norm": 27874.939453125, | |
| "learning_rate": 4.765084153699587e-05, | |
| "loss": 0.717, | |
| "step": 34450 | |
| }, | |
| { | |
| "epoch": 10.95585900285805, | |
| "grad_norm": 27341.05859375, | |
| "learning_rate": 4.761114639568117e-05, | |
| "loss": 0.716, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 10.971737059383932, | |
| "grad_norm": 28336.388671875, | |
| "learning_rate": 4.757145125436647e-05, | |
| "loss": 0.7177, | |
| "step": 34550 | |
| }, | |
| { | |
| "epoch": 10.987615115909813, | |
| "grad_norm": 26653.798828125, | |
| "learning_rate": 4.753175611305177e-05, | |
| "loss": 0.7111, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 11.003493172435693, | |
| "grad_norm": 27900.7265625, | |
| "learning_rate": 4.7492060971737064e-05, | |
| "loss": 0.7124, | |
| "step": 34650 | |
| }, | |
| { | |
| "epoch": 11.019371228961575, | |
| "grad_norm": 27794.94921875, | |
| "learning_rate": 4.745236583042236e-05, | |
| "loss": 0.7103, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 11.035249285487456, | |
| "grad_norm": 27183.654296875, | |
| "learning_rate": 4.741267068910766e-05, | |
| "loss": 0.7052, | |
| "step": 34750 | |
| }, | |
| { | |
| "epoch": 11.051127342013338, | |
| "grad_norm": 27468.392578125, | |
| "learning_rate": 4.737297554779295e-05, | |
| "loss": 0.7084, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 11.06700539853922, | |
| "grad_norm": 26358.43359375, | |
| "learning_rate": 4.7333280406478245e-05, | |
| "loss": 0.7038, | |
| "step": 34850 | |
| }, | |
| { | |
| "epoch": 11.0828834550651, | |
| "grad_norm": 26982.095703125, | |
| "learning_rate": 4.7293585265163545e-05, | |
| "loss": 0.7023, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 11.09876151159098, | |
| "grad_norm": 27181.15625, | |
| "learning_rate": 4.7253890123848845e-05, | |
| "loss": 0.7062, | |
| "step": 34950 | |
| }, | |
| { | |
| "epoch": 11.114639568116862, | |
| "grad_norm": 27455.166015625, | |
| "learning_rate": 4.721419498253414e-05, | |
| "loss": 0.7088, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 11.130517624642744, | |
| "grad_norm": 27479.68359375, | |
| "learning_rate": 4.717449984121944e-05, | |
| "loss": 0.707, | |
| "step": 35050 | |
| }, | |
| { | |
| "epoch": 11.146395681168626, | |
| "grad_norm": 28365.30859375, | |
| "learning_rate": 4.713480469990473e-05, | |
| "loss": 0.7052, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 11.162273737694505, | |
| "grad_norm": 27870.494140625, | |
| "learning_rate": 4.7095109558590026e-05, | |
| "loss": 0.7089, | |
| "step": 35150 | |
| }, | |
| { | |
| "epoch": 11.178151794220387, | |
| "grad_norm": 27870.310546875, | |
| "learning_rate": 4.7055414417275327e-05, | |
| "loss": 0.7066, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 11.194029850746269, | |
| "grad_norm": 28179.03125, | |
| "learning_rate": 4.701571927596063e-05, | |
| "loss": 0.7069, | |
| "step": 35250 | |
| }, | |
| { | |
| "epoch": 11.20990790727215, | |
| "grad_norm": 27911.45703125, | |
| "learning_rate": 4.697602413464593e-05, | |
| "loss": 0.707, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 11.225785963798032, | |
| "grad_norm": 26871.20703125, | |
| "learning_rate": 4.693632899333122e-05, | |
| "loss": 0.7073, | |
| "step": 35350 | |
| }, | |
| { | |
| "epoch": 11.241664020323912, | |
| "grad_norm": 27323.912109375, | |
| "learning_rate": 4.6896633852016514e-05, | |
| "loss": 0.7095, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 11.257542076849793, | |
| "grad_norm": 27162.650390625, | |
| "learning_rate": 4.6856938710701815e-05, | |
| "loss": 0.7047, | |
| "step": 35450 | |
| }, | |
| { | |
| "epoch": 11.273420133375675, | |
| "grad_norm": 26953.41796875, | |
| "learning_rate": 4.681724356938711e-05, | |
| "loss": 0.7061, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 11.289298189901556, | |
| "grad_norm": 26672.6953125, | |
| "learning_rate": 4.67775484280724e-05, | |
| "loss": 0.7059, | |
| "step": 35550 | |
| }, | |
| { | |
| "epoch": 11.305176246427438, | |
| "grad_norm": 27072.08203125, | |
| "learning_rate": 4.67378532867577e-05, | |
| "loss": 0.7047, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 11.321054302953318, | |
| "grad_norm": 26242.37109375, | |
| "learning_rate": 4.6698158145443e-05, | |
| "loss": 0.7042, | |
| "step": 35650 | |
| }, | |
| { | |
| "epoch": 11.3369323594792, | |
| "grad_norm": 26593.7890625, | |
| "learning_rate": 4.6658463004128296e-05, | |
| "loss": 0.7022, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 11.352810416005081, | |
| "grad_norm": 26419.05078125, | |
| "learning_rate": 4.6618767862813596e-05, | |
| "loss": 0.7077, | |
| "step": 35750 | |
| }, | |
| { | |
| "epoch": 11.368688472530962, | |
| "grad_norm": 26334.150390625, | |
| "learning_rate": 4.657907272149889e-05, | |
| "loss": 0.7084, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 11.384566529056844, | |
| "grad_norm": 26394.89453125, | |
| "learning_rate": 4.653937758018419e-05, | |
| "loss": 0.7064, | |
| "step": 35850 | |
| }, | |
| { | |
| "epoch": 11.400444585582724, | |
| "grad_norm": 26715.767578125, | |
| "learning_rate": 4.649968243886948e-05, | |
| "loss": 0.7055, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 11.416322642108605, | |
| "grad_norm": 27197.98828125, | |
| "learning_rate": 4.645998729755478e-05, | |
| "loss": 0.708, | |
| "step": 35950 | |
| }, | |
| { | |
| "epoch": 11.432200698634487, | |
| "grad_norm": 27616.12109375, | |
| "learning_rate": 4.6420292156240084e-05, | |
| "loss": 0.708, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 11.448078755160369, | |
| "grad_norm": 26575.294921875, | |
| "learning_rate": 4.638059701492538e-05, | |
| "loss": 0.7039, | |
| "step": 36050 | |
| }, | |
| { | |
| "epoch": 11.46395681168625, | |
| "grad_norm": 26070.689453125, | |
| "learning_rate": 4.634090187361067e-05, | |
| "loss": 0.7075, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 11.47983486821213, | |
| "grad_norm": 26675.865234375, | |
| "learning_rate": 4.630120673229597e-05, | |
| "loss": 0.7058, | |
| "step": 36150 | |
| }, | |
| { | |
| "epoch": 11.495712924738012, | |
| "grad_norm": 26788.8671875, | |
| "learning_rate": 4.6261511590981265e-05, | |
| "loss": 0.7056, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 11.511590981263893, | |
| "grad_norm": 26736.013671875, | |
| "learning_rate": 4.622181644966656e-05, | |
| "loss": 0.7049, | |
| "step": 36250 | |
| }, | |
| { | |
| "epoch": 11.527469037789775, | |
| "grad_norm": 27259.62109375, | |
| "learning_rate": 4.618212130835186e-05, | |
| "loss": 0.7033, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 11.543347094315656, | |
| "grad_norm": 26500.08984375, | |
| "learning_rate": 4.614242616703716e-05, | |
| "loss": 0.7055, | |
| "step": 36350 | |
| }, | |
| { | |
| "epoch": 11.559225150841536, | |
| "grad_norm": 26374.705078125, | |
| "learning_rate": 4.610273102572246e-05, | |
| "loss": 0.7017, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 11.575103207367418, | |
| "grad_norm": 27614.83984375, | |
| "learning_rate": 4.606303588440775e-05, | |
| "loss": 0.7059, | |
| "step": 36450 | |
| }, | |
| { | |
| "epoch": 11.5909812638933, | |
| "grad_norm": 25694.1015625, | |
| "learning_rate": 4.6023340743093046e-05, | |
| "loss": 0.7048, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 11.606859320419181, | |
| "grad_norm": 26899.56640625, | |
| "learning_rate": 4.5983645601778346e-05, | |
| "loss": 0.7045, | |
| "step": 36550 | |
| }, | |
| { | |
| "epoch": 11.622737376945063, | |
| "grad_norm": 25786.490234375, | |
| "learning_rate": 4.594395046046364e-05, | |
| "loss": 0.7038, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 11.638615433470942, | |
| "grad_norm": 26447.548828125, | |
| "learning_rate": 4.5904255319148933e-05, | |
| "loss": 0.7031, | |
| "step": 36650 | |
| }, | |
| { | |
| "epoch": 11.654493489996824, | |
| "grad_norm": 25961.685546875, | |
| "learning_rate": 4.5864560177834234e-05, | |
| "loss": 0.7059, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 11.670371546522706, | |
| "grad_norm": 26539.162109375, | |
| "learning_rate": 4.5824865036519534e-05, | |
| "loss": 0.7033, | |
| "step": 36750 | |
| }, | |
| { | |
| "epoch": 11.686249603048587, | |
| "grad_norm": 26035.525390625, | |
| "learning_rate": 4.578516989520483e-05, | |
| "loss": 0.7057, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 11.702127659574469, | |
| "grad_norm": 26614.791015625, | |
| "learning_rate": 4.574547475389013e-05, | |
| "loss": 0.7048, | |
| "step": 36850 | |
| }, | |
| { | |
| "epoch": 11.718005716100349, | |
| "grad_norm": 26289.15234375, | |
| "learning_rate": 4.570577961257542e-05, | |
| "loss": 0.6975, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 11.73388377262623, | |
| "grad_norm": 26606.12109375, | |
| "learning_rate": 4.5666084471260715e-05, | |
| "loss": 0.705, | |
| "step": 36950 | |
| }, | |
| { | |
| "epoch": 11.749761829152112, | |
| "grad_norm": 25736.138671875, | |
| "learning_rate": 4.5626389329946015e-05, | |
| "loss": 0.7004, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 11.765639885677993, | |
| "grad_norm": 26059.619140625, | |
| "learning_rate": 4.5586694188631315e-05, | |
| "loss": 0.705, | |
| "step": 37050 | |
| }, | |
| { | |
| "epoch": 11.781517942203875, | |
| "grad_norm": 25117.2734375, | |
| "learning_rate": 4.5546999047316616e-05, | |
| "loss": 0.7088, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 11.797395998729755, | |
| "grad_norm": 27240.82421875, | |
| "learning_rate": 4.550730390600191e-05, | |
| "loss": 0.7003, | |
| "step": 37150 | |
| }, | |
| { | |
| "epoch": 11.813274055255636, | |
| "grad_norm": 26413.03125, | |
| "learning_rate": 4.54676087646872e-05, | |
| "loss": 0.7029, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 11.829152111781518, | |
| "grad_norm": 25459.921875, | |
| "learning_rate": 4.54279136233725e-05, | |
| "loss": 0.6975, | |
| "step": 37250 | |
| }, | |
| { | |
| "epoch": 11.8450301683074, | |
| "grad_norm": 26057.275390625, | |
| "learning_rate": 4.5388218482057797e-05, | |
| "loss": 0.6994, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 11.860908224833281, | |
| "grad_norm": 25720.384765625, | |
| "learning_rate": 4.534852334074309e-05, | |
| "loss": 0.7072, | |
| "step": 37350 | |
| }, | |
| { | |
| "epoch": 11.876786281359161, | |
| "grad_norm": 26672.6015625, | |
| "learning_rate": 4.530882819942839e-05, | |
| "loss": 0.7017, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 11.892664337885043, | |
| "grad_norm": 25892.302734375, | |
| "learning_rate": 4.526913305811369e-05, | |
| "loss": 0.7031, | |
| "step": 37450 | |
| }, | |
| { | |
| "epoch": 11.908542394410924, | |
| "grad_norm": 26435.544921875, | |
| "learning_rate": 4.5229437916798984e-05, | |
| "loss": 0.7011, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 11.924420450936806, | |
| "grad_norm": 25747.921875, | |
| "learning_rate": 4.5189742775484285e-05, | |
| "loss": 0.6979, | |
| "step": 37550 | |
| }, | |
| { | |
| "epoch": 11.940298507462687, | |
| "grad_norm": 25117.19140625, | |
| "learning_rate": 4.515004763416958e-05, | |
| "loss": 0.6996, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 11.956176563988567, | |
| "grad_norm": 26094.021484375, | |
| "learning_rate": 4.511035249285488e-05, | |
| "loss": 0.703, | |
| "step": 37650 | |
| }, | |
| { | |
| "epoch": 11.972054620514449, | |
| "grad_norm": 25621.94140625, | |
| "learning_rate": 4.507065735154017e-05, | |
| "loss": 0.7034, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 11.98793267704033, | |
| "grad_norm": 25541.908203125, | |
| "learning_rate": 4.503096221022547e-05, | |
| "loss": 0.7004, | |
| "step": 37750 | |
| }, | |
| { | |
| "epoch": 12.003810733566212, | |
| "grad_norm": 25723.466796875, | |
| "learning_rate": 4.499126706891077e-05, | |
| "loss": 0.6991, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 12.019688790092093, | |
| "grad_norm": 26813.052734375, | |
| "learning_rate": 4.4951571927596066e-05, | |
| "loss": 0.696, | |
| "step": 37850 | |
| }, | |
| { | |
| "epoch": 12.035566846617973, | |
| "grad_norm": 25913.0390625, | |
| "learning_rate": 4.491187678628136e-05, | |
| "loss": 0.6931, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 12.051444903143855, | |
| "grad_norm": 25332.591796875, | |
| "learning_rate": 4.487218164496666e-05, | |
| "loss": 0.6939, | |
| "step": 37950 | |
| }, | |
| { | |
| "epoch": 12.067322959669736, | |
| "grad_norm": 25602.212890625, | |
| "learning_rate": 4.483248650365195e-05, | |
| "loss": 0.6935, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 12.083201016195618, | |
| "grad_norm": 26077.806640625, | |
| "learning_rate": 4.479279136233725e-05, | |
| "loss": 0.6946, | |
| "step": 38050 | |
| }, | |
| { | |
| "epoch": 12.0990790727215, | |
| "grad_norm": 26038.025390625, | |
| "learning_rate": 4.475309622102255e-05, | |
| "loss": 0.6927, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 12.11495712924738, | |
| "grad_norm": 25922.17578125, | |
| "learning_rate": 4.471340107970785e-05, | |
| "loss": 0.691, | |
| "step": 38150 | |
| }, | |
| { | |
| "epoch": 12.130835185773261, | |
| "grad_norm": 25839.078125, | |
| "learning_rate": 4.467370593839315e-05, | |
| "loss": 0.6924, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 12.146713242299143, | |
| "grad_norm": 25702.123046875, | |
| "learning_rate": 4.463401079707844e-05, | |
| "loss": 0.6918, | |
| "step": 38250 | |
| }, | |
| { | |
| "epoch": 12.162591298825024, | |
| "grad_norm": 26183.7890625, | |
| "learning_rate": 4.4594315655763735e-05, | |
| "loss": 0.6922, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 12.178469355350906, | |
| "grad_norm": 26349.01953125, | |
| "learning_rate": 4.4554620514449035e-05, | |
| "loss": 0.6919, | |
| "step": 38350 | |
| }, | |
| { | |
| "epoch": 12.194347411876786, | |
| "grad_norm": 25578.62890625, | |
| "learning_rate": 4.451492537313433e-05, | |
| "loss": 0.6911, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 12.210225468402667, | |
| "grad_norm": 25342.06640625, | |
| "learning_rate": 4.447523023181963e-05, | |
| "loss": 0.6948, | |
| "step": 38450 | |
| }, | |
| { | |
| "epoch": 12.226103524928549, | |
| "grad_norm": 25582.328125, | |
| "learning_rate": 4.443553509050493e-05, | |
| "loss": 0.6965, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 12.24198158145443, | |
| "grad_norm": 26281.44921875, | |
| "learning_rate": 4.439583994919022e-05, | |
| "loss": 0.6947, | |
| "step": 38550 | |
| }, | |
| { | |
| "epoch": 12.257859637980312, | |
| "grad_norm": 25423.970703125, | |
| "learning_rate": 4.4356144807875516e-05, | |
| "loss": 0.6949, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 12.273737694506192, | |
| "grad_norm": 25645.67578125, | |
| "learning_rate": 4.4316449666560816e-05, | |
| "loss": 0.692, | |
| "step": 38650 | |
| }, | |
| { | |
| "epoch": 12.289615751032073, | |
| "grad_norm": 25957.416015625, | |
| "learning_rate": 4.427675452524611e-05, | |
| "loss": 0.6914, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 12.305493807557955, | |
| "grad_norm": 25020.30859375, | |
| "learning_rate": 4.4237059383931403e-05, | |
| "loss": 0.6956, | |
| "step": 38750 | |
| }, | |
| { | |
| "epoch": 12.321371864083837, | |
| "grad_norm": 25936.24609375, | |
| "learning_rate": 4.4197364242616704e-05, | |
| "loss": 0.6909, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 12.337249920609718, | |
| "grad_norm": 25477.5, | |
| "learning_rate": 4.4157669101302004e-05, | |
| "loss": 0.6915, | |
| "step": 38850 | |
| }, | |
| { | |
| "epoch": 12.353127977135598, | |
| "grad_norm": 24679.041015625, | |
| "learning_rate": 4.4117973959987304e-05, | |
| "loss": 0.6959, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 12.36900603366148, | |
| "grad_norm": 25305.9140625, | |
| "learning_rate": 4.40782788186726e-05, | |
| "loss": 0.6958, | |
| "step": 38950 | |
| }, | |
| { | |
| "epoch": 12.384884090187361, | |
| "grad_norm": 25186.720703125, | |
| "learning_rate": 4.403858367735789e-05, | |
| "loss": 0.6899, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 12.400762146713243, | |
| "grad_norm": 25848.09375, | |
| "learning_rate": 4.399888853604319e-05, | |
| "loss": 0.6954, | |
| "step": 39050 | |
| }, | |
| { | |
| "epoch": 12.416640203239124, | |
| "grad_norm": 25214.7265625, | |
| "learning_rate": 4.3959193394728485e-05, | |
| "loss": 0.6918, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 12.432518259765004, | |
| "grad_norm": 25850.787109375, | |
| "learning_rate": 4.3919498253413785e-05, | |
| "loss": 0.6889, | |
| "step": 39150 | |
| }, | |
| { | |
| "epoch": 12.448396316290886, | |
| "grad_norm": 25535.595703125, | |
| "learning_rate": 4.3879803112099086e-05, | |
| "loss": 0.6913, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 12.464274372816767, | |
| "grad_norm": 25293.98828125, | |
| "learning_rate": 4.384010797078438e-05, | |
| "loss": 0.6909, | |
| "step": 39250 | |
| }, | |
| { | |
| "epoch": 12.480152429342649, | |
| "grad_norm": 25519.966796875, | |
| "learning_rate": 4.380041282946967e-05, | |
| "loss": 0.693, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 12.49603048586853, | |
| "grad_norm": 25590.5390625, | |
| "learning_rate": 4.376071768815497e-05, | |
| "loss": 0.6901, | |
| "step": 39350 | |
| }, | |
| { | |
| "epoch": 12.51190854239441, | |
| "grad_norm": 25909.888671875, | |
| "learning_rate": 4.3721022546840267e-05, | |
| "loss": 0.6908, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 12.527786598920292, | |
| "grad_norm": 25248.21484375, | |
| "learning_rate": 4.368132740552557e-05, | |
| "loss": 0.6932, | |
| "step": 39450 | |
| }, | |
| { | |
| "epoch": 12.543664655446173, | |
| "grad_norm": 24925.970703125, | |
| "learning_rate": 4.364163226421086e-05, | |
| "loss": 0.6914, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 12.559542711972055, | |
| "grad_norm": 25199.6171875, | |
| "learning_rate": 4.360193712289616e-05, | |
| "loss": 0.6937, | |
| "step": 39550 | |
| }, | |
| { | |
| "epoch": 12.575420768497937, | |
| "grad_norm": 25989.927734375, | |
| "learning_rate": 4.356224198158146e-05, | |
| "loss": 0.6912, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 12.591298825023816, | |
| "grad_norm": 24462.072265625, | |
| "learning_rate": 4.3522546840266755e-05, | |
| "loss": 0.6919, | |
| "step": 39650 | |
| }, | |
| { | |
| "epoch": 12.607176881549698, | |
| "grad_norm": 25439.693359375, | |
| "learning_rate": 4.348285169895205e-05, | |
| "loss": 0.6903, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 12.62305493807558, | |
| "grad_norm": 25596.04296875, | |
| "learning_rate": 4.344315655763735e-05, | |
| "loss": 0.6912, | |
| "step": 39750 | |
| }, | |
| { | |
| "epoch": 12.638932994601461, | |
| "grad_norm": 24989.56640625, | |
| "learning_rate": 4.340346141632264e-05, | |
| "loss": 0.6935, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 12.654811051127343, | |
| "grad_norm": 24844.279296875, | |
| "learning_rate": 4.336376627500794e-05, | |
| "loss": 0.6895, | |
| "step": 39850 | |
| }, | |
| { | |
| "epoch": 12.670689107653223, | |
| "grad_norm": 25176.060546875, | |
| "learning_rate": 4.332407113369324e-05, | |
| "loss": 0.6924, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 12.686567164179104, | |
| "grad_norm": 25582.01953125, | |
| "learning_rate": 4.3284375992378536e-05, | |
| "loss": 0.6927, | |
| "step": 39950 | |
| }, | |
| { | |
| "epoch": 12.702445220704986, | |
| "grad_norm": 25024.33203125, | |
| "learning_rate": 4.3244680851063836e-05, | |
| "loss": 0.6904, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 12.718323277230867, | |
| "grad_norm": 25088.216796875, | |
| "learning_rate": 4.320498570974913e-05, | |
| "loss": 0.6904, | |
| "step": 40050 | |
| }, | |
| { | |
| "epoch": 12.734201333756747, | |
| "grad_norm": 24689.61328125, | |
| "learning_rate": 4.316529056843442e-05, | |
| "loss": 0.687, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 12.750079390282629, | |
| "grad_norm": 25595.205078125, | |
| "learning_rate": 4.3125595427119724e-05, | |
| "loss": 0.6911, | |
| "step": 40150 | |
| }, | |
| { | |
| "epoch": 12.76595744680851, | |
| "grad_norm": 25500.544921875, | |
| "learning_rate": 4.308590028580502e-05, | |
| "loss": 0.6894, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 12.781835503334392, | |
| "grad_norm": 24525.78125, | |
| "learning_rate": 4.304620514449032e-05, | |
| "loss": 0.6904, | |
| "step": 40250 | |
| }, | |
| { | |
| "epoch": 12.797713559860274, | |
| "grad_norm": 24815.94921875, | |
| "learning_rate": 4.300651000317562e-05, | |
| "loss": 0.6879, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 12.813591616386155, | |
| "grad_norm": 24612.96875, | |
| "learning_rate": 4.296681486186091e-05, | |
| "loss": 0.693, | |
| "step": 40350 | |
| }, | |
| { | |
| "epoch": 12.829469672912035, | |
| "grad_norm": 25878.453125, | |
| "learning_rate": 4.2927119720546205e-05, | |
| "loss": 0.6877, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 12.845347729437917, | |
| "grad_norm": 25819.634765625, | |
| "learning_rate": 4.2887424579231505e-05, | |
| "loss": 0.6889, | |
| "step": 40450 | |
| }, | |
| { | |
| "epoch": 12.861225785963798, | |
| "grad_norm": 24711.671875, | |
| "learning_rate": 4.28477294379168e-05, | |
| "loss": 0.6897, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 12.87710384248968, | |
| "grad_norm": 25272.193359375, | |
| "learning_rate": 4.280803429660209e-05, | |
| "loss": 0.6916, | |
| "step": 40550 | |
| }, | |
| { | |
| "epoch": 12.89298189901556, | |
| "grad_norm": 24950.6953125, | |
| "learning_rate": 4.27683391552874e-05, | |
| "loss": 0.6874, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 12.908859955541441, | |
| "grad_norm": 25394.751953125, | |
| "learning_rate": 4.272864401397269e-05, | |
| "loss": 0.6909, | |
| "step": 40650 | |
| }, | |
| { | |
| "epoch": 12.924738012067323, | |
| "grad_norm": 25527.599609375, | |
| "learning_rate": 4.268894887265799e-05, | |
| "loss": 0.6888, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 12.940616068593204, | |
| "grad_norm": 25220.8125, | |
| "learning_rate": 4.2649253731343286e-05, | |
| "loss": 0.6902, | |
| "step": 40750 | |
| }, | |
| { | |
| "epoch": 12.956494125119086, | |
| "grad_norm": 24796.541015625, | |
| "learning_rate": 4.260955859002858e-05, | |
| "loss": 0.6894, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 12.972372181644968, | |
| "grad_norm": 25115.955078125, | |
| "learning_rate": 4.256986344871388e-05, | |
| "loss": 0.6908, | |
| "step": 40850 | |
| }, | |
| { | |
| "epoch": 12.988250238170847, | |
| "grad_norm": 24197.728515625, | |
| "learning_rate": 4.2530168307399174e-05, | |
| "loss": 0.6864, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 13.004128294696729, | |
| "grad_norm": 24724.552734375, | |
| "learning_rate": 4.2490473166084474e-05, | |
| "loss": 0.6864, | |
| "step": 40950 | |
| }, | |
| { | |
| "epoch": 13.02000635122261, | |
| "grad_norm": 25012.1640625, | |
| "learning_rate": 4.2450778024769774e-05, | |
| "loss": 0.6799, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 13.035884407748492, | |
| "grad_norm": 24815.64453125, | |
| "learning_rate": 4.241108288345507e-05, | |
| "loss": 0.6837, | |
| "step": 41050 | |
| }, | |
| { | |
| "epoch": 13.051762464274374, | |
| "grad_norm": 25473.265625, | |
| "learning_rate": 4.237138774214036e-05, | |
| "loss": 0.6821, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 13.067640520800254, | |
| "grad_norm": 24979.189453125, | |
| "learning_rate": 4.233169260082566e-05, | |
| "loss": 0.6825, | |
| "step": 41150 | |
| }, | |
| { | |
| "epoch": 13.083518577326135, | |
| "grad_norm": 24369.861328125, | |
| "learning_rate": 4.2291997459510955e-05, | |
| "loss": 0.6802, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 13.099396633852017, | |
| "grad_norm": 24936.068359375, | |
| "learning_rate": 4.2252302318196255e-05, | |
| "loss": 0.6817, | |
| "step": 41250 | |
| }, | |
| { | |
| "epoch": 13.115274690377898, | |
| "grad_norm": 25067.654296875, | |
| "learning_rate": 4.221260717688155e-05, | |
| "loss": 0.68, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 13.13115274690378, | |
| "grad_norm": 24868.97265625, | |
| "learning_rate": 4.217291203556685e-05, | |
| "loss": 0.6811, | |
| "step": 41350 | |
| }, | |
| { | |
| "epoch": 13.14703080342966, | |
| "grad_norm": 24265.5546875, | |
| "learning_rate": 4.213321689425215e-05, | |
| "loss": 0.6813, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 13.162908859955541, | |
| "grad_norm": 24921.30078125, | |
| "learning_rate": 4.209352175293744e-05, | |
| "loss": 0.6796, | |
| "step": 41450 | |
| }, | |
| { | |
| "epoch": 13.178786916481423, | |
| "grad_norm": 24588.80078125, | |
| "learning_rate": 4.2053826611622737e-05, | |
| "loss": 0.6828, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 13.194664973007304, | |
| "grad_norm": 24551.5, | |
| "learning_rate": 4.201413147030804e-05, | |
| "loss": 0.6805, | |
| "step": 41550 | |
| }, | |
| { | |
| "epoch": 13.210543029533184, | |
| "grad_norm": 25206.583984375, | |
| "learning_rate": 4.197443632899333e-05, | |
| "loss": 0.6852, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 13.226421086059066, | |
| "grad_norm": 24448.646484375, | |
| "learning_rate": 4.193474118767863e-05, | |
| "loss": 0.6832, | |
| "step": 41650 | |
| }, | |
| { | |
| "epoch": 13.242299142584947, | |
| "grad_norm": 24609.46484375, | |
| "learning_rate": 4.189504604636393e-05, | |
| "loss": 0.6842, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 13.258177199110829, | |
| "grad_norm": 24898.201171875, | |
| "learning_rate": 4.1855350905049225e-05, | |
| "loss": 0.6812, | |
| "step": 41750 | |
| }, | |
| { | |
| "epoch": 13.27405525563671, | |
| "grad_norm": 24523.78515625, | |
| "learning_rate": 4.1815655763734525e-05, | |
| "loss": 0.6842, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 13.28993331216259, | |
| "grad_norm": 24928.837890625, | |
| "learning_rate": 4.177596062241982e-05, | |
| "loss": 0.6813, | |
| "step": 41850 | |
| }, | |
| { | |
| "epoch": 13.305811368688472, | |
| "grad_norm": 24684.125, | |
| "learning_rate": 4.173626548110511e-05, | |
| "loss": 0.6772, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 13.321689425214354, | |
| "grad_norm": 25091.099609375, | |
| "learning_rate": 4.169657033979041e-05, | |
| "loss": 0.6804, | |
| "step": 41950 | |
| }, | |
| { | |
| "epoch": 13.337567481740235, | |
| "grad_norm": 24772.115234375, | |
| "learning_rate": 4.1656875198475706e-05, | |
| "loss": 0.6788, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 13.353445538266117, | |
| "grad_norm": 25392.5, | |
| "learning_rate": 4.1617180057161006e-05, | |
| "loss": 0.6814, | |
| "step": 42050 | |
| }, | |
| { | |
| "epoch": 13.369323594791997, | |
| "grad_norm": 24344.09375, | |
| "learning_rate": 4.1577484915846306e-05, | |
| "loss": 0.683, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 13.385201651317878, | |
| "grad_norm": 24251.580078125, | |
| "learning_rate": 4.15377897745316e-05, | |
| "loss": 0.6826, | |
| "step": 42150 | |
| }, | |
| { | |
| "epoch": 13.40107970784376, | |
| "grad_norm": 25104.28125, | |
| "learning_rate": 4.149809463321689e-05, | |
| "loss": 0.6812, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 13.416957764369641, | |
| "grad_norm": 24021.865234375, | |
| "learning_rate": 4.1458399491902194e-05, | |
| "loss": 0.6802, | |
| "step": 42250 | |
| }, | |
| { | |
| "epoch": 13.432835820895523, | |
| "grad_norm": 24694.013671875, | |
| "learning_rate": 4.141870435058749e-05, | |
| "loss": 0.683, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 13.448713877421403, | |
| "grad_norm": 24235.306640625, | |
| "learning_rate": 4.137900920927279e-05, | |
| "loss": 0.6818, | |
| "step": 42350 | |
| }, | |
| { | |
| "epoch": 13.464591933947284, | |
| "grad_norm": 24905.02734375, | |
| "learning_rate": 4.133931406795809e-05, | |
| "loss": 0.68, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 13.480469990473166, | |
| "grad_norm": 24339.927734375, | |
| "learning_rate": 4.129961892664338e-05, | |
| "loss": 0.6799, | |
| "step": 42450 | |
| }, | |
| { | |
| "epoch": 13.496348046999048, | |
| "grad_norm": 24592.8828125, | |
| "learning_rate": 4.125992378532868e-05, | |
| "loss": 0.6776, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 13.51222610352493, | |
| "grad_norm": 24951.8125, | |
| "learning_rate": 4.1220228644013975e-05, | |
| "loss": 0.6841, | |
| "step": 42550 | |
| }, | |
| { | |
| "epoch": 13.528104160050809, | |
| "grad_norm": 24222.96875, | |
| "learning_rate": 4.118053350269927e-05, | |
| "loss": 0.6817, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 13.54398221657669, | |
| "grad_norm": 24539.65234375, | |
| "learning_rate": 4.114083836138457e-05, | |
| "loss": 0.6812, | |
| "step": 42650 | |
| }, | |
| { | |
| "epoch": 13.559860273102572, | |
| "grad_norm": 24310.98828125, | |
| "learning_rate": 4.110114322006986e-05, | |
| "loss": 0.6847, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 13.575738329628454, | |
| "grad_norm": 24087.740234375, | |
| "learning_rate": 4.106144807875516e-05, | |
| "loss": 0.6795, | |
| "step": 42750 | |
| }, | |
| { | |
| "epoch": 13.591616386154335, | |
| "grad_norm": 24772.966796875, | |
| "learning_rate": 4.102175293744046e-05, | |
| "loss": 0.68, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 13.607494442680215, | |
| "grad_norm": 24550.08203125, | |
| "learning_rate": 4.0982057796125756e-05, | |
| "loss": 0.6789, | |
| "step": 42850 | |
| }, | |
| { | |
| "epoch": 13.623372499206097, | |
| "grad_norm": 24223.595703125, | |
| "learning_rate": 4.094236265481105e-05, | |
| "loss": 0.6837, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 13.639250555731978, | |
| "grad_norm": 24113.8984375, | |
| "learning_rate": 4.090266751349635e-05, | |
| "loss": 0.681, | |
| "step": 42950 | |
| }, | |
| { | |
| "epoch": 13.65512861225786, | |
| "grad_norm": 24669.611328125, | |
| "learning_rate": 4.0862972372181644e-05, | |
| "loss": 0.6832, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 13.671006668783741, | |
| "grad_norm": 23798.759765625, | |
| "learning_rate": 4.0823277230866944e-05, | |
| "loss": 0.677, | |
| "step": 43050 | |
| }, | |
| { | |
| "epoch": 13.686884725309621, | |
| "grad_norm": 24341.833984375, | |
| "learning_rate": 4.0783582089552244e-05, | |
| "loss": 0.6814, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 13.702762781835503, | |
| "grad_norm": 24048.4609375, | |
| "learning_rate": 4.074388694823754e-05, | |
| "loss": 0.6784, | |
| "step": 43150 | |
| }, | |
| { | |
| "epoch": 13.718640838361384, | |
| "grad_norm": 24171.9921875, | |
| "learning_rate": 4.070419180692284e-05, | |
| "loss": 0.6803, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 13.734518894887266, | |
| "grad_norm": 24474.33203125, | |
| "learning_rate": 4.066449666560813e-05, | |
| "loss": 0.6785, | |
| "step": 43250 | |
| }, | |
| { | |
| "epoch": 13.750396951413148, | |
| "grad_norm": 23632.65625, | |
| "learning_rate": 4.0624801524293425e-05, | |
| "loss": 0.6811, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 13.766275007939027, | |
| "grad_norm": 23942.4140625, | |
| "learning_rate": 4.0585106382978725e-05, | |
| "loss": 0.6802, | |
| "step": 43350 | |
| }, | |
| { | |
| "epoch": 13.782153064464909, | |
| "grad_norm": 24818.43359375, | |
| "learning_rate": 4.054541124166402e-05, | |
| "loss": 0.6817, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 13.79803112099079, | |
| "grad_norm": 23369.2265625, | |
| "learning_rate": 4.050571610034932e-05, | |
| "loss": 0.6811, | |
| "step": 43450 | |
| }, | |
| { | |
| "epoch": 13.813909177516672, | |
| "grad_norm": 24389.009765625, | |
| "learning_rate": 4.046602095903462e-05, | |
| "loss": 0.6805, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 13.829787234042554, | |
| "grad_norm": 24440.2421875, | |
| "learning_rate": 4.042632581771991e-05, | |
| "loss": 0.681, | |
| "step": 43550 | |
| }, | |
| { | |
| "epoch": 13.845665290568434, | |
| "grad_norm": 24627.8203125, | |
| "learning_rate": 4.038663067640521e-05, | |
| "loss": 0.6775, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 13.861543347094315, | |
| "grad_norm": 23863.171875, | |
| "learning_rate": 4.034693553509051e-05, | |
| "loss": 0.6803, | |
| "step": 43650 | |
| }, | |
| { | |
| "epoch": 13.877421403620197, | |
| "grad_norm": 24950.375, | |
| "learning_rate": 4.03072403937758e-05, | |
| "loss": 0.6825, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 13.893299460146078, | |
| "grad_norm": 23713.75390625, | |
| "learning_rate": 4.02675452524611e-05, | |
| "loss": 0.6774, | |
| "step": 43750 | |
| }, | |
| { | |
| "epoch": 13.90917751667196, | |
| "grad_norm": 24111.923828125, | |
| "learning_rate": 4.02278501111464e-05, | |
| "loss": 0.6769, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 13.92505557319784, | |
| "grad_norm": 24068.44140625, | |
| "learning_rate": 4.0188154969831695e-05, | |
| "loss": 0.6792, | |
| "step": 43850 | |
| }, | |
| { | |
| "epoch": 13.940933629723721, | |
| "grad_norm": 24496.923828125, | |
| "learning_rate": 4.0148459828516995e-05, | |
| "loss": 0.6786, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 13.956811686249603, | |
| "grad_norm": 24324.5234375, | |
| "learning_rate": 4.010876468720229e-05, | |
| "loss": 0.6818, | |
| "step": 43950 | |
| }, | |
| { | |
| "epoch": 13.972689742775485, | |
| "grad_norm": 24043.82421875, | |
| "learning_rate": 4.006906954588758e-05, | |
| "loss": 0.6756, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 13.988567799301366, | |
| "grad_norm": 23718.05859375, | |
| "learning_rate": 4.002937440457288e-05, | |
| "loss": 0.6742, | |
| "step": 44050 | |
| }, | |
| { | |
| "epoch": 14.004445855827246, | |
| "grad_norm": 24011.693359375, | |
| "learning_rate": 3.9989679263258176e-05, | |
| "loss": 0.6766, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 14.020323912353128, | |
| "grad_norm": 23874.05859375, | |
| "learning_rate": 3.9949984121943476e-05, | |
| "loss": 0.6721, | |
| "step": 44150 | |
| }, | |
| { | |
| "epoch": 14.03620196887901, | |
| "grad_norm": 24170.900390625, | |
| "learning_rate": 3.9910288980628776e-05, | |
| "loss": 0.6746, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 14.05208002540489, | |
| "grad_norm": 23806.9921875, | |
| "learning_rate": 3.987059383931407e-05, | |
| "loss": 0.6702, | |
| "step": 44250 | |
| }, | |
| { | |
| "epoch": 14.067958081930772, | |
| "grad_norm": 24097.408203125, | |
| "learning_rate": 3.983089869799937e-05, | |
| "loss": 0.6726, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 14.083836138456652, | |
| "grad_norm": 24183.365234375, | |
| "learning_rate": 3.9791203556684664e-05, | |
| "loss": 0.6688, | |
| "step": 44350 | |
| }, | |
| { | |
| "epoch": 14.099714194982534, | |
| "grad_norm": 25200.125, | |
| "learning_rate": 3.975150841536996e-05, | |
| "loss": 0.6743, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 14.115592251508415, | |
| "grad_norm": 24078.8359375, | |
| "learning_rate": 3.971181327405526e-05, | |
| "loss": 0.6707, | |
| "step": 44450 | |
| }, | |
| { | |
| "epoch": 14.131470308034297, | |
| "grad_norm": 24092.71875, | |
| "learning_rate": 3.967211813274056e-05, | |
| "loss": 0.6727, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 14.147348364560179, | |
| "grad_norm": 24398.52734375, | |
| "learning_rate": 3.963242299142585e-05, | |
| "loss": 0.6726, | |
| "step": 44550 | |
| }, | |
| { | |
| "epoch": 14.163226421086058, | |
| "grad_norm": 25464.837890625, | |
| "learning_rate": 3.959272785011115e-05, | |
| "loss": 0.6692, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 14.17910447761194, | |
| "grad_norm": 24340.91796875, | |
| "learning_rate": 3.9553032708796445e-05, | |
| "loss": 0.6726, | |
| "step": 44650 | |
| }, | |
| { | |
| "epoch": 14.194982534137822, | |
| "grad_norm": 24626.837890625, | |
| "learning_rate": 3.951333756748174e-05, | |
| "loss": 0.6719, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 14.210860590663703, | |
| "grad_norm": 24861.15234375, | |
| "learning_rate": 3.947364242616704e-05, | |
| "loss": 0.6724, | |
| "step": 44750 | |
| }, | |
| { | |
| "epoch": 14.226738647189585, | |
| "grad_norm": 23964.291015625, | |
| "learning_rate": 3.943394728485233e-05, | |
| "loss": 0.674, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 14.242616703715465, | |
| "grad_norm": 23936.125, | |
| "learning_rate": 3.939425214353763e-05, | |
| "loss": 0.6743, | |
| "step": 44850 | |
| }, | |
| { | |
| "epoch": 14.258494760241346, | |
| "grad_norm": 24653.296875, | |
| "learning_rate": 3.935455700222293e-05, | |
| "loss": 0.6723, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 14.274372816767228, | |
| "grad_norm": 24398.095703125, | |
| "learning_rate": 3.9314861860908226e-05, | |
| "loss": 0.6774, | |
| "step": 44950 | |
| }, | |
| { | |
| "epoch": 14.29025087329311, | |
| "grad_norm": 24063.818359375, | |
| "learning_rate": 3.927516671959353e-05, | |
| "loss": 0.6701, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 14.30612892981899, | |
| "grad_norm": 24251.955078125, | |
| "learning_rate": 3.923547157827882e-05, | |
| "loss": 0.6749, | |
| "step": 45050 | |
| }, | |
| { | |
| "epoch": 14.32200698634487, | |
| "grad_norm": 24572.671875, | |
| "learning_rate": 3.9195776436964114e-05, | |
| "loss": 0.6754, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 14.337885042870752, | |
| "grad_norm": 24464.720703125, | |
| "learning_rate": 3.9156081295649414e-05, | |
| "loss": 0.6718, | |
| "step": 45150 | |
| }, | |
| { | |
| "epoch": 14.353763099396634, | |
| "grad_norm": 24511.29296875, | |
| "learning_rate": 3.9116386154334714e-05, | |
| "loss": 0.6752, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 14.369641155922515, | |
| "grad_norm": 24030.92578125, | |
| "learning_rate": 3.907669101302001e-05, | |
| "loss": 0.6737, | |
| "step": 45250 | |
| }, | |
| { | |
| "epoch": 14.385519212448397, | |
| "grad_norm": 23963.9453125, | |
| "learning_rate": 3.903699587170531e-05, | |
| "loss": 0.6726, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 14.401397268974277, | |
| "grad_norm": 24408.703125, | |
| "learning_rate": 3.89973007303906e-05, | |
| "loss": 0.6755, | |
| "step": 45350 | |
| }, | |
| { | |
| "epoch": 14.417275325500158, | |
| "grad_norm": 24175.90234375, | |
| "learning_rate": 3.89576055890759e-05, | |
| "loss": 0.6733, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 14.43315338202604, | |
| "grad_norm": 24181.03515625, | |
| "learning_rate": 3.8917910447761195e-05, | |
| "loss": 0.6715, | |
| "step": 45450 | |
| }, | |
| { | |
| "epoch": 14.449031438551922, | |
| "grad_norm": 24088.134765625, | |
| "learning_rate": 3.887821530644649e-05, | |
| "loss": 0.6734, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 14.464909495077803, | |
| "grad_norm": 24348.998046875, | |
| "learning_rate": 3.883852016513179e-05, | |
| "loss": 0.673, | |
| "step": 45550 | |
| }, | |
| { | |
| "epoch": 14.480787551603683, | |
| "grad_norm": 23610.765625, | |
| "learning_rate": 3.879882502381709e-05, | |
| "loss": 0.6733, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 14.496665608129565, | |
| "grad_norm": 23721.083984375, | |
| "learning_rate": 3.875912988250238e-05, | |
| "loss": 0.6731, | |
| "step": 45650 | |
| }, | |
| { | |
| "epoch": 14.512543664655446, | |
| "grad_norm": 24457.9375, | |
| "learning_rate": 3.871943474118768e-05, | |
| "loss": 0.6733, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 14.528421721181328, | |
| "grad_norm": 24163.458984375, | |
| "learning_rate": 3.867973959987298e-05, | |
| "loss": 0.6721, | |
| "step": 45750 | |
| }, | |
| { | |
| "epoch": 14.54429977770721, | |
| "grad_norm": 23760.05859375, | |
| "learning_rate": 3.864004445855827e-05, | |
| "loss": 0.6715, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 14.56017783423309, | |
| "grad_norm": 23812.14453125, | |
| "learning_rate": 3.860034931724357e-05, | |
| "loss": 0.6727, | |
| "step": 45850 | |
| }, | |
| { | |
| "epoch": 14.57605589075897, | |
| "grad_norm": 23580.501953125, | |
| "learning_rate": 3.8560654175928864e-05, | |
| "loss": 0.6704, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 14.591933947284852, | |
| "grad_norm": 24045.09765625, | |
| "learning_rate": 3.8520959034614165e-05, | |
| "loss": 0.6718, | |
| "step": 45950 | |
| }, | |
| { | |
| "epoch": 14.607812003810734, | |
| "grad_norm": 24375.74609375, | |
| "learning_rate": 3.8481263893299465e-05, | |
| "loss": 0.6709, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 14.623690060336616, | |
| "grad_norm": 24000.484375, | |
| "learning_rate": 3.844156875198476e-05, | |
| "loss": 0.674, | |
| "step": 46050 | |
| }, | |
| { | |
| "epoch": 14.639568116862495, | |
| "grad_norm": 23421.57421875, | |
| "learning_rate": 3.840187361067006e-05, | |
| "loss": 0.6735, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 14.655446173388377, | |
| "grad_norm": 24476.041015625, | |
| "learning_rate": 3.836217846935535e-05, | |
| "loss": 0.6737, | |
| "step": 46150 | |
| }, | |
| { | |
| "epoch": 14.671324229914259, | |
| "grad_norm": 23659.337890625, | |
| "learning_rate": 3.8322483328040646e-05, | |
| "loss": 0.6752, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 14.68720228644014, | |
| "grad_norm": 24077.185546875, | |
| "learning_rate": 3.8282788186725946e-05, | |
| "loss": 0.6684, | |
| "step": 46250 | |
| }, | |
| { | |
| "epoch": 14.703080342966022, | |
| "grad_norm": 24052.515625, | |
| "learning_rate": 3.8243093045411246e-05, | |
| "loss": 0.673, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 14.718958399491902, | |
| "grad_norm": 23948.08203125, | |
| "learning_rate": 3.820339790409654e-05, | |
| "loss": 0.6745, | |
| "step": 46350 | |
| }, | |
| { | |
| "epoch": 14.734836456017783, | |
| "grad_norm": 23770.400390625, | |
| "learning_rate": 3.816370276278184e-05, | |
| "loss": 0.6699, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 14.750714512543665, | |
| "grad_norm": 23651.755859375, | |
| "learning_rate": 3.8124007621467134e-05, | |
| "loss": 0.6722, | |
| "step": 46450 | |
| }, | |
| { | |
| "epoch": 14.766592569069546, | |
| "grad_norm": 24653.626953125, | |
| "learning_rate": 3.808431248015243e-05, | |
| "loss": 0.6698, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 14.782470625595428, | |
| "grad_norm": 24360.767578125, | |
| "learning_rate": 3.804461733883773e-05, | |
| "loss": 0.6701, | |
| "step": 46550 | |
| }, | |
| { | |
| "epoch": 14.798348682121308, | |
| "grad_norm": 23147.37109375, | |
| "learning_rate": 3.800492219752302e-05, | |
| "loss": 0.6687, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 14.81422673864719, | |
| "grad_norm": 24035.583984375, | |
| "learning_rate": 3.796522705620832e-05, | |
| "loss": 0.6703, | |
| "step": 46650 | |
| }, | |
| { | |
| "epoch": 14.830104795173071, | |
| "grad_norm": 23670.650390625, | |
| "learning_rate": 3.792553191489362e-05, | |
| "loss": 0.67, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 14.845982851698952, | |
| "grad_norm": 23496.2265625, | |
| "learning_rate": 3.7885836773578915e-05, | |
| "loss": 0.6741, | |
| "step": 46750 | |
| }, | |
| { | |
| "epoch": 14.861860908224834, | |
| "grad_norm": 24511.904296875, | |
| "learning_rate": 3.7846141632264215e-05, | |
| "loss": 0.673, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 14.877738964750714, | |
| "grad_norm": 23906.30859375, | |
| "learning_rate": 3.780644649094951e-05, | |
| "loss": 0.6745, | |
| "step": 46850 | |
| }, | |
| { | |
| "epoch": 14.893617021276595, | |
| "grad_norm": 23655.87109375, | |
| "learning_rate": 3.77667513496348e-05, | |
| "loss": 0.668, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 14.909495077802477, | |
| "grad_norm": 23849.982421875, | |
| "learning_rate": 3.77270562083201e-05, | |
| "loss": 0.6671, | |
| "step": 46950 | |
| }, | |
| { | |
| "epoch": 14.925373134328359, | |
| "grad_norm": 24548.85546875, | |
| "learning_rate": 3.76873610670054e-05, | |
| "loss": 0.6698, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 14.94125119085424, | |
| "grad_norm": 24144.28515625, | |
| "learning_rate": 3.7647665925690696e-05, | |
| "loss": 0.668, | |
| "step": 47050 | |
| }, | |
| { | |
| "epoch": 14.95712924738012, | |
| "grad_norm": 23972.701171875, | |
| "learning_rate": 3.7607970784376e-05, | |
| "loss": 0.6677, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 14.973007303906002, | |
| "grad_norm": 23551.353515625, | |
| "learning_rate": 3.756827564306129e-05, | |
| "loss": 0.6699, | |
| "step": 47150 | |
| }, | |
| { | |
| "epoch": 14.988885360431883, | |
| "grad_norm": 23527.978515625, | |
| "learning_rate": 3.752858050174659e-05, | |
| "loss": 0.6702, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 15.004763416957765, | |
| "grad_norm": 23273.08984375, | |
| "learning_rate": 3.7488885360431884e-05, | |
| "loss": 0.6702, | |
| "step": 47250 | |
| }, | |
| { | |
| "epoch": 15.020641473483646, | |
| "grad_norm": 23773.09765625, | |
| "learning_rate": 3.744919021911718e-05, | |
| "loss": 0.6671, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 15.036519530009526, | |
| "grad_norm": 24133.150390625, | |
| "learning_rate": 3.740949507780248e-05, | |
| "loss": 0.6607, | |
| "step": 47350 | |
| }, | |
| { | |
| "epoch": 15.052397586535408, | |
| "grad_norm": 23611.69140625, | |
| "learning_rate": 3.736979993648778e-05, | |
| "loss": 0.6608, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 15.06827564306129, | |
| "grad_norm": 24616.044921875, | |
| "learning_rate": 3.733010479517307e-05, | |
| "loss": 0.6678, | |
| "step": 47450 | |
| }, | |
| { | |
| "epoch": 15.084153699587171, | |
| "grad_norm": 24132.029296875, | |
| "learning_rate": 3.729040965385837e-05, | |
| "loss": 0.6634, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 15.100031756113053, | |
| "grad_norm": 24181.455078125, | |
| "learning_rate": 3.7250714512543665e-05, | |
| "loss": 0.6655, | |
| "step": 47550 | |
| }, | |
| { | |
| "epoch": 15.115909812638932, | |
| "grad_norm": 23974.443359375, | |
| "learning_rate": 3.721101937122896e-05, | |
| "loss": 0.662, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 15.131787869164814, | |
| "grad_norm": 24213.7890625, | |
| "learning_rate": 3.717132422991426e-05, | |
| "loss": 0.6608, | |
| "step": 47650 | |
| }, | |
| { | |
| "epoch": 15.147665925690696, | |
| "grad_norm": 24070.62890625, | |
| "learning_rate": 3.713162908859956e-05, | |
| "loss": 0.6658, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 15.163543982216577, | |
| "grad_norm": 23968.4296875, | |
| "learning_rate": 3.709193394728485e-05, | |
| "loss": 0.6609, | |
| "step": 47750 | |
| }, | |
| { | |
| "epoch": 15.179422038742459, | |
| "grad_norm": 23617.162109375, | |
| "learning_rate": 3.7052238805970153e-05, | |
| "loss": 0.6613, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 15.195300095268339, | |
| "grad_norm": 24029.837890625, | |
| "learning_rate": 3.701254366465545e-05, | |
| "loss": 0.664, | |
| "step": 47850 | |
| }, | |
| { | |
| "epoch": 15.21117815179422, | |
| "grad_norm": 23731.349609375, | |
| "learning_rate": 3.697284852334075e-05, | |
| "loss": 0.6636, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 15.227056208320102, | |
| "grad_norm": 24603.748046875, | |
| "learning_rate": 3.693315338202604e-05, | |
| "loss": 0.6629, | |
| "step": 47950 | |
| }, | |
| { | |
| "epoch": 15.242934264845983, | |
| "grad_norm": 23958.181640625, | |
| "learning_rate": 3.6893458240711334e-05, | |
| "loss": 0.6648, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 15.258812321371865, | |
| "grad_norm": 23479.33984375, | |
| "learning_rate": 3.6853763099396635e-05, | |
| "loss": 0.6653, | |
| "step": 48050 | |
| }, | |
| { | |
| "epoch": 15.274690377897745, | |
| "grad_norm": 24116.53515625, | |
| "learning_rate": 3.6814067958081935e-05, | |
| "loss": 0.6622, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 15.290568434423626, | |
| "grad_norm": 23710.822265625, | |
| "learning_rate": 3.677437281676723e-05, | |
| "loss": 0.6632, | |
| "step": 48150 | |
| }, | |
| { | |
| "epoch": 15.306446490949508, | |
| "grad_norm": 24976.740234375, | |
| "learning_rate": 3.673467767545253e-05, | |
| "loss": 0.6648, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 15.32232454747539, | |
| "grad_norm": 24450.333984375, | |
| "learning_rate": 3.669498253413782e-05, | |
| "loss": 0.6665, | |
| "step": 48250 | |
| }, | |
| { | |
| "epoch": 15.338202604001271, | |
| "grad_norm": 24068.060546875, | |
| "learning_rate": 3.6655287392823116e-05, | |
| "loss": 0.6653, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 15.354080660527151, | |
| "grad_norm": 23732.517578125, | |
| "learning_rate": 3.6615592251508416e-05, | |
| "loss": 0.6649, | |
| "step": 48350 | |
| }, | |
| { | |
| "epoch": 15.369958717053033, | |
| "grad_norm": 23714.439453125, | |
| "learning_rate": 3.6575897110193716e-05, | |
| "loss": 0.6644, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 15.385836773578914, | |
| "grad_norm": 24033.36328125, | |
| "learning_rate": 3.6536201968879017e-05, | |
| "loss": 0.6648, | |
| "step": 48450 | |
| }, | |
| { | |
| "epoch": 15.401714830104796, | |
| "grad_norm": 23467.201171875, | |
| "learning_rate": 3.649650682756431e-05, | |
| "loss": 0.6632, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 15.417592886630676, | |
| "grad_norm": 23929.56640625, | |
| "learning_rate": 3.6456811686249604e-05, | |
| "loss": 0.6621, | |
| "step": 48550 | |
| }, | |
| { | |
| "epoch": 15.433470943156557, | |
| "grad_norm": 23940.3125, | |
| "learning_rate": 3.6417116544934904e-05, | |
| "loss": 0.6666, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 15.449348999682439, | |
| "grad_norm": 23760.220703125, | |
| "learning_rate": 3.63774214036202e-05, | |
| "loss": 0.6657, | |
| "step": 48650 | |
| }, | |
| { | |
| "epoch": 15.46522705620832, | |
| "grad_norm": 24150.181640625, | |
| "learning_rate": 3.633772626230549e-05, | |
| "loss": 0.6635, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 15.481105112734202, | |
| "grad_norm": 23952.111328125, | |
| "learning_rate": 3.629803112099079e-05, | |
| "loss": 0.6659, | |
| "step": 48750 | |
| }, | |
| { | |
| "epoch": 15.496983169260083, | |
| "grad_norm": 23915.330078125, | |
| "learning_rate": 3.625833597967609e-05, | |
| "loss": 0.6649, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 15.512861225785963, | |
| "grad_norm": 24483.07421875, | |
| "learning_rate": 3.6218640838361385e-05, | |
| "loss": 0.6633, | |
| "step": 48850 | |
| }, | |
| { | |
| "epoch": 15.528739282311845, | |
| "grad_norm": 24314.33203125, | |
| "learning_rate": 3.6178945697046685e-05, | |
| "loss": 0.6649, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 15.544617338837726, | |
| "grad_norm": 24762.5625, | |
| "learning_rate": 3.613925055573198e-05, | |
| "loss": 0.6652, | |
| "step": 48950 | |
| }, | |
| { | |
| "epoch": 15.560495395363608, | |
| "grad_norm": 24496.2109375, | |
| "learning_rate": 3.609955541441727e-05, | |
| "loss": 0.6636, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 15.576373451889488, | |
| "grad_norm": 24142.921875, | |
| "learning_rate": 3.605986027310257e-05, | |
| "loss": 0.665, | |
| "step": 49050 | |
| }, | |
| { | |
| "epoch": 15.59225150841537, | |
| "grad_norm": 23896.888671875, | |
| "learning_rate": 3.602016513178787e-05, | |
| "loss": 0.6636, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 15.608129564941251, | |
| "grad_norm": 23628.111328125, | |
| "learning_rate": 3.598046999047317e-05, | |
| "loss": 0.6615, | |
| "step": 49150 | |
| }, | |
| { | |
| "epoch": 15.624007621467133, | |
| "grad_norm": 23748.744140625, | |
| "learning_rate": 3.594077484915847e-05, | |
| "loss": 0.6648, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 15.639885677993014, | |
| "grad_norm": 23825.630859375, | |
| "learning_rate": 3.590107970784376e-05, | |
| "loss": 0.6652, | |
| "step": 49250 | |
| }, | |
| { | |
| "epoch": 15.655763734518896, | |
| "grad_norm": 23878.716796875, | |
| "learning_rate": 3.586138456652906e-05, | |
| "loss": 0.6615, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 15.671641791044776, | |
| "grad_norm": 23718.876953125, | |
| "learning_rate": 3.5821689425214354e-05, | |
| "loss": 0.6622, | |
| "step": 49350 | |
| }, | |
| { | |
| "epoch": 15.687519847570657, | |
| "grad_norm": 23546.572265625, | |
| "learning_rate": 3.578199428389965e-05, | |
| "loss": 0.6641, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 15.703397904096539, | |
| "grad_norm": 23565.736328125, | |
| "learning_rate": 3.574229914258495e-05, | |
| "loss": 0.6656, | |
| "step": 49450 | |
| }, | |
| { | |
| "epoch": 15.71927596062242, | |
| "grad_norm": 23384.71484375, | |
| "learning_rate": 3.570260400127025e-05, | |
| "loss": 0.6618, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 15.7351540171483, | |
| "grad_norm": 23340.80078125, | |
| "learning_rate": 3.566290885995554e-05, | |
| "loss": 0.6653, | |
| "step": 49550 | |
| }, | |
| { | |
| "epoch": 15.751032073674182, | |
| "grad_norm": 24208.947265625, | |
| "learning_rate": 3.562321371864084e-05, | |
| "loss": 0.6661, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 15.766910130200063, | |
| "grad_norm": 23574.521484375, | |
| "learning_rate": 3.5583518577326135e-05, | |
| "loss": 0.6666, | |
| "step": 49650 | |
| }, | |
| { | |
| "epoch": 15.782788186725945, | |
| "grad_norm": 23982.78125, | |
| "learning_rate": 3.5543823436011436e-05, | |
| "loss": 0.663, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 15.798666243251827, | |
| "grad_norm": 23564.3359375, | |
| "learning_rate": 3.550412829469673e-05, | |
| "loss": 0.6606, | |
| "step": 49750 | |
| }, | |
| { | |
| "epoch": 15.814544299777708, | |
| "grad_norm": 23933.189453125, | |
| "learning_rate": 3.546443315338203e-05, | |
| "loss": 0.6647, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 15.830422356303588, | |
| "grad_norm": 23510.328125, | |
| "learning_rate": 3.542473801206733e-05, | |
| "loss": 0.6635, | |
| "step": 49850 | |
| }, | |
| { | |
| "epoch": 15.84630041282947, | |
| "grad_norm": 23416.091796875, | |
| "learning_rate": 3.5385042870752623e-05, | |
| "loss": 0.6646, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 15.862178469355351, | |
| "grad_norm": 23560.341796875, | |
| "learning_rate": 3.534534772943792e-05, | |
| "loss": 0.663, | |
| "step": 49950 | |
| }, | |
| { | |
| "epoch": 15.878056525881233, | |
| "grad_norm": 23726.447265625, | |
| "learning_rate": 3.530565258812322e-05, | |
| "loss": 0.6649, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 15.893934582407113, | |
| "grad_norm": 23844.755859375, | |
| "learning_rate": 3.526595744680851e-05, | |
| "loss": 0.6622, | |
| "step": 50050 | |
| }, | |
| { | |
| "epoch": 15.909812638932994, | |
| "grad_norm": 23132.53125, | |
| "learning_rate": 3.5226262305493804e-05, | |
| "loss": 0.6612, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 15.925690695458876, | |
| "grad_norm": 24354.9453125, | |
| "learning_rate": 3.5186567164179105e-05, | |
| "loss": 0.6633, | |
| "step": 50150 | |
| }, | |
| { | |
| "epoch": 15.941568751984757, | |
| "grad_norm": 23439.099609375, | |
| "learning_rate": 3.5146872022864405e-05, | |
| "loss": 0.6619, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 15.957446808510639, | |
| "grad_norm": 24292.439453125, | |
| "learning_rate": 3.5107176881549705e-05, | |
| "loss": 0.6629, | |
| "step": 50250 | |
| }, | |
| { | |
| "epoch": 15.973324865036519, | |
| "grad_norm": 23685.8515625, | |
| "learning_rate": 3.5067481740235e-05, | |
| "loss": 0.6622, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 15.9892029215624, | |
| "grad_norm": 23704.876953125, | |
| "learning_rate": 3.502778659892029e-05, | |
| "loss": 0.663, | |
| "step": 50350 | |
| }, | |
| { | |
| "epoch": 16.005080978088284, | |
| "grad_norm": 23387.416015625, | |
| "learning_rate": 3.498809145760559e-05, | |
| "loss": 0.659, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 16.02095903461416, | |
| "grad_norm": 23603.845703125, | |
| "learning_rate": 3.4948396316290886e-05, | |
| "loss": 0.6574, | |
| "step": 50450 | |
| }, | |
| { | |
| "epoch": 16.036837091140043, | |
| "grad_norm": 24016.529296875, | |
| "learning_rate": 3.490870117497618e-05, | |
| "loss": 0.657, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 16.052715147665925, | |
| "grad_norm": 24235.45703125, | |
| "learning_rate": 3.4869006033661487e-05, | |
| "loss": 0.6557, | |
| "step": 50550 | |
| }, | |
| { | |
| "epoch": 16.068593204191806, | |
| "grad_norm": 23704.05859375, | |
| "learning_rate": 3.482931089234678e-05, | |
| "loss": 0.6547, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 16.084471260717688, | |
| "grad_norm": 23739.4921875, | |
| "learning_rate": 3.4789615751032074e-05, | |
| "loss": 0.6541, | |
| "step": 50650 | |
| }, | |
| { | |
| "epoch": 16.10034931724357, | |
| "grad_norm": 24145.8046875, | |
| "learning_rate": 3.4749920609717374e-05, | |
| "loss": 0.6594, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 16.11622737376945, | |
| "grad_norm": 24285.50390625, | |
| "learning_rate": 3.471022546840267e-05, | |
| "loss": 0.6569, | |
| "step": 50750 | |
| }, | |
| { | |
| "epoch": 16.132105430295333, | |
| "grad_norm": 23721.884765625, | |
| "learning_rate": 3.467053032708796e-05, | |
| "loss": 0.6585, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 16.147983486821214, | |
| "grad_norm": 23684.302734375, | |
| "learning_rate": 3.463083518577326e-05, | |
| "loss": 0.6563, | |
| "step": 50850 | |
| }, | |
| { | |
| "epoch": 16.163861543347096, | |
| "grad_norm": 24330.8046875, | |
| "learning_rate": 3.459114004445856e-05, | |
| "loss": 0.6547, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 16.179739599872974, | |
| "grad_norm": 23832.5703125, | |
| "learning_rate": 3.455144490314386e-05, | |
| "loss": 0.6578, | |
| "step": 50950 | |
| }, | |
| { | |
| "epoch": 16.195617656398856, | |
| "grad_norm": 23988.150390625, | |
| "learning_rate": 3.4511749761829155e-05, | |
| "loss": 0.6558, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 16.211495712924737, | |
| "grad_norm": 23709.25, | |
| "learning_rate": 3.447205462051445e-05, | |
| "loss": 0.655, | |
| "step": 51050 | |
| }, | |
| { | |
| "epoch": 16.22737376945062, | |
| "grad_norm": 23120.591796875, | |
| "learning_rate": 3.443235947919975e-05, | |
| "loss": 0.6565, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 16.2432518259765, | |
| "grad_norm": 23713.447265625, | |
| "learning_rate": 3.439266433788504e-05, | |
| "loss": 0.6583, | |
| "step": 51150 | |
| }, | |
| { | |
| "epoch": 16.259129882502382, | |
| "grad_norm": 23596.0625, | |
| "learning_rate": 3.4352969196570336e-05, | |
| "loss": 0.6585, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 16.275007939028264, | |
| "grad_norm": 24150.869140625, | |
| "learning_rate": 3.4313274055255636e-05, | |
| "loss": 0.6575, | |
| "step": 51250 | |
| }, | |
| { | |
| "epoch": 16.290885995554145, | |
| "grad_norm": 24331.630859375, | |
| "learning_rate": 3.427357891394094e-05, | |
| "loss": 0.6594, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 16.306764052080027, | |
| "grad_norm": 23451.88671875, | |
| "learning_rate": 3.423388377262623e-05, | |
| "loss": 0.6596, | |
| "step": 51350 | |
| }, | |
| { | |
| "epoch": 16.32264210860591, | |
| "grad_norm": 23804.421875, | |
| "learning_rate": 3.419418863131153e-05, | |
| "loss": 0.6565, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 16.338520165131786, | |
| "grad_norm": 24613.421875, | |
| "learning_rate": 3.4154493489996824e-05, | |
| "loss": 0.6608, | |
| "step": 51450 | |
| }, | |
| { | |
| "epoch": 16.354398221657668, | |
| "grad_norm": 23770.623046875, | |
| "learning_rate": 3.4114798348682124e-05, | |
| "loss": 0.6558, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 16.37027627818355, | |
| "grad_norm": 23497.1171875, | |
| "learning_rate": 3.407510320736742e-05, | |
| "loss": 0.6559, | |
| "step": 51550 | |
| }, | |
| { | |
| "epoch": 16.38615433470943, | |
| "grad_norm": 23999.451171875, | |
| "learning_rate": 3.403540806605272e-05, | |
| "loss": 0.66, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 16.402032391235313, | |
| "grad_norm": 23710.712890625, | |
| "learning_rate": 3.399571292473802e-05, | |
| "loss": 0.6605, | |
| "step": 51650 | |
| }, | |
| { | |
| "epoch": 16.417910447761194, | |
| "grad_norm": 23660.166015625, | |
| "learning_rate": 3.395601778342331e-05, | |
| "loss": 0.6549, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 16.433788504287076, | |
| "grad_norm": 23169.587890625, | |
| "learning_rate": 3.3916322642108605e-05, | |
| "loss": 0.6604, | |
| "step": 51750 | |
| }, | |
| { | |
| "epoch": 16.449666560812958, | |
| "grad_norm": 24434.888671875, | |
| "learning_rate": 3.3876627500793906e-05, | |
| "loss": 0.6586, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 16.46554461733884, | |
| "grad_norm": 23537.533203125, | |
| "learning_rate": 3.38369323594792e-05, | |
| "loss": 0.6588, | |
| "step": 51850 | |
| }, | |
| { | |
| "epoch": 16.48142267386472, | |
| "grad_norm": 23304.681640625, | |
| "learning_rate": 3.379723721816449e-05, | |
| "loss": 0.6571, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 16.4973007303906, | |
| "grad_norm": 23815.26171875, | |
| "learning_rate": 3.375754207684979e-05, | |
| "loss": 0.6593, | |
| "step": 51950 | |
| }, | |
| { | |
| "epoch": 16.51317878691648, | |
| "grad_norm": 24492.578125, | |
| "learning_rate": 3.3717846935535093e-05, | |
| "loss": 0.6583, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 16.529056843442362, | |
| "grad_norm": 23740.220703125, | |
| "learning_rate": 3.3678151794220394e-05, | |
| "loss": 0.6548, | |
| "step": 52050 | |
| }, | |
| { | |
| "epoch": 16.544934899968244, | |
| "grad_norm": 23588.91015625, | |
| "learning_rate": 3.363845665290569e-05, | |
| "loss": 0.6556, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 16.560812956494125, | |
| "grad_norm": 23579.50390625, | |
| "learning_rate": 3.359876151159098e-05, | |
| "loss": 0.6572, | |
| "step": 52150 | |
| }, | |
| { | |
| "epoch": 16.576691013020007, | |
| "grad_norm": 23243.826171875, | |
| "learning_rate": 3.355906637027628e-05, | |
| "loss": 0.658, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 16.59256906954589, | |
| "grad_norm": 23816.73828125, | |
| "learning_rate": 3.3519371228961575e-05, | |
| "loss": 0.6573, | |
| "step": 52250 | |
| }, | |
| { | |
| "epoch": 16.60844712607177, | |
| "grad_norm": 23419.2265625, | |
| "learning_rate": 3.3479676087646875e-05, | |
| "loss": 0.6569, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 16.62432518259765, | |
| "grad_norm": 24114.97265625, | |
| "learning_rate": 3.3439980946332175e-05, | |
| "loss": 0.6556, | |
| "step": 52350 | |
| }, | |
| { | |
| "epoch": 16.64020323912353, | |
| "grad_norm": 23384.103515625, | |
| "learning_rate": 3.340028580501747e-05, | |
| "loss": 0.6595, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 16.65608129564941, | |
| "grad_norm": 23055.017578125, | |
| "learning_rate": 3.336059066370276e-05, | |
| "loss": 0.6539, | |
| "step": 52450 | |
| }, | |
| { | |
| "epoch": 16.671959352175293, | |
| "grad_norm": 23221.4609375, | |
| "learning_rate": 3.332089552238806e-05, | |
| "loss": 0.6587, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 16.687837408701174, | |
| "grad_norm": 23152.85546875, | |
| "learning_rate": 3.3281200381073356e-05, | |
| "loss": 0.6583, | |
| "step": 52550 | |
| }, | |
| { | |
| "epoch": 16.703715465227056, | |
| "grad_norm": 23718.134765625, | |
| "learning_rate": 3.324150523975865e-05, | |
| "loss": 0.6575, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 16.719593521752937, | |
| "grad_norm": 23835.32421875, | |
| "learning_rate": 3.320181009844395e-05, | |
| "loss": 0.652, | |
| "step": 52650 | |
| }, | |
| { | |
| "epoch": 16.73547157827882, | |
| "grad_norm": 24064.998046875, | |
| "learning_rate": 3.316211495712925e-05, | |
| "loss": 0.6544, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 16.7513496348047, | |
| "grad_norm": 23508.853515625, | |
| "learning_rate": 3.312241981581455e-05, | |
| "loss": 0.6558, | |
| "step": 52750 | |
| }, | |
| { | |
| "epoch": 16.767227691330582, | |
| "grad_norm": 23061.650390625, | |
| "learning_rate": 3.3082724674499844e-05, | |
| "loss": 0.6591, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 16.783105747856464, | |
| "grad_norm": 23995.44140625, | |
| "learning_rate": 3.304302953318514e-05, | |
| "loss": 0.6559, | |
| "step": 52850 | |
| }, | |
| { | |
| "epoch": 16.798983804382345, | |
| "grad_norm": 23488.259765625, | |
| "learning_rate": 3.300333439187044e-05, | |
| "loss": 0.6571, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 16.814861860908223, | |
| "grad_norm": 24552.5625, | |
| "learning_rate": 3.296363925055573e-05, | |
| "loss": 0.6547, | |
| "step": 52950 | |
| }, | |
| { | |
| "epoch": 16.830739917434105, | |
| "grad_norm": 23736.818359375, | |
| "learning_rate": 3.292394410924103e-05, | |
| "loss": 0.6578, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 16.846617973959987, | |
| "grad_norm": 24375.546875, | |
| "learning_rate": 3.288424896792633e-05, | |
| "loss": 0.655, | |
| "step": 53050 | |
| }, | |
| { | |
| "epoch": 16.862496030485868, | |
| "grad_norm": 23373.5, | |
| "learning_rate": 3.2844553826611625e-05, | |
| "loss": 0.6555, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 16.87837408701175, | |
| "grad_norm": 24301.333984375, | |
| "learning_rate": 3.280485868529692e-05, | |
| "loss": 0.6574, | |
| "step": 53150 | |
| }, | |
| { | |
| "epoch": 16.89425214353763, | |
| "grad_norm": 23277.462890625, | |
| "learning_rate": 3.276516354398222e-05, | |
| "loss": 0.6555, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 16.910130200063513, | |
| "grad_norm": 23834.724609375, | |
| "learning_rate": 3.272546840266751e-05, | |
| "loss": 0.6582, | |
| "step": 53250 | |
| }, | |
| { | |
| "epoch": 16.926008256589395, | |
| "grad_norm": 23567.853515625, | |
| "learning_rate": 3.268577326135281e-05, | |
| "loss": 0.6547, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 16.941886313115276, | |
| "grad_norm": 23326.3671875, | |
| "learning_rate": 3.2646078120038106e-05, | |
| "loss": 0.6567, | |
| "step": 53350 | |
| }, | |
| { | |
| "epoch": 16.957764369641154, | |
| "grad_norm": 23291.1953125, | |
| "learning_rate": 3.260638297872341e-05, | |
| "loss": 0.6552, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 16.973642426167036, | |
| "grad_norm": 23493.248046875, | |
| "learning_rate": 3.256668783740871e-05, | |
| "loss": 0.6562, | |
| "step": 53450 | |
| }, | |
| { | |
| "epoch": 16.989520482692917, | |
| "grad_norm": 23239.197265625, | |
| "learning_rate": 3.2526992696094e-05, | |
| "loss": 0.6562, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 17.0053985392188, | |
| "grad_norm": 23646.5, | |
| "learning_rate": 3.2487297554779294e-05, | |
| "loss": 0.6553, | |
| "step": 53550 | |
| }, | |
| { | |
| "epoch": 17.02127659574468, | |
| "grad_norm": 23330.103515625, | |
| "learning_rate": 3.2447602413464594e-05, | |
| "loss": 0.6497, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 17.037154652270562, | |
| "grad_norm": 24091.298828125, | |
| "learning_rate": 3.240790727214989e-05, | |
| "loss": 0.6515, | |
| "step": 53650 | |
| }, | |
| { | |
| "epoch": 17.053032708796444, | |
| "grad_norm": 24229.26171875, | |
| "learning_rate": 3.236821213083519e-05, | |
| "loss": 0.6518, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 17.068910765322325, | |
| "grad_norm": 23416.974609375, | |
| "learning_rate": 3.232851698952049e-05, | |
| "loss": 0.6496, | |
| "step": 53750 | |
| }, | |
| { | |
| "epoch": 17.084788821848207, | |
| "grad_norm": 23611.005859375, | |
| "learning_rate": 3.228882184820578e-05, | |
| "loss": 0.6491, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 17.10066687837409, | |
| "grad_norm": 23050.642578125, | |
| "learning_rate": 3.224912670689108e-05, | |
| "loss": 0.6507, | |
| "step": 53850 | |
| }, | |
| { | |
| "epoch": 17.116544934899967, | |
| "grad_norm": 23595.267578125, | |
| "learning_rate": 3.2209431565576376e-05, | |
| "loss": 0.6475, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 17.132422991425848, | |
| "grad_norm": 23561.833984375, | |
| "learning_rate": 3.216973642426167e-05, | |
| "loss": 0.6487, | |
| "step": 53950 | |
| }, | |
| { | |
| "epoch": 17.14830104795173, | |
| "grad_norm": 23513.59375, | |
| "learning_rate": 3.213004128294697e-05, | |
| "loss": 0.6474, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 17.16417910447761, | |
| "grad_norm": 23998.837890625, | |
| "learning_rate": 3.209034614163226e-05, | |
| "loss": 0.6507, | |
| "step": 54050 | |
| }, | |
| { | |
| "epoch": 17.180057161003493, | |
| "grad_norm": 23122.234375, | |
| "learning_rate": 3.2050651000317563e-05, | |
| "loss": 0.649, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 17.195935217529374, | |
| "grad_norm": 23222.94921875, | |
| "learning_rate": 3.2010955859002864e-05, | |
| "loss": 0.6503, | |
| "step": 54150 | |
| }, | |
| { | |
| "epoch": 17.211813274055256, | |
| "grad_norm": 23577.119140625, | |
| "learning_rate": 3.197126071768816e-05, | |
| "loss": 0.6511, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 17.227691330581138, | |
| "grad_norm": 23782.28125, | |
| "learning_rate": 3.193156557637345e-05, | |
| "loss": 0.6523, | |
| "step": 54250 | |
| }, | |
| { | |
| "epoch": 17.24356938710702, | |
| "grad_norm": 24278.03125, | |
| "learning_rate": 3.189187043505875e-05, | |
| "loss": 0.654, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 17.2594474436329, | |
| "grad_norm": 23616.5390625, | |
| "learning_rate": 3.1852175293744045e-05, | |
| "loss": 0.651, | |
| "step": 54350 | |
| }, | |
| { | |
| "epoch": 17.27532550015878, | |
| "grad_norm": 23676.12890625, | |
| "learning_rate": 3.181248015242934e-05, | |
| "loss": 0.6503, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 17.29120355668466, | |
| "grad_norm": 23172.5859375, | |
| "learning_rate": 3.1772785011114645e-05, | |
| "loss": 0.6541, | |
| "step": 54450 | |
| }, | |
| { | |
| "epoch": 17.307081613210542, | |
| "grad_norm": 23444.791015625, | |
| "learning_rate": 3.173308986979994e-05, | |
| "loss": 0.6491, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 17.322959669736424, | |
| "grad_norm": 23441.435546875, | |
| "learning_rate": 3.169339472848524e-05, | |
| "loss": 0.6495, | |
| "step": 54550 | |
| }, | |
| { | |
| "epoch": 17.338837726262305, | |
| "grad_norm": 23380.716796875, | |
| "learning_rate": 3.165369958717053e-05, | |
| "loss": 0.6504, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 17.354715782788187, | |
| "grad_norm": 23601.03125, | |
| "learning_rate": 3.1614004445855826e-05, | |
| "loss": 0.6543, | |
| "step": 54650 | |
| }, | |
| { | |
| "epoch": 17.37059383931407, | |
| "grad_norm": 23646.830078125, | |
| "learning_rate": 3.1574309304541126e-05, | |
| "loss": 0.6514, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 17.38647189583995, | |
| "grad_norm": 23490.85546875, | |
| "learning_rate": 3.153461416322642e-05, | |
| "loss": 0.6513, | |
| "step": 54750 | |
| }, | |
| { | |
| "epoch": 17.40234995236583, | |
| "grad_norm": 23827.6796875, | |
| "learning_rate": 3.149491902191172e-05, | |
| "loss": 0.6484, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 17.418228008891713, | |
| "grad_norm": 23096.40625, | |
| "learning_rate": 3.145522388059702e-05, | |
| "loss": 0.6483, | |
| "step": 54850 | |
| }, | |
| { | |
| "epoch": 17.43410606541759, | |
| "grad_norm": 23743.8359375, | |
| "learning_rate": 3.1415528739282314e-05, | |
| "loss": 0.652, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 17.449984121943473, | |
| "grad_norm": 23075.265625, | |
| "learning_rate": 3.137583359796761e-05, | |
| "loss": 0.6501, | |
| "step": 54950 | |
| }, | |
| { | |
| "epoch": 17.465862178469354, | |
| "grad_norm": 24035.71875, | |
| "learning_rate": 3.133613845665291e-05, | |
| "loss": 0.652, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 17.481740234995236, | |
| "grad_norm": 23361.478515625, | |
| "learning_rate": 3.12964433153382e-05, | |
| "loss": 0.6536, | |
| "step": 55050 | |
| }, | |
| { | |
| "epoch": 17.497618291521118, | |
| "grad_norm": 23745.171875, | |
| "learning_rate": 3.12567481740235e-05, | |
| "loss": 0.6504, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 17.513496348047, | |
| "grad_norm": 23789.912109375, | |
| "learning_rate": 3.12170530327088e-05, | |
| "loss": 0.6491, | |
| "step": 55150 | |
| }, | |
| { | |
| "epoch": 17.52937440457288, | |
| "grad_norm": 23335.9609375, | |
| "learning_rate": 3.1177357891394095e-05, | |
| "loss": 0.6503, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 17.545252461098762, | |
| "grad_norm": 23689.12890625, | |
| "learning_rate": 3.1137662750079396e-05, | |
| "loss": 0.6527, | |
| "step": 55250 | |
| }, | |
| { | |
| "epoch": 17.561130517624644, | |
| "grad_norm": 23400.572265625, | |
| "learning_rate": 3.109796760876469e-05, | |
| "loss": 0.6519, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 17.577008574150526, | |
| "grad_norm": 23527.71875, | |
| "learning_rate": 3.105827246744998e-05, | |
| "loss": 0.6494, | |
| "step": 55350 | |
| }, | |
| { | |
| "epoch": 17.592886630676404, | |
| "grad_norm": 23462.595703125, | |
| "learning_rate": 3.101857732613528e-05, | |
| "loss": 0.6507, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 17.608764687202285, | |
| "grad_norm": 23832.5625, | |
| "learning_rate": 3.0978882184820576e-05, | |
| "loss": 0.648, | |
| "step": 55450 | |
| }, | |
| { | |
| "epoch": 17.624642743728167, | |
| "grad_norm": 23171.41796875, | |
| "learning_rate": 3.093918704350588e-05, | |
| "loss": 0.6515, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 17.64052080025405, | |
| "grad_norm": 22859.158203125, | |
| "learning_rate": 3.089949190219118e-05, | |
| "loss": 0.65, | |
| "step": 55550 | |
| }, | |
| { | |
| "epoch": 17.65639885677993, | |
| "grad_norm": 23794.48828125, | |
| "learning_rate": 3.085979676087647e-05, | |
| "loss": 0.6494, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 17.67227691330581, | |
| "grad_norm": 23437.626953125, | |
| "learning_rate": 3.082010161956177e-05, | |
| "loss": 0.6507, | |
| "step": 55650 | |
| }, | |
| { | |
| "epoch": 17.688154969831693, | |
| "grad_norm": 23454.169921875, | |
| "learning_rate": 3.0780406478247064e-05, | |
| "loss": 0.6528, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 17.704033026357575, | |
| "grad_norm": 23537.875, | |
| "learning_rate": 3.074071133693236e-05, | |
| "loss": 0.6508, | |
| "step": 55750 | |
| }, | |
| { | |
| "epoch": 17.719911082883456, | |
| "grad_norm": 23073.7421875, | |
| "learning_rate": 3.070101619561766e-05, | |
| "loss": 0.6514, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 17.735789139409338, | |
| "grad_norm": 24199.0234375, | |
| "learning_rate": 3.066132105430295e-05, | |
| "loss": 0.652, | |
| "step": 55850 | |
| }, | |
| { | |
| "epoch": 17.751667195935216, | |
| "grad_norm": 24246.693359375, | |
| "learning_rate": 3.062162591298825e-05, | |
| "loss": 0.6506, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 17.767545252461098, | |
| "grad_norm": 23131.373046875, | |
| "learning_rate": 3.058193077167355e-05, | |
| "loss": 0.6473, | |
| "step": 55950 | |
| }, | |
| { | |
| "epoch": 17.78342330898698, | |
| "grad_norm": 23967.60546875, | |
| "learning_rate": 3.0542235630358846e-05, | |
| "loss": 0.649, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 17.79930136551286, | |
| "grad_norm": 23857.62890625, | |
| "learning_rate": 3.050254048904414e-05, | |
| "loss": 0.6498, | |
| "step": 56050 | |
| }, | |
| { | |
| "epoch": 17.815179422038742, | |
| "grad_norm": 23931.333984375, | |
| "learning_rate": 3.046284534772944e-05, | |
| "loss": 0.651, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 17.831057478564624, | |
| "grad_norm": 23383.25390625, | |
| "learning_rate": 3.0423150206414736e-05, | |
| "loss": 0.6526, | |
| "step": 56150 | |
| }, | |
| { | |
| "epoch": 17.846935535090505, | |
| "grad_norm": 23175.05078125, | |
| "learning_rate": 3.038345506510003e-05, | |
| "loss": 0.6494, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 17.862813591616387, | |
| "grad_norm": 23574.662109375, | |
| "learning_rate": 3.034375992378533e-05, | |
| "loss": 0.6479, | |
| "step": 56250 | |
| }, | |
| { | |
| "epoch": 17.87869164814227, | |
| "grad_norm": 23581.66015625, | |
| "learning_rate": 3.0304064782470627e-05, | |
| "loss": 0.6514, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 17.89456970466815, | |
| "grad_norm": 23191.83203125, | |
| "learning_rate": 3.0264369641155928e-05, | |
| "loss": 0.649, | |
| "step": 56350 | |
| }, | |
| { | |
| "epoch": 17.91044776119403, | |
| "grad_norm": 23138.642578125, | |
| "learning_rate": 3.022467449984122e-05, | |
| "loss": 0.6523, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 17.92632581771991, | |
| "grad_norm": 24247.412109375, | |
| "learning_rate": 3.0184979358526515e-05, | |
| "loss": 0.6508, | |
| "step": 56450 | |
| }, | |
| { | |
| "epoch": 17.94220387424579, | |
| "grad_norm": 23328.904296875, | |
| "learning_rate": 3.0145284217211818e-05, | |
| "loss": 0.6521, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 17.958081930771673, | |
| "grad_norm": 23195.806640625, | |
| "learning_rate": 3.0105589075897112e-05, | |
| "loss": 0.6519, | |
| "step": 56550 | |
| }, | |
| { | |
| "epoch": 17.973959987297555, | |
| "grad_norm": 24111.8359375, | |
| "learning_rate": 3.0065893934582405e-05, | |
| "loss": 0.6504, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 17.989838043823436, | |
| "grad_norm": 23394.666015625, | |
| "learning_rate": 3.0026198793267706e-05, | |
| "loss": 0.6499, | |
| "step": 56650 | |
| }, | |
| { | |
| "epoch": 18.005716100349318, | |
| "grad_norm": 24451.033203125, | |
| "learning_rate": 2.9986503651953002e-05, | |
| "loss": 0.6493, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 18.0215941568752, | |
| "grad_norm": 24162.71875, | |
| "learning_rate": 2.9946808510638296e-05, | |
| "loss": 0.6437, | |
| "step": 56750 | |
| }, | |
| { | |
| "epoch": 18.03747221340108, | |
| "grad_norm": 23387.994140625, | |
| "learning_rate": 2.9907113369323596e-05, | |
| "loss": 0.6487, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 18.053350269926963, | |
| "grad_norm": 23269.083984375, | |
| "learning_rate": 2.9867418228008893e-05, | |
| "loss": 0.6469, | |
| "step": 56850 | |
| }, | |
| { | |
| "epoch": 18.06922832645284, | |
| "grad_norm": 23347.12890625, | |
| "learning_rate": 2.9827723086694193e-05, | |
| "loss": 0.6428, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 18.085106382978722, | |
| "grad_norm": 23654.724609375, | |
| "learning_rate": 2.9788027945379487e-05, | |
| "loss": 0.6485, | |
| "step": 56950 | |
| }, | |
| { | |
| "epoch": 18.100984439504604, | |
| "grad_norm": 23451.236328125, | |
| "learning_rate": 2.9748332804064784e-05, | |
| "loss": 0.6458, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 18.116862496030485, | |
| "grad_norm": 24515.484375, | |
| "learning_rate": 2.9708637662750084e-05, | |
| "loss": 0.6451, | |
| "step": 57050 | |
| }, | |
| { | |
| "epoch": 18.132740552556367, | |
| "grad_norm": 23294.755859375, | |
| "learning_rate": 2.9668942521435378e-05, | |
| "loss": 0.6473, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 18.14861860908225, | |
| "grad_norm": 24213.658203125, | |
| "learning_rate": 2.962924738012067e-05, | |
| "loss": 0.6435, | |
| "step": 57150 | |
| }, | |
| { | |
| "epoch": 18.16449666560813, | |
| "grad_norm": 23097.380859375, | |
| "learning_rate": 2.958955223880597e-05, | |
| "loss": 0.6448, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 18.18037472213401, | |
| "grad_norm": 23379.626953125, | |
| "learning_rate": 2.954985709749127e-05, | |
| "loss": 0.6455, | |
| "step": 57250 | |
| }, | |
| { | |
| "epoch": 18.196252778659893, | |
| "grad_norm": 23119.544921875, | |
| "learning_rate": 2.9510161956176562e-05, | |
| "loss": 0.6435, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 18.212130835185775, | |
| "grad_norm": 23745.896484375, | |
| "learning_rate": 2.9470466814861862e-05, | |
| "loss": 0.6469, | |
| "step": 57350 | |
| }, | |
| { | |
| "epoch": 18.228008891711653, | |
| "grad_norm": 23996.830078125, | |
| "learning_rate": 2.943077167354716e-05, | |
| "loss": 0.6471, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 18.243886948237535, | |
| "grad_norm": 23478.310546875, | |
| "learning_rate": 2.939107653223246e-05, | |
| "loss": 0.6435, | |
| "step": 57450 | |
| }, | |
| { | |
| "epoch": 18.259765004763416, | |
| "grad_norm": 23451.203125, | |
| "learning_rate": 2.9351381390917753e-05, | |
| "loss": 0.6466, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 18.275643061289298, | |
| "grad_norm": 23465.240234375, | |
| "learning_rate": 2.931168624960305e-05, | |
| "loss": 0.6473, | |
| "step": 57550 | |
| }, | |
| { | |
| "epoch": 18.29152111781518, | |
| "grad_norm": 23575.08203125, | |
| "learning_rate": 2.927199110828835e-05, | |
| "loss": 0.6465, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 18.30739917434106, | |
| "grad_norm": 23251.138671875, | |
| "learning_rate": 2.9232295966973644e-05, | |
| "loss": 0.6434, | |
| "step": 57650 | |
| }, | |
| { | |
| "epoch": 18.323277230866942, | |
| "grad_norm": 23791.912109375, | |
| "learning_rate": 2.919260082565894e-05, | |
| "loss": 0.6462, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 18.339155287392824, | |
| "grad_norm": 23962.904296875, | |
| "learning_rate": 2.915290568434424e-05, | |
| "loss": 0.6464, | |
| "step": 57750 | |
| }, | |
| { | |
| "epoch": 18.355033343918706, | |
| "grad_norm": 23311.55078125, | |
| "learning_rate": 2.9113210543029534e-05, | |
| "loss": 0.6432, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 18.370911400444587, | |
| "grad_norm": 23348.361328125, | |
| "learning_rate": 2.9073515401714828e-05, | |
| "loss": 0.6439, | |
| "step": 57850 | |
| }, | |
| { | |
| "epoch": 18.386789456970465, | |
| "grad_norm": 23554.818359375, | |
| "learning_rate": 2.9033820260400128e-05, | |
| "loss": 0.6468, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 18.402667513496347, | |
| "grad_norm": 23781.22265625, | |
| "learning_rate": 2.8994125119085425e-05, | |
| "loss": 0.6444, | |
| "step": 57950 | |
| }, | |
| { | |
| "epoch": 18.41854557002223, | |
| "grad_norm": 22692.29296875, | |
| "learning_rate": 2.895442997777072e-05, | |
| "loss": 0.6442, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 18.43442362654811, | |
| "grad_norm": 23058.107421875, | |
| "learning_rate": 2.891473483645602e-05, | |
| "loss": 0.6439, | |
| "step": 58050 | |
| }, | |
| { | |
| "epoch": 18.45030168307399, | |
| "grad_norm": 22859.33203125, | |
| "learning_rate": 2.8875039695141316e-05, | |
| "loss": 0.645, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 18.466179739599873, | |
| "grad_norm": 23191.587890625, | |
| "learning_rate": 2.8835344553826616e-05, | |
| "loss": 0.6449, | |
| "step": 58150 | |
| }, | |
| { | |
| "epoch": 18.482057796125755, | |
| "grad_norm": 23647.953125, | |
| "learning_rate": 2.879564941251191e-05, | |
| "loss": 0.6466, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 18.497935852651636, | |
| "grad_norm": 23992.3828125, | |
| "learning_rate": 2.8755954271197207e-05, | |
| "loss": 0.6473, | |
| "step": 58250 | |
| }, | |
| { | |
| "epoch": 18.513813909177518, | |
| "grad_norm": 23166.861328125, | |
| "learning_rate": 2.8716259129882507e-05, | |
| "loss": 0.6451, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 18.5296919657034, | |
| "grad_norm": 23637.072265625, | |
| "learning_rate": 2.86765639885678e-05, | |
| "loss": 0.6481, | |
| "step": 58350 | |
| }, | |
| { | |
| "epoch": 18.545570022229278, | |
| "grad_norm": 23371.7265625, | |
| "learning_rate": 2.8636868847253094e-05, | |
| "loss": 0.6406, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 18.56144807875516, | |
| "grad_norm": 23572.712890625, | |
| "learning_rate": 2.8597173705938398e-05, | |
| "loss": 0.6437, | |
| "step": 58450 | |
| }, | |
| { | |
| "epoch": 18.57732613528104, | |
| "grad_norm": 23404.9765625, | |
| "learning_rate": 2.855747856462369e-05, | |
| "loss": 0.6467, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 18.593204191806922, | |
| "grad_norm": 23365.392578125, | |
| "learning_rate": 2.8517783423308985e-05, | |
| "loss": 0.6464, | |
| "step": 58550 | |
| }, | |
| { | |
| "epoch": 18.609082248332804, | |
| "grad_norm": 22837.333984375, | |
| "learning_rate": 2.8478088281994285e-05, | |
| "loss": 0.6476, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 18.624960304858686, | |
| "grad_norm": 23265.06640625, | |
| "learning_rate": 2.8438393140679582e-05, | |
| "loss": 0.6445, | |
| "step": 58650 | |
| }, | |
| { | |
| "epoch": 18.640838361384567, | |
| "grad_norm": 23657.7265625, | |
| "learning_rate": 2.8398697999364882e-05, | |
| "loss": 0.6487, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 18.65671641791045, | |
| "grad_norm": 23775.322265625, | |
| "learning_rate": 2.8359002858050176e-05, | |
| "loss": 0.6461, | |
| "step": 58750 | |
| }, | |
| { | |
| "epoch": 18.67259447443633, | |
| "grad_norm": 23216.8359375, | |
| "learning_rate": 2.8319307716735472e-05, | |
| "loss": 0.6445, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 18.68847253096221, | |
| "grad_norm": 23683.677734375, | |
| "learning_rate": 2.8279612575420773e-05, | |
| "loss": 0.6464, | |
| "step": 58850 | |
| }, | |
| { | |
| "epoch": 18.70435058748809, | |
| "grad_norm": 24212.357421875, | |
| "learning_rate": 2.8239917434106066e-05, | |
| "loss": 0.6458, | |
| "step": 58900 | |
| }, | |
| { | |
| "epoch": 18.72022864401397, | |
| "grad_norm": 23845.314453125, | |
| "learning_rate": 2.8200222292791363e-05, | |
| "loss": 0.6434, | |
| "step": 58950 | |
| }, | |
| { | |
| "epoch": 18.736106700539853, | |
| "grad_norm": 23429.07421875, | |
| "learning_rate": 2.8160527151476663e-05, | |
| "loss": 0.6454, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 18.751984757065735, | |
| "grad_norm": 23693.30078125, | |
| "learning_rate": 2.8120832010161957e-05, | |
| "loss": 0.6449, | |
| "step": 59050 | |
| }, | |
| { | |
| "epoch": 18.767862813591616, | |
| "grad_norm": 24399.693359375, | |
| "learning_rate": 2.808113686884725e-05, | |
| "loss": 0.6442, | |
| "step": 59100 | |
| }, | |
| { | |
| "epoch": 18.783740870117498, | |
| "grad_norm": 23742.171875, | |
| "learning_rate": 2.8041441727532554e-05, | |
| "loss": 0.645, | |
| "step": 59150 | |
| }, | |
| { | |
| "epoch": 18.79961892664338, | |
| "grad_norm": 23861.95703125, | |
| "learning_rate": 2.8001746586217848e-05, | |
| "loss": 0.6476, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 18.81549698316926, | |
| "grad_norm": 23694.40625, | |
| "learning_rate": 2.7962051444903148e-05, | |
| "loss": 0.6453, | |
| "step": 59250 | |
| }, | |
| { | |
| "epoch": 18.831375039695143, | |
| "grad_norm": 22883.59375, | |
| "learning_rate": 2.792235630358844e-05, | |
| "loss": 0.6441, | |
| "step": 59300 | |
| }, | |
| { | |
| "epoch": 18.847253096221024, | |
| "grad_norm": 24172.53125, | |
| "learning_rate": 2.788266116227374e-05, | |
| "loss": 0.643, | |
| "step": 59350 | |
| }, | |
| { | |
| "epoch": 18.863131152746902, | |
| "grad_norm": 23304.0859375, | |
| "learning_rate": 2.784296602095904e-05, | |
| "loss": 0.6468, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 18.879009209272784, | |
| "grad_norm": 23574.00390625, | |
| "learning_rate": 2.7803270879644332e-05, | |
| "loss": 0.6473, | |
| "step": 59450 | |
| }, | |
| { | |
| "epoch": 18.894887265798666, | |
| "grad_norm": 23528.5859375, | |
| "learning_rate": 2.776357573832963e-05, | |
| "loss": 0.6455, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 18.910765322324547, | |
| "grad_norm": 23860.66015625, | |
| "learning_rate": 2.772388059701493e-05, | |
| "loss": 0.647, | |
| "step": 59550 | |
| }, | |
| { | |
| "epoch": 18.92664337885043, | |
| "grad_norm": 23734.267578125, | |
| "learning_rate": 2.7684185455700223e-05, | |
| "loss": 0.6422, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 18.94252143537631, | |
| "grad_norm": 23822.1875, | |
| "learning_rate": 2.764449031438552e-05, | |
| "loss": 0.643, | |
| "step": 59650 | |
| }, | |
| { | |
| "epoch": 18.958399491902192, | |
| "grad_norm": 22949.701171875, | |
| "learning_rate": 2.760479517307082e-05, | |
| "loss": 0.6441, | |
| "step": 59700 | |
| }, | |
| { | |
| "epoch": 18.974277548428073, | |
| "grad_norm": 23238.943359375, | |
| "learning_rate": 2.7565100031756114e-05, | |
| "loss": 0.6449, | |
| "step": 59750 | |
| }, | |
| { | |
| "epoch": 18.990155604953955, | |
| "grad_norm": 23398.4140625, | |
| "learning_rate": 2.7525404890441407e-05, | |
| "loss": 0.6429, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 19.006033661479837, | |
| "grad_norm": 23431.22265625, | |
| "learning_rate": 2.7485709749126707e-05, | |
| "loss": 0.6427, | |
| "step": 59850 | |
| }, | |
| { | |
| "epoch": 19.021911718005715, | |
| "grad_norm": 23309.41796875, | |
| "learning_rate": 2.7446014607812004e-05, | |
| "loss": 0.6407, | |
| "step": 59900 | |
| }, | |
| { | |
| "epoch": 19.037789774531596, | |
| "grad_norm": 23369.134765625, | |
| "learning_rate": 2.7406319466497305e-05, | |
| "loss": 0.6337, | |
| "step": 59950 | |
| }, | |
| { | |
| "epoch": 19.053667831057478, | |
| "grad_norm": 23830.84375, | |
| "learning_rate": 2.7366624325182598e-05, | |
| "loss": 0.6405, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 19.06954588758336, | |
| "grad_norm": 23755.240234375, | |
| "learning_rate": 2.7326929183867895e-05, | |
| "loss": 0.6404, | |
| "step": 60050 | |
| }, | |
| { | |
| "epoch": 19.08542394410924, | |
| "grad_norm": 23902.658203125, | |
| "learning_rate": 2.7287234042553195e-05, | |
| "loss": 0.639, | |
| "step": 60100 | |
| }, | |
| { | |
| "epoch": 19.101302000635123, | |
| "grad_norm": 23243.98828125, | |
| "learning_rate": 2.724753890123849e-05, | |
| "loss": 0.6352, | |
| "step": 60150 | |
| }, | |
| { | |
| "epoch": 19.117180057161004, | |
| "grad_norm": 23601.951171875, | |
| "learning_rate": 2.7207843759923786e-05, | |
| "loss": 0.6374, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 19.133058113686886, | |
| "grad_norm": 23469.58203125, | |
| "learning_rate": 2.7168148618609086e-05, | |
| "loss": 0.6391, | |
| "step": 60250 | |
| }, | |
| { | |
| "epoch": 19.148936170212767, | |
| "grad_norm": 24173.52734375, | |
| "learning_rate": 2.712845347729438e-05, | |
| "loss": 0.6387, | |
| "step": 60300 | |
| }, | |
| { | |
| "epoch": 19.164814226738645, | |
| "grad_norm": 23875.79296875, | |
| "learning_rate": 2.7088758335979677e-05, | |
| "loss": 0.6399, | |
| "step": 60350 | |
| }, | |
| { | |
| "epoch": 19.180692283264527, | |
| "grad_norm": 23351.52734375, | |
| "learning_rate": 2.7049063194664977e-05, | |
| "loss": 0.6418, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 19.19657033979041, | |
| "grad_norm": 24030.84765625, | |
| "learning_rate": 2.700936805335027e-05, | |
| "loss": 0.6417, | |
| "step": 60450 | |
| }, | |
| { | |
| "epoch": 19.21244839631629, | |
| "grad_norm": 23502.681640625, | |
| "learning_rate": 2.696967291203557e-05, | |
| "loss": 0.6419, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 19.228326452842172, | |
| "grad_norm": 23972.9609375, | |
| "learning_rate": 2.6929977770720864e-05, | |
| "loss": 0.6385, | |
| "step": 60550 | |
| }, | |
| { | |
| "epoch": 19.244204509368053, | |
| "grad_norm": 23450.828125, | |
| "learning_rate": 2.689028262940616e-05, | |
| "loss": 0.6383, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 19.260082565893935, | |
| "grad_norm": 23348.533203125, | |
| "learning_rate": 2.685058748809146e-05, | |
| "loss": 0.6438, | |
| "step": 60650 | |
| }, | |
| { | |
| "epoch": 19.275960622419817, | |
| "grad_norm": 23530.572265625, | |
| "learning_rate": 2.6810892346776755e-05, | |
| "loss": 0.642, | |
| "step": 60700 | |
| }, | |
| { | |
| "epoch": 19.291838678945698, | |
| "grad_norm": 23539.4375, | |
| "learning_rate": 2.6771197205462052e-05, | |
| "loss": 0.6403, | |
| "step": 60750 | |
| }, | |
| { | |
| "epoch": 19.30771673547158, | |
| "grad_norm": 23497.1015625, | |
| "learning_rate": 2.6731502064147352e-05, | |
| "loss": 0.6384, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 19.323594791997458, | |
| "grad_norm": 23695.337890625, | |
| "learning_rate": 2.6691806922832646e-05, | |
| "loss": 0.6428, | |
| "step": 60850 | |
| }, | |
| { | |
| "epoch": 19.33947284852334, | |
| "grad_norm": 23628.791015625, | |
| "learning_rate": 2.6652111781517942e-05, | |
| "loss": 0.6371, | |
| "step": 60900 | |
| }, | |
| { | |
| "epoch": 19.35535090504922, | |
| "grad_norm": 23643.2421875, | |
| "learning_rate": 2.6612416640203243e-05, | |
| "loss": 0.643, | |
| "step": 60950 | |
| }, | |
| { | |
| "epoch": 19.371228961575103, | |
| "grad_norm": 23056.48828125, | |
| "learning_rate": 2.6572721498888536e-05, | |
| "loss": 0.6387, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 19.387107018100984, | |
| "grad_norm": 23687.0859375, | |
| "learning_rate": 2.6533026357573837e-05, | |
| "loss": 0.64, | |
| "step": 61050 | |
| }, | |
| { | |
| "epoch": 19.402985074626866, | |
| "grad_norm": 23476.853515625, | |
| "learning_rate": 2.6493331216259133e-05, | |
| "loss": 0.6406, | |
| "step": 61100 | |
| }, | |
| { | |
| "epoch": 19.418863131152747, | |
| "grad_norm": 24086.623046875, | |
| "learning_rate": 2.6453636074944427e-05, | |
| "loss": 0.6428, | |
| "step": 61150 | |
| }, | |
| { | |
| "epoch": 19.43474118767863, | |
| "grad_norm": 23353.08984375, | |
| "learning_rate": 2.6413940933629727e-05, | |
| "loss": 0.6435, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 19.45061924420451, | |
| "grad_norm": 23448.341796875, | |
| "learning_rate": 2.637424579231502e-05, | |
| "loss": 0.6423, | |
| "step": 61250 | |
| }, | |
| { | |
| "epoch": 19.466497300730392, | |
| "grad_norm": 23803.828125, | |
| "learning_rate": 2.6334550651000318e-05, | |
| "loss": 0.6419, | |
| "step": 61300 | |
| }, | |
| { | |
| "epoch": 19.48237535725627, | |
| "grad_norm": 24309.66796875, | |
| "learning_rate": 2.6294855509685618e-05, | |
| "loss": 0.6434, | |
| "step": 61350 | |
| }, | |
| { | |
| "epoch": 19.49825341378215, | |
| "grad_norm": 23195.96875, | |
| "learning_rate": 2.625516036837091e-05, | |
| "loss": 0.6378, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 19.514131470308033, | |
| "grad_norm": 23841.748046875, | |
| "learning_rate": 2.621546522705621e-05, | |
| "loss": 0.6397, | |
| "step": 61450 | |
| }, | |
| { | |
| "epoch": 19.530009526833915, | |
| "grad_norm": 23373.90625, | |
| "learning_rate": 2.617577008574151e-05, | |
| "loss": 0.6405, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 19.545887583359796, | |
| "grad_norm": 23817.623046875, | |
| "learning_rate": 2.6136074944426802e-05, | |
| "loss": 0.6402, | |
| "step": 61550 | |
| }, | |
| { | |
| "epoch": 19.561765639885678, | |
| "grad_norm": 23955.10546875, | |
| "learning_rate": 2.60963798031121e-05, | |
| "loss": 0.6394, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 19.57764369641156, | |
| "grad_norm": 23063.484375, | |
| "learning_rate": 2.60566846617974e-05, | |
| "loss": 0.6426, | |
| "step": 61650 | |
| }, | |
| { | |
| "epoch": 19.59352175293744, | |
| "grad_norm": 23891.98046875, | |
| "learning_rate": 2.6016989520482693e-05, | |
| "loss": 0.6411, | |
| "step": 61700 | |
| }, | |
| { | |
| "epoch": 19.609399809463323, | |
| "grad_norm": 23992.01171875, | |
| "learning_rate": 2.5977294379167993e-05, | |
| "loss": 0.6414, | |
| "step": 61750 | |
| }, | |
| { | |
| "epoch": 19.625277865989204, | |
| "grad_norm": 23503.697265625, | |
| "learning_rate": 2.5937599237853287e-05, | |
| "loss": 0.6375, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 19.641155922515082, | |
| "grad_norm": 24320.890625, | |
| "learning_rate": 2.5897904096538584e-05, | |
| "loss": 0.6395, | |
| "step": 61850 | |
| }, | |
| { | |
| "epoch": 19.657033979040964, | |
| "grad_norm": 23124.4375, | |
| "learning_rate": 2.5858208955223884e-05, | |
| "loss": 0.6425, | |
| "step": 61900 | |
| }, | |
| { | |
| "epoch": 19.672912035566846, | |
| "grad_norm": 24162.05078125, | |
| "learning_rate": 2.5818513813909177e-05, | |
| "loss": 0.6399, | |
| "step": 61950 | |
| }, | |
| { | |
| "epoch": 19.688790092092727, | |
| "grad_norm": 23305.732421875, | |
| "learning_rate": 2.5778818672594474e-05, | |
| "loss": 0.6375, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 19.70466814861861, | |
| "grad_norm": 23611.771484375, | |
| "learning_rate": 2.5739123531279775e-05, | |
| "loss": 0.6402, | |
| "step": 62050 | |
| }, | |
| { | |
| "epoch": 19.72054620514449, | |
| "grad_norm": 23453.798828125, | |
| "learning_rate": 2.5699428389965068e-05, | |
| "loss": 0.6365, | |
| "step": 62100 | |
| }, | |
| { | |
| "epoch": 19.736424261670372, | |
| "grad_norm": 23632.29296875, | |
| "learning_rate": 2.5659733248650365e-05, | |
| "loss": 0.6387, | |
| "step": 62150 | |
| }, | |
| { | |
| "epoch": 19.752302318196254, | |
| "grad_norm": 23645.380859375, | |
| "learning_rate": 2.5620038107335665e-05, | |
| "loss": 0.642, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 19.768180374722135, | |
| "grad_norm": 23833.201171875, | |
| "learning_rate": 2.558034296602096e-05, | |
| "loss": 0.6401, | |
| "step": 62250 | |
| }, | |
| { | |
| "epoch": 19.784058431248017, | |
| "grad_norm": 23208.59765625, | |
| "learning_rate": 2.554064782470626e-05, | |
| "loss": 0.6398, | |
| "step": 62300 | |
| }, | |
| { | |
| "epoch": 19.799936487773895, | |
| "grad_norm": 23972.072265625, | |
| "learning_rate": 2.5500952683391556e-05, | |
| "loss": 0.6371, | |
| "step": 62350 | |
| }, | |
| { | |
| "epoch": 19.815814544299776, | |
| "grad_norm": 23224.896484375, | |
| "learning_rate": 2.546125754207685e-05, | |
| "loss": 0.6391, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 19.831692600825658, | |
| "grad_norm": 24514.93359375, | |
| "learning_rate": 2.542156240076215e-05, | |
| "loss": 0.6403, | |
| "step": 62450 | |
| }, | |
| { | |
| "epoch": 19.84757065735154, | |
| "grad_norm": 23860.24609375, | |
| "learning_rate": 2.5381867259447443e-05, | |
| "loss": 0.6409, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 19.86344871387742, | |
| "grad_norm": 23272.37890625, | |
| "learning_rate": 2.534217211813274e-05, | |
| "loss": 0.6391, | |
| "step": 62550 | |
| }, | |
| { | |
| "epoch": 19.879326770403303, | |
| "grad_norm": 23465.796875, | |
| "learning_rate": 2.530247697681804e-05, | |
| "loss": 0.6403, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 19.895204826929184, | |
| "grad_norm": 23239.646484375, | |
| "learning_rate": 2.5262781835503334e-05, | |
| "loss": 0.641, | |
| "step": 62650 | |
| }, | |
| { | |
| "epoch": 19.911082883455066, | |
| "grad_norm": 23455.052734375, | |
| "learning_rate": 2.522308669418863e-05, | |
| "loss": 0.641, | |
| "step": 62700 | |
| }, | |
| { | |
| "epoch": 19.926960939980948, | |
| "grad_norm": 23780.283203125, | |
| "learning_rate": 2.518339155287393e-05, | |
| "loss": 0.6403, | |
| "step": 62750 | |
| }, | |
| { | |
| "epoch": 19.94283899650683, | |
| "grad_norm": 24348.8515625, | |
| "learning_rate": 2.5143696411559225e-05, | |
| "loss": 0.638, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 19.958717053032707, | |
| "grad_norm": 23591.212890625, | |
| "learning_rate": 2.5104001270244525e-05, | |
| "loss": 0.6367, | |
| "step": 62850 | |
| }, | |
| { | |
| "epoch": 19.97459510955859, | |
| "grad_norm": 23728.669921875, | |
| "learning_rate": 2.5064306128929822e-05, | |
| "loss": 0.6397, | |
| "step": 62900 | |
| }, | |
| { | |
| "epoch": 19.99047316608447, | |
| "grad_norm": 23320.609375, | |
| "learning_rate": 2.5024610987615116e-05, | |
| "loss": 0.6411, | |
| "step": 62950 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "step": 62980, | |
| "total_flos": 2.10597197119488e+18, | |
| "train_loss": 0.33622724912249125, | |
| "train_runtime": 72693.7372, | |
| "train_samples_per_second": 55.437, | |
| "train_steps_per_second": 0.866 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 62980, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.10597197119488e+18, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |