{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.998630870885172, "eval_steps": 500, "global_step": 11196, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0035716411691172094, "grad_norm": 47.84096509103244, "learning_rate": 6.3e-07, "loss": 2.0895, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.22, "memory/max_mem_allocated(gib)": 42.69, "step": 10 }, { "epoch": 0.007143282338234419, "grad_norm": 9.599267571025768, "learning_rate": 1.33e-06, "loss": 1.8176, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.22, "memory/max_mem_allocated(gib)": 42.69, "step": 20 }, { "epoch": 0.010714923507351629, "grad_norm": 5.205599049164542, "learning_rate": 2.0299999999999996e-06, "loss": 1.4788, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.22, "memory/max_mem_allocated(gib)": 42.69, "step": 30 }, { "epoch": 0.014286564676468837, "grad_norm": 2.205719923387632, "learning_rate": 2.73e-06, "loss": 1.2848, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 40 }, { "epoch": 0.017858205845586048, "grad_norm": 1.8447838629310436, "learning_rate": 3.4299999999999998e-06, "loss": 1.169, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 50 }, { "epoch": 0.021429847014703258, "grad_norm": 2.1579365048105044, "learning_rate": 4.129999999999999e-06, "loss": 1.0719, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 60 }, { "epoch": 0.025001488183820465, "grad_norm": 2.7295524544918397, "learning_rate": 4.8299999999999995e-06, "loss": 1.0057, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 70 }, { "epoch": 0.028573129352937675, "grad_norm": 2.7450280014903705, "learning_rate": 5.53e-06, "loss": 0.9462, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 80 }, { "epoch": 0.03214477052205488, "grad_norm": 3.5632078056569534, "learning_rate": 6.23e-06, "loss": 0.8855, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 90 }, { "epoch": 0.035716411691172095, "grad_norm": 3.4382364016435805, "learning_rate": 6.93e-06, "loss": 0.8407, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 100 }, { "epoch": 0.0392880528602893, "grad_norm": 2.9311709402300705, "learning_rate": 6.999988637086991e-06, "loss": 0.7935, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 110 }, { "epoch": 0.042859694029406516, "grad_norm": 2.796049740599755, "learning_rate": 6.9999493579762545e-06, "loss": 0.7523, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 120 }, { "epoch": 0.04643133519852372, "grad_norm": 3.1001912029157466, "learning_rate": 6.999882022699704e-06, "loss": 0.7096, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 130 }, { "epoch": 0.05000297636764093, "grad_norm": 2.720368644013231, "learning_rate": 6.999786631797113e-06, "loss": 0.6689, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 140 }, { "epoch": 0.05357461753675814, "grad_norm": 2.570815950070848, "learning_rate": 6.999663186033147e-06, "loss": 0.6353, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 150 }, { "epoch": 0.05714625870587535, "grad_norm": 2.4115369873146384, "learning_rate": 6.999511686397369e-06, "loss": 0.6019, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 160 }, { "epoch": 0.060717899874992556, "grad_norm": 2.3377949765181993, "learning_rate": 6.999332134104226e-06, "loss": 0.5704, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 170 }, { "epoch": 0.06428954104410976, "grad_norm": 1.5878939024656435, "learning_rate": 6.999124530593037e-06, "loss": 0.5523, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 180 }, { "epoch": 0.06786118221322698, "grad_norm": 1.3264919157704183, "learning_rate": 6.998888877527986e-06, "loss": 0.5435, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 190 }, { "epoch": 0.07143282338234419, "grad_norm": 2.038010532729759, "learning_rate": 6.9986251767981075e-06, "loss": 0.522, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 200 }, { "epoch": 0.0750044645514614, "grad_norm": 1.21415374871142, "learning_rate": 6.9983334305172685e-06, "loss": 0.51, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 210 }, { "epoch": 0.0785761057205786, "grad_norm": 1.614164511359785, "learning_rate": 6.998013641024154e-06, "loss": 0.4999, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 220 }, { "epoch": 0.08214774688969581, "grad_norm": 1.8682436211450861, "learning_rate": 6.997665810882251e-06, "loss": 0.495, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 230 }, { "epoch": 0.08571938805881303, "grad_norm": 1.2962880024167205, "learning_rate": 6.997289942879822e-06, "loss": 0.4771, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 240 }, { "epoch": 0.08929102922793024, "grad_norm": 1.087472607402264, "learning_rate": 6.996886040029885e-06, "loss": 0.4806, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 250 }, { "epoch": 0.09286267039704744, "grad_norm": 1.0059439297326096, "learning_rate": 6.996454105570193e-06, "loss": 0.4723, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 260 }, { "epoch": 0.09643431156616465, "grad_norm": 0.8919607073801068, "learning_rate": 6.9959941429632016e-06, "loss": 0.4619, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 270 }, { "epoch": 0.10000595273528186, "grad_norm": 0.9964374765321111, "learning_rate": 6.995506155896048e-06, "loss": 0.4596, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 280 }, { "epoch": 0.10357759390439907, "grad_norm": 1.6592621687619997, "learning_rate": 6.994990148280515e-06, "loss": 0.4583, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 290 }, { "epoch": 0.10714923507351629, "grad_norm": 0.9205505376855222, "learning_rate": 6.994446124253003e-06, "loss": 0.4494, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 300 }, { "epoch": 0.11072087624263349, "grad_norm": 0.8244439874224572, "learning_rate": 6.993874088174499e-06, "loss": 0.4397, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 310 }, { "epoch": 0.1142925174117507, "grad_norm": 0.8112163055151138, "learning_rate": 6.993274044630538e-06, "loss": 0.4474, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 320 }, { "epoch": 0.1178641585808679, "grad_norm": 0.9568110158706775, "learning_rate": 6.992645998431166e-06, "loss": 0.4381, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 330 }, { "epoch": 0.12143579974998511, "grad_norm": 0.894726666953806, "learning_rate": 6.991989954610905e-06, "loss": 0.4297, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 340 }, { "epoch": 0.12500744091910232, "grad_norm": 0.8386265568617023, "learning_rate": 6.991305918428709e-06, "loss": 0.4323, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 350 }, { "epoch": 0.12857908208821953, "grad_norm": 0.7834041126531694, "learning_rate": 6.990593895367922e-06, "loss": 0.43, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 360 }, { "epoch": 0.13215072325733673, "grad_norm": 0.6738833957249635, "learning_rate": 6.98985389113624e-06, "loss": 0.4248, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 370 }, { "epoch": 0.13572236442645397, "grad_norm": 0.6793575164378994, "learning_rate": 6.989085911665656e-06, "loss": 0.425, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 380 }, { "epoch": 0.13929400559557117, "grad_norm": 0.7442286270617003, "learning_rate": 6.988289963112419e-06, "loss": 0.4176, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 390 }, { "epoch": 0.14286564676468838, "grad_norm": 0.6670476894638441, "learning_rate": 6.987466051856985e-06, "loss": 0.4187, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 400 }, { "epoch": 0.1464372879338056, "grad_norm": 0.6158756183627782, "learning_rate": 6.9866141845039574e-06, "loss": 0.4083, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 410 }, { "epoch": 0.1500089291029228, "grad_norm": 0.8089591515808892, "learning_rate": 6.9857343678820465e-06, "loss": 0.4113, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 420 }, { "epoch": 0.15358057027204, "grad_norm": 0.6783726635892394, "learning_rate": 6.984826609044006e-06, "loss": 0.41, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 430 }, { "epoch": 0.1571522114411572, "grad_norm": 0.7629974327324691, "learning_rate": 6.983890915266577e-06, "loss": 0.4134, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 440 }, { "epoch": 0.16072385261027441, "grad_norm": 0.7450263962040445, "learning_rate": 6.982927294050436e-06, "loss": 0.4152, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 450 }, { "epoch": 0.16429549377939162, "grad_norm": 0.6928100693829363, "learning_rate": 6.981935753120128e-06, "loss": 0.4113, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 460 }, { "epoch": 0.16786713494850883, "grad_norm": 0.7438608324903054, "learning_rate": 6.980916300424006e-06, "loss": 0.4076, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 470 }, { "epoch": 0.17143877611762606, "grad_norm": 0.7341733676360269, "learning_rate": 6.979868944134169e-06, "loss": 0.4047, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 480 }, { "epoch": 0.17501041728674327, "grad_norm": 0.6106239719972382, "learning_rate": 6.9787936926463975e-06, "loss": 0.4093, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 490 }, { "epoch": 0.17858205845586048, "grad_norm": 0.5925020448722477, "learning_rate": 6.977690554580082e-06, "loss": 0.4011, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 500 }, { "epoch": 0.18215369962497768, "grad_norm": 0.5871269087366053, "learning_rate": 6.976559538778159e-06, "loss": 0.3971, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 510 }, { "epoch": 0.1857253407940949, "grad_norm": 0.8321256719515702, "learning_rate": 6.975400654307034e-06, "loss": 0.3997, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 520 }, { "epoch": 0.1892969819632121, "grad_norm": 0.5233429979550782, "learning_rate": 6.974213910456515e-06, "loss": 0.4004, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 530 }, { "epoch": 0.1928686231323293, "grad_norm": 0.6812462304184482, "learning_rate": 6.9729993167397355e-06, "loss": 0.3937, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 540 }, { "epoch": 0.1964402643014465, "grad_norm": 0.6409024032491494, "learning_rate": 6.971756882893076e-06, "loss": 0.3889, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 550 }, { "epoch": 0.20001190547056372, "grad_norm": 0.7193181432834939, "learning_rate": 6.9704866188760895e-06, "loss": 0.3992, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 560 }, { "epoch": 0.20358354663968092, "grad_norm": 0.7830346863841446, "learning_rate": 6.9691885348714184e-06, "loss": 0.3937, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 570 }, { "epoch": 0.20715518780879813, "grad_norm": 0.5652448193001737, "learning_rate": 6.967862641284718e-06, "loss": 0.3886, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 580 }, { "epoch": 0.21072682897791536, "grad_norm": 0.5748926945027741, "learning_rate": 6.966508948744569e-06, "loss": 0.3848, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 590 }, { "epoch": 0.21429847014703257, "grad_norm": 0.6665986237846571, "learning_rate": 6.96512746810239e-06, "loss": 0.3924, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 600 }, { "epoch": 0.21787011131614978, "grad_norm": 0.6823372205985191, "learning_rate": 6.963718210432355e-06, "loss": 0.3826, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 610 }, { "epoch": 0.22144175248526698, "grad_norm": 0.58994938065327, "learning_rate": 6.962281187031305e-06, "loss": 0.389, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 620 }, { "epoch": 0.2250133936543842, "grad_norm": 0.6052840463581863, "learning_rate": 6.9608164094186545e-06, "loss": 0.3835, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 630 }, { "epoch": 0.2285850348235014, "grad_norm": 0.7055587121927881, "learning_rate": 6.959323889336299e-06, "loss": 0.3793, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 640 }, { "epoch": 0.2321566759926186, "grad_norm": 0.6473817353018219, "learning_rate": 6.957803638748524e-06, "loss": 0.3768, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 650 }, { "epoch": 0.2357283171617358, "grad_norm": 0.5453880438030917, "learning_rate": 6.9562556698419065e-06, "loss": 0.3858, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 660 }, { "epoch": 0.23929995833085302, "grad_norm": 0.539099324403936, "learning_rate": 6.954679995025219e-06, "loss": 0.3882, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 670 }, { "epoch": 0.24287159949997023, "grad_norm": 0.5764837722625463, "learning_rate": 6.953076626929326e-06, "loss": 0.3808, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 680 }, { "epoch": 0.24644324066908743, "grad_norm": 0.5646676416233323, "learning_rate": 6.95144557840709e-06, "loss": 0.3801, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 690 }, { "epoch": 0.25001488183820464, "grad_norm": 0.6097644788446263, "learning_rate": 6.949786862533261e-06, "loss": 0.3716, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 700 }, { "epoch": 0.2535865230073219, "grad_norm": 0.526307204275848, "learning_rate": 6.9481004926043784e-06, "loss": 0.3778, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 710 }, { "epoch": 0.25715816417643905, "grad_norm": 0.578906592299905, "learning_rate": 6.946386482138658e-06, "loss": 0.3787, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 720 }, { "epoch": 0.2607298053455563, "grad_norm": 0.549626668018743, "learning_rate": 6.944644844875887e-06, "loss": 0.3702, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 730 }, { "epoch": 0.26430144651467347, "grad_norm": 0.6165546258876643, "learning_rate": 6.942875594777315e-06, "loss": 0.3721, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 740 }, { "epoch": 0.2678730876837907, "grad_norm": 0.6930131839825238, "learning_rate": 6.94107874602554e-06, "loss": 0.3703, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 750 }, { "epoch": 0.27144472885290793, "grad_norm": 0.60045008248484, "learning_rate": 6.939254313024393e-06, "loss": 0.3704, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 760 }, { "epoch": 0.2750163700220251, "grad_norm": 0.5827829638301874, "learning_rate": 6.9374023103988305e-06, "loss": 0.3708, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 770 }, { "epoch": 0.27858801119114235, "grad_norm": 0.5545642688802787, "learning_rate": 6.935522752994806e-06, "loss": 0.3749, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 780 }, { "epoch": 0.2821596523602595, "grad_norm": 0.5429199324119214, "learning_rate": 6.933615655879159e-06, "loss": 0.3748, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 790 }, { "epoch": 0.28573129352937676, "grad_norm": 0.5151541889971357, "learning_rate": 6.931681034339491e-06, "loss": 0.3692, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 800 }, { "epoch": 0.28930293469849394, "grad_norm": 0.5071957665722244, "learning_rate": 6.929718903884045e-06, "loss": 0.372, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 810 }, { "epoch": 0.2928745758676112, "grad_norm": 0.6233887048007566, "learning_rate": 6.92772928024158e-06, "loss": 0.3689, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 820 }, { "epoch": 0.29644621703672835, "grad_norm": 0.522671393367273, "learning_rate": 6.9257121793612425e-06, "loss": 0.3648, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 830 }, { "epoch": 0.3000178582058456, "grad_norm": 0.5410930274218302, "learning_rate": 6.923667617412443e-06, "loss": 0.3684, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 840 }, { "epoch": 0.30358949937496277, "grad_norm": 0.5662127188946626, "learning_rate": 6.921595610784726e-06, "loss": 0.367, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 850 }, { "epoch": 0.30716114054408, "grad_norm": 0.6258807561777707, "learning_rate": 6.9194961760876324e-06, "loss": 0.3655, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 860 }, { "epoch": 0.31073278171319724, "grad_norm": 0.5662982884434634, "learning_rate": 6.917369330150575e-06, "loss": 0.3668, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 870 }, { "epoch": 0.3143044228823144, "grad_norm": 0.5937482674450755, "learning_rate": 6.915215090022699e-06, "loss": 0.3648, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 880 }, { "epoch": 0.31787606405143165, "grad_norm": 0.5546463119504276, "learning_rate": 6.913033472972744e-06, "loss": 0.3642, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 890 }, { "epoch": 0.32144770522054883, "grad_norm": 0.5674302479041585, "learning_rate": 6.910824496488908e-06, "loss": 0.3765, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 900 }, { "epoch": 0.32501934638966606, "grad_norm": 0.5205528325154691, "learning_rate": 6.90858817827871e-06, "loss": 0.36, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 910 }, { "epoch": 0.32859098755878324, "grad_norm": 0.529377222438568, "learning_rate": 6.906324536268841e-06, "loss": 0.3677, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 920 }, { "epoch": 0.3321626287279005, "grad_norm": 0.6122529498214767, "learning_rate": 6.9040335886050235e-06, "loss": 0.3665, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 930 }, { "epoch": 0.33573426989701766, "grad_norm": 0.6135486838026446, "learning_rate": 6.901715353651869e-06, "loss": 0.3601, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 940 }, { "epoch": 0.3393059110661349, "grad_norm": 0.4990872913204794, "learning_rate": 6.899369849992728e-06, "loss": 0.3642, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 950 }, { "epoch": 0.3428775522352521, "grad_norm": 0.5348432895185071, "learning_rate": 6.896997096429542e-06, "loss": 0.3559, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 960 }, { "epoch": 0.3464491934043693, "grad_norm": 0.5440123718353397, "learning_rate": 6.89459711198269e-06, "loss": 0.3644, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 970 }, { "epoch": 0.35002083457348654, "grad_norm": 0.5253113764115817, "learning_rate": 6.89216991589084e-06, "loss": 0.359, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 980 }, { "epoch": 0.3535924757426037, "grad_norm": 0.6010219056393492, "learning_rate": 6.889715527610791e-06, "loss": 0.3586, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 990 }, { "epoch": 0.35716411691172095, "grad_norm": 0.49806273998200895, "learning_rate": 6.887233966817321e-06, "loss": 0.3588, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1000 }, { "epoch": 0.36073575808083813, "grad_norm": 0.5185542033740208, "learning_rate": 6.884725253403029e-06, "loss": 0.3559, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1010 }, { "epoch": 0.36430739924995537, "grad_norm": 0.6091051912479017, "learning_rate": 6.882189407478168e-06, "loss": 0.3593, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1020 }, { "epoch": 0.36787904041907254, "grad_norm": 0.490572831948872, "learning_rate": 6.8796264493704955e-06, "loss": 0.3533, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1030 }, { "epoch": 0.3714506815881898, "grad_norm": 0.4991028642294145, "learning_rate": 6.877036399625101e-06, "loss": 0.3586, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1040 }, { "epoch": 0.37502232275730696, "grad_norm": 0.47906335050403054, "learning_rate": 6.874419279004248e-06, "loss": 0.3581, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1050 }, { "epoch": 0.3785939639264242, "grad_norm": 0.49469149452681016, "learning_rate": 6.871775108487201e-06, "loss": 0.3596, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1060 }, { "epoch": 0.3821656050955414, "grad_norm": 0.5292248202385722, "learning_rate": 6.869103909270065e-06, "loss": 0.3583, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1070 }, { "epoch": 0.3857372462646586, "grad_norm": 0.5117377218602205, "learning_rate": 6.866405702765607e-06, "loss": 0.365, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1080 }, { "epoch": 0.38930888743377584, "grad_norm": 0.5004559418367507, "learning_rate": 6.863680510603094e-06, "loss": 0.3455, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1090 }, { "epoch": 0.392880528602893, "grad_norm": 0.57089126215835, "learning_rate": 6.860928354628108e-06, "loss": 0.349, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1100 }, { "epoch": 0.39645216977201025, "grad_norm": 0.5820503861826878, "learning_rate": 6.8581492569023836e-06, "loss": 0.3551, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1110 }, { "epoch": 0.40002381094112743, "grad_norm": 0.5570132816377671, "learning_rate": 6.85534323970362e-06, "loss": 0.3513, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1120 }, { "epoch": 0.40359545211024467, "grad_norm": 0.5464001911986117, "learning_rate": 6.852510325525313e-06, "loss": 0.3613, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1130 }, { "epoch": 0.40716709327936185, "grad_norm": 0.4827396326230109, "learning_rate": 6.849650537076562e-06, "loss": 0.3504, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1140 }, { "epoch": 0.4107387344484791, "grad_norm": 0.4999437570126179, "learning_rate": 6.8467638972819e-06, "loss": 0.3526, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1150 }, { "epoch": 0.41431037561759626, "grad_norm": 0.48618334288528897, "learning_rate": 6.843850429281102e-06, "loss": 0.3541, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1160 }, { "epoch": 0.4178820167867135, "grad_norm": 0.48710415139914987, "learning_rate": 6.840910156429005e-06, "loss": 0.3502, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1170 }, { "epoch": 0.42145365795583073, "grad_norm": 0.5301441193838501, "learning_rate": 6.8379431022953156e-06, "loss": 0.3451, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1180 }, { "epoch": 0.4250252991249479, "grad_norm": 0.4847505852864843, "learning_rate": 6.834949290664424e-06, "loss": 0.3549, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1190 }, { "epoch": 0.42859694029406514, "grad_norm": 0.49082072305818153, "learning_rate": 6.831928745535214e-06, "loss": 0.3539, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1200 }, { "epoch": 0.4321685814631823, "grad_norm": 0.46047367228279323, "learning_rate": 6.828881491120869e-06, "loss": 0.3545, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1210 }, { "epoch": 0.43574022263229956, "grad_norm": 0.48672379907591806, "learning_rate": 6.825807551848678e-06, "loss": 0.347, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1220 }, { "epoch": 0.43931186380141674, "grad_norm": 0.4448810458268107, "learning_rate": 6.8227069523598405e-06, "loss": 0.3559, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1230 }, { "epoch": 0.44288350497053397, "grad_norm": 0.49497043238088634, "learning_rate": 6.8195797175092675e-06, "loss": 0.3546, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1240 }, { "epoch": 0.44645514613965115, "grad_norm": 0.4732540784083669, "learning_rate": 6.816425872365385e-06, "loss": 0.349, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1250 }, { "epoch": 0.4500267873087684, "grad_norm": 0.5192314130895199, "learning_rate": 6.813245442209932e-06, "loss": 0.3515, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1260 }, { "epoch": 0.45359842847788556, "grad_norm": 0.4792403319160851, "learning_rate": 6.810038452537754e-06, "loss": 0.3382, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1270 }, { "epoch": 0.4571700696470028, "grad_norm": 0.5060077004358183, "learning_rate": 6.806804929056606e-06, "loss": 0.3554, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1280 }, { "epoch": 0.46074171081612003, "grad_norm": 0.520248105755976, "learning_rate": 6.80354489768694e-06, "loss": 0.3464, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1290 }, { "epoch": 0.4643133519852372, "grad_norm": 0.4598019361746213, "learning_rate": 6.8002583845617e-06, "loss": 0.3526, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1300 }, { "epoch": 0.46788499315435444, "grad_norm": 0.4724933886093667, "learning_rate": 6.796945416026112e-06, "loss": 0.3512, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1310 }, { "epoch": 0.4714566343234716, "grad_norm": 0.4660281073410152, "learning_rate": 6.793606018637473e-06, "loss": 0.3476, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1320 }, { "epoch": 0.47502827549258886, "grad_norm": 0.5045848706762358, "learning_rate": 6.790240219164937e-06, "loss": 0.3453, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1330 }, { "epoch": 0.47859991666170604, "grad_norm": 0.5194987277056717, "learning_rate": 6.786848044589305e-06, "loss": 0.3474, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1340 }, { "epoch": 0.48217155783082327, "grad_norm": 0.5656264203505013, "learning_rate": 6.7834295221028e-06, "loss": 0.3424, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1350 }, { "epoch": 0.48574319899994045, "grad_norm": 0.4894023655199842, "learning_rate": 6.779984679108858e-06, "loss": 0.3515, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1360 }, { "epoch": 0.4893148401690577, "grad_norm": 0.5037132785096825, "learning_rate": 6.776513543221901e-06, "loss": 0.3346, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1370 }, { "epoch": 0.49288648133817486, "grad_norm": 0.5190300836703553, "learning_rate": 6.773016142267126e-06, "loss": 0.3476, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1380 }, { "epoch": 0.4964581225072921, "grad_norm": 0.4435043093847383, "learning_rate": 6.7694925042802655e-06, "loss": 0.3366, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1390 }, { "epoch": 0.5000297636764093, "grad_norm": 0.5324417067919307, "learning_rate": 6.7659426575073794e-06, "loss": 0.3456, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1400 }, { "epoch": 0.5036014048455265, "grad_norm": 0.494609813535511, "learning_rate": 6.76236663040462e-06, "loss": 0.3389, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1410 }, { "epoch": 0.5071730460146437, "grad_norm": 0.4723453866802145, "learning_rate": 6.758764451638004e-06, "loss": 0.3419, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1420 }, { "epoch": 0.510744687183761, "grad_norm": 0.4986566417970195, "learning_rate": 6.7551361500831845e-06, "loss": 0.3359, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1430 }, { "epoch": 0.5143163283528781, "grad_norm": 0.4900882038508289, "learning_rate": 6.75148175482522e-06, "loss": 0.3453, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1440 }, { "epoch": 0.5178879695219953, "grad_norm": 0.48044517729584096, "learning_rate": 6.747801295158339e-06, "loss": 0.3442, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1450 }, { "epoch": 0.5214596106911126, "grad_norm": 0.5288761906532677, "learning_rate": 6.744094800585709e-06, "loss": 0.3375, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1460 }, { "epoch": 0.5250312518602298, "grad_norm": 0.530447136190816, "learning_rate": 6.740362300819195e-06, "loss": 0.3382, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1470 }, { "epoch": 0.5286028930293469, "grad_norm": 0.45351586743181693, "learning_rate": 6.736603825779125e-06, "loss": 0.3444, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1480 }, { "epoch": 0.5321745341984642, "grad_norm": 0.5208363356822504, "learning_rate": 6.732819405594048e-06, "loss": 0.3347, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1490 }, { "epoch": 0.5357461753675814, "grad_norm": 0.45820411460318944, "learning_rate": 6.729009070600497e-06, "loss": 0.3389, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1500 }, { "epoch": 0.5393178165366986, "grad_norm": 0.4831990656531665, "learning_rate": 6.725172851342737e-06, "loss": 0.3355, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1510 }, { "epoch": 0.5428894577058159, "grad_norm": 0.4699654688524428, "learning_rate": 6.721310778572529e-06, "loss": 0.3318, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1520 }, { "epoch": 0.546461098874933, "grad_norm": 0.47152717294043456, "learning_rate": 6.71742288324888e-06, "loss": 0.3412, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1530 }, { "epoch": 0.5500327400440502, "grad_norm": 0.4994080875975566, "learning_rate": 6.713509196537794e-06, "loss": 0.3376, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1540 }, { "epoch": 0.5536043812131675, "grad_norm": 0.48606980053716015, "learning_rate": 6.709569749812023e-06, "loss": 0.3412, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1550 }, { "epoch": 0.5571760223822847, "grad_norm": 0.47736374810187937, "learning_rate": 6.705604574650815e-06, "loss": 0.3359, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1560 }, { "epoch": 0.5607476635514018, "grad_norm": 0.46009726498403986, "learning_rate": 6.701613702839661e-06, "loss": 0.3372, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1570 }, { "epoch": 0.564319304720519, "grad_norm": 0.49517486088050466, "learning_rate": 6.6975971663700434e-06, "loss": 0.328, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1580 }, { "epoch": 0.5678909458896363, "grad_norm": 0.4746416834193424, "learning_rate": 6.693554997439175e-06, "loss": 0.3366, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1590 }, { "epoch": 0.5714625870587535, "grad_norm": 0.4611224689559148, "learning_rate": 6.689487228449739e-06, "loss": 0.3409, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1600 }, { "epoch": 0.5750342282278708, "grad_norm": 0.4809305461747918, "learning_rate": 6.685393892009639e-06, "loss": 0.3398, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1610 }, { "epoch": 0.5786058693969879, "grad_norm": 0.44334262590360224, "learning_rate": 6.681275020931727e-06, "loss": 0.3382, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1620 }, { "epoch": 0.5821775105661051, "grad_norm": 0.5398313870854491, "learning_rate": 6.677130648233548e-06, "loss": 0.3464, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1630 }, { "epoch": 0.5857491517352224, "grad_norm": 0.490905419595684, "learning_rate": 6.6729608071370695e-06, "loss": 0.3377, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1640 }, { "epoch": 0.5893207929043396, "grad_norm": 0.5468845395496524, "learning_rate": 6.668765531068419e-06, "loss": 0.3363, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1650 }, { "epoch": 0.5928924340734567, "grad_norm": 0.48192206630092693, "learning_rate": 6.664544853657616e-06, "loss": 0.3298, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1660 }, { "epoch": 0.5964640752425739, "grad_norm": 0.45744945668619447, "learning_rate": 6.660298808738299e-06, "loss": 0.334, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1670 }, { "epoch": 0.6000357164116912, "grad_norm": 0.44653921337558566, "learning_rate": 6.656027430347457e-06, "loss": 0.3355, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1680 }, { "epoch": 0.6036073575808084, "grad_norm": 0.5388041006651919, "learning_rate": 6.6517307527251576e-06, "loss": 0.3372, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1690 }, { "epoch": 0.6071789987499255, "grad_norm": 0.4494250389449916, "learning_rate": 6.647408810314272e-06, "loss": 0.3289, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1700 }, { "epoch": 0.6107506399190428, "grad_norm": 0.5041032492465202, "learning_rate": 6.643061637760195e-06, "loss": 0.329, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1710 }, { "epoch": 0.61432228108816, "grad_norm": 0.4542350053071805, "learning_rate": 6.638689269910572e-06, "loss": 0.3269, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1720 }, { "epoch": 0.6178939222572772, "grad_norm": 0.4717924107240841, "learning_rate": 6.634291741815018e-06, "loss": 0.336, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1730 }, { "epoch": 0.6214655634263945, "grad_norm": 0.49412268954326644, "learning_rate": 6.629869088724838e-06, "loss": 0.3356, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1740 }, { "epoch": 0.6250372045955116, "grad_norm": 0.4881525370343868, "learning_rate": 6.6254213460927385e-06, "loss": 0.3294, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1750 }, { "epoch": 0.6286088457646288, "grad_norm": 0.5027799485224533, "learning_rate": 6.620948549572555e-06, "loss": 0.3344, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1760 }, { "epoch": 0.6321804869337461, "grad_norm": 0.48657084247101007, "learning_rate": 6.616450735018952e-06, "loss": 0.3346, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1770 }, { "epoch": 0.6357521281028633, "grad_norm": 0.4974458887948958, "learning_rate": 6.611927938487148e-06, "loss": 0.3372, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1780 }, { "epoch": 0.6393237692719804, "grad_norm": 0.48881133320324655, "learning_rate": 6.607380196232617e-06, "loss": 0.332, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1790 }, { "epoch": 0.6428954104410977, "grad_norm": 0.436305609913343, "learning_rate": 6.6028075447108054e-06, "loss": 0.3285, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1800 }, { "epoch": 0.6464670516102149, "grad_norm": 0.48349336881385824, "learning_rate": 6.598210020576833e-06, "loss": 0.329, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1810 }, { "epoch": 0.6500386927793321, "grad_norm": 0.45307654128523317, "learning_rate": 6.593587660685206e-06, "loss": 0.323, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1820 }, { "epoch": 0.6536103339484494, "grad_norm": 0.4663165053604926, "learning_rate": 6.588940502089516e-06, "loss": 0.3354, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1830 }, { "epoch": 0.6571819751175665, "grad_norm": 0.49842281430038393, "learning_rate": 6.584268582042145e-06, "loss": 0.3329, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1840 }, { "epoch": 0.6607536162866837, "grad_norm": 0.4758384481874692, "learning_rate": 6.579571937993967e-06, "loss": 0.3332, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1850 }, { "epoch": 0.664325257455801, "grad_norm": 0.4548392499179828, "learning_rate": 6.574850607594048e-06, "loss": 0.3356, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1860 }, { "epoch": 0.6678968986249182, "grad_norm": 0.4723502380387424, "learning_rate": 6.570104628689345e-06, "loss": 0.3228, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1870 }, { "epoch": 0.6714685397940353, "grad_norm": 0.4827017868525288, "learning_rate": 6.5653340393244e-06, "loss": 0.3299, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1880 }, { "epoch": 0.6750401809631525, "grad_norm": 0.4853339734974653, "learning_rate": 6.560538877741036e-06, "loss": 0.3354, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1890 }, { "epoch": 0.6786118221322698, "grad_norm": 0.490622912910108, "learning_rate": 6.55571918237805e-06, "loss": 0.3289, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1900 }, { "epoch": 0.682183463301387, "grad_norm": 0.4360684992956346, "learning_rate": 6.5508749918709115e-06, "loss": 0.3304, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1910 }, { "epoch": 0.6857551044705043, "grad_norm": 0.4706600602999743, "learning_rate": 6.54600634505144e-06, "loss": 0.3268, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1920 }, { "epoch": 0.6893267456396214, "grad_norm": 0.4674898732610253, "learning_rate": 6.541113280947503e-06, "loss": 0.328, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1930 }, { "epoch": 0.6928983868087386, "grad_norm": 0.48582636452480815, "learning_rate": 6.536195838782704e-06, "loss": 0.3317, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1940 }, { "epoch": 0.6964700279778558, "grad_norm": 0.46098715412427904, "learning_rate": 6.5312540579760625e-06, "loss": 0.3335, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1950 }, { "epoch": 0.7000416691469731, "grad_norm": 0.4468284228904629, "learning_rate": 6.526287978141699e-06, "loss": 0.3269, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1960 }, { "epoch": 0.7036133103160902, "grad_norm": 0.4754658451034331, "learning_rate": 6.521297639088522e-06, "loss": 0.3271, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1970 }, { "epoch": 0.7071849514852074, "grad_norm": 0.45435138107973977, "learning_rate": 6.516283080819904e-06, "loss": 0.3299, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1980 }, { "epoch": 0.7107565926543247, "grad_norm": 0.449811476238864, "learning_rate": 6.511244343533364e-06, "loss": 0.3239, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 1990 }, { "epoch": 0.7143282338234419, "grad_norm": 0.43297219624166366, "learning_rate": 6.506181467620243e-06, "loss": 0.3289, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2000 }, { "epoch": 0.717899874992559, "grad_norm": 0.4396876046033842, "learning_rate": 6.5010944936653805e-06, "loss": 0.3247, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2010 }, { "epoch": 0.7214715161616763, "grad_norm": 0.4557973023145909, "learning_rate": 6.495983462446792e-06, "loss": 0.3256, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2020 }, { "epoch": 0.7250431573307935, "grad_norm": 0.5048088833153063, "learning_rate": 6.490848414935338e-06, "loss": 0.3242, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2030 }, { "epoch": 0.7286147984999107, "grad_norm": 0.46964680827760763, "learning_rate": 6.485689392294398e-06, "loss": 0.3337, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2040 }, { "epoch": 0.732186439669028, "grad_norm": 0.4182978089039866, "learning_rate": 6.480506435879541e-06, "loss": 0.3254, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2050 }, { "epoch": 0.7357580808381451, "grad_norm": 0.5087956625646197, "learning_rate": 6.475299587238192e-06, "loss": 0.3283, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2060 }, { "epoch": 0.7393297220072623, "grad_norm": 0.4433568329887951, "learning_rate": 6.470068888109301e-06, "loss": 0.328, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2070 }, { "epoch": 0.7429013631763796, "grad_norm": 0.40440774246534084, "learning_rate": 6.4648143804230044e-06, "loss": 0.3323, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2080 }, { "epoch": 0.7464730043454968, "grad_norm": 0.4776400773257993, "learning_rate": 6.459536106300296e-06, "loss": 0.33, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2090 }, { "epoch": 0.7500446455146139, "grad_norm": 0.48086135057212126, "learning_rate": 6.454234108052685e-06, "loss": 0.3284, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2100 }, { "epoch": 0.7536162866837312, "grad_norm": 0.45259232721638404, "learning_rate": 6.448908428181852e-06, "loss": 0.326, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2110 }, { "epoch": 0.7571879278528484, "grad_norm": 0.46227294968779126, "learning_rate": 6.443559109379319e-06, "loss": 0.3326, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2120 }, { "epoch": 0.7607595690219656, "grad_norm": 0.4815320401008922, "learning_rate": 6.4381861945261e-06, "loss": 0.327, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2130 }, { "epoch": 0.7643312101910829, "grad_norm": 0.46393575252024677, "learning_rate": 6.432789726692356e-06, "loss": 0.3302, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2140 }, { "epoch": 0.7679028513602, "grad_norm": 0.4195186834772109, "learning_rate": 6.427369749137057e-06, "loss": 0.3162, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2150 }, { "epoch": 0.7714744925293172, "grad_norm": 0.4537338691534887, "learning_rate": 6.421926305307627e-06, "loss": 0.3234, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2160 }, { "epoch": 0.7750461336984344, "grad_norm": 0.48382227197517164, "learning_rate": 6.416459438839601e-06, "loss": 0.3256, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2170 }, { "epoch": 0.7786177748675517, "grad_norm": 0.46259622419983354, "learning_rate": 6.410969193556274e-06, "loss": 0.3241, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2180 }, { "epoch": 0.7821894160366688, "grad_norm": 0.4300803430769861, "learning_rate": 6.40545561346835e-06, "loss": 0.3242, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2190 }, { "epoch": 0.785761057205786, "grad_norm": 0.42886334543184423, "learning_rate": 6.399918742773586e-06, "loss": 0.3203, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2200 }, { "epoch": 0.7893326983749033, "grad_norm": 0.436336380040389, "learning_rate": 6.394358625856444e-06, "loss": 0.3258, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2210 }, { "epoch": 0.7929043395440205, "grad_norm": 0.5165620808176424, "learning_rate": 6.388775307287727e-06, "loss": 0.3099, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2220 }, { "epoch": 0.7964759807131376, "grad_norm": 0.4104870809975225, "learning_rate": 6.383168831824231e-06, "loss": 0.3279, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2230 }, { "epoch": 0.8000476218822549, "grad_norm": 0.4312051340095396, "learning_rate": 6.377539244408378e-06, "loss": 0.3297, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2240 }, { "epoch": 0.8036192630513721, "grad_norm": 0.42973894851523453, "learning_rate": 6.371886590167861e-06, "loss": 0.3259, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2250 }, { "epoch": 0.8071909042204893, "grad_norm": 0.450430593545997, "learning_rate": 6.36621091441528e-06, "loss": 0.3287, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2260 }, { "epoch": 0.8107625453896066, "grad_norm": 0.4843313728163245, "learning_rate": 6.360512262647777e-06, "loss": 0.3195, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2270 }, { "epoch": 0.8143341865587237, "grad_norm": 0.4484783923353552, "learning_rate": 6.354790680546679e-06, "loss": 0.3171, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2280 }, { "epoch": 0.8179058277278409, "grad_norm": 0.4215719667731261, "learning_rate": 6.349046213977121e-06, "loss": 0.3169, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2290 }, { "epoch": 0.8214774688969582, "grad_norm": 0.4508726622466319, "learning_rate": 6.343278908987684e-06, "loss": 0.319, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2300 }, { "epoch": 0.8250491100660754, "grad_norm": 0.4361713908568727, "learning_rate": 6.337488811810029e-06, "loss": 0.3211, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2310 }, { "epoch": 0.8286207512351925, "grad_norm": 0.4816418184058236, "learning_rate": 6.33167596885852e-06, "loss": 0.3235, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2320 }, { "epoch": 0.8321923924043098, "grad_norm": 0.4589272175981858, "learning_rate": 6.325840426729856e-06, "loss": 0.3197, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2330 }, { "epoch": 0.835764033573427, "grad_norm": 0.43673536428623677, "learning_rate": 6.319982232202696e-06, "loss": 0.3205, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2340 }, { "epoch": 0.8393356747425442, "grad_norm": 0.4214827560121218, "learning_rate": 6.314101432237283e-06, "loss": 0.3198, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2350 }, { "epoch": 0.8429073159116615, "grad_norm": 0.43810471273328333, "learning_rate": 6.308198073975073e-06, "loss": 0.3146, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2360 }, { "epoch": 0.8464789570807786, "grad_norm": 0.45322674327393886, "learning_rate": 6.302272204738348e-06, "loss": 0.3146, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2370 }, { "epoch": 0.8500505982498958, "grad_norm": 0.44384180344721963, "learning_rate": 6.296323872029845e-06, "loss": 0.3113, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2380 }, { "epoch": 0.853622239419013, "grad_norm": 0.4318032922963888, "learning_rate": 6.29035312353237e-06, "loss": 0.3216, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2390 }, { "epoch": 0.8571938805881303, "grad_norm": 0.48276893773226826, "learning_rate": 6.284360007108418e-06, "loss": 0.3218, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2400 }, { "epoch": 0.8607655217572474, "grad_norm": 0.4432412247091493, "learning_rate": 6.278344570799791e-06, "loss": 0.323, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2410 }, { "epoch": 0.8643371629263646, "grad_norm": 0.4357439381549616, "learning_rate": 6.272306862827208e-06, "loss": 0.3127, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2420 }, { "epoch": 0.8679088040954819, "grad_norm": 0.4287466917293321, "learning_rate": 6.2662469315899215e-06, "loss": 0.3201, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2430 }, { "epoch": 0.8714804452645991, "grad_norm": 0.46939300578506366, "learning_rate": 6.260164825665332e-06, "loss": 0.3123, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2440 }, { "epoch": 0.8750520864337163, "grad_norm": 0.4358908165077067, "learning_rate": 6.254060593808594e-06, "loss": 0.3154, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2450 }, { "epoch": 0.8786237276028335, "grad_norm": 0.4691550559602913, "learning_rate": 6.247934284952225e-06, "loss": 0.3154, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2460 }, { "epoch": 0.8821953687719507, "grad_norm": 0.4614380220064387, "learning_rate": 6.241785948205721e-06, "loss": 0.3218, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2470 }, { "epoch": 0.8857670099410679, "grad_norm": 0.4490215516280512, "learning_rate": 6.235615632855151e-06, "loss": 0.313, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2480 }, { "epoch": 0.8893386511101852, "grad_norm": 0.45310821557712655, "learning_rate": 6.229423388362772e-06, "loss": 0.3239, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2490 }, { "epoch": 0.8929102922793023, "grad_norm": 0.4246377711390903, "learning_rate": 6.223209264366629e-06, "loss": 0.3178, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2500 }, { "epoch": 0.8964819334484195, "grad_norm": 0.437170171634512, "learning_rate": 6.216973310680151e-06, "loss": 0.3161, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2510 }, { "epoch": 0.9000535746175368, "grad_norm": 0.4100710907843204, "learning_rate": 6.2107155772917645e-06, "loss": 0.322, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2520 }, { "epoch": 0.903625215786654, "grad_norm": 0.4297080236276428, "learning_rate": 6.204436114364483e-06, "loss": 0.3088, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2530 }, { "epoch": 0.9071968569557711, "grad_norm": 0.4578128481232775, "learning_rate": 6.198134972235506e-06, "loss": 0.3116, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2540 }, { "epoch": 0.9107684981248884, "grad_norm": 0.4619368696236996, "learning_rate": 6.191812201415817e-06, "loss": 0.3108, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2550 }, { "epoch": 0.9143401392940056, "grad_norm": 0.4316502369623513, "learning_rate": 6.185467852589783e-06, "loss": 0.3076, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2560 }, { "epoch": 0.9179117804631228, "grad_norm": 0.46879215227469134, "learning_rate": 6.179101976614738e-06, "loss": 0.3079, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2570 }, { "epoch": 0.9214834216322401, "grad_norm": 0.45391009388979736, "learning_rate": 6.172714624520584e-06, "loss": 0.3117, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2580 }, { "epoch": 0.9250550628013572, "grad_norm": 0.41117818979235166, "learning_rate": 6.16630584750938e-06, "loss": 0.3085, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2590 }, { "epoch": 0.9286267039704744, "grad_norm": 0.41393645126366063, "learning_rate": 6.159875696954928e-06, "loss": 0.3158, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2600 }, { "epoch": 0.9321983451395917, "grad_norm": 0.43546721450403336, "learning_rate": 6.153424224402368e-06, "loss": 0.3114, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2610 }, { "epoch": 0.9357699863087089, "grad_norm": 0.47547951834025626, "learning_rate": 6.146951481567755e-06, "loss": 0.3159, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2620 }, { "epoch": 0.939341627477826, "grad_norm": 0.41613918798314703, "learning_rate": 6.1404575203376536e-06, "loss": 0.318, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2630 }, { "epoch": 0.9429132686469432, "grad_norm": 0.43579838968445417, "learning_rate": 6.133942392768719e-06, "loss": 0.3159, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2640 }, { "epoch": 0.9464849098160605, "grad_norm": 0.42779957417782555, "learning_rate": 6.1274061510872765e-06, "loss": 0.3171, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2650 }, { "epoch": 0.9500565509851777, "grad_norm": 0.4571516272464143, "learning_rate": 6.120848847688909e-06, "loss": 0.308, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2660 }, { "epoch": 0.953628192154295, "grad_norm": 0.4259836126699497, "learning_rate": 6.11427053513803e-06, "loss": 0.3081, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2670 }, { "epoch": 0.9571998333234121, "grad_norm": 0.45984729995716905, "learning_rate": 6.107671266167468e-06, "loss": 0.3163, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2680 }, { "epoch": 0.9607714744925293, "grad_norm": 0.4786910501462735, "learning_rate": 6.1010510936780425e-06, "loss": 0.3122, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2690 }, { "epoch": 0.9643431156616465, "grad_norm": 0.45855998268064024, "learning_rate": 6.094410070738137e-06, "loss": 0.3111, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2700 }, { "epoch": 0.9679147568307638, "grad_norm": 0.40749766688739447, "learning_rate": 6.087748250583276e-06, "loss": 0.3098, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2710 }, { "epoch": 0.9714863979998809, "grad_norm": 0.3971023012451066, "learning_rate": 6.081065686615698e-06, "loss": 0.3121, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2720 }, { "epoch": 0.9750580391689981, "grad_norm": 0.43110701814916075, "learning_rate": 6.074362432403927e-06, "loss": 0.3165, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2730 }, { "epoch": 0.9786296803381154, "grad_norm": 0.44353114381202113, "learning_rate": 6.067638541682344e-06, "loss": 0.3085, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2740 }, { "epoch": 0.9822013215072326, "grad_norm": 0.3889830628797175, "learning_rate": 6.060894068350758e-06, "loss": 0.3111, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2750 }, { "epoch": 0.9857729626763497, "grad_norm": 0.45027635846439507, "learning_rate": 6.0541290664739644e-06, "loss": 0.3149, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2760 }, { "epoch": 0.989344603845467, "grad_norm": 0.4203375605922012, "learning_rate": 6.047343590281326e-06, "loss": 0.3122, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2770 }, { "epoch": 0.9929162450145842, "grad_norm": 0.42453145771429407, "learning_rate": 6.040537694166331e-06, "loss": 0.3122, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2780 }, { "epoch": 0.9964878861837014, "grad_norm": 0.48274276529679444, "learning_rate": 6.0337114326861495e-06, "loss": 0.3125, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2790 }, { "epoch": 1.0, "grad_norm": 0.4713684607192772, "learning_rate": 6.026864860561211e-06, "loss": 0.3211, "memory/device_mem_reserved(gib)": 48.61, "memory/max_mem_active(gib)": 43.25, "memory/max_mem_allocated(gib)": 42.69, "step": 2800 }, { "epoch": 1.0035716411691171, "grad_norm": 0.42558419555930044, "learning_rate": 6.019998032674756e-06, "loss": 0.3189, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.4, "memory/max_mem_allocated(gib)": 42.86, "step": 2810 }, { "epoch": 1.0071432823382345, "grad_norm": 0.43216906153711016, "learning_rate": 6.0131110040724e-06, "loss": 0.3112, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.4, "memory/max_mem_allocated(gib)": 42.86, "step": 2820 }, { "epoch": 1.0107149235073516, "grad_norm": 0.45448985837389827, "learning_rate": 6.006203829961688e-06, "loss": 0.3075, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.4, "memory/max_mem_allocated(gib)": 42.86, "step": 2830 }, { "epoch": 1.014286564676469, "grad_norm": 0.41525982725225924, "learning_rate": 5.999276565711656e-06, "loss": 0.2989, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.4, "memory/max_mem_allocated(gib)": 42.86, "step": 2840 }, { "epoch": 1.017858205845586, "grad_norm": 0.4198260335440655, "learning_rate": 5.992329266852385e-06, "loss": 0.3071, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.4, "memory/max_mem_allocated(gib)": 42.86, "step": 2850 }, { "epoch": 1.0214298470147032, "grad_norm": 0.42331773555378355, "learning_rate": 5.98536198907456e-06, "loss": 0.3025, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.4, "memory/max_mem_allocated(gib)": 42.86, "step": 2860 }, { "epoch": 1.0250014881838205, "grad_norm": 0.42277960958705246, "learning_rate": 5.978374788229015e-06, "loss": 0.3058, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.4, "memory/max_mem_allocated(gib)": 42.86, "step": 2870 }, { "epoch": 1.0285731293529377, "grad_norm": 0.41102681519195966, "learning_rate": 5.971367720326295e-06, "loss": 0.3077, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.4, "memory/max_mem_allocated(gib)": 42.86, "step": 2880 }, { "epoch": 1.0321447705220548, "grad_norm": 0.4156887229302285, "learning_rate": 5.9643408415362015e-06, "loss": 0.2976, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.4, "memory/max_mem_allocated(gib)": 42.86, "step": 2890 }, { "epoch": 1.0357164116911721, "grad_norm": 0.40926887059288125, "learning_rate": 5.9572942081873415e-06, "loss": 0.3073, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.4, "memory/max_mem_allocated(gib)": 42.86, "step": 2900 }, { "epoch": 1.0392880528602892, "grad_norm": 0.4181073533586528, "learning_rate": 5.950227876766679e-06, "loss": 0.3048, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.4, "memory/max_mem_allocated(gib)": 42.86, "step": 2910 }, { "epoch": 1.0428596940294066, "grad_norm": 0.43301206694152716, "learning_rate": 5.943141903919084e-06, "loss": 0.3102, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.4, "memory/max_mem_allocated(gib)": 42.86, "step": 2920 }, { "epoch": 1.0464313351985237, "grad_norm": 0.428644958607888, "learning_rate": 5.93603634644687e-06, "loss": 0.3069, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.4, "memory/max_mem_allocated(gib)": 42.86, "step": 2930 }, { "epoch": 1.0500029763676408, "grad_norm": 0.4289954783985394, "learning_rate": 5.928911261309345e-06, "loss": 0.3062, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.4, "memory/max_mem_allocated(gib)": 42.86, "step": 2940 }, { "epoch": 1.0535746175367582, "grad_norm": 0.40640231743067806, "learning_rate": 5.921766705622358e-06, "loss": 0.3057, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.4, "memory/max_mem_allocated(gib)": 42.86, "step": 2950 }, { "epoch": 1.0571462587058753, "grad_norm": 0.39786241335597483, "learning_rate": 5.914602736657834e-06, "loss": 0.3014, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.4, "memory/max_mem_allocated(gib)": 42.86, "step": 2960 }, { "epoch": 1.0607178998749927, "grad_norm": 0.42829381973477715, "learning_rate": 5.9074194118433185e-06, "loss": 0.2951, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.4, "memory/max_mem_allocated(gib)": 42.86, "step": 2970 }, { "epoch": 1.0642895410441098, "grad_norm": 0.40677472977856, "learning_rate": 5.900216788761514e-06, "loss": 0.299, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.4, "memory/max_mem_allocated(gib)": 42.86, "step": 2980 }, { "epoch": 1.067861182213227, "grad_norm": 0.40052402118235286, "learning_rate": 5.892994925149829e-06, "loss": 0.3081, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.4, "memory/max_mem_allocated(gib)": 42.86, "step": 2990 }, { "epoch": 1.0714328233823442, "grad_norm": 0.41164871053260205, "learning_rate": 5.885753878899898e-06, "loss": 0.2983, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3000 }, { "epoch": 1.0750044645514614, "grad_norm": 0.4398438793790258, "learning_rate": 5.878493708057132e-06, "loss": 0.296, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3010 }, { "epoch": 1.0785761057205785, "grad_norm": 0.4145451367489992, "learning_rate": 5.871214470820245e-06, "loss": 0.2953, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3020 }, { "epoch": 1.0821477468896958, "grad_norm": 0.3902504116999639, "learning_rate": 5.863916225540795e-06, "loss": 0.302, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3030 }, { "epoch": 1.085719388058813, "grad_norm": 0.4038545931689053, "learning_rate": 5.8565990307227055e-06, "loss": 0.2892, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3040 }, { "epoch": 1.0892910292279303, "grad_norm": 0.4002310932995622, "learning_rate": 5.849262945021807e-06, "loss": 0.2993, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3050 }, { "epoch": 1.0928626703970474, "grad_norm": 0.4200267477903967, "learning_rate": 5.84190802724536e-06, "loss": 0.2959, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3060 }, { "epoch": 1.0964343115661646, "grad_norm": 0.4118492608691234, "learning_rate": 5.834534336351588e-06, "loss": 0.2904, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3070 }, { "epoch": 1.100005952735282, "grad_norm": 0.37731728704380824, "learning_rate": 5.827141931449201e-06, "loss": 0.2931, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3080 }, { "epoch": 1.103577593904399, "grad_norm": 0.4257361126495842, "learning_rate": 5.819730871796925e-06, "loss": 0.3007, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3090 }, { "epoch": 1.1071492350735164, "grad_norm": 0.4627051529190304, "learning_rate": 5.812301216803024e-06, "loss": 0.2945, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3100 }, { "epoch": 1.1107208762426335, "grad_norm": 0.39052506920739677, "learning_rate": 5.804853026024826e-06, "loss": 0.2844, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3110 }, { "epoch": 1.1142925174117506, "grad_norm": 0.4183858283818629, "learning_rate": 5.797386359168247e-06, "loss": 0.2969, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3120 }, { "epoch": 1.117864158580868, "grad_norm": 0.38122190274713647, "learning_rate": 5.789901276087306e-06, "loss": 0.2903, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3130 }, { "epoch": 1.121435799749985, "grad_norm": 0.41997541586634773, "learning_rate": 5.782397836783651e-06, "loss": 0.2859, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3140 }, { "epoch": 1.1250074409191022, "grad_norm": 0.49411147636632047, "learning_rate": 5.774876101406081e-06, "loss": 0.2915, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3150 }, { "epoch": 1.1285790820882196, "grad_norm": 0.38342852409381073, "learning_rate": 5.767336130250054e-06, "loss": 0.2922, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3160 }, { "epoch": 1.1321507232573367, "grad_norm": 0.4209796316280021, "learning_rate": 5.759777983757208e-06, "loss": 0.2888, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3170 }, { "epoch": 1.135722364426454, "grad_norm": 0.4048539945241082, "learning_rate": 5.752201722514882e-06, "loss": 0.292, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3180 }, { "epoch": 1.1392940055955711, "grad_norm": 0.41621200257599267, "learning_rate": 5.7446074072556255e-06, "loss": 0.2877, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3190 }, { "epoch": 1.1428656467646885, "grad_norm": 0.45972371472591095, "learning_rate": 5.736995098856708e-06, "loss": 0.2907, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3200 }, { "epoch": 1.1464372879338056, "grad_norm": 0.4150892225825858, "learning_rate": 5.7293648583396364e-06, "loss": 0.2819, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3210 }, { "epoch": 1.1500089291029227, "grad_norm": 0.3953394886577025, "learning_rate": 5.7217167468696675e-06, "loss": 0.2869, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3220 }, { "epoch": 1.15358057027204, "grad_norm": 0.4043756771843379, "learning_rate": 5.714050825755311e-06, "loss": 0.2883, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3230 }, { "epoch": 1.1571522114411572, "grad_norm": 0.42513927984802846, "learning_rate": 5.706367156447842e-06, "loss": 0.2936, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3240 }, { "epoch": 1.1607238526102743, "grad_norm": 0.37727727646775006, "learning_rate": 5.698665800540812e-06, "loss": 0.2972, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3250 }, { "epoch": 1.1642954937793917, "grad_norm": 0.38826025836911765, "learning_rate": 5.690946819769544e-06, "loss": 0.2954, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3260 }, { "epoch": 1.1678671349485088, "grad_norm": 0.405329832879052, "learning_rate": 5.683210276010654e-06, "loss": 0.2915, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3270 }, { "epoch": 1.1714387761176261, "grad_norm": 0.4363084476787587, "learning_rate": 5.6754562312815345e-06, "loss": 0.291, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3280 }, { "epoch": 1.1750104172867433, "grad_norm": 0.4133239785031364, "learning_rate": 5.667684747739879e-06, "loss": 0.2974, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3290 }, { "epoch": 1.1785820584558604, "grad_norm": 0.38573210471666897, "learning_rate": 5.6598958876831675e-06, "loss": 0.2907, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3300 }, { "epoch": 1.1821536996249777, "grad_norm": 0.39168356395863324, "learning_rate": 5.652089713548175e-06, "loss": 0.2875, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3310 }, { "epoch": 1.1857253407940949, "grad_norm": 0.43833999524703754, "learning_rate": 5.644266287910469e-06, "loss": 0.2918, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3320 }, { "epoch": 1.1892969819632122, "grad_norm": 0.3739394107569196, "learning_rate": 5.636425673483907e-06, "loss": 0.2938, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3330 }, { "epoch": 1.1928686231323293, "grad_norm": 0.39393210278890395, "learning_rate": 5.628567933120135e-06, "loss": 0.287, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3340 }, { "epoch": 1.1964402643014465, "grad_norm": 0.42110108390228834, "learning_rate": 5.620693129808084e-06, "loss": 0.2839, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3350 }, { "epoch": 1.2000119054705638, "grad_norm": 0.45198733665344365, "learning_rate": 5.612801326673463e-06, "loss": 0.2956, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3360 }, { "epoch": 1.203583546639681, "grad_norm": 0.40498561338650063, "learning_rate": 5.604892586978256e-06, "loss": 0.2904, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3370 }, { "epoch": 1.207155187808798, "grad_norm": 0.44100061374306926, "learning_rate": 5.5969669741202115e-06, "loss": 0.2863, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3380 }, { "epoch": 1.2107268289779154, "grad_norm": 0.4159877214300284, "learning_rate": 5.589024551632338e-06, "loss": 0.2835, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3390 }, { "epoch": 1.2142984701470325, "grad_norm": 0.4114576141382402, "learning_rate": 5.58106538318239e-06, "loss": 0.2913, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3400 }, { "epoch": 1.2178701113161499, "grad_norm": 0.38152511059940936, "learning_rate": 5.5730895325723645e-06, "loss": 0.2839, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3410 }, { "epoch": 1.221441752485267, "grad_norm": 0.3988441810677571, "learning_rate": 5.5650970637379795e-06, "loss": 0.291, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3420 }, { "epoch": 1.225013393654384, "grad_norm": 0.40024145076228973, "learning_rate": 5.557088040748174e-06, "loss": 0.2849, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3430 }, { "epoch": 1.2285850348235015, "grad_norm": 18.581620195482078, "learning_rate": 5.549062527804584e-06, "loss": 0.2826, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3440 }, { "epoch": 1.2321566759926186, "grad_norm": 0.4030023053420153, "learning_rate": 5.5410205892410326e-06, "loss": 0.2806, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3450 }, { "epoch": 1.235728317161736, "grad_norm": 0.4515723074035561, "learning_rate": 5.53296228952301e-06, "loss": 0.2917, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3460 }, { "epoch": 1.239299958330853, "grad_norm": 0.39728025388029203, "learning_rate": 5.524887693247166e-06, "loss": 0.2947, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3470 }, { "epoch": 1.2428715994999702, "grad_norm": 0.3871968955057113, "learning_rate": 5.516796865140783e-06, "loss": 0.2879, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3480 }, { "epoch": 1.2464432406690875, "grad_norm": 0.4087098376285781, "learning_rate": 5.508689870061262e-06, "loss": 0.2877, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3490 }, { "epoch": 1.2500148818382046, "grad_norm": 0.3882691465242462, "learning_rate": 5.500566772995597e-06, "loss": 0.2797, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3500 }, { "epoch": 1.2535865230073218, "grad_norm": 0.4185654530957896, "learning_rate": 5.4924276390598625e-06, "loss": 0.2868, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3510 }, { "epoch": 1.257158164176439, "grad_norm": 0.4035064939077187, "learning_rate": 5.484272533498688e-06, "loss": 0.2881, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3520 }, { "epoch": 1.2607298053455562, "grad_norm": 0.3866791184579453, "learning_rate": 5.476101521684729e-06, "loss": 0.2796, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3530 }, { "epoch": 1.2643014465146734, "grad_norm": 0.4247476728802867, "learning_rate": 5.467914669118151e-06, "loss": 0.2818, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3540 }, { "epoch": 1.2678730876837907, "grad_norm": 0.4457076719062088, "learning_rate": 5.459712041426103e-06, "loss": 0.2821, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3550 }, { "epoch": 1.271444728852908, "grad_norm": 0.39931687643920494, "learning_rate": 5.451493704362188e-06, "loss": 0.2822, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3560 }, { "epoch": 1.2750163700220252, "grad_norm": 0.40299377985016827, "learning_rate": 5.443259723805939e-06, "loss": 0.2825, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3570 }, { "epoch": 1.2785880111911423, "grad_norm": 0.39622897594218864, "learning_rate": 5.435010165762289e-06, "loss": 0.2885, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3580 }, { "epoch": 1.2821596523602596, "grad_norm": 0.43394101353228975, "learning_rate": 5.426745096361043e-06, "loss": 0.2886, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3590 }, { "epoch": 1.2857312935293768, "grad_norm": 0.3824521403588983, "learning_rate": 5.418464581856346e-06, "loss": 0.2822, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3600 }, { "epoch": 1.2893029346984939, "grad_norm": 0.41532444786322925, "learning_rate": 5.4101686886261575e-06, "loss": 0.286, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3610 }, { "epoch": 1.2928745758676112, "grad_norm": 0.45648561913824676, "learning_rate": 5.4018574831717105e-06, "loss": 0.2843, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3620 }, { "epoch": 1.2964462170367284, "grad_norm": 0.40279789970652746, "learning_rate": 5.393531032116988e-06, "loss": 0.2803, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3630 }, { "epoch": 1.3000178582058455, "grad_norm": 0.41292629848412293, "learning_rate": 5.38518940220818e-06, "loss": 0.2837, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3640 }, { "epoch": 1.3035894993749628, "grad_norm": 0.3842527104062671, "learning_rate": 5.376832660313156e-06, "loss": 0.2837, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3650 }, { "epoch": 1.30716114054408, "grad_norm": 0.4265057399549405, "learning_rate": 5.368460873420923e-06, "loss": 0.2823, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3660 }, { "epoch": 1.3107327817131973, "grad_norm": 0.3861802078618478, "learning_rate": 5.360074108641092e-06, "loss": 0.2846, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3670 }, { "epoch": 1.3143044228823144, "grad_norm": 0.4162455814696529, "learning_rate": 5.3516724332033415e-06, "loss": 0.2817, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3680 }, { "epoch": 1.3178760640514318, "grad_norm": 0.41457828657902673, "learning_rate": 5.343255914456873e-06, "loss": 0.2824, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3690 }, { "epoch": 1.3214477052205489, "grad_norm": 0.3826207041057534, "learning_rate": 5.334824619869876e-06, "loss": 0.2961, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3700 }, { "epoch": 1.325019346389666, "grad_norm": 0.40031209082106967, "learning_rate": 5.326378617028985e-06, "loss": 0.2791, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3710 }, { "epoch": 1.3285909875587834, "grad_norm": 0.3708661212761246, "learning_rate": 5.3179179736387385e-06, "loss": 0.2874, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3720 }, { "epoch": 1.3321626287279005, "grad_norm": 0.38510593028444307, "learning_rate": 5.3094427575210366e-06, "loss": 0.2867, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3730 }, { "epoch": 1.3357342698970176, "grad_norm": 0.3819530538365875, "learning_rate": 5.300953036614595e-06, "loss": 0.2804, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3740 }, { "epoch": 1.339305911066135, "grad_norm": 0.3794797389478174, "learning_rate": 5.2924488789744055e-06, "loss": 0.2851, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3750 }, { "epoch": 1.342877552235252, "grad_norm": 0.41346602960853757, "learning_rate": 5.283930352771183e-06, "loss": 0.2762, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3760 }, { "epoch": 1.3464491934043692, "grad_norm": 0.4037130991475417, "learning_rate": 5.275397526290827e-06, "loss": 0.2865, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3770 }, { "epoch": 1.3500208345734865, "grad_norm": 0.42726973214522196, "learning_rate": 5.266850467933866e-06, "loss": 0.281, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3780 }, { "epoch": 1.3535924757426037, "grad_norm": 0.43061681622670994, "learning_rate": 5.258289246214919e-06, "loss": 0.2811, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3790 }, { "epoch": 1.357164116911721, "grad_norm": 0.4380949060565935, "learning_rate": 5.249713929762136e-06, "loss": 0.2828, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3800 }, { "epoch": 1.3607357580808381, "grad_norm": 0.47620516316901934, "learning_rate": 5.241124587316655e-06, "loss": 0.2793, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3810 }, { "epoch": 1.3643073992499555, "grad_norm": 0.3966834578414604, "learning_rate": 5.2325212877320475e-06, "loss": 0.2832, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3820 }, { "epoch": 1.3678790404190726, "grad_norm": 0.41841887208116907, "learning_rate": 5.223904099973769e-06, "loss": 0.2776, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3830 }, { "epoch": 1.3714506815881897, "grad_norm": 0.3887158923993312, "learning_rate": 5.215273093118605e-06, "loss": 0.2834, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3840 }, { "epoch": 1.375022322757307, "grad_norm": 0.38605915567228644, "learning_rate": 5.2066283363541145e-06, "loss": 0.2824, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3850 }, { "epoch": 1.3785939639264242, "grad_norm": 0.3735164396659493, "learning_rate": 5.197969898978083e-06, "loss": 0.2846, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3860 }, { "epoch": 1.3821656050955413, "grad_norm": 0.38639038722023733, "learning_rate": 5.189297850397956e-06, "loss": 0.2833, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3870 }, { "epoch": 1.3857372462646587, "grad_norm": 0.38176503042886195, "learning_rate": 5.180612260130293e-06, "loss": 0.291, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3880 }, { "epoch": 1.3893088874337758, "grad_norm": 0.4012566858870177, "learning_rate": 5.171913197800206e-06, "loss": 0.2709, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3890 }, { "epoch": 1.392880528602893, "grad_norm": 0.4285411453903363, "learning_rate": 5.1632007331407955e-06, "loss": 0.275, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3900 }, { "epoch": 1.3964521697720103, "grad_norm": 0.43737881415448876, "learning_rate": 5.1544749359926055e-06, "loss": 0.2811, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3910 }, { "epoch": 1.4000238109411274, "grad_norm": 0.4077413602492253, "learning_rate": 5.1457358763030504e-06, "loss": 0.2783, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3920 }, { "epoch": 1.4035954521102447, "grad_norm": 0.3921345244240013, "learning_rate": 5.13698362412586e-06, "loss": 0.2887, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3930 }, { "epoch": 1.4071670932793618, "grad_norm": 0.41078975111561067, "learning_rate": 5.128218249620516e-06, "loss": 0.2779, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3940 }, { "epoch": 1.4107387344484792, "grad_norm": 0.4733932988022064, "learning_rate": 5.119439823051695e-06, "loss": 0.2808, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3950 }, { "epoch": 1.4143103756175963, "grad_norm": 0.42977751530797087, "learning_rate": 5.110648414788696e-06, "loss": 0.2824, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3960 }, { "epoch": 1.4178820167867134, "grad_norm": 0.4072718628555276, "learning_rate": 5.101844095304886e-06, "loss": 0.2783, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3970 }, { "epoch": 1.4214536579558308, "grad_norm": 0.3867375629277297, "learning_rate": 5.093026935177127e-06, "loss": 0.2739, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3980 }, { "epoch": 1.425025299124948, "grad_norm": 0.4270140563186022, "learning_rate": 5.084197005085215e-06, "loss": 0.2839, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 3990 }, { "epoch": 1.428596940294065, "grad_norm": 0.3952561361354359, "learning_rate": 5.075354375811313e-06, "loss": 0.2824, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4000 }, { "epoch": 1.4321685814631824, "grad_norm": 0.42309739207423486, "learning_rate": 5.066499118239381e-06, "loss": 0.2841, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4010 }, { "epoch": 1.4357402226322995, "grad_norm": 0.4393277747654901, "learning_rate": 5.05763130335461e-06, "loss": 0.2768, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4020 }, { "epoch": 1.4393118638014166, "grad_norm": 0.42584412565699653, "learning_rate": 5.048751002242853e-06, "loss": 0.2863, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4030 }, { "epoch": 1.442883504970534, "grad_norm": 0.4411596536003725, "learning_rate": 5.039858286090056e-06, "loss": 0.2849, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4040 }, { "epoch": 1.446455146139651, "grad_norm": 0.41130998429424936, "learning_rate": 5.030953226181682e-06, "loss": 0.2799, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4050 }, { "epoch": 1.4500267873087684, "grad_norm": 0.4154103752019673, "learning_rate": 5.022035893902147e-06, "loss": 0.2823, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4060 }, { "epoch": 1.4535984284778856, "grad_norm": 0.4208114402245905, "learning_rate": 5.013106360734244e-06, "loss": 0.2691, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4070 }, { "epoch": 1.457170069647003, "grad_norm": 0.43416877401764065, "learning_rate": 5.0041646982585694e-06, "loss": 0.2874, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4080 }, { "epoch": 1.46074171081612, "grad_norm": 0.4893174023257388, "learning_rate": 4.9952109781529494e-06, "loss": 0.2787, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4090 }, { "epoch": 1.4643133519852372, "grad_norm": 0.4375592068849344, "learning_rate": 4.986245272191866e-06, "loss": 0.2846, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4100 }, { "epoch": 1.4678849931543545, "grad_norm": 0.41720630958175864, "learning_rate": 4.977267652245887e-06, "loss": 0.2831, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4110 }, { "epoch": 1.4714566343234716, "grad_norm": 0.38893766128889445, "learning_rate": 4.968278190281076e-06, "loss": 0.2801, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4120 }, { "epoch": 1.4750282754925887, "grad_norm": 0.40586605542464455, "learning_rate": 4.959276958358434e-06, "loss": 0.2782, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4130 }, { "epoch": 1.478599916661706, "grad_norm": 0.39331036478589604, "learning_rate": 4.9502640286333e-06, "loss": 0.2805, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4140 }, { "epoch": 1.4821715578308232, "grad_norm": 0.42690856798454524, "learning_rate": 4.941239473354793e-06, "loss": 0.2757, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4150 }, { "epoch": 1.4857431989999403, "grad_norm": 0.42353429308127194, "learning_rate": 4.932203364865227e-06, "loss": 0.2853, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4160 }, { "epoch": 1.4893148401690577, "grad_norm": 0.46112729709385236, "learning_rate": 4.923155775599519e-06, "loss": 0.2679, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4170 }, { "epoch": 1.4928864813381748, "grad_norm": 0.4161270630543364, "learning_rate": 4.914096778084623e-06, "loss": 0.2813, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4180 }, { "epoch": 1.4964581225072922, "grad_norm": 0.4339245152353466, "learning_rate": 4.905026444938945e-06, "loss": 0.2706, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4190 }, { "epoch": 1.5000297636764093, "grad_norm": 0.40007840075097373, "learning_rate": 4.895944848871756e-06, "loss": 0.2807, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4200 }, { "epoch": 1.5036014048455266, "grad_norm": 0.4009324464084967, "learning_rate": 4.886852062682613e-06, "loss": 0.2728, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4210 }, { "epoch": 1.5071730460146437, "grad_norm": 0.39202018659117227, "learning_rate": 4.8777481592607775e-06, "loss": 0.2765, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4220 }, { "epoch": 1.5107446871837609, "grad_norm": 0.4112480071981047, "learning_rate": 4.868633211584625e-06, "loss": 0.2701, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4230 }, { "epoch": 1.5143163283528782, "grad_norm": 0.41390732975997946, "learning_rate": 4.859507292721066e-06, "loss": 0.2801, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4240 }, { "epoch": 1.5178879695219953, "grad_norm": 0.4401971189377111, "learning_rate": 4.850370475824957e-06, "loss": 0.2791, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4250 }, { "epoch": 1.5214596106911125, "grad_norm": 0.40944175778293085, "learning_rate": 4.841222834138513e-06, "loss": 0.2725, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4260 }, { "epoch": 1.5250312518602298, "grad_norm": 0.4006029556225281, "learning_rate": 4.832064440990725e-06, "loss": 0.2731, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4270 }, { "epoch": 1.528602893029347, "grad_norm": 0.3895187645181517, "learning_rate": 4.8228953697967705e-06, "loss": 0.2799, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4280 }, { "epoch": 1.532174534198464, "grad_norm": 0.43446583393388194, "learning_rate": 4.813715694057419e-06, "loss": 0.2706, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4290 }, { "epoch": 1.5357461753675814, "grad_norm": 0.4365210838133416, "learning_rate": 4.804525487358454e-06, "loss": 0.2748, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4300 }, { "epoch": 1.5393178165366987, "grad_norm": 0.46735370374372087, "learning_rate": 4.7953248233700725e-06, "loss": 0.272, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4310 }, { "epoch": 1.5428894577058159, "grad_norm": 0.42778351347035765, "learning_rate": 4.7861137758463e-06, "loss": 0.268, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4320 }, { "epoch": 1.546461098874933, "grad_norm": 0.40402901670951247, "learning_rate": 4.776892418624396e-06, "loss": 0.2784, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4330 }, { "epoch": 1.5500327400440503, "grad_norm": 0.42578918369148505, "learning_rate": 4.767660825624269e-06, "loss": 0.2739, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4340 }, { "epoch": 1.5536043812131675, "grad_norm": 0.41895323402321494, "learning_rate": 4.758419070847877e-06, "loss": 0.2783, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4350 }, { "epoch": 1.5571760223822846, "grad_norm": 0.4321907650265535, "learning_rate": 4.749167228378634e-06, "loss": 0.2727, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4360 }, { "epoch": 1.560747663551402, "grad_norm": 0.449385172204246, "learning_rate": 4.739905372380822e-06, "loss": 0.2754, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4370 }, { "epoch": 1.564319304720519, "grad_norm": 0.40983330954131525, "learning_rate": 4.730633577098992e-06, "loss": 0.2653, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4380 }, { "epoch": 1.5678909458896362, "grad_norm": 0.4099672865787947, "learning_rate": 4.721351916857368e-06, "loss": 0.2743, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4390 }, { "epoch": 1.5714625870587535, "grad_norm": 0.449638292162348, "learning_rate": 4.7120604660592564e-06, "loss": 0.2798, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4400 }, { "epoch": 1.5750342282278709, "grad_norm": 0.4888696074836512, "learning_rate": 4.702759299186444e-06, "loss": 0.2785, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4410 }, { "epoch": 1.5786058693969878, "grad_norm": 0.41140229166260467, "learning_rate": 4.693448490798603e-06, "loss": 0.2768, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4420 }, { "epoch": 1.5821775105661051, "grad_norm": 0.3965457289700049, "learning_rate": 4.684128115532697e-06, "loss": 0.2852, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4430 }, { "epoch": 1.5857491517352225, "grad_norm": 0.40893457249676096, "learning_rate": 4.674798248102374e-06, "loss": 0.2767, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4440 }, { "epoch": 1.5893207929043396, "grad_norm": 0.40947270681652376, "learning_rate": 4.665458963297376e-06, "loss": 0.2747, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4450 }, { "epoch": 1.5928924340734567, "grad_norm": 0.43217919095328655, "learning_rate": 4.656110335982939e-06, "loss": 0.2688, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4460 }, { "epoch": 1.596464075242574, "grad_norm": 0.4048417118383221, "learning_rate": 4.6467524410991815e-06, "loss": 0.2737, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4470 }, { "epoch": 1.6000357164116912, "grad_norm": 0.40615664984204264, "learning_rate": 4.637385353660521e-06, "loss": 0.2747, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4480 }, { "epoch": 1.6036073575808083, "grad_norm": 0.4556450981794944, "learning_rate": 4.62800914875506e-06, "loss": 0.2771, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4490 }, { "epoch": 1.6071789987499256, "grad_norm": 0.4456452650392903, "learning_rate": 4.618623901543989e-06, "loss": 0.2688, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4500 }, { "epoch": 1.6107506399190428, "grad_norm": 0.4072795804451177, "learning_rate": 4.609229687260981e-06, "loss": 0.2683, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4510 }, { "epoch": 1.61432228108816, "grad_norm": 0.3987863022162168, "learning_rate": 4.599826581211594e-06, "loss": 0.2668, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4520 }, { "epoch": 1.6178939222572772, "grad_norm": 0.42547111660934306, "learning_rate": 4.590414658772663e-06, "loss": 0.2763, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4530 }, { "epoch": 1.6214655634263946, "grad_norm": 0.41478885940190585, "learning_rate": 4.580993995391694e-06, "loss": 0.2765, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4540 }, { "epoch": 1.6250372045955115, "grad_norm": 0.4198144043488199, "learning_rate": 4.5715646665862645e-06, "loss": 0.2698, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4550 }, { "epoch": 1.6286088457646288, "grad_norm": 0.4179570184140089, "learning_rate": 4.562126747943415e-06, "loss": 0.2753, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4560 }, { "epoch": 1.6321804869337462, "grad_norm": 0.4407922084504109, "learning_rate": 4.5526803151190415e-06, "loss": 0.2758, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4570 }, { "epoch": 1.6357521281028633, "grad_norm": 0.5289684644845591, "learning_rate": 4.543225443837295e-06, "loss": 0.2788, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4580 }, { "epoch": 1.6393237692719804, "grad_norm": 0.4428322641436808, "learning_rate": 4.5337622098899675e-06, "loss": 0.2732, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4590 }, { "epoch": 1.6428954104410978, "grad_norm": 0.4552104082227244, "learning_rate": 4.5242906891358866e-06, "loss": 0.2698, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4600 }, { "epoch": 1.646467051610215, "grad_norm": 0.4677528492713541, "learning_rate": 4.514810957500311e-06, "loss": 0.2703, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4610 }, { "epoch": 1.650038692779332, "grad_norm": 0.45043763627660394, "learning_rate": 4.5053230909743195e-06, "loss": 0.265, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4620 }, { "epoch": 1.6536103339484494, "grad_norm": 0.451616136811452, "learning_rate": 4.495827165614198e-06, "loss": 0.2777, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4630 }, { "epoch": 1.6571819751175665, "grad_norm": 0.4281517840545686, "learning_rate": 4.486323257540838e-06, "loss": 0.2754, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4640 }, { "epoch": 1.6607536162866836, "grad_norm": 0.420818894121793, "learning_rate": 4.476811442939116e-06, "loss": 0.276, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4650 }, { "epoch": 1.664325257455801, "grad_norm": 0.4149963360632429, "learning_rate": 4.467291798057295e-06, "loss": 0.2781, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4660 }, { "epoch": 1.6678968986249183, "grad_norm": 0.4336018015531387, "learning_rate": 4.4577643992064e-06, "loss": 0.2654, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4670 }, { "epoch": 1.6714685397940352, "grad_norm": 0.41195956330157535, "learning_rate": 4.44822932275962e-06, "loss": 0.2725, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4680 }, { "epoch": 1.6750401809631525, "grad_norm": 0.4234398479845422, "learning_rate": 4.438686645151683e-06, "loss": 0.2787, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4690 }, { "epoch": 1.67861182213227, "grad_norm": 0.42731758155771205, "learning_rate": 4.429136442878253e-06, "loss": 0.2715, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4700 }, { "epoch": 1.682183463301387, "grad_norm": 0.42562808585130063, "learning_rate": 4.419578792495311e-06, "loss": 0.2737, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4710 }, { "epoch": 1.6857551044705041, "grad_norm": 0.4421430973069684, "learning_rate": 4.410013770618545e-06, "loss": 0.27, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4720 }, { "epoch": 1.6893267456396215, "grad_norm": 0.43191492739674137, "learning_rate": 4.4004414539227295e-06, "loss": 0.2712, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4730 }, { "epoch": 1.6928983868087386, "grad_norm": 0.4334260693657613, "learning_rate": 4.3908619191411235e-06, "loss": 0.2748, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4740 }, { "epoch": 1.6964700279778557, "grad_norm": 0.41696073544153367, "learning_rate": 4.3812752430648395e-06, "loss": 0.2779, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4750 }, { "epoch": 1.700041669146973, "grad_norm": 0.4348794170591351, "learning_rate": 4.371681502542241e-06, "loss": 0.2711, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4760 }, { "epoch": 1.7036133103160902, "grad_norm": 0.492407893353495, "learning_rate": 4.362080774478316e-06, "loss": 0.2707, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4770 }, { "epoch": 1.7071849514852073, "grad_norm": 0.45024503624278867, "learning_rate": 4.352473135834074e-06, "loss": 0.2742, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4780 }, { "epoch": 1.7107565926543247, "grad_norm": 0.45700989872478603, "learning_rate": 4.342858663625912e-06, "loss": 0.2682, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4790 }, { "epoch": 1.714328233823442, "grad_norm": 0.41115833866014856, "learning_rate": 4.333237434925012e-06, "loss": 0.2731, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4800 }, { "epoch": 1.717899874992559, "grad_norm": 0.4229273901374492, "learning_rate": 4.323609526856713e-06, "loss": 0.2696, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4810 }, { "epoch": 1.7214715161616763, "grad_norm": 0.42165606422875274, "learning_rate": 4.3139750165999e-06, "loss": 0.2702, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4820 }, { "epoch": 1.7250431573307936, "grad_norm": 0.42215858922988486, "learning_rate": 4.3043339813863825e-06, "loss": 0.269, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4830 }, { "epoch": 1.7286147984999107, "grad_norm": 0.4083178726418061, "learning_rate": 4.294686498500272e-06, "loss": 0.2793, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4840 }, { "epoch": 1.7321864396690279, "grad_norm": 0.46153757788947947, "learning_rate": 4.285032645277368e-06, "loss": 0.2706, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4850 }, { "epoch": 1.7357580808381452, "grad_norm": 0.47338863262420977, "learning_rate": 4.275372499104534e-06, "loss": 0.2734, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4860 }, { "epoch": 1.7393297220072623, "grad_norm": 0.4257009544961603, "learning_rate": 4.265706137419082e-06, "loss": 0.2732, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4870 }, { "epoch": 1.7429013631763794, "grad_norm": 0.432685871073769, "learning_rate": 4.256033637708144e-06, "loss": 0.2778, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4880 }, { "epoch": 1.7464730043454968, "grad_norm": 0.46972048535868977, "learning_rate": 4.246355077508058e-06, "loss": 0.2756, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4890 }, { "epoch": 1.750044645514614, "grad_norm": 0.43325958278072135, "learning_rate": 4.2366705344037455e-06, "loss": 0.2741, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4900 }, { "epoch": 1.753616286683731, "grad_norm": 0.44379386526215314, "learning_rate": 4.226980086028085e-06, "loss": 0.2721, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4910 }, { "epoch": 1.7571879278528484, "grad_norm": 0.5008628676566734, "learning_rate": 4.217283810061295e-06, "loss": 0.2786, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4920 }, { "epoch": 1.7607595690219657, "grad_norm": 0.4265989351603266, "learning_rate": 4.207581784230304e-06, "loss": 0.2733, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4930 }, { "epoch": 1.7643312101910829, "grad_norm": 0.4410937655399, "learning_rate": 4.197874086308141e-06, "loss": 0.2765, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4940 }, { "epoch": 1.7679028513602, "grad_norm": 0.43772683050066985, "learning_rate": 4.188160794113295e-06, "loss": 0.2621, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4950 }, { "epoch": 1.7714744925293173, "grad_norm": 0.44596352642090287, "learning_rate": 4.178441985509104e-06, "loss": 0.2699, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4960 }, { "epoch": 1.7750461336984344, "grad_norm": 0.43919374270562106, "learning_rate": 4.168717738403124e-06, "loss": 0.2724, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4970 }, { "epoch": 1.7786177748675516, "grad_norm": 0.409663218208272, "learning_rate": 4.158988130746512e-06, "loss": 0.2709, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4980 }, { "epoch": 1.782189416036669, "grad_norm": 0.40778352844560883, "learning_rate": 4.149253240533389e-06, "loss": 0.2708, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 4990 }, { "epoch": 1.785761057205786, "grad_norm": 0.3988447330223531, "learning_rate": 4.139513145800226e-06, "loss": 0.2674, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5000 }, { "epoch": 1.7893326983749032, "grad_norm": 0.4197454963211274, "learning_rate": 4.129767924625215e-06, "loss": 0.2727, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5010 }, { "epoch": 1.7929043395440205, "grad_norm": 0.45641210622438827, "learning_rate": 4.12001765512764e-06, "loss": 0.2567, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5020 }, { "epoch": 1.7964759807131376, "grad_norm": 0.4218050555962391, "learning_rate": 4.110262415467254e-06, "loss": 0.2755, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5030 }, { "epoch": 1.8000476218822548, "grad_norm": 0.41944541731559504, "learning_rate": 4.100502283843651e-06, "loss": 0.278, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5040 }, { "epoch": 1.803619263051372, "grad_norm": 0.4238198211524272, "learning_rate": 4.0907373384956415e-06, "loss": 0.2731, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5050 }, { "epoch": 1.8071909042204894, "grad_norm": 0.42139886595109105, "learning_rate": 4.080967657700623e-06, "loss": 0.277, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5060 }, { "epoch": 1.8107625453896066, "grad_norm": 0.4294097732318922, "learning_rate": 4.071193319773952e-06, "loss": 0.2666, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5070 }, { "epoch": 1.8143341865587237, "grad_norm": 0.4088369335354677, "learning_rate": 4.06141440306832e-06, "loss": 0.265, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5080 }, { "epoch": 1.817905827727841, "grad_norm": 0.4325090741382579, "learning_rate": 4.051630985973116e-06, "loss": 0.2649, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5090 }, { "epoch": 1.8214774688969582, "grad_norm": 0.4782429273760244, "learning_rate": 4.041843146913814e-06, "loss": 0.2666, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5100 }, { "epoch": 1.8250491100660753, "grad_norm": 0.44116784442799345, "learning_rate": 4.03205096435133e-06, "loss": 0.2696, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5110 }, { "epoch": 1.8286207512351926, "grad_norm": 0.41397328741000383, "learning_rate": 4.022254516781399e-06, "loss": 0.2717, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5120 }, { "epoch": 1.8321923924043098, "grad_norm": 0.45756084112038137, "learning_rate": 4.012453882733945e-06, "loss": 0.2678, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5130 }, { "epoch": 1.8357640335734269, "grad_norm": 0.4307519036701089, "learning_rate": 4.00264914077245e-06, "loss": 0.2685, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5140 }, { "epoch": 1.8393356747425442, "grad_norm": 0.46282872085965715, "learning_rate": 3.99284036949333e-06, "loss": 0.2684, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5150 }, { "epoch": 1.8429073159116616, "grad_norm": 0.44521556978523347, "learning_rate": 3.983027647525297e-06, "loss": 0.2629, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5160 }, { "epoch": 1.8464789570807785, "grad_norm": 0.4335775967545023, "learning_rate": 3.973211053528732e-06, "loss": 0.2638, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5170 }, { "epoch": 1.8500505982498958, "grad_norm": 0.4484527647303034, "learning_rate": 3.963390666195058e-06, "loss": 0.2601, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5180 }, { "epoch": 1.8536222394190132, "grad_norm": 0.4147772650920792, "learning_rate": 3.953566564246102e-06, "loss": 0.2708, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5190 }, { "epoch": 1.8571938805881303, "grad_norm": 0.42238734977530235, "learning_rate": 3.9437388264334726e-06, "loss": 0.2711, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5200 }, { "epoch": 1.8607655217572474, "grad_norm": 0.4676602900276513, "learning_rate": 3.933907531537919e-06, "loss": 0.2724, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5210 }, { "epoch": 1.8643371629263648, "grad_norm": 0.4550684970676724, "learning_rate": 3.92407275836871e-06, "loss": 0.2625, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5220 }, { "epoch": 1.8679088040954819, "grad_norm": 0.39876614644325137, "learning_rate": 3.914234585762992e-06, "loss": 0.2699, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5230 }, { "epoch": 1.871480445264599, "grad_norm": 0.45192453682747524, "learning_rate": 3.9043930925851644e-06, "loss": 0.2616, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5240 }, { "epoch": 1.8750520864337163, "grad_norm": 0.4260929665151792, "learning_rate": 3.894548357726243e-06, "loss": 0.2645, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5250 }, { "epoch": 1.8786237276028335, "grad_norm": 0.43944022835650526, "learning_rate": 3.884700460103234e-06, "loss": 0.2652, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5260 }, { "epoch": 1.8821953687719506, "grad_norm": 0.45508403385278373, "learning_rate": 3.874849478658489e-06, "loss": 0.272, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5270 }, { "epoch": 1.885767009941068, "grad_norm": 0.41091398036572885, "learning_rate": 3.864995492359088e-06, "loss": 0.2628, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5280 }, { "epoch": 1.8893386511101853, "grad_norm": 0.4605821484406852, "learning_rate": 3.8551385801961935e-06, "loss": 0.2737, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5290 }, { "epoch": 1.8929102922793022, "grad_norm": 0.43068610457940865, "learning_rate": 3.845278821184424e-06, "loss": 0.2681, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5300 }, { "epoch": 1.8964819334484195, "grad_norm": 0.43164071390264, "learning_rate": 3.835416294361219e-06, "loss": 0.2662, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5310 }, { "epoch": 1.9000535746175369, "grad_norm": 0.3967008822555253, "learning_rate": 3.825551078786203e-06, "loss": 0.2726, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5320 }, { "epoch": 1.903625215786654, "grad_norm": 0.44872440715524753, "learning_rate": 3.815683253540557e-06, "loss": 0.2595, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5330 }, { "epoch": 1.9071968569557711, "grad_norm": 0.47048031848498323, "learning_rate": 3.8058128977263796e-06, "loss": 0.2625, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5340 }, { "epoch": 1.9107684981248885, "grad_norm": 0.43319523625383444, "learning_rate": 3.7959400904660573e-06, "loss": 0.2614, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5350 }, { "epoch": 1.9143401392940056, "grad_norm": 0.4520347805433191, "learning_rate": 3.786064910901623e-06, "loss": 0.2581, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5360 }, { "epoch": 1.9179117804631227, "grad_norm": 0.44455950032323127, "learning_rate": 3.7761874381941305e-06, "loss": 0.2587, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5370 }, { "epoch": 1.92148342163224, "grad_norm": 0.4238561873196431, "learning_rate": 3.7663077515230163e-06, "loss": 0.2629, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5380 }, { "epoch": 1.9250550628013572, "grad_norm": 0.46891711405964687, "learning_rate": 3.75642593008546e-06, "loss": 0.2594, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5390 }, { "epoch": 1.9286267039704743, "grad_norm": 0.40249725381883916, "learning_rate": 3.7465420530957588e-06, "loss": 0.2673, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5400 }, { "epoch": 1.9321983451395917, "grad_norm": 0.4369159681597768, "learning_rate": 3.736656199784684e-06, "loss": 0.2626, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5410 }, { "epoch": 1.935769986308709, "grad_norm": 0.4618371306142006, "learning_rate": 3.72676844939885e-06, "loss": 0.2674, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5420 }, { "epoch": 1.939341627477826, "grad_norm": 0.44720354687845026, "learning_rate": 3.7168788812000813e-06, "loss": 0.2699, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5430 }, { "epoch": 1.9429132686469432, "grad_norm": 0.4779469192627861, "learning_rate": 3.706987574464772e-06, "loss": 0.2673, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5440 }, { "epoch": 1.9464849098160606, "grad_norm": 0.4639961764942175, "learning_rate": 3.6970946084832493e-06, "loss": 0.269, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5450 }, { "epoch": 1.9500565509851777, "grad_norm": 0.4658367692686749, "learning_rate": 3.687200062559149e-06, "loss": 0.2601, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5460 }, { "epoch": 1.9536281921542948, "grad_norm": 0.45772338543604596, "learning_rate": 3.677304016008766e-06, "loss": 0.2596, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5470 }, { "epoch": 1.9571998333234122, "grad_norm": 0.4632287878695277, "learning_rate": 3.6674065481604254e-06, "loss": 0.2681, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5480 }, { "epoch": 1.9607714744925293, "grad_norm": 0.47016891116157244, "learning_rate": 3.657507738353846e-06, "loss": 0.2644, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5490 }, { "epoch": 1.9643431156616464, "grad_norm": 0.439306529444033, "learning_rate": 3.6476076659395063e-06, "loss": 0.263, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5500 }, { "epoch": 1.9679147568307638, "grad_norm": 0.4217601827104545, "learning_rate": 3.6377064102780027e-06, "loss": 0.2623, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5510 }, { "epoch": 1.971486397999881, "grad_norm": 0.41695850691495356, "learning_rate": 3.627804050739418e-06, "loss": 0.2646, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5520 }, { "epoch": 1.975058039168998, "grad_norm": 0.4599681552034561, "learning_rate": 3.6179006667026843e-06, "loss": 0.2692, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5530 }, { "epoch": 1.9786296803381154, "grad_norm": 0.4485808768326826, "learning_rate": 3.607996337554946e-06, "loss": 0.2615, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5540 }, { "epoch": 1.9822013215072327, "grad_norm": 0.40761040542444305, "learning_rate": 3.5980911426909237e-06, "loss": 0.2638, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5550 }, { "epoch": 1.9857729626763496, "grad_norm": 0.4222409436766008, "learning_rate": 3.588185161512277e-06, "loss": 0.2682, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5560 }, { "epoch": 1.989344603845467, "grad_norm": 0.4469348281535693, "learning_rate": 3.5782784734269684e-06, "loss": 0.2652, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5570 }, { "epoch": 1.9929162450145843, "grad_norm": 0.4436372969867896, "learning_rate": 3.56837115784863e-06, "loss": 0.2654, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5580 }, { "epoch": 1.9964878861837014, "grad_norm": 0.4450124638999612, "learning_rate": 3.558463294195921e-06, "loss": 0.2654, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5590 }, { "epoch": 2.0, "grad_norm": 0.4587295790990368, "learning_rate": 3.548554961891895e-06, "loss": 0.2733, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5600 }, { "epoch": 2.0035716411691173, "grad_norm": 0.4581938127499719, "learning_rate": 3.5386462403633622e-06, "loss": 0.2723, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5610 }, { "epoch": 2.0071432823382342, "grad_norm": 0.4332836163918156, "learning_rate": 3.5287372090402544e-06, "loss": 0.2648, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5620 }, { "epoch": 2.0107149235073516, "grad_norm": 0.46166257438776565, "learning_rate": 3.518827947354983e-06, "loss": 0.2609, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5630 }, { "epoch": 2.014286564676469, "grad_norm": 0.4258612846530011, "learning_rate": 3.5089185347418104e-06, "loss": 0.2524, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5640 }, { "epoch": 2.017858205845586, "grad_norm": 0.451199865672739, "learning_rate": 3.499009050636206e-06, "loss": 0.2615, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5650 }, { "epoch": 2.021429847014703, "grad_norm": 0.4539388452090233, "learning_rate": 3.4890995744742136e-06, "loss": 0.257, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5660 }, { "epoch": 2.0250014881838205, "grad_norm": 0.46272019900125183, "learning_rate": 3.4791901856918133e-06, "loss": 0.26, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5670 }, { "epoch": 2.028573129352938, "grad_norm": 0.4462580527431944, "learning_rate": 3.469280963724284e-06, "loss": 0.2624, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5680 }, { "epoch": 2.0321447705220548, "grad_norm": 0.4313640422424715, "learning_rate": 3.4593719880055683e-06, "loss": 0.252, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5690 }, { "epoch": 2.035716411691172, "grad_norm": 0.4320905225982968, "learning_rate": 3.449463337967633e-06, "loss": 0.2624, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5700 }, { "epoch": 2.0392880528602895, "grad_norm": 0.45081082335764194, "learning_rate": 3.439555093039836e-06, "loss": 0.2596, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5710 }, { "epoch": 2.0428596940294064, "grad_norm": 0.44641947710146607, "learning_rate": 3.4296473326482884e-06, "loss": 0.2654, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5720 }, { "epoch": 2.0464313351985237, "grad_norm": 0.4744778002332598, "learning_rate": 3.4197401362152145e-06, "loss": 0.2623, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5730 }, { "epoch": 2.050002976367641, "grad_norm": 0.40222803242906663, "learning_rate": 3.4098335831583214e-06, "loss": 0.2615, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5740 }, { "epoch": 2.053574617536758, "grad_norm": 0.4291805662714492, "learning_rate": 3.3999277528901547e-06, "loss": 0.2612, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5750 }, { "epoch": 2.0571462587058753, "grad_norm": 0.4198663575448227, "learning_rate": 3.390022724817469e-06, "loss": 0.257, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5760 }, { "epoch": 2.0607178998749927, "grad_norm": 0.43469615242755727, "learning_rate": 3.380118578340587e-06, "loss": 0.2506, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5770 }, { "epoch": 2.0642895410441096, "grad_norm": 0.4257751104947171, "learning_rate": 3.370215392852766e-06, "loss": 0.2551, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5780 }, { "epoch": 2.067861182213227, "grad_norm": 0.44934709851583765, "learning_rate": 3.3603132477395556e-06, "loss": 0.2647, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5790 }, { "epoch": 2.0714328233823442, "grad_norm": 0.4151266377740739, "learning_rate": 3.350412222378173e-06, "loss": 0.2553, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5800 }, { "epoch": 2.0750044645514616, "grad_norm": 0.4131070130410974, "learning_rate": 3.3405123961368523e-06, "loss": 0.2531, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5810 }, { "epoch": 2.0785761057205785, "grad_norm": 0.4469057886718626, "learning_rate": 3.330613848374218e-06, "loss": 0.2529, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5820 }, { "epoch": 2.082147746889696, "grad_norm": 0.4470454898990664, "learning_rate": 3.3207166584386465e-06, "loss": 0.2596, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5830 }, { "epoch": 2.085719388058813, "grad_norm": 0.43300846252821945, "learning_rate": 3.31082090566763e-06, "loss": 0.2465, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5840 }, { "epoch": 2.08929102922793, "grad_norm": 0.4158002323170265, "learning_rate": 3.3009266693871378e-06, "loss": 0.2571, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5850 }, { "epoch": 2.0928626703970474, "grad_norm": 0.4410294971525737, "learning_rate": 3.2910340289109845e-06, "loss": 0.2537, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5860 }, { "epoch": 2.0964343115661648, "grad_norm": 0.425419914687579, "learning_rate": 3.281143063540192e-06, "loss": 0.2485, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5870 }, { "epoch": 2.1000059527352817, "grad_norm": 0.3930814964109948, "learning_rate": 3.2712538525623556e-06, "loss": 0.2509, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5880 }, { "epoch": 2.103577593904399, "grad_norm": 0.39312197575007407, "learning_rate": 3.2613664752510058e-06, "loss": 0.2579, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5890 }, { "epoch": 2.1071492350735164, "grad_norm": 0.39166055787352044, "learning_rate": 3.251481010864975e-06, "loss": 0.253, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5900 }, { "epoch": 2.1107208762426337, "grad_norm": 0.4168399489824956, "learning_rate": 3.2415975386477585e-06, "loss": 0.2426, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5910 }, { "epoch": 2.1142925174117506, "grad_norm": 0.4003519724030952, "learning_rate": 3.2317161378268857e-06, "loss": 0.2555, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5920 }, { "epoch": 2.117864158580868, "grad_norm": 0.4031809919204729, "learning_rate": 3.22183688761328e-06, "loss": 0.2488, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5930 }, { "epoch": 2.1214357997499853, "grad_norm": 0.4234261622652059, "learning_rate": 3.211959867200624e-06, "loss": 0.2445, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5940 }, { "epoch": 2.125007440919102, "grad_norm": 0.4612705252248991, "learning_rate": 3.2020851557647266e-06, "loss": 0.2503, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5950 }, { "epoch": 2.1285790820882196, "grad_norm": 0.42876076853178796, "learning_rate": 3.192212832462889e-06, "loss": 0.2507, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5960 }, { "epoch": 2.132150723257337, "grad_norm": 0.45700635843544063, "learning_rate": 3.1823429764332676e-06, "loss": 0.2476, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5970 }, { "epoch": 2.135722364426454, "grad_norm": 0.43185044728972843, "learning_rate": 3.1724756667942405e-06, "loss": 0.2504, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5980 }, { "epoch": 2.139294005595571, "grad_norm": 0.38819831543066835, "learning_rate": 3.1626109826437737e-06, "loss": 0.2464, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 5990 }, { "epoch": 2.1428656467646885, "grad_norm": 0.4246096563981283, "learning_rate": 3.1527490030587865e-06, "loss": 0.2498, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6000 }, { "epoch": 2.1464372879338054, "grad_norm": 0.4478475877616365, "learning_rate": 3.142889807094518e-06, "loss": 0.241, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6010 }, { "epoch": 2.1500089291029227, "grad_norm": 0.42555214861086776, "learning_rate": 3.133033473783894e-06, "loss": 0.2457, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6020 }, { "epoch": 2.15358057027204, "grad_norm": 0.41306054422231764, "learning_rate": 3.123180082136892e-06, "loss": 0.2477, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6030 }, { "epoch": 2.157152211441157, "grad_norm": 0.42793129046487083, "learning_rate": 3.11332971113991e-06, "loss": 0.2531, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6040 }, { "epoch": 2.1607238526102743, "grad_norm": 0.4284276032645389, "learning_rate": 3.1034824397551324e-06, "loss": 0.2564, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6050 }, { "epoch": 2.1642954937793917, "grad_norm": 0.4546595301716484, "learning_rate": 3.0936383469198925e-06, "loss": 0.2551, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6060 }, { "epoch": 2.167867134948509, "grad_norm": 0.3960043742406359, "learning_rate": 3.083797511546048e-06, "loss": 0.2508, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6070 }, { "epoch": 2.171438776117626, "grad_norm": 0.44211103699658133, "learning_rate": 3.073960012519343e-06, "loss": 0.2505, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6080 }, { "epoch": 2.1750104172867433, "grad_norm": 0.4384592528383511, "learning_rate": 3.0641259286987764e-06, "loss": 0.2571, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6090 }, { "epoch": 2.1785820584558606, "grad_norm": 0.4153547793008788, "learning_rate": 3.05429533891597e-06, "loss": 0.2502, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6100 }, { "epoch": 2.1821536996249775, "grad_norm": 0.43122353801146596, "learning_rate": 3.0444683219745377e-06, "loss": 0.2472, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6110 }, { "epoch": 2.185725340794095, "grad_norm": 0.46468804115251483, "learning_rate": 3.0346449566494538e-06, "loss": 0.2518, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6120 }, { "epoch": 2.189296981963212, "grad_norm": 0.426327090760231, "learning_rate": 3.024825321686419e-06, "loss": 0.2537, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6130 }, { "epoch": 2.192868623132329, "grad_norm": 0.43925587873676897, "learning_rate": 3.0150094958012315e-06, "loss": 0.2466, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6140 }, { "epoch": 2.1964402643014465, "grad_norm": 0.44205546017901753, "learning_rate": 3.0051975576791544e-06, "loss": 0.2439, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6150 }, { "epoch": 2.200011905470564, "grad_norm": 0.4229463218493798, "learning_rate": 2.9953895859742865e-06, "loss": 0.2555, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6160 }, { "epoch": 2.203583546639681, "grad_norm": 0.43407788987427626, "learning_rate": 2.985585659308932e-06, "loss": 0.2504, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6170 }, { "epoch": 2.207155187808798, "grad_norm": 0.4354528910386347, "learning_rate": 2.975785856272969e-06, "loss": 0.2467, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6180 }, { "epoch": 2.2107268289779154, "grad_norm": 0.43331128792320145, "learning_rate": 2.965990255423217e-06, "loss": 0.2439, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6190 }, { "epoch": 2.2142984701470327, "grad_norm": 0.42629909041017044, "learning_rate": 2.956198935282816e-06, "loss": 0.2512, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6200 }, { "epoch": 2.2178701113161496, "grad_norm": 0.406376228889923, "learning_rate": 2.9464119743405863e-06, "loss": 0.2445, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6210 }, { "epoch": 2.221441752485267, "grad_norm": 0.45906603584954947, "learning_rate": 2.936629451050405e-06, "loss": 0.2515, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6220 }, { "epoch": 2.2250133936543843, "grad_norm": 0.44547898712147144, "learning_rate": 2.926851443830579e-06, "loss": 0.245, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6230 }, { "epoch": 2.2285850348235012, "grad_norm": 0.5384322504281486, "learning_rate": 2.9170780310632086e-06, "loss": 0.2454, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6240 }, { "epoch": 2.2321566759926186, "grad_norm": 0.42570030625635924, "learning_rate": 2.907309291093569e-06, "loss": 0.2405, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6250 }, { "epoch": 2.235728317161736, "grad_norm": 0.436114018629106, "learning_rate": 2.8975453022294748e-06, "loss": 0.252, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6260 }, { "epoch": 2.239299958330853, "grad_norm": 0.41311020815509447, "learning_rate": 2.8877861427406562e-06, "loss": 0.2555, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6270 }, { "epoch": 2.24287159949997, "grad_norm": 0.40408747470000445, "learning_rate": 2.8780318908581304e-06, "loss": 0.2487, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6280 }, { "epoch": 2.2464432406690875, "grad_norm": 0.4185113340584532, "learning_rate": 2.8682826247735742e-06, "loss": 0.2486, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6290 }, { "epoch": 2.2500148818382044, "grad_norm": 0.4218486087745524, "learning_rate": 2.8585384226386955e-06, "loss": 0.2407, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6300 }, { "epoch": 2.2535865230073218, "grad_norm": 0.40512251740904315, "learning_rate": 2.8487993625646116e-06, "loss": 0.248, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6310 }, { "epoch": 2.257158164176439, "grad_norm": 0.4409482835365694, "learning_rate": 2.8390655226212196e-06, "loss": 0.2492, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6320 }, { "epoch": 2.2607298053455565, "grad_norm": 0.4073725088603519, "learning_rate": 2.82933698083657e-06, "loss": 0.2404, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6330 }, { "epoch": 2.2643014465146734, "grad_norm": 0.4369684228847736, "learning_rate": 2.8196138151962426e-06, "loss": 0.2427, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6340 }, { "epoch": 2.2678730876837907, "grad_norm": 0.44579174359074303, "learning_rate": 2.809896103642721e-06, "loss": 0.2426, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6350 }, { "epoch": 2.271444728852908, "grad_norm": 0.45771064161651553, "learning_rate": 2.80018392407477e-06, "loss": 0.2435, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6360 }, { "epoch": 2.275016370022025, "grad_norm": 0.4403774918989739, "learning_rate": 2.7904773543468075e-06, "loss": 0.2438, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6370 }, { "epoch": 2.2785880111911423, "grad_norm": 0.42598744927312987, "learning_rate": 2.780776472268282e-06, "loss": 0.2503, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6380 }, { "epoch": 2.2821596523602596, "grad_norm": 0.4276886507005743, "learning_rate": 2.771081355603049e-06, "loss": 0.2503, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6390 }, { "epoch": 2.285731293529377, "grad_norm": 0.4920717543136929, "learning_rate": 2.7613920820687467e-06, "loss": 0.2435, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6400 }, { "epoch": 2.289302934698494, "grad_norm": 0.43701925036033384, "learning_rate": 2.7517087293361747e-06, "loss": 0.2475, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6410 }, { "epoch": 2.2928745758676112, "grad_norm": 0.4346436136227055, "learning_rate": 2.7420313750286722e-06, "loss": 0.2461, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6420 }, { "epoch": 2.2964462170367286, "grad_norm": 0.4229939188749013, "learning_rate": 2.7323600967214882e-06, "loss": 0.2421, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6430 }, { "epoch": 2.3000178582058455, "grad_norm": 0.5912201414304545, "learning_rate": 2.7226949719411746e-06, "loss": 0.2453, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6440 }, { "epoch": 2.303589499374963, "grad_norm": 0.4253354692500375, "learning_rate": 2.7130360781649473e-06, "loss": 0.2455, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6450 }, { "epoch": 2.30716114054408, "grad_norm": 0.4342807176023442, "learning_rate": 2.703383492820078e-06, "loss": 0.2443, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6460 }, { "epoch": 2.310732781713197, "grad_norm": 0.44986044561831723, "learning_rate": 2.6937372932832662e-06, "loss": 0.2465, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6470 }, { "epoch": 2.3143044228823144, "grad_norm": 0.4577282072825657, "learning_rate": 2.684097556880026e-06, "loss": 0.2432, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6480 }, { "epoch": 2.3178760640514318, "grad_norm": 0.41883367404578103, "learning_rate": 2.674464360884056e-06, "loss": 0.2446, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6490 }, { "epoch": 2.3214477052205487, "grad_norm": 0.42225480419162303, "learning_rate": 2.664837782516631e-06, "loss": 0.2585, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6500 }, { "epoch": 2.325019346389666, "grad_norm": 0.43200294151141205, "learning_rate": 2.6552178989459744e-06, "loss": 0.241, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6510 }, { "epoch": 2.3285909875587834, "grad_norm": 0.4172939355364942, "learning_rate": 2.6456047872866464e-06, "loss": 0.2498, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6520 }, { "epoch": 2.3321626287279003, "grad_norm": 0.4336551198473009, "learning_rate": 2.6359985245989188e-06, "loss": 0.2492, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6530 }, { "epoch": 2.3357342698970176, "grad_norm": 0.45451056073122864, "learning_rate": 2.626399187888164e-06, "loss": 0.2427, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6540 }, { "epoch": 2.339305911066135, "grad_norm": 0.42567264012171563, "learning_rate": 2.616806854104232e-06, "loss": 0.2475, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6550 }, { "epoch": 2.3428775522352523, "grad_norm": 0.4748131514352899, "learning_rate": 2.6072216001408377e-06, "loss": 0.2383, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6560 }, { "epoch": 2.346449193404369, "grad_norm": 0.4477560986216873, "learning_rate": 2.5976435028349436e-06, "loss": 0.2492, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6570 }, { "epoch": 2.3500208345734865, "grad_norm": 0.4394971651973961, "learning_rate": 2.588072638966141e-06, "loss": 0.2437, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6580 }, { "epoch": 2.353592475742604, "grad_norm": 0.4470690760524359, "learning_rate": 2.578509085256038e-06, "loss": 0.2439, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6590 }, { "epoch": 2.357164116911721, "grad_norm": 0.43968237151683753, "learning_rate": 2.568952918367645e-06, "loss": 0.2457, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6600 }, { "epoch": 2.360735758080838, "grad_norm": 0.4391386784963587, "learning_rate": 2.5594042149047564e-06, "loss": 0.2423, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6610 }, { "epoch": 2.3643073992499555, "grad_norm": 0.42975324407653287, "learning_rate": 2.549863051411339e-06, "loss": 0.2462, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6620 }, { "epoch": 2.3678790404190724, "grad_norm": 0.45538626525983805, "learning_rate": 2.540329504370918e-06, "loss": 0.2406, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6630 }, { "epoch": 2.3714506815881897, "grad_norm": 0.442390876010859, "learning_rate": 2.530803650205964e-06, "loss": 0.2466, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6640 }, { "epoch": 2.375022322757307, "grad_norm": 0.44546208871045945, "learning_rate": 2.5212855652772796e-06, "loss": 0.2454, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6650 }, { "epoch": 2.3785939639264244, "grad_norm": 0.4205593813710197, "learning_rate": 2.5117753258833893e-06, "loss": 0.2477, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6660 }, { "epoch": 2.3821656050955413, "grad_norm": 0.4276254011439656, "learning_rate": 2.5022730082599236e-06, "loss": 0.2464, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6670 }, { "epoch": 2.3857372462646587, "grad_norm": 0.4278204935672946, "learning_rate": 2.492778688579016e-06, "loss": 0.2543, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6680 }, { "epoch": 2.389308887433776, "grad_norm": 0.4343823003511441, "learning_rate": 2.4832924429486827e-06, "loss": 0.2341, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6690 }, { "epoch": 2.392880528602893, "grad_norm": 0.4323058611549525, "learning_rate": 2.473814347412218e-06, "loss": 0.238, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6700 }, { "epoch": 2.3964521697720103, "grad_norm": 0.4565089465307684, "learning_rate": 2.464344477947585e-06, "loss": 0.2444, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6710 }, { "epoch": 2.4000238109411276, "grad_norm": 0.4734833348995525, "learning_rate": 2.4548829104668033e-06, "loss": 0.2418, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6720 }, { "epoch": 2.4035954521102445, "grad_norm": 0.46249619301612205, "learning_rate": 2.4454297208153435e-06, "loss": 0.2521, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6730 }, { "epoch": 2.407167093279362, "grad_norm": 0.43223468376106816, "learning_rate": 2.4359849847715185e-06, "loss": 0.2416, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6740 }, { "epoch": 2.410738734448479, "grad_norm": 0.4508896819243592, "learning_rate": 2.426548778045875e-06, "loss": 0.2445, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6750 }, { "epoch": 2.414310375617596, "grad_norm": 0.4495937778511734, "learning_rate": 2.4171211762805884e-06, "loss": 0.246, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6760 }, { "epoch": 2.4178820167867134, "grad_norm": 0.43179341290400897, "learning_rate": 2.4077022550488544e-06, "loss": 0.2417, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6770 }, { "epoch": 2.421453657955831, "grad_norm": 0.4705685298645784, "learning_rate": 2.398292089854285e-06, "loss": 0.2381, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6780 }, { "epoch": 2.4250252991249477, "grad_norm": 0.43061736432599096, "learning_rate": 2.3888907561302995e-06, "loss": 0.2477, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6790 }, { "epoch": 2.428596940294065, "grad_norm": 0.4934734522126448, "learning_rate": 2.379498329239526e-06, "loss": 0.2459, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6800 }, { "epoch": 2.4321685814631824, "grad_norm": 0.4765838098586213, "learning_rate": 2.370114884473194e-06, "loss": 0.2482, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6810 }, { "epoch": 2.4357402226322997, "grad_norm": 0.4687408325113669, "learning_rate": 2.3607404970505284e-06, "loss": 0.2408, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6820 }, { "epoch": 2.4393118638014166, "grad_norm": 0.4281004117147228, "learning_rate": 2.351375242118148e-06, "loss": 0.2505, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6830 }, { "epoch": 2.442883504970534, "grad_norm": 0.4370567386193378, "learning_rate": 2.342019194749471e-06, "loss": 0.249, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6840 }, { "epoch": 2.4464551461396513, "grad_norm": 0.4777666587818985, "learning_rate": 2.3326724299440986e-06, "loss": 0.2441, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6850 }, { "epoch": 2.450026787308768, "grad_norm": 0.450289306534585, "learning_rate": 2.323335022627225e-06, "loss": 0.2466, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6860 }, { "epoch": 2.4535984284778856, "grad_norm": 0.4472240813142968, "learning_rate": 2.314007047649033e-06, "loss": 0.2336, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6870 }, { "epoch": 2.457170069647003, "grad_norm": 0.4377670065814464, "learning_rate": 2.304688579784093e-06, "loss": 0.2519, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6880 }, { "epoch": 2.4607417108161203, "grad_norm": 0.4491227795973426, "learning_rate": 2.295379693730766e-06, "loss": 0.2436, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6890 }, { "epoch": 2.464313351985237, "grad_norm": 0.476378700259544, "learning_rate": 2.2860804641106033e-06, "loss": 0.249, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6900 }, { "epoch": 2.4678849931543545, "grad_norm": 0.4508043014114273, "learning_rate": 2.276790965467748e-06, "loss": 0.2473, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6910 }, { "epoch": 2.471456634323472, "grad_norm": 0.42106773935779124, "learning_rate": 2.2675112722683406e-06, "loss": 0.245, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6920 }, { "epoch": 2.4750282754925887, "grad_norm": 0.49568541004831845, "learning_rate": 2.2582414588999175e-06, "loss": 0.2432, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6930 }, { "epoch": 2.478599916661706, "grad_norm": 0.44834524118595986, "learning_rate": 2.248981599670815e-06, "loss": 0.2456, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6940 }, { "epoch": 2.4821715578308234, "grad_norm": 0.4382727602006904, "learning_rate": 2.2397317688095792e-06, "loss": 0.2404, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6950 }, { "epoch": 2.4857431989999403, "grad_norm": 0.4357541382580279, "learning_rate": 2.230492040464365e-06, "loss": 0.2506, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6960 }, { "epoch": 2.4893148401690577, "grad_norm": 0.4494934844393061, "learning_rate": 2.221262488702344e-06, "loss": 0.2328, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6970 }, { "epoch": 2.492886481338175, "grad_norm": 0.44511789317832856, "learning_rate": 2.2120431875091107e-06, "loss": 0.2462, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6980 }, { "epoch": 2.496458122507292, "grad_norm": 0.4637114798209286, "learning_rate": 2.202834210788089e-06, "loss": 0.2359, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 6990 }, { "epoch": 2.5000297636764093, "grad_norm": 0.454258990361472, "learning_rate": 2.1936356323599417e-06, "loss": 0.2464, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7000 }, { "epoch": 2.5036014048455266, "grad_norm": 0.42621406491701536, "learning_rate": 2.1844475259619763e-06, "loss": 0.238, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7010 }, { "epoch": 2.5071730460146435, "grad_norm": 0.4742468073415194, "learning_rate": 2.1752699652475546e-06, "loss": 0.2417, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7020 }, { "epoch": 2.510744687183761, "grad_norm": 0.47342536962727527, "learning_rate": 2.1661030237855023e-06, "loss": 0.2351, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7030 }, { "epoch": 2.514316328352878, "grad_norm": 0.47877566175565467, "learning_rate": 2.1569467750595193e-06, "loss": 0.2452, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7040 }, { "epoch": 2.517887969521995, "grad_norm": 0.46978072487842876, "learning_rate": 2.147801292467592e-06, "loss": 0.2443, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7050 }, { "epoch": 2.5214596106911125, "grad_norm": 0.4735918423996795, "learning_rate": 2.1386666493214007e-06, "loss": 0.2378, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7060 }, { "epoch": 2.52503125186023, "grad_norm": 0.4567127424051271, "learning_rate": 2.12954291884574e-06, "loss": 0.2384, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7070 }, { "epoch": 2.5286028930293467, "grad_norm": 0.4147541911140241, "learning_rate": 2.1204301741779235e-06, "loss": 0.2453, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7080 }, { "epoch": 2.532174534198464, "grad_norm": 0.43912608187760216, "learning_rate": 2.1113284883671994e-06, "loss": 0.2361, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7090 }, { "epoch": 2.5357461753675814, "grad_norm": 0.44929631139927123, "learning_rate": 2.102237934374169e-06, "loss": 0.2401, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7100 }, { "epoch": 2.5393178165366987, "grad_norm": 0.4882310550461796, "learning_rate": 2.0931585850701984e-06, "loss": 0.2377, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7110 }, { "epoch": 2.542889457705816, "grad_norm": 0.4581959273553773, "learning_rate": 2.084090513236835e-06, "loss": 0.2337, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7120 }, { "epoch": 2.546461098874933, "grad_norm": 0.45309648854952933, "learning_rate": 2.075033791565223e-06, "loss": 0.2445, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7130 }, { "epoch": 2.5500327400440503, "grad_norm": 0.44638496641251035, "learning_rate": 2.0659884926555226e-06, "loss": 0.2393, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7140 }, { "epoch": 2.5536043812131677, "grad_norm": 0.48769533117288066, "learning_rate": 2.0569546890163285e-06, "loss": 0.2442, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7150 }, { "epoch": 2.5571760223822846, "grad_norm": 0.43511656727356074, "learning_rate": 2.0479324530640867e-06, "loss": 0.2384, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7160 }, { "epoch": 2.560747663551402, "grad_norm": 0.4820233106672117, "learning_rate": 2.038921857122515e-06, "loss": 0.2419, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7170 }, { "epoch": 2.5643193047205193, "grad_norm": 0.48561125119478454, "learning_rate": 2.02992297342202e-06, "loss": 0.2312, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7180 }, { "epoch": 2.567890945889636, "grad_norm": 0.4432776127145546, "learning_rate": 2.020935874099126e-06, "loss": 0.2402, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7190 }, { "epoch": 2.5714625870587535, "grad_norm": 0.454164264161987, "learning_rate": 2.011960631195889e-06, "loss": 0.2466, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7200 }, { "epoch": 2.575034228227871, "grad_norm": 0.4854707049194837, "learning_rate": 2.0029973166593196e-06, "loss": 0.245, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7210 }, { "epoch": 2.5786058693969878, "grad_norm": 0.46000696889932535, "learning_rate": 1.994046002340814e-06, "loss": 0.243, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7220 }, { "epoch": 2.582177510566105, "grad_norm": 0.4357766462500482, "learning_rate": 1.9851067599955707e-06, "loss": 0.2516, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7230 }, { "epoch": 2.5857491517352225, "grad_norm": 0.44408294376392227, "learning_rate": 1.9761796612820146e-06, "loss": 0.2432, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7240 }, { "epoch": 2.5893207929043394, "grad_norm": 0.43382954307970867, "learning_rate": 1.967264777761229e-06, "loss": 0.241, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7250 }, { "epoch": 2.5928924340734567, "grad_norm": 0.46704708085378005, "learning_rate": 1.958362180896376e-06, "loss": 0.2352, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7260 }, { "epoch": 2.596464075242574, "grad_norm": 0.4388547369132919, "learning_rate": 1.9494719420521254e-06, "loss": 0.2406, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7270 }, { "epoch": 2.600035716411691, "grad_norm": 0.4280307854060593, "learning_rate": 1.9405941324940857e-06, "loss": 0.2414, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7280 }, { "epoch": 2.6036073575808083, "grad_norm": 0.45466648840686463, "learning_rate": 1.931728823388228e-06, "loss": 0.244, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7290 }, { "epoch": 2.6071789987499256, "grad_norm": 0.44941873865770565, "learning_rate": 1.9228760858003155e-06, "loss": 0.2356, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7300 }, { "epoch": 2.6107506399190425, "grad_norm": 0.4776784235080879, "learning_rate": 1.9140359906953443e-06, "loss": 0.2348, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7310 }, { "epoch": 2.61432228108816, "grad_norm": 0.4859122793447613, "learning_rate": 1.9052086089369585e-06, "loss": 0.2335, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7320 }, { "epoch": 2.6178939222572772, "grad_norm": 0.4645265007751813, "learning_rate": 1.8963940112868904e-06, "loss": 0.2431, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7330 }, { "epoch": 2.6214655634263946, "grad_norm": 0.47714483697616294, "learning_rate": 1.8875922684043977e-06, "loss": 0.2438, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7340 }, { "epoch": 2.6250372045955115, "grad_norm": 0.4921155235969745, "learning_rate": 1.878803450845688e-06, "loss": 0.2367, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7350 }, { "epoch": 2.628608845764629, "grad_norm": 0.4582649026095651, "learning_rate": 1.870027629063357e-06, "loss": 0.2428, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7360 }, { "epoch": 2.632180486933746, "grad_norm": 0.44087571867035075, "learning_rate": 1.8612648734058292e-06, "loss": 0.2433, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7370 }, { "epoch": 2.6357521281028635, "grad_norm": 0.5367973728899978, "learning_rate": 1.852515254116782e-06, "loss": 0.2462, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7380 }, { "epoch": 2.6393237692719804, "grad_norm": 0.47523519421713606, "learning_rate": 1.843778841334594e-06, "loss": 0.2404, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7390 }, { "epoch": 2.6428954104410978, "grad_norm": 0.45351487278591784, "learning_rate": 1.8350557050917794e-06, "loss": 0.237, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7400 }, { "epoch": 2.646467051610215, "grad_norm": 0.45566439391509844, "learning_rate": 1.8263459153144216e-06, "loss": 0.2378, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7410 }, { "epoch": 2.650038692779332, "grad_norm": 0.5427961932037867, "learning_rate": 1.8176495418216173e-06, "loss": 0.2327, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7420 }, { "epoch": 2.6536103339484494, "grad_norm": 0.48467724978186316, "learning_rate": 1.808966654324919e-06, "loss": 0.2453, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7430 }, { "epoch": 2.6571819751175667, "grad_norm": 0.4689333195988378, "learning_rate": 1.800297322427771e-06, "loss": 0.2433, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7440 }, { "epoch": 2.6607536162866836, "grad_norm": 0.4870900784001144, "learning_rate": 1.791641615624952e-06, "loss": 0.2441, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7450 }, { "epoch": 2.664325257455801, "grad_norm": 0.45290609880585164, "learning_rate": 1.7829996033020236e-06, "loss": 0.2461, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7460 }, { "epoch": 2.6678968986249183, "grad_norm": 0.4809762662064227, "learning_rate": 1.7743713547347693e-06, "loss": 0.2332, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7470 }, { "epoch": 2.671468539794035, "grad_norm": 0.4803894889697133, "learning_rate": 1.7657569390886364e-06, "loss": 0.2403, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7480 }, { "epoch": 2.6750401809631525, "grad_norm": 0.43418058879650556, "learning_rate": 1.757156425418191e-06, "loss": 0.2471, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7490 }, { "epoch": 2.67861182213227, "grad_norm": 0.4545794361949984, "learning_rate": 1.7485698826665536e-06, "loss": 0.2395, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7500 }, { "epoch": 2.682183463301387, "grad_norm": 0.44328983850304654, "learning_rate": 1.7399973796648535e-06, "loss": 0.2419, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7510 }, { "epoch": 2.685755104470504, "grad_norm": 0.48089511751970304, "learning_rate": 1.731438985131677e-06, "loss": 0.2381, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7520 }, { "epoch": 2.6893267456396215, "grad_norm": 0.5182314821606652, "learning_rate": 1.7228947676725114e-06, "loss": 0.2393, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7530 }, { "epoch": 2.6928983868087384, "grad_norm": 0.4896768129268529, "learning_rate": 1.7143647957791976e-06, "loss": 0.2428, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7540 }, { "epoch": 2.6964700279778557, "grad_norm": 0.43406888099995405, "learning_rate": 1.7058491378293887e-06, "loss": 0.2464, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7550 }, { "epoch": 2.700041669146973, "grad_norm": 0.45987710855301045, "learning_rate": 1.6973478620859872e-06, "loss": 0.2396, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7560 }, { "epoch": 2.70361331031609, "grad_norm": 0.5081238736314797, "learning_rate": 1.688861036696607e-06, "loss": 0.239, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7570 }, { "epoch": 2.7071849514852073, "grad_norm": 0.47124511315850837, "learning_rate": 1.6803887296930296e-06, "loss": 0.2427, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7580 }, { "epoch": 2.7107565926543247, "grad_norm": 0.4766186555462526, "learning_rate": 1.6719310089906508e-06, "loss": 0.2367, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7590 }, { "epoch": 2.714328233823442, "grad_norm": 0.5154643063486422, "learning_rate": 1.6634879423879397e-06, "loss": 0.2418, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7600 }, { "epoch": 2.717899874992559, "grad_norm": 0.47619250886490805, "learning_rate": 1.655059597565901e-06, "loss": 0.2388, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7610 }, { "epoch": 2.7214715161616763, "grad_norm": 0.4727081854520604, "learning_rate": 1.6466460420875199e-06, "loss": 0.2389, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7620 }, { "epoch": 2.7250431573307936, "grad_norm": 0.4739130851871452, "learning_rate": 1.638247343397233e-06, "loss": 0.2379, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7630 }, { "epoch": 2.728614798499911, "grad_norm": 0.4921562232207917, "learning_rate": 1.6298635688203832e-06, "loss": 0.2486, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7640 }, { "epoch": 2.732186439669028, "grad_norm": 0.4757143273606086, "learning_rate": 1.6214947855626752e-06, "loss": 0.2397, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7650 }, { "epoch": 2.735758080838145, "grad_norm": 0.47786652916433087, "learning_rate": 1.6131410607096414e-06, "loss": 0.2424, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7660 }, { "epoch": 2.7393297220072625, "grad_norm": 0.4357313446728837, "learning_rate": 1.6048024612261076e-06, "loss": 0.2422, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7670 }, { "epoch": 2.7429013631763794, "grad_norm": 0.43258578669352915, "learning_rate": 1.5964790539556484e-06, "loss": 0.2467, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7680 }, { "epoch": 2.746473004345497, "grad_norm": 0.45798160471764066, "learning_rate": 1.5881709056200545e-06, "loss": 0.2451, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7690 }, { "epoch": 2.750044645514614, "grad_norm": 0.5046607545679609, "learning_rate": 1.579878082818802e-06, "loss": 0.2431, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7700 }, { "epoch": 2.753616286683731, "grad_norm": 0.4561696903127055, "learning_rate": 1.5716006520285138e-06, "loss": 0.2416, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7710 }, { "epoch": 2.7571879278528484, "grad_norm": 0.4582998703554197, "learning_rate": 1.5633386796024254e-06, "loss": 0.2481, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7720 }, { "epoch": 2.7607595690219657, "grad_norm": 0.4856769517926892, "learning_rate": 1.55509223176986e-06, "loss": 0.2428, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7730 }, { "epoch": 2.7643312101910826, "grad_norm": 0.4665717722179424, "learning_rate": 1.5468613746356885e-06, "loss": 0.246, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7740 }, { "epoch": 2.7679028513602, "grad_norm": 0.46949182547775437, "learning_rate": 1.538646174179806e-06, "loss": 0.2317, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7750 }, { "epoch": 2.7714744925293173, "grad_norm": 0.48292030377173556, "learning_rate": 1.5304466962566037e-06, "loss": 0.2396, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7760 }, { "epoch": 2.7750461336984342, "grad_norm": 0.4431681729827931, "learning_rate": 1.5222630065944332e-06, "loss": 0.2422, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7770 }, { "epoch": 2.7786177748675516, "grad_norm": 0.4660840922860918, "learning_rate": 1.5140951707950896e-06, "loss": 0.2407, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7780 }, { "epoch": 2.782189416036669, "grad_norm": 0.4322294580280508, "learning_rate": 1.5059432543332798e-06, "loss": 0.2406, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7790 }, { "epoch": 2.785761057205786, "grad_norm": 0.4534373647030015, "learning_rate": 1.4978073225560966e-06, "loss": 0.2375, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7800 }, { "epoch": 2.789332698374903, "grad_norm": 0.4509567338324847, "learning_rate": 1.489687440682497e-06, "loss": 0.2426, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7810 }, { "epoch": 2.7929043395440205, "grad_norm": 0.5264855086764537, "learning_rate": 1.4815836738027832e-06, "loss": 0.2265, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7820 }, { "epoch": 2.7964759807131374, "grad_norm": 0.46188466281106827, "learning_rate": 1.473496086878072e-06, "loss": 0.2455, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7830 }, { "epoch": 2.8000476218822548, "grad_norm": 0.4550270822364228, "learning_rate": 1.4654247447397833e-06, "loss": 0.2487, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7840 }, { "epoch": 2.803619263051372, "grad_norm": 0.4812751842028551, "learning_rate": 1.457369712089114e-06, "loss": 0.2432, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7850 }, { "epoch": 2.8071909042204894, "grad_norm": 0.4300511348882846, "learning_rate": 1.4493310534965194e-06, "loss": 0.2477, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7860 }, { "epoch": 2.810762545389607, "grad_norm": 0.44046616488515516, "learning_rate": 1.4413088334012023e-06, "loss": 0.2366, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7870 }, { "epoch": 2.8143341865587237, "grad_norm": 0.46437456660895376, "learning_rate": 1.4333031161105897e-06, "loss": 0.2356, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7880 }, { "epoch": 2.817905827727841, "grad_norm": 0.4753228834616267, "learning_rate": 1.4253139657998184e-06, "loss": 0.2353, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7890 }, { "epoch": 2.8214774688969584, "grad_norm": 0.4851133893845524, "learning_rate": 1.4173414465112214e-06, "loss": 0.2369, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7900 }, { "epoch": 2.8250491100660753, "grad_norm": 0.47493639702473806, "learning_rate": 1.4093856221538177e-06, "loss": 0.2401, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7910 }, { "epoch": 2.8286207512351926, "grad_norm": 0.4618709950690347, "learning_rate": 1.4014465565027934e-06, "loss": 0.2423, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7920 }, { "epoch": 2.83219239240431, "grad_norm": 0.43927959015901663, "learning_rate": 1.3935243131989971e-06, "loss": 0.2383, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7930 }, { "epoch": 2.835764033573427, "grad_norm": 0.4484129148789068, "learning_rate": 1.385618955748424e-06, "loss": 0.239, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7940 }, { "epoch": 2.8393356747425442, "grad_norm": 0.5001001943658824, "learning_rate": 1.3777305475217123e-06, "loss": 0.2391, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7950 }, { "epoch": 2.8429073159116616, "grad_norm": 0.4653892594419888, "learning_rate": 1.3698591517536295e-06, "loss": 0.2334, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7960 }, { "epoch": 2.8464789570807785, "grad_norm": 0.45427703489652743, "learning_rate": 1.3620048315425715e-06, "loss": 0.2347, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7970 }, { "epoch": 2.850050598249896, "grad_norm": 0.46031626412232396, "learning_rate": 1.3541676498500502e-06, "loss": 0.2309, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7980 }, { "epoch": 2.853622239419013, "grad_norm": 0.4781032434861627, "learning_rate": 1.3463476695001968e-06, "loss": 0.242, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 7990 }, { "epoch": 2.85719388058813, "grad_norm": 0.449958891165902, "learning_rate": 1.3385449531792503e-06, "loss": 0.242, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8000 }, { "epoch": 2.8607655217572474, "grad_norm": 0.4739933313720244, "learning_rate": 1.3307595634350577e-06, "loss": 0.2438, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8010 }, { "epoch": 2.8643371629263648, "grad_norm": 0.4777799159455414, "learning_rate": 1.3229915626765764e-06, "loss": 0.2338, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8020 }, { "epoch": 2.8679088040954817, "grad_norm": 0.4870133963570842, "learning_rate": 1.3152410131733706e-06, "loss": 0.2411, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8030 }, { "epoch": 2.871480445264599, "grad_norm": 0.46036598057169154, "learning_rate": 1.3075079770551103e-06, "loss": 0.2328, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8040 }, { "epoch": 2.8750520864337163, "grad_norm": 0.45474146002760457, "learning_rate": 1.299792516311075e-06, "loss": 0.2356, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8050 }, { "epoch": 2.8786237276028332, "grad_norm": 0.4650118946273859, "learning_rate": 1.2920946927896608e-06, "loss": 0.2367, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8060 }, { "epoch": 2.8821953687719506, "grad_norm": 0.4918407064463534, "learning_rate": 1.2844145681978763e-06, "loss": 0.2435, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8070 }, { "epoch": 2.885767009941068, "grad_norm": 0.45031935108227794, "learning_rate": 1.276752204100857e-06, "loss": 0.2342, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8080 }, { "epoch": 2.8893386511101853, "grad_norm": 0.4511846978714396, "learning_rate": 1.2691076619213641e-06, "loss": 0.2451, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8090 }, { "epoch": 2.892910292279302, "grad_norm": 0.49634809036650396, "learning_rate": 1.261481002939296e-06, "loss": 0.2397, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8100 }, { "epoch": 2.8964819334484195, "grad_norm": 0.4735229254072526, "learning_rate": 1.2538722882911975e-06, "loss": 0.2376, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8110 }, { "epoch": 2.900053574617537, "grad_norm": 0.4586289523437465, "learning_rate": 1.2462815789697694e-06, "loss": 0.2444, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8120 }, { "epoch": 2.9036252157866542, "grad_norm": 0.4689993067162779, "learning_rate": 1.2387089358233753e-06, "loss": 0.2312, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8130 }, { "epoch": 2.907196856955771, "grad_norm": 0.45127761849831977, "learning_rate": 1.2311544195555625e-06, "loss": 0.2345, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8140 }, { "epoch": 2.9107684981248885, "grad_norm": 0.4551924773089047, "learning_rate": 1.2236180907245657e-06, "loss": 0.2333, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8150 }, { "epoch": 2.914340139294006, "grad_norm": 0.4747350168287828, "learning_rate": 1.2161000097428276e-06, "loss": 0.2299, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8160 }, { "epoch": 2.9179117804631227, "grad_norm": 0.4789946852521992, "learning_rate": 1.2086002368765154e-06, "loss": 0.2309, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8170 }, { "epoch": 2.92148342163224, "grad_norm": 0.44972106394869, "learning_rate": 1.2011188322450313e-06, "loss": 0.2351, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8180 }, { "epoch": 2.9250550628013574, "grad_norm": 0.4752319084408135, "learning_rate": 1.1936558558205381e-06, "loss": 0.2316, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8190 }, { "epoch": 2.9286267039704743, "grad_norm": 0.4538625576215163, "learning_rate": 1.186211367427475e-06, "loss": 0.2395, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8200 }, { "epoch": 2.9321983451395917, "grad_norm": 0.43101707019643254, "learning_rate": 1.1787854267420764e-06, "loss": 0.2347, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8210 }, { "epoch": 2.935769986308709, "grad_norm": 0.4565611978219319, "learning_rate": 1.1713780932918943e-06, "loss": 0.2397, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8220 }, { "epoch": 2.939341627477826, "grad_norm": 0.4675562969199988, "learning_rate": 1.1639894264553263e-06, "loss": 0.2427, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8230 }, { "epoch": 2.9429132686469432, "grad_norm": 0.47564898045704657, "learning_rate": 1.1566194854611319e-06, "loss": 0.2395, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8240 }, { "epoch": 2.9464849098160606, "grad_norm": 0.4539496439942422, "learning_rate": 1.1492683293879607e-06, "loss": 0.2415, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8250 }, { "epoch": 2.9500565509851775, "grad_norm": 0.4425681909675462, "learning_rate": 1.1419360171638813e-06, "loss": 0.2328, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8260 }, { "epoch": 2.953628192154295, "grad_norm": 0.5071600153926035, "learning_rate": 1.1346226075659083e-06, "loss": 0.2321, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8270 }, { "epoch": 2.957199833323412, "grad_norm": 0.4678630904942017, "learning_rate": 1.1273281592195246e-06, "loss": 0.2407, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8280 }, { "epoch": 2.960771474492529, "grad_norm": 0.4622258694530203, "learning_rate": 1.1200527305982229e-06, "loss": 0.2372, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8290 }, { "epoch": 2.9643431156616464, "grad_norm": 0.4561377388624881, "learning_rate": 1.1127963800230261e-06, "loss": 0.2358, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8300 }, { "epoch": 2.967914756830764, "grad_norm": 0.45829660432747915, "learning_rate": 1.1055591656620247e-06, "loss": 0.2352, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8310 }, { "epoch": 2.9714863979998807, "grad_norm": 0.4557869621482418, "learning_rate": 1.0983411455299138e-06, "loss": 0.2376, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8320 }, { "epoch": 2.975058039168998, "grad_norm": 0.4681011674673779, "learning_rate": 1.09114237748752e-06, "loss": 0.2425, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8330 }, { "epoch": 2.9786296803381154, "grad_norm": 0.4732661149841976, "learning_rate": 1.0839629192413425e-06, "loss": 0.2349, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8340 }, { "epoch": 2.9822013215072327, "grad_norm": 0.4476543017032875, "learning_rate": 1.0768028283430959e-06, "loss": 0.237, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8350 }, { "epoch": 2.9857729626763496, "grad_norm": 0.4680863834511507, "learning_rate": 1.0696621621892357e-06, "loss": 0.2417, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8360 }, { "epoch": 2.989344603845467, "grad_norm": 0.4578896945136068, "learning_rate": 1.062540978020508e-06, "loss": 0.2385, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8370 }, { "epoch": 2.9929162450145843, "grad_norm": 0.4549383047870384, "learning_rate": 1.0554393329214897e-06, "loss": 0.2388, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8380 }, { "epoch": 2.9964878861837017, "grad_norm": 0.4693977536348272, "learning_rate": 1.0483572838201275e-06, "loss": 0.2387, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8390 }, { "epoch": 3.0, "grad_norm": 0.4767978710071707, "learning_rate": 1.0412948874872829e-06, "loss": 0.2462, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.43, "memory/max_mem_allocated(gib)": 42.86, "step": 8400 }, { "epoch": 3.0035716411691173, "grad_norm": 0.5082675672275083, "learning_rate": 1.0342522005362807e-06, "loss": 0.2459, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8410 }, { "epoch": 3.0071432823382342, "grad_norm": 0.4434911719606514, "learning_rate": 1.0272292794224478e-06, "loss": 0.2385, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8420 }, { "epoch": 3.0107149235073516, "grad_norm": 0.47493763800133504, "learning_rate": 1.0202261804426686e-06, "loss": 0.2345, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8430 }, { "epoch": 3.014286564676469, "grad_norm": 0.45270030835499836, "learning_rate": 1.013242959734929e-06, "loss": 0.2258, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8440 }, { "epoch": 3.017858205845586, "grad_norm": 0.4945126721474151, "learning_rate": 1.0062796732778665e-06, "loss": 0.2357, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8450 }, { "epoch": 3.021429847014703, "grad_norm": 0.4584599172625007, "learning_rate": 9.99336376890321e-07, "loss": 0.231, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8460 }, { "epoch": 3.0250014881838205, "grad_norm": 0.4741731531048592, "learning_rate": 9.924131262308933e-07, "loss": 0.234, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8470 }, { "epoch": 3.028573129352938, "grad_norm": 0.42977088432539023, "learning_rate": 9.85509976797491e-07, "loss": 0.2367, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8480 }, { "epoch": 3.0321447705220548, "grad_norm": 0.4549490544845592, "learning_rate": 9.78626983926886e-07, "loss": 0.2261, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8490 }, { "epoch": 3.035716411691172, "grad_norm": 0.43517186698776544, "learning_rate": 9.717642027942746e-07, "loss": 0.2368, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8500 }, { "epoch": 3.0392880528602895, "grad_norm": 0.48096149960619133, "learning_rate": 9.64921688412833e-07, "loss": 0.2339, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8510 }, { "epoch": 3.0428596940294064, "grad_norm": 0.4695639710779253, "learning_rate": 9.58099495633272e-07, "loss": 0.2397, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8520 }, { "epoch": 3.0464313351985237, "grad_norm": 0.46406480819585466, "learning_rate": 9.512976791434055e-07, "loss": 0.2367, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8530 }, { "epoch": 3.050002976367641, "grad_norm": 0.43549316441900676, "learning_rate": 9.445162934677041e-07, "loss": 0.2362, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8540 }, { "epoch": 3.053574617536758, "grad_norm": 0.4776504686587063, "learning_rate": 9.377553929668628e-07, "loss": 0.2357, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8550 }, { "epoch": 3.0571462587058753, "grad_norm": 0.4957631348783547, "learning_rate": 9.310150318373668e-07, "loss": 0.2317, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8560 }, { "epoch": 3.0607178998749927, "grad_norm": 0.5121306681504587, "learning_rate": 9.242952641110502e-07, "loss": 0.225, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8570 }, { "epoch": 3.0642895410441096, "grad_norm": 0.4355193727970268, "learning_rate": 9.17596143654667e-07, "loss": 0.2301, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8580 }, { "epoch": 3.067861182213227, "grad_norm": 0.4570296964534929, "learning_rate": 9.109177241694655e-07, "loss": 0.2399, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8590 }, { "epoch": 3.0714328233823442, "grad_norm": 0.4830423290838575, "learning_rate": 9.042600591907455e-07, "loss": 0.2304, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8600 }, { "epoch": 3.0750044645514616, "grad_norm": 0.4429461104459678, "learning_rate": 8.976232020874352e-07, "loss": 0.2284, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8610 }, { "epoch": 3.0785761057205785, "grad_norm": 0.4705885398084829, "learning_rate": 8.910072060616686e-07, "loss": 0.2282, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8620 }, { "epoch": 3.082147746889696, "grad_norm": 0.48482873146757927, "learning_rate": 8.844121241483485e-07, "loss": 0.2351, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8630 }, { "epoch": 3.085719388058813, "grad_norm": 0.45871074100809267, "learning_rate": 8.778380092147283e-07, "loss": 0.222, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8640 }, { "epoch": 3.08929102922793, "grad_norm": 0.42580067592131254, "learning_rate": 8.712849139599894e-07, "loss": 0.2328, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8650 }, { "epoch": 3.0928626703970474, "grad_norm": 0.4683497978752016, "learning_rate": 8.647528909148116e-07, "loss": 0.2295, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8660 }, { "epoch": 3.0964343115661648, "grad_norm": 0.46173709254753664, "learning_rate": 8.582419924409602e-07, "loss": 0.2244, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8670 }, { "epoch": 3.1000059527352817, "grad_norm": 0.41918963310164564, "learning_rate": 8.51752270730862e-07, "loss": 0.2267, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8680 }, { "epoch": 3.103577593904399, "grad_norm": 0.4216737296789738, "learning_rate": 8.452837778071847e-07, "loss": 0.2336, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8690 }, { "epoch": 3.1071492350735164, "grad_norm": 0.4641138956232256, "learning_rate": 8.388365655224239e-07, "loss": 0.2293, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8700 }, { "epoch": 3.1107208762426337, "grad_norm": 0.466554529547195, "learning_rate": 8.324106855584883e-07, "loss": 0.2186, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8710 }, { "epoch": 3.1142925174117506, "grad_norm": 0.45252478767112264, "learning_rate": 8.260061894262803e-07, "loss": 0.232, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8720 }, { "epoch": 3.117864158580868, "grad_norm": 0.453030371983167, "learning_rate": 8.196231284652864e-07, "loss": 0.2249, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8730 }, { "epoch": 3.1214357997499853, "grad_norm": 0.434980320791229, "learning_rate": 8.132615538431668e-07, "loss": 0.2208, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8740 }, { "epoch": 3.125007440919102, "grad_norm": 0.4504272817775109, "learning_rate": 8.069215165553445e-07, "loss": 0.2269, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8750 }, { "epoch": 3.1285790820882196, "grad_norm": 0.4169338153102719, "learning_rate": 8.006030674245912e-07, "loss": 0.2272, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8760 }, { "epoch": 3.132150723257337, "grad_norm": 0.4381106314478239, "learning_rate": 7.9430625710063e-07, "loss": 0.2239, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8770 }, { "epoch": 3.135722364426454, "grad_norm": 0.46269561831814054, "learning_rate": 7.880311360597195e-07, "loss": 0.2267, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8780 }, { "epoch": 3.139294005595571, "grad_norm": 0.43404406950330415, "learning_rate": 7.817777546042548e-07, "loss": 0.223, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8790 }, { "epoch": 3.1428656467646885, "grad_norm": 0.45629754277622947, "learning_rate": 7.755461628623651e-07, "loss": 0.2266, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8800 }, { "epoch": 3.1464372879338054, "grad_norm": 0.48355699051102885, "learning_rate": 7.693364107875062e-07, "loss": 0.2175, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8810 }, { "epoch": 3.1500089291029227, "grad_norm": 0.4413863956425271, "learning_rate": 7.631485481580636e-07, "loss": 0.2221, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8820 }, { "epoch": 3.15358057027204, "grad_norm": 0.43695796833933653, "learning_rate": 7.569826245769594e-07, "loss": 0.2246, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8830 }, { "epoch": 3.157152211441157, "grad_norm": 0.4668782524426645, "learning_rate": 7.508386894712413e-07, "loss": 0.2301, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8840 }, { "epoch": 3.1607238526102743, "grad_norm": 0.45885921126660134, "learning_rate": 7.447167920916969e-07, "loss": 0.2332, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8850 }, { "epoch": 3.1642954937793917, "grad_norm": 0.42552829032477146, "learning_rate": 7.386169815124566e-07, "loss": 0.2322, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8860 }, { "epoch": 3.167867134948509, "grad_norm": 0.4633441040739579, "learning_rate": 7.325393066305984e-07, "loss": 0.2276, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8870 }, { "epoch": 3.171438776117626, "grad_norm": 0.48042671789935903, "learning_rate": 7.264838161657556e-07, "loss": 0.2276, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8880 }, { "epoch": 3.1750104172867433, "grad_norm": 0.4584169432718334, "learning_rate": 7.204505586597312e-07, "loss": 0.2344, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8890 }, { "epoch": 3.1785820584558606, "grad_norm": 0.4213870889501487, "learning_rate": 7.144395824761007e-07, "loss": 0.2274, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8900 }, { "epoch": 3.1821536996249775, "grad_norm": 0.43652677559804104, "learning_rate": 7.084509357998322e-07, "loss": 0.2244, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8910 }, { "epoch": 3.185725340794095, "grad_norm": 0.44176850021681147, "learning_rate": 7.024846666368969e-07, "loss": 0.2293, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8920 }, { "epoch": 3.189296981963212, "grad_norm": 0.43039405198823494, "learning_rate": 6.965408228138819e-07, "loss": 0.231, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8930 }, { "epoch": 3.192868623132329, "grad_norm": 0.4680348567715748, "learning_rate": 6.906194519776098e-07, "loss": 0.224, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8940 }, { "epoch": 3.1964402643014465, "grad_norm": 0.43030595828491525, "learning_rate": 6.847206015947576e-07, "loss": 0.2213, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8950 }, { "epoch": 3.200011905470564, "grad_norm": 0.44816634746331696, "learning_rate": 6.788443189514727e-07, "loss": 0.2329, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8960 }, { "epoch": 3.203583546639681, "grad_norm": 0.438369297856931, "learning_rate": 6.72990651152998e-07, "loss": 0.2279, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8970 }, { "epoch": 3.207155187808798, "grad_norm": 0.4379336123678947, "learning_rate": 6.67159645123289e-07, "loss": 0.2244, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8980 }, { "epoch": 3.2107268289779154, "grad_norm": 0.45049953635574025, "learning_rate": 6.613513476046447e-07, "loss": 0.2218, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 8990 }, { "epoch": 3.2142984701470327, "grad_norm": 0.4568418405772703, "learning_rate": 6.555658051573249e-07, "loss": 0.2288, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9000 }, { "epoch": 3.2178701113161496, "grad_norm": 0.4129714709714023, "learning_rate": 6.498030641591848e-07, "loss": 0.2224, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9010 }, { "epoch": 3.221441752485267, "grad_norm": 0.4770576540272976, "learning_rate": 6.440631708052969e-07, "loss": 0.2294, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9020 }, { "epoch": 3.2250133936543843, "grad_norm": 0.4508289702367081, "learning_rate": 6.383461711075827e-07, "loss": 0.2228, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9030 }, { "epoch": 3.2285850348235012, "grad_norm": 0.5081602228563172, "learning_rate": 6.32652110894448e-07, "loss": 0.2229, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9040 }, { "epoch": 3.2321566759926186, "grad_norm": 0.42729925975899286, "learning_rate": 6.269810358104077e-07, "loss": 0.2187, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9050 }, { "epoch": 3.235728317161736, "grad_norm": 0.455081900607026, "learning_rate": 6.213329913157267e-07, "loss": 0.2302, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9060 }, { "epoch": 3.239299958330853, "grad_norm": 0.4613482712305252, "learning_rate": 6.157080226860536e-07, "loss": 0.2338, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9070 }, { "epoch": 3.24287159949997, "grad_norm": 0.413583821978395, "learning_rate": 6.10106175012055e-07, "loss": 0.2269, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9080 }, { "epoch": 3.2464432406690875, "grad_norm": 0.4256358641740858, "learning_rate": 6.045274931990558e-07, "loss": 0.227, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9090 }, { "epoch": 3.2500148818382044, "grad_norm": 0.4410727602613116, "learning_rate": 5.98972021966683e-07, "loss": 0.2192, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9100 }, { "epoch": 3.2535865230073218, "grad_norm": 0.42510105742412124, "learning_rate": 5.934398058484991e-07, "loss": 0.2262, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9110 }, { "epoch": 3.257158164176439, "grad_norm": 0.4421749589580055, "learning_rate": 5.879308891916546e-07, "loss": 0.2279, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9120 }, { "epoch": 3.2607298053455565, "grad_norm": 0.4465359432618891, "learning_rate": 5.824453161565242e-07, "loss": 0.2189, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9130 }, { "epoch": 3.2643014465146734, "grad_norm": 0.4386778468888303, "learning_rate": 5.769831307163567e-07, "loss": 0.2212, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9140 }, { "epoch": 3.2678730876837907, "grad_norm": 0.43969024247923927, "learning_rate": 5.715443766569241e-07, "loss": 0.2211, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9150 }, { "epoch": 3.271444728852908, "grad_norm": 0.4841280042950919, "learning_rate": 5.661290975761674e-07, "loss": 0.2222, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9160 }, { "epoch": 3.275016370022025, "grad_norm": 0.43028249447306965, "learning_rate": 5.607373368838486e-07, "loss": 0.2225, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9170 }, { "epoch": 3.2785880111911423, "grad_norm": 0.45903939584627335, "learning_rate": 5.553691378012004e-07, "loss": 0.2295, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9180 }, { "epoch": 3.2821596523602596, "grad_norm": 0.4387835891178062, "learning_rate": 5.500245433605858e-07, "loss": 0.2292, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9190 }, { "epoch": 3.285731293529377, "grad_norm": 0.4390323394974518, "learning_rate": 5.447035964051456e-07, "loss": 0.2225, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9200 }, { "epoch": 3.289302934698494, "grad_norm": 0.45468304875658144, "learning_rate": 5.394063395884604e-07, "loss": 0.2263, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9210 }, { "epoch": 3.2928745758676112, "grad_norm": 0.49857421082659625, "learning_rate": 5.34132815374206e-07, "loss": 0.225, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9220 }, { "epoch": 3.2964462170367286, "grad_norm": 0.4995652805597166, "learning_rate": 5.288830660358142e-07, "loss": 0.2214, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9230 }, { "epoch": 3.3000178582058455, "grad_norm": 0.47464766483941245, "learning_rate": 5.23657133656133e-07, "loss": 0.2244, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9240 }, { "epoch": 3.303589499374963, "grad_norm": 0.42707525290171827, "learning_rate": 5.184550601270905e-07, "loss": 0.2245, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9250 }, { "epoch": 3.30716114054408, "grad_norm": 0.45332045123115117, "learning_rate": 5.132768871493571e-07, "loss": 0.2235, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9260 }, { "epoch": 3.310732781713197, "grad_norm": 0.4455639041063865, "learning_rate": 5.081226562320148e-07, "loss": 0.2258, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9270 }, { "epoch": 3.3143044228823144, "grad_norm": 0.4324319313455736, "learning_rate": 5.0299240869222e-07, "loss": 0.2224, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9280 }, { "epoch": 3.3178760640514318, "grad_norm": 0.42414698673635143, "learning_rate": 4.978861856548734e-07, "loss": 0.2243, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9290 }, { "epoch": 3.3214477052205487, "grad_norm": 0.42707846365134206, "learning_rate": 4.928040280522948e-07, "loss": 0.2382, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9300 }, { "epoch": 3.325019346389666, "grad_norm": 0.4246313252911472, "learning_rate": 4.877459766238902e-07, "loss": 0.2205, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9310 }, { "epoch": 3.3285909875587834, "grad_norm": 0.43253448134906214, "learning_rate": 4.827120719158243e-07, "loss": 0.2296, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9320 }, { "epoch": 3.3321626287279003, "grad_norm": 0.43450078224608335, "learning_rate": 4.777023542807018e-07, "loss": 0.229, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9330 }, { "epoch": 3.3357342698970176, "grad_norm": 0.4363961978427439, "learning_rate": 4.7271686387723625e-07, "loss": 0.2225, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9340 }, { "epoch": 3.339305911066135, "grad_norm": 0.45658061349483414, "learning_rate": 4.6775564066993293e-07, "loss": 0.2274, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9350 }, { "epoch": 3.3428775522352523, "grad_norm": 0.4509642094193982, "learning_rate": 4.6281872442876833e-07, "loss": 0.218, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9360 }, { "epoch": 3.346449193404369, "grad_norm": 0.4636424632943862, "learning_rate": 4.5790615472886965e-07, "loss": 0.2292, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9370 }, { "epoch": 3.3500208345734865, "grad_norm": 0.44021097993476904, "learning_rate": 4.5301797095019686e-07, "loss": 0.2238, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9380 }, { "epoch": 3.353592475742604, "grad_norm": 0.437952090120706, "learning_rate": 4.4815421227723026e-07, "loss": 0.2241, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9390 }, { "epoch": 3.357164116911721, "grad_norm": 0.4422016996505597, "learning_rate": 4.433149176986555e-07, "loss": 0.2262, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9400 }, { "epoch": 3.360735758080838, "grad_norm": 0.4416073497219435, "learning_rate": 4.385001260070462e-07, "loss": 0.2226, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9410 }, { "epoch": 3.3643073992499555, "grad_norm": 0.4566130926885388, "learning_rate": 4.3370987579856075e-07, "loss": 0.2265, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9420 }, { "epoch": 3.3678790404190724, "grad_norm": 0.43716008842107645, "learning_rate": 4.289442054726267e-07, "loss": 0.221, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9430 }, { "epoch": 3.3714506815881897, "grad_norm": 0.4295747135270123, "learning_rate": 4.2420315323163524e-07, "loss": 0.227, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9440 }, { "epoch": 3.375022322757307, "grad_norm": 0.4344921163984427, "learning_rate": 4.194867570806372e-07, "loss": 0.2257, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9450 }, { "epoch": 3.3785939639264244, "grad_norm": 0.41936065855618876, "learning_rate": 4.14795054827033e-07, "loss": 0.2281, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9460 }, { "epoch": 3.3821656050955413, "grad_norm": 0.42885109463625787, "learning_rate": 4.1012808408027487e-07, "loss": 0.2269, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9470 }, { "epoch": 3.3857372462646587, "grad_norm": 0.42821304096655227, "learning_rate": 4.054858822515636e-07, "loss": 0.235, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9480 }, { "epoch": 3.389308887433776, "grad_norm": 0.46047010134247357, "learning_rate": 4.008684865535473e-07, "loss": 0.2148, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9490 }, { "epoch": 3.392880528602893, "grad_norm": 0.450023215014394, "learning_rate": 3.9627593400002247e-07, "loss": 0.2186, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9500 }, { "epoch": 3.3964521697720103, "grad_norm": 0.4628252521903469, "learning_rate": 3.917082614056427e-07, "loss": 0.225, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9510 }, { "epoch": 3.4000238109411276, "grad_norm": 0.4377616291629553, "learning_rate": 3.871655053856167e-07, "loss": 0.2228, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9520 }, { "epoch": 3.4035954521102445, "grad_norm": 0.4204372477172826, "learning_rate": 3.8264770235541844e-07, "loss": 0.233, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9530 }, { "epoch": 3.407167093279362, "grad_norm": 0.45383296284555796, "learning_rate": 3.7815488853049534e-07, "loss": 0.2227, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9540 }, { "epoch": 3.410738734448479, "grad_norm": 0.4256157176449563, "learning_rate": 3.736870999259782e-07, "loss": 0.2255, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9550 }, { "epoch": 3.414310375617596, "grad_norm": 0.42932735183625526, "learning_rate": 3.6924437235638823e-07, "loss": 0.2272, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9560 }, { "epoch": 3.4178820167867134, "grad_norm": 0.4395411809664829, "learning_rate": 3.648267414353572e-07, "loss": 0.2226, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9570 }, { "epoch": 3.421453657955831, "grad_norm": 0.4789395353295623, "learning_rate": 3.60434242575336e-07, "loss": 0.2194, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9580 }, { "epoch": 3.4250252991249477, "grad_norm": 0.46026844417244916, "learning_rate": 3.5606691098731145e-07, "loss": 0.229, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9590 }, { "epoch": 3.428596940294065, "grad_norm": 0.4548661544045963, "learning_rate": 3.5172478168052915e-07, "loss": 0.2269, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9600 }, { "epoch": 3.4321685814631824, "grad_norm": 0.43693949940193977, "learning_rate": 3.47407889462206e-07, "loss": 0.2295, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9610 }, { "epoch": 3.4357402226322997, "grad_norm": 0.4358578582107585, "learning_rate": 3.431162689372535e-07, "loss": 0.2221, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9620 }, { "epoch": 3.4393118638014166, "grad_norm": 0.4277147932086469, "learning_rate": 3.3884995450800646e-07, "loss": 0.2322, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9630 }, { "epoch": 3.442883504970534, "grad_norm": 0.457090524162981, "learning_rate": 3.3460898037393604e-07, "loss": 0.2306, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9640 }, { "epoch": 3.4464551461396513, "grad_norm": 0.48622748466946314, "learning_rate": 3.3039338053138333e-07, "loss": 0.2257, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9650 }, { "epoch": 3.450026787308768, "grad_norm": 0.447418853195856, "learning_rate": 3.2620318877328716e-07, "loss": 0.2285, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9660 }, { "epoch": 3.4535984284778856, "grad_norm": 0.4494933647456064, "learning_rate": 3.22038438688908e-07, "loss": 0.2154, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9670 }, { "epoch": 3.457170069647003, "grad_norm": 0.4386760172527179, "learning_rate": 3.1789916366356237e-07, "loss": 0.2337, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9680 }, { "epoch": 3.4607417108161203, "grad_norm": 0.4472559976730384, "learning_rate": 3.137853968783562e-07, "loss": 0.2257, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9690 }, { "epoch": 3.464313351985237, "grad_norm": 0.4130599398053046, "learning_rate": 3.096971713099152e-07, "loss": 0.2307, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9700 }, { "epoch": 3.4678849931543545, "grad_norm": 0.44974189566383543, "learning_rate": 3.056345197301239e-07, "loss": 0.2293, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9710 }, { "epoch": 3.471456634323472, "grad_norm": 0.433647374405478, "learning_rate": 3.0159747470586114e-07, "loss": 0.227, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9720 }, { "epoch": 3.4750282754925887, "grad_norm": 0.4573097157873816, "learning_rate": 2.9758606859873936e-07, "loss": 0.2255, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9730 }, { "epoch": 3.478599916661706, "grad_norm": 0.4429742982193052, "learning_rate": 2.9360033356484463e-07, "loss": 0.2278, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9740 }, { "epoch": 3.4821715578308234, "grad_norm": 0.45668296151320453, "learning_rate": 2.8964030155448055e-07, "loss": 0.2225, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9750 }, { "epoch": 3.4857431989999403, "grad_norm": 0.44853320183811274, "learning_rate": 2.85706004311911e-07, "loss": 0.2332, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9760 }, { "epoch": 3.4893148401690577, "grad_norm": 0.4896496538364987, "learning_rate": 2.8179747337510424e-07, "loss": 0.2151, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9770 }, { "epoch": 3.492886481338175, "grad_norm": 0.45216838981680796, "learning_rate": 2.779147400754833e-07, "loss": 0.2284, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9780 }, { "epoch": 3.496458122507292, "grad_norm": 0.4446772622560835, "learning_rate": 2.740578355376736e-07, "loss": 0.2184, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9790 }, { "epoch": 3.5000297636764093, "grad_norm": 0.4468530070722961, "learning_rate": 2.7022679067925045e-07, "loss": 0.2292, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9800 }, { "epoch": 3.5036014048455266, "grad_norm": 0.434552824796451, "learning_rate": 2.664216362104964e-07, "loss": 0.2206, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9810 }, { "epoch": 3.5071730460146435, "grad_norm": 0.4328854611895299, "learning_rate": 2.6264240263415113e-07, "loss": 0.2244, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9820 }, { "epoch": 3.510744687183761, "grad_norm": 0.4782923737873149, "learning_rate": 2.5888912024516743e-07, "loss": 0.2177, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9830 }, { "epoch": 3.514316328352878, "grad_norm": 0.4451929903668464, "learning_rate": 2.5516181913047134e-07, "loss": 0.2278, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9840 }, { "epoch": 3.517887969521995, "grad_norm": 0.461154244396955, "learning_rate": 2.5146052916871664e-07, "loss": 0.2269, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9850 }, { "epoch": 3.5214596106911125, "grad_norm": 0.44185695213818643, "learning_rate": 2.4778528003004823e-07, "loss": 0.2206, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9860 }, { "epoch": 3.52503125186023, "grad_norm": 0.44663018732053594, "learning_rate": 2.441361011758647e-07, "loss": 0.2211, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9870 }, { "epoch": 3.5286028930293467, "grad_norm": 0.4279233693907992, "learning_rate": 2.4051302185857915e-07, "loss": 0.2282, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9880 }, { "epoch": 3.532174534198464, "grad_norm": 0.5027365223037497, "learning_rate": 2.3691607112138646e-07, "loss": 0.2191, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9890 }, { "epoch": 3.5357461753675814, "grad_norm": 0.4283371243464985, "learning_rate": 2.3334527779803294e-07, "loss": 0.2231, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9900 }, { "epoch": 3.5393178165366987, "grad_norm": 0.451203596680216, "learning_rate": 2.2980067051258035e-07, "loss": 0.2208, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9910 }, { "epoch": 3.542889457705816, "grad_norm": 0.44280537011849175, "learning_rate": 2.2628227767917946e-07, "loss": 0.2168, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9920 }, { "epoch": 3.546461098874933, "grad_norm": 0.43243809254606863, "learning_rate": 2.2279012750184228e-07, "loss": 0.2279, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9930 }, { "epoch": 3.5500327400440503, "grad_norm": 0.4587688082684246, "learning_rate": 2.1932424797421556e-07, "loss": 0.2223, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9940 }, { "epoch": 3.5536043812131677, "grad_norm": 0.4638395466260308, "learning_rate": 2.1588466687935497e-07, "loss": 0.2275, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9950 }, { "epoch": 3.5571760223822846, "grad_norm": 0.4545490936757308, "learning_rate": 2.1247141178950523e-07, "loss": 0.2217, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9960 }, { "epoch": 3.560747663551402, "grad_norm": 0.47690321382830164, "learning_rate": 2.0908451006587663e-07, "loss": 0.2254, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9970 }, { "epoch": 3.5643193047205193, "grad_norm": 0.4392958225160946, "learning_rate": 2.0572398885842592e-07, "loss": 0.2148, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9980 }, { "epoch": 3.567890945889636, "grad_norm": 0.4239943589734696, "learning_rate": 2.0238987510564094e-07, "loss": 0.2238, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 9990 }, { "epoch": 3.5714625870587535, "grad_norm": 0.44038559554116197, "learning_rate": 1.99082195534322e-07, "loss": 0.2304, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10000 }, { "epoch": 3.575034228227871, "grad_norm": 0.4345253477997, "learning_rate": 1.9580097665936762e-07, "loss": 0.2287, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10010 }, { "epoch": 3.5786058693969878, "grad_norm": 0.4403790298956109, "learning_rate": 1.925462447835668e-07, "loss": 0.2268, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10020 }, { "epoch": 3.582177510566105, "grad_norm": 0.448263330638005, "learning_rate": 1.8931802599738046e-07, "loss": 0.2354, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10030 }, { "epoch": 3.5857491517352225, "grad_norm": 0.43699511153186094, "learning_rate": 1.8611634617873757e-07, "loss": 0.2273, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10040 }, { "epoch": 3.5893207929043394, "grad_norm": 0.4402260519814635, "learning_rate": 1.829412309928272e-07, "loss": 0.225, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10050 }, { "epoch": 3.5928924340734567, "grad_norm": 0.44922009323567325, "learning_rate": 1.7979270589189111e-07, "loss": 0.2191, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10060 }, { "epoch": 3.596464075242574, "grad_norm": 0.4348279357761571, "learning_rate": 1.7667079611502073e-07, "loss": 0.2248, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10070 }, { "epoch": 3.600035716411691, "grad_norm": 0.4224705514098165, "learning_rate": 1.7357552668795528e-07, "loss": 0.2255, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10080 }, { "epoch": 3.6036073575808083, "grad_norm": 0.4516505048559037, "learning_rate": 1.7050692242287923e-07, "loss": 0.2283, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10090 }, { "epoch": 3.6071789987499256, "grad_norm": 0.43040590998867473, "learning_rate": 1.6746500791822649e-07, "loss": 0.2199, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10100 }, { "epoch": 3.6107506399190425, "grad_norm": 0.47700956208234957, "learning_rate": 1.6444980755848065e-07, "loss": 0.2191, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10110 }, { "epoch": 3.61432228108816, "grad_norm": 0.4546842042912744, "learning_rate": 1.6146134551398033e-07, "loss": 0.2178, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10120 }, { "epoch": 3.6178939222572772, "grad_norm": 0.44712219892480254, "learning_rate": 1.584996457407249e-07, "loss": 0.2275, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10130 }, { "epoch": 3.6214655634263946, "grad_norm": 0.42639223794691933, "learning_rate": 1.555647319801844e-07, "loss": 0.2284, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10140 }, { "epoch": 3.6250372045955115, "grad_norm": 0.445529806494021, "learning_rate": 1.526566277591065e-07, "loss": 0.2211, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10150 }, { "epoch": 3.628608845764629, "grad_norm": 0.43147908453278766, "learning_rate": 1.4977535638932914e-07, "loss": 0.2275, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10160 }, { "epoch": 3.632180486933746, "grad_norm": 0.4399996543390641, "learning_rate": 1.469209409675953e-07, "loss": 0.2281, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10170 }, { "epoch": 3.6357521281028635, "grad_norm": 0.4523298177857587, "learning_rate": 1.4409340437536543e-07, "loss": 0.231, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10180 }, { "epoch": 3.6393237692719804, "grad_norm": 0.4450819890687884, "learning_rate": 1.4129276927863366e-07, "loss": 0.2253, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10190 }, { "epoch": 3.6428954104410978, "grad_norm": 0.4445668634013846, "learning_rate": 1.3851905812774945e-07, "loss": 0.2219, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10200 }, { "epoch": 3.646467051610215, "grad_norm": 0.4393809602135747, "learning_rate": 1.3577229315723405e-07, "loss": 0.2227, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10210 }, { "epoch": 3.650038692779332, "grad_norm": 0.4392939224137732, "learning_rate": 1.3305249638560396e-07, "loss": 0.2176, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10220 }, { "epoch": 3.6536103339484494, "grad_norm": 0.4550312708413194, "learning_rate": 1.3035968961519534e-07, "loss": 0.2304, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10230 }, { "epoch": 3.6571819751175667, "grad_norm": 0.44151330373786013, "learning_rate": 1.2769389443198603e-07, "loss": 0.2284, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10240 }, { "epoch": 3.6607536162866836, "grad_norm": 0.4168807111228616, "learning_rate": 1.2505513220542614e-07, "loss": 0.2294, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10250 }, { "epoch": 3.664325257455801, "grad_norm": 0.4464258397287314, "learning_rate": 1.2244342408826508e-07, "loss": 0.2313, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10260 }, { "epoch": 3.6678968986249183, "grad_norm": 0.45152627548033747, "learning_rate": 1.1985879101638148e-07, "loss": 0.2184, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10270 }, { "epoch": 3.671468539794035, "grad_norm": 0.47117490838138765, "learning_rate": 1.1730125370861632e-07, "loss": 0.2257, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10280 }, { "epoch": 3.6750401809631525, "grad_norm": 0.439951465915027, "learning_rate": 1.1477083266660642e-07, "loss": 0.2328, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10290 }, { "epoch": 3.67861182213227, "grad_norm": 0.4489829456467373, "learning_rate": 1.122675481746207e-07, "loss": 0.225, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10300 }, { "epoch": 3.682183463301387, "grad_norm": 0.44134052549882896, "learning_rate": 1.0979142029939548e-07, "loss": 0.2275, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10310 }, { "epoch": 3.685755104470504, "grad_norm": 0.46831622118801114, "learning_rate": 1.073424688899775e-07, "loss": 0.2237, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10320 }, { "epoch": 3.6893267456396215, "grad_norm": 0.45036094056520976, "learning_rate": 1.0492071357756033e-07, "loss": 0.2249, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10330 }, { "epoch": 3.6928983868087384, "grad_norm": 0.4659459059342654, "learning_rate": 1.0252617377533046e-07, "loss": 0.2284, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10340 }, { "epoch": 3.6964700279778557, "grad_norm": 0.4225585197970624, "learning_rate": 1.0015886867831115e-07, "loss": 0.2321, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10350 }, { "epoch": 3.700041669146973, "grad_norm": 0.44704385962317833, "learning_rate": 9.781881726320573e-08, "loss": 0.2256, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10360 }, { "epoch": 3.70361331031609, "grad_norm": 0.4549601644611548, "learning_rate": 9.550603828824816e-08, "loss": 0.2248, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10370 }, { "epoch": 3.7071849514852073, "grad_norm": 0.447148866393789, "learning_rate": 9.322055029305403e-08, "loss": 0.2286, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10380 }, { "epoch": 3.7107565926543247, "grad_norm": 0.45463473593785586, "learning_rate": 9.096237159846721e-08, "loss": 0.2227, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10390 }, { "epoch": 3.714328233823442, "grad_norm": 0.4412149959468804, "learning_rate": 8.873152030641751e-08, "loss": 0.2279, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10400 }, { "epoch": 3.717899874992559, "grad_norm": 0.4495044254264032, "learning_rate": 8.652801429977353e-08, "loss": 0.2251, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10410 }, { "epoch": 3.7214715161616763, "grad_norm": 0.43845028047131007, "learning_rate": 8.435187124219878e-08, "loss": 0.225, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10420 }, { "epoch": 3.7250431573307936, "grad_norm": 0.4599594623404225, "learning_rate": 8.220310857801189e-08, "loss": 0.2242, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10430 }, { "epoch": 3.728614798499911, "grad_norm": 0.4584359385494083, "learning_rate": 8.008174353204584e-08, "loss": 0.2351, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10440 }, { "epoch": 3.732186439669028, "grad_norm": 0.43163046353373913, "learning_rate": 7.798779310950854e-08, "loss": 0.2261, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10450 }, { "epoch": 3.735758080838145, "grad_norm": 0.43734187492828513, "learning_rate": 7.592127409584915e-08, "loss": 0.2289, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10460 }, { "epoch": 3.7393297220072625, "grad_norm": 0.43309527476787557, "learning_rate": 7.388220305662163e-08, "loss": 0.2285, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10470 }, { "epoch": 3.7429013631763794, "grad_norm": 0.4344606568256094, "learning_rate": 7.187059633735193e-08, "loss": 0.2331, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10480 }, { "epoch": 3.746473004345497, "grad_norm": 0.43446478923577986, "learning_rate": 6.988647006340892e-08, "loss": 0.2317, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10490 }, { "epoch": 3.750044645514614, "grad_norm": 0.4356415389064807, "learning_rate": 6.792984013987308e-08, "loss": 0.2297, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10500 }, { "epoch": 3.753616286683731, "grad_norm": 0.44211853419334296, "learning_rate": 6.600072225140902e-08, "loss": 0.2283, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10510 }, { "epoch": 3.7571879278528484, "grad_norm": 0.436758452421375, "learning_rate": 6.409913186214078e-08, "loss": 0.2349, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10520 }, { "epoch": 3.7607595690219657, "grad_norm": 0.43053855087980014, "learning_rate": 6.222508421552785e-08, "loss": 0.2297, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10530 }, { "epoch": 3.7643312101910826, "grad_norm": 0.4558435875308646, "learning_rate": 6.037859433424203e-08, "loss": 0.233, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10540 }, { "epoch": 3.7679028513602, "grad_norm": 0.4389567806893566, "learning_rate": 5.855967702004844e-08, "loss": 0.2186, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10550 }, { "epoch": 3.7714744925293173, "grad_norm": 0.4360877408318116, "learning_rate": 5.676834685368515e-08, "loss": 0.2266, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10560 }, { "epoch": 3.7750461336984342, "grad_norm": 0.4356712867921081, "learning_rate": 5.500461819474694e-08, "loss": 0.2294, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10570 }, { "epoch": 3.7786177748675516, "grad_norm": 0.43833812335077477, "learning_rate": 5.326850518157111e-08, "loss": 0.2279, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10580 }, { "epoch": 3.782189416036669, "grad_norm": 0.44419001553486764, "learning_rate": 5.156002173112356e-08, "loss": 0.2278, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10590 }, { "epoch": 3.785761057205786, "grad_norm": 0.4533306175595154, "learning_rate": 4.9879181538886154e-08, "loss": 0.2249, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10600 }, { "epoch": 3.789332698374903, "grad_norm": 0.43562953329475035, "learning_rate": 4.8225998078749054e-08, "loss": 0.2299, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10610 }, { "epoch": 3.7929043395440205, "grad_norm": 0.4515448260405365, "learning_rate": 4.660048460290078e-08, "loss": 0.2139, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10620 }, { "epoch": 3.7964759807131374, "grad_norm": 0.445160563686723, "learning_rate": 4.50026541417225e-08, "loss": 0.2329, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10630 }, { "epoch": 3.8000476218822548, "grad_norm": 0.4327457321668427, "learning_rate": 4.343251950368465e-08, "loss": 0.2363, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10640 }, { "epoch": 3.803619263051372, "grad_norm": 0.445610865255777, "learning_rate": 4.1890093275242846e-08, "loss": 0.2306, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10650 }, { "epoch": 3.8071909042204894, "grad_norm": 0.4360425724072501, "learning_rate": 4.0375387820737574e-08, "loss": 0.2355, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10660 }, { "epoch": 3.810762545389607, "grad_norm": 0.45104819913425176, "learning_rate": 3.888841528229553e-08, "loss": 0.2242, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10670 }, { "epoch": 3.8143341865587237, "grad_norm": 0.43201411969010634, "learning_rate": 3.742918757973129e-08, "loss": 0.2233, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10680 }, { "epoch": 3.817905827727841, "grad_norm": 0.4330792005779704, "learning_rate": 3.5997716410452127e-08, "loss": 0.223, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10690 }, { "epoch": 3.8214774688969584, "grad_norm": 0.4642141392644615, "learning_rate": 3.4594013249365507e-08, "loss": 0.2247, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10700 }, { "epoch": 3.8250491100660753, "grad_norm": 0.46034792920824497, "learning_rate": 3.3218089348785065e-08, "loss": 0.2279, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10710 }, { "epoch": 3.8286207512351926, "grad_norm": 0.443689997182439, "learning_rate": 3.1869955738341225e-08, "loss": 0.2303, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10720 }, { "epoch": 3.83219239240431, "grad_norm": 0.42628378709969905, "learning_rate": 3.054962322489341e-08, "loss": 0.2262, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10730 }, { "epoch": 3.835764033573427, "grad_norm": 0.4395794254721828, "learning_rate": 2.9257102392442946e-08, "loss": 0.2271, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10740 }, { "epoch": 3.8393356747425442, "grad_norm": 0.44880114793329107, "learning_rate": 2.7992403602047632e-08, "loss": 0.227, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10750 }, { "epoch": 3.8429073159116616, "grad_norm": 0.44743156837785386, "learning_rate": 2.6755536991739714e-08, "loss": 0.2214, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10760 }, { "epoch": 3.8464789570807785, "grad_norm": 0.4468589013028941, "learning_rate": 2.5546512476444294e-08, "loss": 0.2228, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10770 }, { "epoch": 3.850050598249896, "grad_norm": 0.4567894351522827, "learning_rate": 2.4365339747898893e-08, "loss": 0.2192, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10780 }, { "epoch": 3.853622239419013, "grad_norm": 0.4532132712813433, "learning_rate": 2.321202827457769e-08, "loss": 0.2303, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10790 }, { "epoch": 3.85719388058813, "grad_norm": 0.4490186477309506, "learning_rate": 2.2086587301613393e-08, "loss": 0.2304, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10800 }, { "epoch": 3.8607655217572474, "grad_norm": 0.43279214372519503, "learning_rate": 2.0989025850724206e-08, "loss": 0.2321, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10810 }, { "epoch": 3.8643371629263648, "grad_norm": 0.43545063828161296, "learning_rate": 1.991935272014311e-08, "loss": 0.2223, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10820 }, { "epoch": 3.8679088040954817, "grad_norm": 0.416421287059464, "learning_rate": 1.8877576484544022e-08, "loss": 0.2295, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10830 }, { "epoch": 3.871480445264599, "grad_norm": 0.44598171266797837, "learning_rate": 1.7863705494975736e-08, "loss": 0.2214, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10840 }, { "epoch": 3.8750520864337163, "grad_norm": 0.4293547056423775, "learning_rate": 1.6877747878793536e-08, "loss": 0.2241, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10850 }, { "epoch": 3.8786237276028332, "grad_norm": 0.44628839462942493, "learning_rate": 1.5919711539594706e-08, "loss": 0.2254, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10860 }, { "epoch": 3.8821953687719506, "grad_norm": 0.44532618818831393, "learning_rate": 1.4989604157154777e-08, "loss": 0.2322, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10870 }, { "epoch": 3.885767009941068, "grad_norm": 0.4620906666796364, "learning_rate": 1.4087433187367326e-08, "loss": 0.223, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10880 }, { "epoch": 3.8893386511101853, "grad_norm": 0.4519981254148374, "learning_rate": 1.3213205862181387e-08, "loss": 0.2339, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10890 }, { "epoch": 3.892910292279302, "grad_norm": 0.46209671155053056, "learning_rate": 1.2366929189546295e-08, "loss": 0.2286, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10900 }, { "epoch": 3.8964819334484195, "grad_norm": 0.4477262080879772, "learning_rate": 1.1548609953354549e-08, "loss": 0.2266, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10910 }, { "epoch": 3.900053574617537, "grad_norm": 0.41528706320813497, "learning_rate": 1.0758254713386262e-08, "loss": 0.2334, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10920 }, { "epoch": 3.9036252157866542, "grad_norm": 0.4380520934393717, "learning_rate": 9.995869805257852e-09, "loss": 0.2203, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10930 }, { "epoch": 3.907196856955771, "grad_norm": 0.44084027878315196, "learning_rate": 9.261461340372313e-09, "loss": 0.2237, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10940 }, { "epoch": 3.9107684981248885, "grad_norm": 0.4557862047592466, "learning_rate": 8.555035205867145e-09, "loss": 0.2226, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10950 }, { "epoch": 3.914340139294006, "grad_norm": 0.46256250332145743, "learning_rate": 7.876597064569668e-09, "loss": 0.2191, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10960 }, { "epoch": 3.9179117804631227, "grad_norm": 0.45890967205133926, "learning_rate": 7.226152354951165e-09, "loss": 0.2202, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10970 }, { "epoch": 3.92148342163224, "grad_norm": 0.44125688697910415, "learning_rate": 6.603706291082201e-09, "loss": 0.2245, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10980 }, { "epoch": 3.9250550628013574, "grad_norm": 0.43913591864768914, "learning_rate": 6.009263862592596e-09, "loss": 0.2209, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 10990 }, { "epoch": 3.9286267039704743, "grad_norm": 0.42891856210394647, "learning_rate": 5.442829834630236e-09, "loss": 0.229, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 11000 }, { "epoch": 3.9321983451395917, "grad_norm": 0.43191273643462674, "learning_rate": 4.904408747821831e-09, "loss": 0.2241, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 11010 }, { "epoch": 3.935769986308709, "grad_norm": 0.4321814680588804, "learning_rate": 4.3940049182394916e-09, "loss": 0.2292, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 11020 }, { "epoch": 3.939341627477826, "grad_norm": 0.4440803966779172, "learning_rate": 3.911622437363427e-09, "loss": 0.2324, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 11030 }, { "epoch": 3.9429132686469432, "grad_norm": 0.44630919988961, "learning_rate": 3.4572651720508605e-09, "loss": 0.2291, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 11040 }, { "epoch": 3.9464849098160606, "grad_norm": 0.4479542040467652, "learning_rate": 3.0309367645030004e-09, "loss": 0.2312, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 11050 }, { "epoch": 3.9500565509851775, "grad_norm": 0.44081290560579367, "learning_rate": 2.632640632239003e-09, "loss": 0.2228, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 11060 }, { "epoch": 3.953628192154295, "grad_norm": 0.47217543215125773, "learning_rate": 2.262379968064887e-09, "loss": 0.2219, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 11070 }, { "epoch": 3.957199833323412, "grad_norm": 0.43867083497842563, "learning_rate": 1.920157740050998e-09, "loss": 0.2305, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 11080 }, { "epoch": 3.960771474492529, "grad_norm": 0.4391386544004875, "learning_rate": 1.6059766915067496e-09, "loss": 0.2272, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 11090 }, { "epoch": 3.9643431156616464, "grad_norm": 0.45329886069059977, "learning_rate": 1.319839340958473e-09, "loss": 0.2257, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 11100 }, { "epoch": 3.967914756830764, "grad_norm": 0.4698347155108858, "learning_rate": 1.0617479821303787e-09, "loss": 0.2253, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 11110 }, { "epoch": 3.9714863979998807, "grad_norm": 0.4503276770969569, "learning_rate": 8.317046839243503e-10, "loss": 0.2277, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 11120 }, { "epoch": 3.975058039168998, "grad_norm": 0.44723000513320477, "learning_rate": 6.297112904051771e-10, "loss": 0.2327, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 11130 }, { "epoch": 3.9786296803381154, "grad_norm": 0.4400304568539704, "learning_rate": 4.5576942078501267e-10, "loss": 0.2251, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 11140 }, { "epoch": 3.9822013215072327, "grad_norm": 0.44837484863154975, "learning_rate": 3.0988046941055057e-10, "loss": 0.2273, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 11150 }, { "epoch": 3.9857729626763496, "grad_norm": 0.4270143306404482, "learning_rate": 1.9204560575136753e-10, "loss": 0.232, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 11160 }, { "epoch": 3.989344603845467, "grad_norm": 0.44880561675935815, "learning_rate": 1.0226577439059748e-10, "loss": 0.2289, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 11170 }, { "epoch": 3.9929162450145843, "grad_norm": 0.43633117229278284, "learning_rate": 4.054169501832572e-11, "loss": 0.2292, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 11180 }, { "epoch": 3.9964878861837017, "grad_norm": 0.4732141126000996, "learning_rate": 6.873862424983113e-12, "loss": 0.2291, "memory/device_mem_reserved(gib)": 49.07, "memory/max_mem_active(gib)": 43.76, "memory/max_mem_allocated(gib)": 42.86, "step": 11190 } ], "logging_steps": 10, "max_steps": 11196, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.217947929182208e+16, "train_batch_size": 6, "trial_name": null, "trial_params": null }