| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 5.0, |
| "eval_steps": 13724, |
| "global_step": 68625, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0, |
| "eval_loss": 8.86070728302002, |
| "eval_runtime": 579.5301, |
| "eval_samples_per_second": 523.403, |
| "eval_steps_per_second": 43.618, |
| "memory/device_mem_reserved(gib)": 28.34, |
| "memory/max_mem_active(gib)": 28.24, |
| "memory/max_mem_allocated(gib)": 28.24, |
| "step": 0 |
| }, |
| { |
| "epoch": 0.0036430536075338347, |
| "grad_norm": 4.34375, |
| "learning_rate": 0.0001999999601433486, |
| "loss": 6.5752, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.007286107215067669, |
| "grad_norm": 3.015625, |
| "learning_rate": 0.00019999979243626065, |
| "loss": 5.425, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.010929160822601505, |
| "grad_norm": 3.609375, |
| "learning_rate": 0.0001999994937082425, |
| "loss": 5.221, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.014572214430135339, |
| "grad_norm": 2.765625, |
| "learning_rate": 0.0001999990639596855, |
| "loss": 5.1478, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.018215268037669173, |
| "grad_norm": 2.375, |
| "learning_rate": 0.00019999850319115273, |
| "loss": 5.0803, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.02185832164520301, |
| "grad_norm": 3.921875, |
| "learning_rate": 0.0001999978114033789, |
| "loss": 5.0557, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.025501375252736845, |
| "grad_norm": 3.28125, |
| "learning_rate": 0.00019999698859727048, |
| "loss": 5.0235, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.029144428860270678, |
| "grad_norm": 3.453125, |
| "learning_rate": 0.0001999960347739054, |
| "loss": 4.9791, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.03278748246780452, |
| "grad_norm": 892.0, |
| "learning_rate": 0.00019999494993453346, |
| "loss": 4.9892, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.036430536075338346, |
| "grad_norm": 3.21875, |
| "learning_rate": 0.00019999373408057598, |
| "loss": 4.9557, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.04007358968287218, |
| "grad_norm": 2.84375, |
| "learning_rate": 0.000199992387213626, |
| "loss": 4.9263, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.04371664329040602, |
| "grad_norm": 1.8515625, |
| "learning_rate": 0.0001999909093354482, |
| "loss": 4.9129, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.047359696897939854, |
| "grad_norm": 3.046875, |
| "learning_rate": 0.00019998930044797897, |
| "loss": 4.9079, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.05100275050547369, |
| "grad_norm": 3.0, |
| "learning_rate": 0.0001999875605533262, |
| "loss": 4.9052, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.054645804113007526, |
| "grad_norm": 2.796875, |
| "learning_rate": 0.00019998568965376955, |
| "loss": 4.8854, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.058288857720541355, |
| "grad_norm": 2.0625, |
| "learning_rate": 0.00019998368775176034, |
| "loss": 4.8794, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.06193191132807519, |
| "grad_norm": 3.171875, |
| "learning_rate": 0.00019998155484992144, |
| "loss": 4.8675, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.06557496493560903, |
| "grad_norm": 2.234375, |
| "learning_rate": 0.00019997929095104744, |
| "loss": 4.8586, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.06921801854314286, |
| "grad_norm": 4.65625, |
| "learning_rate": 0.0001999768960581045, |
| "loss": 4.8677, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.07286107215067669, |
| "grad_norm": 2.671875, |
| "learning_rate": 0.00019997437017423044, |
| "loss": 4.8616, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.07650412575821053, |
| "grad_norm": 2.34375, |
| "learning_rate": 0.0001999717133027347, |
| "loss": 4.859, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.08014717936574436, |
| "grad_norm": 2.171875, |
| "learning_rate": 0.00019996892544709834, |
| "loss": 4.8502, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.0837902329732782, |
| "grad_norm": 2.65625, |
| "learning_rate": 0.0001999660066109741, |
| "loss": 4.8431, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.08743328658081204, |
| "grad_norm": 2.03125, |
| "learning_rate": 0.00019996295679818618, |
| "loss": 4.8409, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.09107634018834587, |
| "grad_norm": 2.171875, |
| "learning_rate": 0.00019995977601273052, |
| "loss": 4.8311, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.09471939379587971, |
| "grad_norm": 1.921875, |
| "learning_rate": 0.00019995646425877467, |
| "loss": 4.8255, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.09836244740341354, |
| "grad_norm": 1.65625, |
| "learning_rate": 0.0001999530215406577, |
| "loss": 4.8379, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.10200550101094738, |
| "grad_norm": 1.5234375, |
| "learning_rate": 0.00019994944786289027, |
| "loss": 4.8341, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.10564855461848122, |
| "grad_norm": 3.328125, |
| "learning_rate": 0.0001999457432301547, |
| "loss": 4.8198, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.10929160822601505, |
| "grad_norm": 2.0, |
| "learning_rate": 0.0001999419076473048, |
| "loss": 4.8208, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.11293466183354887, |
| "grad_norm": 1.6796875, |
| "learning_rate": 0.00019993794111936603, |
| "loss": 4.811, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.11657771544108271, |
| "grad_norm": 1.7109375, |
| "learning_rate": 0.0001999338436515354, |
| "loss": 4.8103, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.12022076904861655, |
| "grad_norm": 2.25, |
| "learning_rate": 0.00019992961524918138, |
| "loss": 4.8211, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.12386382265615038, |
| "grad_norm": 1.640625, |
| "learning_rate": 0.00019992525591784418, |
| "loss": 4.7974, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.12750687626368423, |
| "grad_norm": 4.90625, |
| "learning_rate": 0.00019992076566323537, |
| "loss": 4.8109, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.13114992987121807, |
| "grad_norm": 2.5, |
| "learning_rate": 0.00019991614449123816, |
| "loss": 4.8088, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.13479298347875188, |
| "grad_norm": 2.765625, |
| "learning_rate": 0.00019991139240790727, |
| "loss": 4.8129, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.1384360370862857, |
| "grad_norm": 2.5, |
| "learning_rate": 0.00019990650941946892, |
| "loss": 4.7992, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.14207909069381955, |
| "grad_norm": 1.5703125, |
| "learning_rate": 0.0001999014955323209, |
| "loss": 4.7891, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.14572214430135338, |
| "grad_norm": 3.015625, |
| "learning_rate": 0.00019989635075303244, |
| "loss": 4.807, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.14936519790888722, |
| "grad_norm": 1.9609375, |
| "learning_rate": 0.00019989107508834426, |
| "loss": 4.7982, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.15300825151642106, |
| "grad_norm": 4.1875, |
| "learning_rate": 0.00019988566854516865, |
| "loss": 4.7876, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.1566513051239549, |
| "grad_norm": 2.296875, |
| "learning_rate": 0.00019988013113058931, |
| "loss": 4.781, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.16029435873148873, |
| "grad_norm": 1.9296875, |
| "learning_rate": 0.00019987446285186144, |
| "loss": 4.7979, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.16393741233902256, |
| "grad_norm": 1.78125, |
| "learning_rate": 0.00019986866371641163, |
| "loss": 4.7943, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.1675804659465564, |
| "grad_norm": 1.9375, |
| "learning_rate": 0.00019986273373183807, |
| "loss": 4.7746, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.17122351955409024, |
| "grad_norm": 1.65625, |
| "learning_rate": 0.00019985667290591024, |
| "loss": 4.7891, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.17486657316162407, |
| "grad_norm": 1.7890625, |
| "learning_rate": 0.00019985048124656908, |
| "loss": 4.7855, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.1785096267691579, |
| "grad_norm": 1.7265625, |
| "learning_rate": 0.00019984415876192705, |
| "loss": 4.7557, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.18215268037669174, |
| "grad_norm": 1.71875, |
| "learning_rate": 0.00019983770546026786, |
| "loss": 4.792, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.18579573398422558, |
| "grad_norm": 2.09375, |
| "learning_rate": 0.00019983112135004677, |
| "loss": 4.7808, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.18943878759175942, |
| "grad_norm": 1.6171875, |
| "learning_rate": 0.00019982440643989035, |
| "loss": 4.7753, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.19308184119929325, |
| "grad_norm": 1.9921875, |
| "learning_rate": 0.0001998175607385965, |
| "loss": 4.7718, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.1967248948068271, |
| "grad_norm": 1.640625, |
| "learning_rate": 0.00019981058425513464, |
| "loss": 4.7856, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.20036794841436092, |
| "grad_norm": 1.625, |
| "learning_rate": 0.00019980347699864533, |
| "loss": 4.7822, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.20401100202189476, |
| "grad_norm": 1.8359375, |
| "learning_rate": 0.0001997962389784407, |
| "loss": 4.7734, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.2076540556294286, |
| "grad_norm": 1.828125, |
| "learning_rate": 0.00019978887020400402, |
| "loss": 4.7677, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.21129710923696243, |
| "grad_norm": 3.078125, |
| "learning_rate": 0.00019978137068498995, |
| "loss": 4.7827, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.21494016284449627, |
| "grad_norm": 1.7421875, |
| "learning_rate": 0.00019977374043122446, |
| "loss": 4.76, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.2185832164520301, |
| "grad_norm": 2.59375, |
| "learning_rate": 0.00019976597945270478, |
| "loss": 4.7473, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.2222262700595639, |
| "grad_norm": 1.8828125, |
| "learning_rate": 0.00019975808775959947, |
| "loss": 4.7717, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.22586932366709775, |
| "grad_norm": 2.046875, |
| "learning_rate": 0.0001997500653622483, |
| "loss": 4.7739, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.22951237727463158, |
| "grad_norm": 1.734375, |
| "learning_rate": 0.00019974191227116234, |
| "loss": 4.7492, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.23315543088216542, |
| "grad_norm": 2.28125, |
| "learning_rate": 0.00019973362849702383, |
| "loss": 4.7625, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.23679848448969926, |
| "grad_norm": 2.265625, |
| "learning_rate": 0.00019972521405068626, |
| "loss": 4.7515, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.2404415380972331, |
| "grad_norm": 1.7890625, |
| "learning_rate": 0.0001997166689431744, |
| "loss": 4.7716, |
| "memory/device_mem_reserved(gib)": 63.02, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.24408459170476693, |
| "grad_norm": 2.328125, |
| "learning_rate": 0.00019970799318568412, |
| "loss": 4.7701, |
| "memory/device_mem_reserved(gib)": 63.21, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.24772764531230076, |
| "grad_norm": 1.890625, |
| "learning_rate": 0.0001996991867895825, |
| "loss": 4.7435, |
| "memory/device_mem_reserved(gib)": 63.21, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.2513706989198346, |
| "grad_norm": 1.640625, |
| "learning_rate": 0.00019969024976640776, |
| "loss": 4.7664, |
| "memory/device_mem_reserved(gib)": 63.21, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.25501375252736846, |
| "grad_norm": 1.5703125, |
| "learning_rate": 0.00019968118212786935, |
| "loss": 4.7539, |
| "memory/device_mem_reserved(gib)": 63.21, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.2586568061349023, |
| "grad_norm": 1.984375, |
| "learning_rate": 0.00019967198388584775, |
| "loss": 4.7536, |
| "memory/device_mem_reserved(gib)": 63.21, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.26229985974243614, |
| "grad_norm": 3.171875, |
| "learning_rate": 0.00019966265505239465, |
| "loss": 4.7535, |
| "memory/device_mem_reserved(gib)": 63.21, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.26594291334996994, |
| "grad_norm": 1.5703125, |
| "learning_rate": 0.00019965319563973276, |
| "loss": 4.7544, |
| "memory/device_mem_reserved(gib)": 63.21, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.26958596695750375, |
| "grad_norm": 1.7734375, |
| "learning_rate": 0.00019964360566025592, |
| "loss": 4.7627, |
| "memory/device_mem_reserved(gib)": 63.21, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.2732290205650376, |
| "grad_norm": 2.265625, |
| "learning_rate": 0.0001996338851265291, |
| "loss": 4.7506, |
| "memory/device_mem_reserved(gib)": 63.21, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.2768720741725714, |
| "grad_norm": 2.375, |
| "learning_rate": 0.00019962403405128818, |
| "loss": 4.7488, |
| "memory/device_mem_reserved(gib)": 63.21, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.2805151277801053, |
| "grad_norm": 2.171875, |
| "learning_rate": 0.0001996140524474402, |
| "loss": 4.7349, |
| "memory/device_mem_reserved(gib)": 63.21, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.2841581813876391, |
| "grad_norm": 2.484375, |
| "learning_rate": 0.00019960394032806313, |
| "loss": 4.74, |
| "memory/device_mem_reserved(gib)": 63.21, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.28780123499517296, |
| "grad_norm": 1.3125, |
| "learning_rate": 0.00019959369770640605, |
| "loss": 4.7398, |
| "memory/device_mem_reserved(gib)": 63.21, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.29144428860270677, |
| "grad_norm": 1.6328125, |
| "learning_rate": 0.0001995833245958889, |
| "loss": 4.7369, |
| "memory/device_mem_reserved(gib)": 63.21, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.29508734221024063, |
| "grad_norm": 1.8359375, |
| "learning_rate": 0.00019957282101010275, |
| "loss": 4.7469, |
| "memory/device_mem_reserved(gib)": 63.21, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.29873039581777444, |
| "grad_norm": 2.1875, |
| "learning_rate": 0.00019956218696280946, |
| "loss": 4.7504, |
| "memory/device_mem_reserved(gib)": 63.21, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.3023734494253083, |
| "grad_norm": 1.4453125, |
| "learning_rate": 0.00019955142246794188, |
| "loss": 4.7473, |
| "memory/device_mem_reserved(gib)": 63.21, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.3060165030328421, |
| "grad_norm": 1.84375, |
| "learning_rate": 0.0001995405275396038, |
| "loss": 4.7453, |
| "memory/device_mem_reserved(gib)": 63.21, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.309659556640376, |
| "grad_norm": 1.453125, |
| "learning_rate": 0.0001995295021920699, |
| "loss": 4.7269, |
| "memory/device_mem_reserved(gib)": 63.21, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.3133026102479098, |
| "grad_norm": 1.4921875, |
| "learning_rate": 0.0001995183464397857, |
| "loss": 4.7336, |
| "memory/device_mem_reserved(gib)": 63.25, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.31694566385544365, |
| "grad_norm": 1.546875, |
| "learning_rate": 0.00019950706029736758, |
| "loss": 4.7422, |
| "memory/device_mem_reserved(gib)": 63.25, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.32058871746297746, |
| "grad_norm": 1.59375, |
| "learning_rate": 0.00019949564377960281, |
| "loss": 4.7503, |
| "memory/device_mem_reserved(gib)": 63.25, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.3242317710705113, |
| "grad_norm": 1.40625, |
| "learning_rate": 0.0001994840969014495, |
| "loss": 4.7461, |
| "memory/device_mem_reserved(gib)": 63.25, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.32787482467804513, |
| "grad_norm": 1.3671875, |
| "learning_rate": 0.00019947241967803637, |
| "loss": 4.7384, |
| "memory/device_mem_reserved(gib)": 63.25, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.331517878285579, |
| "grad_norm": 1.1875, |
| "learning_rate": 0.0001994606121246632, |
| "loss": 4.7454, |
| "memory/device_mem_reserved(gib)": 63.25, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.3351609318931128, |
| "grad_norm": 1.6484375, |
| "learning_rate": 0.00019944867425680034, |
| "loss": 4.7483, |
| "memory/device_mem_reserved(gib)": 63.25, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.33880398550064666, |
| "grad_norm": 1.4921875, |
| "learning_rate": 0.00019943660609008886, |
| "loss": 4.7328, |
| "memory/device_mem_reserved(gib)": 63.25, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.3424470391081805, |
| "grad_norm": 1.6796875, |
| "learning_rate": 0.00019942440764034075, |
| "loss": 4.7382, |
| "memory/device_mem_reserved(gib)": 63.25, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.34609009271571434, |
| "grad_norm": 2.78125, |
| "learning_rate": 0.00019941207892353843, |
| "loss": 4.729, |
| "memory/device_mem_reserved(gib)": 63.25, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.34973314632324815, |
| "grad_norm": 2.15625, |
| "learning_rate": 0.0001993996199558352, |
| "loss": 4.7405, |
| "memory/device_mem_reserved(gib)": 63.25, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.353376199930782, |
| "grad_norm": 1.3984375, |
| "learning_rate": 0.00019938703075355496, |
| "loss": 4.7504, |
| "memory/device_mem_reserved(gib)": 63.25, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.3570192535383158, |
| "grad_norm": 1.3203125, |
| "learning_rate": 0.0001993743113331922, |
| "loss": 4.7415, |
| "memory/device_mem_reserved(gib)": 63.25, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.3606623071458496, |
| "grad_norm": 1.84375, |
| "learning_rate": 0.0001993614617114121, |
| "loss": 4.7293, |
| "memory/device_mem_reserved(gib)": 63.25, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.3643053607533835, |
| "grad_norm": 1.234375, |
| "learning_rate": 0.00019934848190505036, |
| "loss": 4.7308, |
| "memory/device_mem_reserved(gib)": 63.25, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.3679484143609173, |
| "grad_norm": 1.890625, |
| "learning_rate": 0.00019933537193111327, |
| "loss": 4.7361, |
| "memory/device_mem_reserved(gib)": 63.25, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.37159146796845116, |
| "grad_norm": 1.46875, |
| "learning_rate": 0.0001993221318067777, |
| "loss": 4.7288, |
| "memory/device_mem_reserved(gib)": 63.25, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.37523452157598497, |
| "grad_norm": 1.765625, |
| "learning_rate": 0.00019930876154939097, |
| "loss": 4.7409, |
| "memory/device_mem_reserved(gib)": 63.25, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.37887757518351883, |
| "grad_norm": 1.75, |
| "learning_rate": 0.00019929526117647104, |
| "loss": 4.7261, |
| "memory/device_mem_reserved(gib)": 63.25, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.38252062879105264, |
| "grad_norm": 1.453125, |
| "learning_rate": 0.00019928163070570619, |
| "loss": 4.7163, |
| "memory/device_mem_reserved(gib)": 63.25, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.3861636823985865, |
| "grad_norm": 1.140625, |
| "learning_rate": 0.00019926787015495524, |
| "loss": 4.7419, |
| "memory/device_mem_reserved(gib)": 63.25, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.3898067360061203, |
| "grad_norm": 2.578125, |
| "learning_rate": 0.00019925397954224746, |
| "loss": 4.723, |
| "memory/device_mem_reserved(gib)": 63.25, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.3934497896136542, |
| "grad_norm": 1.7734375, |
| "learning_rate": 0.00019923995888578242, |
| "loss": 4.7147, |
| "memory/device_mem_reserved(gib)": 63.25, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.397092843221188, |
| "grad_norm": 1.53125, |
| "learning_rate": 0.00019922580820393027, |
| "loss": 4.7327, |
| "memory/device_mem_reserved(gib)": 63.25, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.40073589682872185, |
| "grad_norm": 1.6875, |
| "learning_rate": 0.0001992115275152313, |
| "loss": 4.7336, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.40437895043625566, |
| "grad_norm": 1.9609375, |
| "learning_rate": 0.00019919711683839627, |
| "loss": 4.7355, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.4080220040437895, |
| "grad_norm": 3.140625, |
| "learning_rate": 0.00019918257619230627, |
| "loss": 4.7275, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.41166505765132333, |
| "grad_norm": 1.3671875, |
| "learning_rate": 0.00019916790559601258, |
| "loss": 4.7265, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.4153081112588572, |
| "grad_norm": 3.390625, |
| "learning_rate": 0.0001991531050687368, |
| "loss": 4.7165, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.418951164866391, |
| "grad_norm": 1.84375, |
| "learning_rate": 0.00019913817462987075, |
| "loss": 4.7273, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.42259421847392487, |
| "grad_norm": 1.7578125, |
| "learning_rate": 0.0001991231142989765, |
| "loss": 4.7246, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.4262372720814587, |
| "grad_norm": 1.3125, |
| "learning_rate": 0.00019910792409578624, |
| "loss": 4.7113, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.42988032568899254, |
| "grad_norm": 2.453125, |
| "learning_rate": 0.0001990926040402024, |
| "loss": 4.7308, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.43352337929652635, |
| "grad_norm": 2.375, |
| "learning_rate": 0.00019907715415229746, |
| "loss": 4.7258, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.4371664329040602, |
| "grad_norm": 1.8359375, |
| "learning_rate": 0.00019906157445231406, |
| "loss": 4.7347, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.440809486511594, |
| "grad_norm": 1.359375, |
| "learning_rate": 0.00019904586496066493, |
| "loss": 4.7186, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.4444525401191278, |
| "grad_norm": 1.9765625, |
| "learning_rate": 0.00019903002569793282, |
| "loss": 4.7277, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.4480955937266617, |
| "grad_norm": 1.3125, |
| "learning_rate": 0.0001990140566848705, |
| "loss": 4.7184, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.4517386473341955, |
| "grad_norm": 1.375, |
| "learning_rate": 0.0001989979579424008, |
| "loss": 4.7278, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.45538170094172936, |
| "grad_norm": 1.3359375, |
| "learning_rate": 0.00019898172949161648, |
| "loss": 4.7344, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.45902475454926317, |
| "grad_norm": 2.734375, |
| "learning_rate": 0.00019896537135378025, |
| "loss": 4.7325, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.46266780815679703, |
| "grad_norm": 1.7578125, |
| "learning_rate": 0.00019894888355032468, |
| "loss": 4.7283, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.46631086176433084, |
| "grad_norm": 1.6953125, |
| "learning_rate": 0.0001989322661028524, |
| "loss": 4.7196, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.4699539153718647, |
| "grad_norm": 1.40625, |
| "learning_rate": 0.0001989155190331357, |
| "loss": 4.7179, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.4735969689793985, |
| "grad_norm": 1.1171875, |
| "learning_rate": 0.00019889864236311683, |
| "loss": 4.7332, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.4772400225869324, |
| "grad_norm": 3.234375, |
| "learning_rate": 0.0001988816361149078, |
| "loss": 4.7166, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.4808830761944662, |
| "grad_norm": 1.875, |
| "learning_rate": 0.0001988645003107904, |
| "loss": 4.7201, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.48452612980200005, |
| "grad_norm": 1.84375, |
| "learning_rate": 0.00019884723497321617, |
| "loss": 4.7344, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.48816918340953386, |
| "grad_norm": 1.390625, |
| "learning_rate": 0.00019882984012480634, |
| "loss": 4.7158, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.4918122370170677, |
| "grad_norm": 1.5078125, |
| "learning_rate": 0.00019881231578835185, |
| "loss": 4.7163, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.49545529062460153, |
| "grad_norm": 1.578125, |
| "learning_rate": 0.00019879466198681337, |
| "loss": 4.7198, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.4990983442321354, |
| "grad_norm": 1.921875, |
| "learning_rate": 0.00019877687874332104, |
| "loss": 4.7198, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.5027413978396692, |
| "grad_norm": 1.6875, |
| "learning_rate": 0.0001987589660811747, |
| "loss": 4.7289, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.506384451447203, |
| "grad_norm": 1.2265625, |
| "learning_rate": 0.00019874092402384375, |
| "loss": 4.7214, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.5100275050547369, |
| "grad_norm": 1.4765625, |
| "learning_rate": 0.00019872275259496709, |
| "loss": 4.7201, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.5136705586622707, |
| "grad_norm": 1.6484375, |
| "learning_rate": 0.00019870445181835317, |
| "loss": 4.7091, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.5173136122698045, |
| "grad_norm": 1.1796875, |
| "learning_rate": 0.00019868602171797985, |
| "loss": 4.7131, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.5209566658773384, |
| "grad_norm": 1.171875, |
| "learning_rate": 0.00019866746231799452, |
| "loss": 4.7132, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.5245997194848723, |
| "grad_norm": 1.40625, |
| "learning_rate": 0.00019864877364271388, |
| "loss": 4.7088, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.5282427730924061, |
| "grad_norm": 1.140625, |
| "learning_rate": 0.0001986299557166241, |
| "loss": 4.7075, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.5318858266999399, |
| "grad_norm": 1.453125, |
| "learning_rate": 0.0001986110085643806, |
| "loss": 4.7243, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.5355288803074737, |
| "grad_norm": 1.3203125, |
| "learning_rate": 0.00019859193221080824, |
| "loss": 4.7224, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.5391719339150075, |
| "grad_norm": 1.5078125, |
| "learning_rate": 0.00019857272668090102, |
| "loss": 4.703, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.5428149875225414, |
| "grad_norm": 1.59375, |
| "learning_rate": 0.0001985533919998223, |
| "loss": 4.7123, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 7450 |
| }, |
| { |
| "epoch": 0.5464580411300752, |
| "grad_norm": 2.546875, |
| "learning_rate": 0.00019853392819290457, |
| "loss": 4.7179, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.550101094737609, |
| "grad_norm": 2.625, |
| "learning_rate": 0.0001985143352856496, |
| "loss": 4.7139, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 7550 |
| }, |
| { |
| "epoch": 0.5537441483451429, |
| "grad_norm": 1.2890625, |
| "learning_rate": 0.0001984946133037282, |
| "loss": 4.7175, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.5573872019526768, |
| "grad_norm": 2.0, |
| "learning_rate": 0.00019847476227298038, |
| "loss": 4.7202, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 7650 |
| }, |
| { |
| "epoch": 0.5610302555602106, |
| "grad_norm": 1.2734375, |
| "learning_rate": 0.00019845478221941517, |
| "loss": 4.7185, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.5646733091677444, |
| "grad_norm": 1.6484375, |
| "learning_rate": 0.0001984346731692107, |
| "loss": 4.7153, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 7750 |
| }, |
| { |
| "epoch": 0.5683163627752782, |
| "grad_norm": 1.6640625, |
| "learning_rate": 0.0001984144351487141, |
| "loss": 4.723, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.5719594163828121, |
| "grad_norm": 2.1875, |
| "learning_rate": 0.00019839406818444145, |
| "loss": 4.7072, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 7850 |
| }, |
| { |
| "epoch": 0.5756024699903459, |
| "grad_norm": 1.75, |
| "learning_rate": 0.00019837357230307776, |
| "loss": 4.706, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.5792455235978797, |
| "grad_norm": 2.078125, |
| "learning_rate": 0.00019835294753147703, |
| "loss": 4.6992, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 7950 |
| }, |
| { |
| "epoch": 0.5828885772054135, |
| "grad_norm": 1.6484375, |
| "learning_rate": 0.00019833219389666206, |
| "loss": 4.7252, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.5865316308129475, |
| "grad_norm": 1.4375, |
| "learning_rate": 0.00019831131142582453, |
| "loss": 4.6982, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 8050 |
| }, |
| { |
| "epoch": 0.5901746844204813, |
| "grad_norm": 1.375, |
| "learning_rate": 0.0001982903001463249, |
| "loss": 4.7183, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.5938177380280151, |
| "grad_norm": 1.578125, |
| "learning_rate": 0.0001982691600856924, |
| "loss": 4.7046, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 8150 |
| }, |
| { |
| "epoch": 0.5974607916355489, |
| "grad_norm": 2.078125, |
| "learning_rate": 0.00019824789127162492, |
| "loss": 4.7002, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.6011038452430828, |
| "grad_norm": 1.2265625, |
| "learning_rate": 0.00019822649373198926, |
| "loss": 4.7226, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.6047468988506166, |
| "grad_norm": 1.1328125, |
| "learning_rate": 0.00019820496749482062, |
| "loss": 4.7044, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.6083899524581504, |
| "grad_norm": 1.4921875, |
| "learning_rate": 0.00019818331258832298, |
| "loss": 4.7108, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 8350 |
| }, |
| { |
| "epoch": 0.6120330060656842, |
| "grad_norm": 1.65625, |
| "learning_rate": 0.00019816152904086884, |
| "loss": 4.7146, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.6156760596732181, |
| "grad_norm": 1.3515625, |
| "learning_rate": 0.00019813961688099925, |
| "loss": 4.7052, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 8450 |
| }, |
| { |
| "epoch": 0.619319113280752, |
| "grad_norm": 1.375, |
| "learning_rate": 0.00019811757613742383, |
| "loss": 4.7054, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.6229621668882858, |
| "grad_norm": 1.484375, |
| "learning_rate": 0.0001980954068390206, |
| "loss": 4.7186, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 8550 |
| }, |
| { |
| "epoch": 0.6266052204958196, |
| "grad_norm": 3.515625, |
| "learning_rate": 0.00019807310901483608, |
| "loss": 4.7105, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.6302482741033534, |
| "grad_norm": 1.4140625, |
| "learning_rate": 0.00019805068269408512, |
| "loss": 4.7019, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 8650 |
| }, |
| { |
| "epoch": 0.6338913277108873, |
| "grad_norm": 2.359375, |
| "learning_rate": 0.0001980281279061509, |
| "loss": 4.7042, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.6375343813184211, |
| "grad_norm": 2.796875, |
| "learning_rate": 0.00019800544468058504, |
| "loss": 4.7089, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.6411774349259549, |
| "grad_norm": 1.2109375, |
| "learning_rate": 0.00019798263304710739, |
| "loss": 4.6996, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.6448204885334887, |
| "grad_norm": 1.390625, |
| "learning_rate": 0.00019795969303560595, |
| "loss": 4.705, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 8850 |
| }, |
| { |
| "epoch": 0.6484635421410226, |
| "grad_norm": 1.46875, |
| "learning_rate": 0.00019793662467613708, |
| "loss": 4.704, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.6521065957485565, |
| "grad_norm": 1.15625, |
| "learning_rate": 0.00019791342799892515, |
| "loss": 4.7013, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 8950 |
| }, |
| { |
| "epoch": 0.6557496493560903, |
| "grad_norm": 2.09375, |
| "learning_rate": 0.0001978901030343628, |
| "loss": 4.7052, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.6593927029636241, |
| "grad_norm": 1.40625, |
| "learning_rate": 0.00019786664981301063, |
| "loss": 4.6984, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 9050 |
| }, |
| { |
| "epoch": 0.663035756571158, |
| "grad_norm": 2.15625, |
| "learning_rate": 0.00019784306836559732, |
| "loss": 4.7043, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.6666788101786918, |
| "grad_norm": 1.3203125, |
| "learning_rate": 0.00019781935872301962, |
| "loss": 4.7079, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 9150 |
| }, |
| { |
| "epoch": 0.6703218637862256, |
| "grad_norm": 1.09375, |
| "learning_rate": 0.00019779552091634214, |
| "loss": 4.6956, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.6739649173937594, |
| "grad_norm": 1.5, |
| "learning_rate": 0.00019777155497679747, |
| "loss": 4.714, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 9250 |
| }, |
| { |
| "epoch": 0.6776079710012933, |
| "grad_norm": 2.59375, |
| "learning_rate": 0.0001977474609357861, |
| "loss": 4.7094, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.6812510246088271, |
| "grad_norm": 1.3515625, |
| "learning_rate": 0.00019772323882487632, |
| "loss": 4.7011, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 9350 |
| }, |
| { |
| "epoch": 0.684894078216361, |
| "grad_norm": 1.6953125, |
| "learning_rate": 0.0001976988886758042, |
| "loss": 4.6987, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.6885371318238948, |
| "grad_norm": 2.59375, |
| "learning_rate": 0.00019767441052047363, |
| "loss": 4.7024, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 9450 |
| }, |
| { |
| "epoch": 0.6921801854314287, |
| "grad_norm": 1.140625, |
| "learning_rate": 0.00019764980439095618, |
| "loss": 4.7205, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.6958232390389625, |
| "grad_norm": 1.6484375, |
| "learning_rate": 0.00019762507031949108, |
| "loss": 4.7038, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 9550 |
| }, |
| { |
| "epoch": 0.6994662926464963, |
| "grad_norm": 1.609375, |
| "learning_rate": 0.00019760020833848522, |
| "loss": 4.7058, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.7031093462540301, |
| "grad_norm": 1.6953125, |
| "learning_rate": 0.00019757521848051308, |
| "loss": 4.7002, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 9650 |
| }, |
| { |
| "epoch": 0.706752399861564, |
| "grad_norm": 1.7421875, |
| "learning_rate": 0.00019755010077831666, |
| "loss": 4.7137, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.7103954534690978, |
| "grad_norm": 1.5625, |
| "learning_rate": 0.00019752485526480546, |
| "loss": 4.717, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.7140385070766316, |
| "grad_norm": 1.4140625, |
| "learning_rate": 0.0001974994819730565, |
| "loss": 4.6963, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.7176815606841654, |
| "grad_norm": 2.296875, |
| "learning_rate": 0.0001974739809363141, |
| "loss": 4.7155, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 9850 |
| }, |
| { |
| "epoch": 0.7213246142916993, |
| "grad_norm": 1.171875, |
| "learning_rate": 0.00019744835218799009, |
| "loss": 4.7063, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.7249676678992332, |
| "grad_norm": 1.828125, |
| "learning_rate": 0.00019742259576166355, |
| "loss": 4.7001, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 9950 |
| }, |
| { |
| "epoch": 0.728610721506767, |
| "grad_norm": 2.328125, |
| "learning_rate": 0.00019739671169108082, |
| "loss": 4.7064, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.7322537751143008, |
| "grad_norm": 1.5390625, |
| "learning_rate": 0.0001973707000101556, |
| "loss": 4.7168, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 10050 |
| }, |
| { |
| "epoch": 0.7358968287218346, |
| "grad_norm": 2.15625, |
| "learning_rate": 0.00019734456075296862, |
| "loss": 4.7028, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.7395398823293685, |
| "grad_norm": 1.4375, |
| "learning_rate": 0.00019731829395376786, |
| "loss": 4.7101, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 10150 |
| }, |
| { |
| "epoch": 0.7431829359369023, |
| "grad_norm": 1.265625, |
| "learning_rate": 0.00019729189964696846, |
| "loss": 4.7066, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.7468259895444361, |
| "grad_norm": 1.234375, |
| "learning_rate": 0.0001972653778671525, |
| "loss": 4.7244, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 10250 |
| }, |
| { |
| "epoch": 0.7504690431519699, |
| "grad_norm": 1.7734375, |
| "learning_rate": 0.00019723872864906917, |
| "loss": 4.7167, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.7541120967595039, |
| "grad_norm": 1.515625, |
| "learning_rate": 0.00019721195202763458, |
| "loss": 4.6993, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 10350 |
| }, |
| { |
| "epoch": 0.7577551503670377, |
| "grad_norm": 2.0, |
| "learning_rate": 0.00019718504803793176, |
| "loss": 4.7053, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.7613982039745715, |
| "grad_norm": 1.390625, |
| "learning_rate": 0.0001971580167152107, |
| "loss": 4.7009, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 10450 |
| }, |
| { |
| "epoch": 0.7650412575821053, |
| "grad_norm": 1.5859375, |
| "learning_rate": 0.00019713085809488812, |
| "loss": 4.6948, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.7686843111896392, |
| "grad_norm": 1.6953125, |
| "learning_rate": 0.00019710357221254757, |
| "loss": 4.6921, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 10550 |
| }, |
| { |
| "epoch": 0.772327364797173, |
| "grad_norm": 3.84375, |
| "learning_rate": 0.00019707615910393933, |
| "loss": 4.7048, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.7759704184047068, |
| "grad_norm": 1.5390625, |
| "learning_rate": 0.00019704861880498042, |
| "loss": 4.7059, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 10650 |
| }, |
| { |
| "epoch": 0.7796134720122406, |
| "grad_norm": 1.4375, |
| "learning_rate": 0.00019702095135175444, |
| "loss": 4.7035, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.7832565256197745, |
| "grad_norm": 1.40625, |
| "learning_rate": 0.00019699315678051166, |
| "loss": 4.7176, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 10750 |
| }, |
| { |
| "epoch": 0.7868995792273084, |
| "grad_norm": 2.296875, |
| "learning_rate": 0.00019696523512766884, |
| "loss": 4.686, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.7905426328348422, |
| "grad_norm": 3.0, |
| "learning_rate": 0.0001969371864298092, |
| "loss": 4.7, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 10850 |
| }, |
| { |
| "epoch": 0.794185686442376, |
| "grad_norm": 2.234375, |
| "learning_rate": 0.00019690901072368262, |
| "loss": 4.686, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.7978287400499098, |
| "grad_norm": 1.875, |
| "learning_rate": 0.00019688070804620513, |
| "loss": 4.6894, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 10950 |
| }, |
| { |
| "epoch": 0.8014717936574437, |
| "grad_norm": 1.796875, |
| "learning_rate": 0.00019685227843445926, |
| "loss": 4.7038, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.8051148472649775, |
| "grad_norm": 1.578125, |
| "learning_rate": 0.00019682372192569386, |
| "loss": 4.6949, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 11050 |
| }, |
| { |
| "epoch": 0.8087579008725113, |
| "grad_norm": 1.2421875, |
| "learning_rate": 0.00019679503855732404, |
| "loss": 4.7035, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.8124009544800451, |
| "grad_norm": 1.34375, |
| "learning_rate": 0.00019676622836693102, |
| "loss": 4.6992, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 11150 |
| }, |
| { |
| "epoch": 0.816044008087579, |
| "grad_norm": 4.0625, |
| "learning_rate": 0.00019673729139226229, |
| "loss": 4.6937, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.8196870616951129, |
| "grad_norm": 1.0859375, |
| "learning_rate": 0.00019670822767123142, |
| "loss": 4.6948, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.8233301153026467, |
| "grad_norm": 1.59375, |
| "learning_rate": 0.00019667903724191805, |
| "loss": 4.6816, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.8269731689101805, |
| "grad_norm": 1.40625, |
| "learning_rate": 0.00019664972014256783, |
| "loss": 4.6848, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 11350 |
| }, |
| { |
| "epoch": 0.8306162225177144, |
| "grad_norm": 1.15625, |
| "learning_rate": 0.0001966202764115924, |
| "loss": 4.6876, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.8342592761252482, |
| "grad_norm": 2.21875, |
| "learning_rate": 0.00019659070608756926, |
| "loss": 4.7076, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 11450 |
| }, |
| { |
| "epoch": 0.837902329732782, |
| "grad_norm": 1.359375, |
| "learning_rate": 0.0001965610092092418, |
| "loss": 4.6935, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.8415453833403158, |
| "grad_norm": 1.3203125, |
| "learning_rate": 0.00019653118581551925, |
| "loss": 4.693, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 11550 |
| }, |
| { |
| "epoch": 0.8451884369478497, |
| "grad_norm": 4.375, |
| "learning_rate": 0.00019650123594547656, |
| "loss": 4.6968, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.8488314905553835, |
| "grad_norm": 1.3515625, |
| "learning_rate": 0.00019647115963835444, |
| "loss": 4.6935, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 11650 |
| }, |
| { |
| "epoch": 0.8524745441629173, |
| "grad_norm": 1.6484375, |
| "learning_rate": 0.00019644095693355915, |
| "loss": 4.6961, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.8561175977704512, |
| "grad_norm": 3.046875, |
| "learning_rate": 0.00019641062787066274, |
| "loss": 4.7102, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 11750 |
| }, |
| { |
| "epoch": 0.8597606513779851, |
| "grad_norm": 1.40625, |
| "learning_rate": 0.00019638017248940263, |
| "loss": 4.722, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.8634037049855189, |
| "grad_norm": 1.9453125, |
| "learning_rate": 0.0001963495908296819, |
| "loss": 4.6987, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 11850 |
| }, |
| { |
| "epoch": 0.8670467585930527, |
| "grad_norm": 2.53125, |
| "learning_rate": 0.00019631888293156896, |
| "loss": 4.6952, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.8706898122005865, |
| "grad_norm": 2.75, |
| "learning_rate": 0.00019628804883529765, |
| "loss": 4.6898, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 11950 |
| }, |
| { |
| "epoch": 0.8743328658081204, |
| "grad_norm": 2.71875, |
| "learning_rate": 0.00019625708858126727, |
| "loss": 4.7085, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.8779759194156542, |
| "grad_norm": 1.734375, |
| "learning_rate": 0.00019622600221004218, |
| "loss": 4.6967, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 12050 |
| }, |
| { |
| "epoch": 0.881618973023188, |
| "grad_norm": 1.65625, |
| "learning_rate": 0.0001961947897623522, |
| "loss": 4.7088, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.8852620266307218, |
| "grad_norm": 1.5390625, |
| "learning_rate": 0.00019616345127909226, |
| "loss": 4.6893, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 12150 |
| }, |
| { |
| "epoch": 0.8889050802382557, |
| "grad_norm": 3.375, |
| "learning_rate": 0.0001961319868013224, |
| "loss": 4.6895, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.8925481338457896, |
| "grad_norm": 1.2578125, |
| "learning_rate": 0.00019610039637026774, |
| "loss": 4.6961, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 12250 |
| }, |
| { |
| "epoch": 0.8961911874533234, |
| "grad_norm": 2.03125, |
| "learning_rate": 0.00019606868002731845, |
| "loss": 4.7013, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.8998342410608572, |
| "grad_norm": 1.328125, |
| "learning_rate": 0.00019603683781402968, |
| "loss": 4.7018, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 12350 |
| }, |
| { |
| "epoch": 0.903477294668391, |
| "grad_norm": 1.9296875, |
| "learning_rate": 0.00019600486977212146, |
| "loss": 4.703, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.9071203482759249, |
| "grad_norm": 1.3125, |
| "learning_rate": 0.00019597277594347875, |
| "loss": 4.704, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 12450 |
| }, |
| { |
| "epoch": 0.9107634018834587, |
| "grad_norm": 3.625, |
| "learning_rate": 0.0001959405563701512, |
| "loss": 4.7039, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.9144064554909925, |
| "grad_norm": 1.6484375, |
| "learning_rate": 0.00019590821109435335, |
| "loss": 4.6941, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 12550 |
| }, |
| { |
| "epoch": 0.9180495090985263, |
| "grad_norm": 1.75, |
| "learning_rate": 0.00019587574015846433, |
| "loss": 4.7073, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.9216925627060603, |
| "grad_norm": 1.4921875, |
| "learning_rate": 0.00019584314360502792, |
| "loss": 4.6859, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 12650 |
| }, |
| { |
| "epoch": 0.9253356163135941, |
| "grad_norm": 2.640625, |
| "learning_rate": 0.00019581042147675257, |
| "loss": 4.7085, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.9289786699211279, |
| "grad_norm": 2.8125, |
| "learning_rate": 0.0001957775738165112, |
| "loss": 4.6923, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 12750 |
| }, |
| { |
| "epoch": 0.9326217235286617, |
| "grad_norm": 1.59375, |
| "learning_rate": 0.00019574460066734123, |
| "loss": 4.7077, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.9362647771361956, |
| "grad_norm": 2.4375, |
| "learning_rate": 0.0001957115020724444, |
| "loss": 4.6849, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 12850 |
| }, |
| { |
| "epoch": 0.9399078307437294, |
| "grad_norm": 1.1796875, |
| "learning_rate": 0.00019567827807518692, |
| "loss": 4.6974, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.9435508843512632, |
| "grad_norm": 4.4375, |
| "learning_rate": 0.0001956449287190993, |
| "loss": 4.695, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 12950 |
| }, |
| { |
| "epoch": 0.947193937958797, |
| "grad_norm": 1.9296875, |
| "learning_rate": 0.00019561145404787625, |
| "loss": 4.6926, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.950836991566331, |
| "grad_norm": 1.25, |
| "learning_rate": 0.0001955778541053767, |
| "loss": 4.6966, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 13050 |
| }, |
| { |
| "epoch": 0.9544800451738648, |
| "grad_norm": 3.03125, |
| "learning_rate": 0.00019554412893562368, |
| "loss": 4.6981, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.9581230987813986, |
| "grad_norm": 1.4765625, |
| "learning_rate": 0.0001955102785828043, |
| "loss": 4.6908, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 13150 |
| }, |
| { |
| "epoch": 0.9617661523889324, |
| "grad_norm": 1.796875, |
| "learning_rate": 0.00019547630309126973, |
| "loss": 4.6856, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.9654092059964663, |
| "grad_norm": 2.015625, |
| "learning_rate": 0.00019544220250553504, |
| "loss": 4.6967, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 13250 |
| }, |
| { |
| "epoch": 0.9690522596040001, |
| "grad_norm": 1.4375, |
| "learning_rate": 0.00019540797687027928, |
| "loss": 4.7017, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.9726953132115339, |
| "grad_norm": 1.59375, |
| "learning_rate": 0.0001953736262303452, |
| "loss": 4.6947, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 13350 |
| }, |
| { |
| "epoch": 0.9763383668190677, |
| "grad_norm": 1.328125, |
| "learning_rate": 0.0001953391506307395, |
| "loss": 4.6823, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.9799814204266015, |
| "grad_norm": 1.8125, |
| "learning_rate": 0.0001953045501166325, |
| "loss": 4.6847, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 13450 |
| }, |
| { |
| "epoch": 0.9836244740341354, |
| "grad_norm": 2.671875, |
| "learning_rate": 0.0001952698247333582, |
| "loss": 4.7012, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.9872675276416693, |
| "grad_norm": 2.34375, |
| "learning_rate": 0.0001952349745264142, |
| "loss": 4.6827, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 13550 |
| }, |
| { |
| "epoch": 0.9909105812492031, |
| "grad_norm": 1.3125, |
| "learning_rate": 0.00019519999954146174, |
| "loss": 4.7041, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.9945536348567369, |
| "grad_norm": 1.7890625, |
| "learning_rate": 0.00019516489982432535, |
| "loss": 4.6852, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 13650 |
| }, |
| { |
| "epoch": 0.9981966884642708, |
| "grad_norm": 1.6484375, |
| "learning_rate": 0.00019512967542099316, |
| "loss": 4.6928, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.999945354195887, |
| "eval_loss": 4.698671340942383, |
| "eval_runtime": 581.7351, |
| "eval_samples_per_second": 521.419, |
| "eval_steps_per_second": 43.453, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 13724 |
| }, |
| { |
| "epoch": 1.001821526803767, |
| "grad_norm": 1.4453125, |
| "learning_rate": 0.00019509432637761665, |
| "loss": 4.6839, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 13750 |
| }, |
| { |
| "epoch": 1.0054645804113007, |
| "grad_norm": 1.875, |
| "learning_rate": 0.0001950588527405105, |
| "loss": 4.6792, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 13800 |
| }, |
| { |
| "epoch": 1.0091076340188345, |
| "grad_norm": 1.3125, |
| "learning_rate": 0.00019502325455615267, |
| "loss": 4.683, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 13850 |
| }, |
| { |
| "epoch": 1.0127506876263683, |
| "grad_norm": 1.921875, |
| "learning_rate": 0.0001949875318711844, |
| "loss": 4.6828, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 13900 |
| }, |
| { |
| "epoch": 1.0163937412339024, |
| "grad_norm": 1.4609375, |
| "learning_rate": 0.00019495168473240994, |
| "loss": 4.6829, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 13950 |
| }, |
| { |
| "epoch": 1.0200367948414362, |
| "grad_norm": 1.6796875, |
| "learning_rate": 0.0001949157131867967, |
| "loss": 4.6819, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.02367984844897, |
| "grad_norm": 1.6875, |
| "learning_rate": 0.00019487961728147495, |
| "loss": 4.6945, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 14050 |
| }, |
| { |
| "epoch": 1.0273229020565038, |
| "grad_norm": 1.4375, |
| "learning_rate": 0.000194843397063738, |
| "loss": 4.6835, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 14100 |
| }, |
| { |
| "epoch": 1.0309659556640376, |
| "grad_norm": 1.5390625, |
| "learning_rate": 0.00019480705258104205, |
| "loss": 4.6919, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 14150 |
| }, |
| { |
| "epoch": 1.0346090092715714, |
| "grad_norm": 1.4765625, |
| "learning_rate": 0.00019477058388100605, |
| "loss": 4.6791, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 14200 |
| }, |
| { |
| "epoch": 1.0382520628791052, |
| "grad_norm": 1.0, |
| "learning_rate": 0.00019473399101141176, |
| "loss": 4.6751, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 14250 |
| }, |
| { |
| "epoch": 1.041895116486639, |
| "grad_norm": 2.03125, |
| "learning_rate": 0.00019469727402020358, |
| "loss": 4.6755, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 14300 |
| }, |
| { |
| "epoch": 1.045538170094173, |
| "grad_norm": 1.5625, |
| "learning_rate": 0.0001946604329554885, |
| "loss": 4.6791, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 14350 |
| }, |
| { |
| "epoch": 1.0491812237017069, |
| "grad_norm": 1.453125, |
| "learning_rate": 0.0001946234678655362, |
| "loss": 4.6889, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 14400 |
| }, |
| { |
| "epoch": 1.0528242773092407, |
| "grad_norm": 1.2421875, |
| "learning_rate": 0.00019458637879877876, |
| "loss": 4.687, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 14450 |
| }, |
| { |
| "epoch": 1.0564673309167745, |
| "grad_norm": 1.453125, |
| "learning_rate": 0.00019454916580381075, |
| "loss": 4.6753, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 14500 |
| }, |
| { |
| "epoch": 1.0601103845243083, |
| "grad_norm": 2.296875, |
| "learning_rate": 0.00019451182892938902, |
| "loss": 4.6795, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 14550 |
| }, |
| { |
| "epoch": 1.063753438131842, |
| "grad_norm": 2.390625, |
| "learning_rate": 0.00019447436822443286, |
| "loss": 4.671, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 14600 |
| }, |
| { |
| "epoch": 1.067396491739376, |
| "grad_norm": 2.875, |
| "learning_rate": 0.00019443678373802365, |
| "loss": 4.683, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 14650 |
| }, |
| { |
| "epoch": 1.0710395453469097, |
| "grad_norm": 1.40625, |
| "learning_rate": 0.00019439907551940512, |
| "loss": 4.6855, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 14700 |
| }, |
| { |
| "epoch": 1.0746825989544435, |
| "grad_norm": 1.4375, |
| "learning_rate": 0.000194361243617983, |
| "loss": 4.6963, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 14750 |
| }, |
| { |
| "epoch": 1.0783256525619775, |
| "grad_norm": 1.7265625, |
| "learning_rate": 0.0001943232880833251, |
| "loss": 4.6866, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 14800 |
| }, |
| { |
| "epoch": 1.0819687061695114, |
| "grad_norm": 1.203125, |
| "learning_rate": 0.00019428520896516122, |
| "loss": 4.6841, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 14850 |
| }, |
| { |
| "epoch": 1.0856117597770452, |
| "grad_norm": 1.9609375, |
| "learning_rate": 0.00019424700631338304, |
| "loss": 4.6809, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 14900 |
| }, |
| { |
| "epoch": 1.089254813384579, |
| "grad_norm": 2.640625, |
| "learning_rate": 0.00019420868017804423, |
| "loss": 4.6902, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 14950 |
| }, |
| { |
| "epoch": 1.0928978669921128, |
| "grad_norm": 1.4375, |
| "learning_rate": 0.00019417023060936005, |
| "loss": 4.6618, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.0965409205996466, |
| "grad_norm": 2.796875, |
| "learning_rate": 0.00019413165765770765, |
| "loss": 4.6778, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 15050 |
| }, |
| { |
| "epoch": 1.1001839742071804, |
| "grad_norm": 2.1875, |
| "learning_rate": 0.00019409296137362577, |
| "loss": 4.6947, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 15100 |
| }, |
| { |
| "epoch": 1.1038270278147142, |
| "grad_norm": 1.3203125, |
| "learning_rate": 0.00019405414180781469, |
| "loss": 4.6835, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 15150 |
| }, |
| { |
| "epoch": 1.1074700814222482, |
| "grad_norm": 1.171875, |
| "learning_rate": 0.00019401519901113634, |
| "loss": 4.6801, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 15200 |
| }, |
| { |
| "epoch": 1.111113135029782, |
| "grad_norm": 1.7734375, |
| "learning_rate": 0.00019397613303461403, |
| "loss": 4.6784, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 15250 |
| }, |
| { |
| "epoch": 1.1147561886373158, |
| "grad_norm": 1.734375, |
| "learning_rate": 0.00019393694392943244, |
| "loss": 4.6779, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 15300 |
| }, |
| { |
| "epoch": 1.1183992422448497, |
| "grad_norm": 1.2578125, |
| "learning_rate": 0.00019389763174693764, |
| "loss": 4.6905, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 15350 |
| }, |
| { |
| "epoch": 1.1220422958523835, |
| "grad_norm": 1.515625, |
| "learning_rate": 0.0001938581965386369, |
| "loss": 4.6881, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 15400 |
| }, |
| { |
| "epoch": 1.1256853494599173, |
| "grad_norm": 1.53125, |
| "learning_rate": 0.00019381863835619872, |
| "loss": 4.6866, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 15450 |
| }, |
| { |
| "epoch": 1.129328403067451, |
| "grad_norm": 1.5625, |
| "learning_rate": 0.00019377895725145267, |
| "loss": 4.6806, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 15500 |
| }, |
| { |
| "epoch": 1.132971456674985, |
| "grad_norm": 1.578125, |
| "learning_rate": 0.00019373915327638945, |
| "loss": 4.6933, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 15550 |
| }, |
| { |
| "epoch": 1.1366145102825187, |
| "grad_norm": 2.28125, |
| "learning_rate": 0.0001936992264831607, |
| "loss": 4.6747, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 15600 |
| }, |
| { |
| "epoch": 1.1402575638900527, |
| "grad_norm": 1.453125, |
| "learning_rate": 0.00019365917692407898, |
| "loss": 4.6811, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 15650 |
| }, |
| { |
| "epoch": 1.1439006174975865, |
| "grad_norm": 1.53125, |
| "learning_rate": 0.0001936190046516177, |
| "loss": 4.6794, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 15700 |
| }, |
| { |
| "epoch": 1.1475436711051203, |
| "grad_norm": 1.6640625, |
| "learning_rate": 0.00019357870971841104, |
| "loss": 4.6911, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 15750 |
| }, |
| { |
| "epoch": 1.1511867247126542, |
| "grad_norm": 1.2109375, |
| "learning_rate": 0.00019353829217725398, |
| "loss": 4.6904, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 15800 |
| }, |
| { |
| "epoch": 1.154829778320188, |
| "grad_norm": 2.171875, |
| "learning_rate": 0.00019349775208110198, |
| "loss": 4.6759, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 15850 |
| }, |
| { |
| "epoch": 1.1584728319277218, |
| "grad_norm": 1.859375, |
| "learning_rate": 0.00019345708948307117, |
| "loss": 4.6844, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 15900 |
| }, |
| { |
| "epoch": 1.1621158855352556, |
| "grad_norm": 1.46875, |
| "learning_rate": 0.00019341630443643824, |
| "loss": 4.6956, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 15950 |
| }, |
| { |
| "epoch": 1.1657589391427896, |
| "grad_norm": 1.21875, |
| "learning_rate": 0.00019337539699464018, |
| "loss": 4.6799, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 16000 |
| }, |
| { |
| "epoch": 1.1694019927503234, |
| "grad_norm": 1.515625, |
| "learning_rate": 0.00019333436721127444, |
| "loss": 4.6869, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 16050 |
| }, |
| { |
| "epoch": 1.1730450463578572, |
| "grad_norm": 2.875, |
| "learning_rate": 0.00019329321514009875, |
| "loss": 4.6809, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 16100 |
| }, |
| { |
| "epoch": 1.176688099965391, |
| "grad_norm": 1.9296875, |
| "learning_rate": 0.00019325194083503103, |
| "loss": 4.6703, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 16150 |
| }, |
| { |
| "epoch": 1.1803311535729248, |
| "grad_norm": 1.6875, |
| "learning_rate": 0.00019321054435014935, |
| "loss": 4.6802, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 16200 |
| }, |
| { |
| "epoch": 1.1839742071804586, |
| "grad_norm": 3.03125, |
| "learning_rate": 0.0001931690257396919, |
| "loss": 4.689, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 16250 |
| }, |
| { |
| "epoch": 1.1876172607879925, |
| "grad_norm": 1.515625, |
| "learning_rate": 0.00019312738505805691, |
| "loss": 4.6841, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 16300 |
| }, |
| { |
| "epoch": 1.1912603143955263, |
| "grad_norm": 1.5546875, |
| "learning_rate": 0.00019308562235980243, |
| "loss": 4.684, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 16350 |
| }, |
| { |
| "epoch": 1.19490336800306, |
| "grad_norm": 2.859375, |
| "learning_rate": 0.0001930437376996465, |
| "loss": 4.6814, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 16400 |
| }, |
| { |
| "epoch": 1.1985464216105939, |
| "grad_norm": 2.046875, |
| "learning_rate": 0.00019300173113246687, |
| "loss": 4.6885, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 16450 |
| }, |
| { |
| "epoch": 1.202189475218128, |
| "grad_norm": 1.4765625, |
| "learning_rate": 0.00019295960271330104, |
| "loss": 4.6973, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 16500 |
| }, |
| { |
| "epoch": 1.2058325288256617, |
| "grad_norm": 2.609375, |
| "learning_rate": 0.00019291735249734622, |
| "loss": 4.6803, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 16550 |
| }, |
| { |
| "epoch": 1.2094755824331955, |
| "grad_norm": 1.71875, |
| "learning_rate": 0.00019287498053995915, |
| "loss": 4.6959, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 16600 |
| }, |
| { |
| "epoch": 1.2131186360407293, |
| "grad_norm": 1.6875, |
| "learning_rate": 0.000192832486896656, |
| "loss": 4.6853, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 16650 |
| }, |
| { |
| "epoch": 1.2167616896482631, |
| "grad_norm": 1.28125, |
| "learning_rate": 0.00019278987162311254, |
| "loss": 4.6695, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 16700 |
| }, |
| { |
| "epoch": 1.220404743255797, |
| "grad_norm": 1.578125, |
| "learning_rate": 0.00019274713477516377, |
| "loss": 4.6742, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 16750 |
| }, |
| { |
| "epoch": 1.2240477968633308, |
| "grad_norm": 1.375, |
| "learning_rate": 0.00019270427640880405, |
| "loss": 4.6926, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 16800 |
| }, |
| { |
| "epoch": 1.2276908504708648, |
| "grad_norm": 1.546875, |
| "learning_rate": 0.00019266129658018687, |
| "loss": 4.6798, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 16850 |
| }, |
| { |
| "epoch": 1.2313339040783986, |
| "grad_norm": 1.4453125, |
| "learning_rate": 0.000192618195345625, |
| "loss": 4.6903, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 16900 |
| }, |
| { |
| "epoch": 1.2349769576859324, |
| "grad_norm": 1.96875, |
| "learning_rate": 0.0001925749727615901, |
| "loss": 4.6728, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 16950 |
| }, |
| { |
| "epoch": 1.2386200112934662, |
| "grad_norm": 1.8515625, |
| "learning_rate": 0.00019253162888471304, |
| "loss": 4.6905, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 17000 |
| }, |
| { |
| "epoch": 1.242263064901, |
| "grad_norm": 2.59375, |
| "learning_rate": 0.00019248816377178337, |
| "loss": 4.6912, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 17050 |
| }, |
| { |
| "epoch": 1.2459061185085338, |
| "grad_norm": 1.2734375, |
| "learning_rate": 0.00019244457747974968, |
| "loss": 4.6748, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 17100 |
| }, |
| { |
| "epoch": 1.2495491721160676, |
| "grad_norm": 1.609375, |
| "learning_rate": 0.00019240087006571922, |
| "loss": 4.6893, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 17150 |
| }, |
| { |
| "epoch": 1.2531922257236014, |
| "grad_norm": 1.53125, |
| "learning_rate": 0.00019235704158695796, |
| "loss": 4.6905, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 17200 |
| }, |
| { |
| "epoch": 1.2568352793311353, |
| "grad_norm": 3.3125, |
| "learning_rate": 0.00019231309210089053, |
| "loss": 4.6715, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 17250 |
| }, |
| { |
| "epoch": 1.260478332938669, |
| "grad_norm": 1.4765625, |
| "learning_rate": 0.00019226902166510006, |
| "loss": 4.6798, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 17300 |
| }, |
| { |
| "epoch": 1.264121386546203, |
| "grad_norm": 2.5625, |
| "learning_rate": 0.00019222483033732815, |
| "loss": 4.6959, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 17350 |
| }, |
| { |
| "epoch": 1.267764440153737, |
| "grad_norm": 1.125, |
| "learning_rate": 0.00019218051817547483, |
| "loss": 4.6907, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 17400 |
| }, |
| { |
| "epoch": 1.2714074937612707, |
| "grad_norm": 1.3984375, |
| "learning_rate": 0.00019213608523759842, |
| "loss": 4.6718, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 17450 |
| }, |
| { |
| "epoch": 1.2750505473688045, |
| "grad_norm": 1.703125, |
| "learning_rate": 0.00019209153158191553, |
| "loss": 4.691, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 17500 |
| }, |
| { |
| "epoch": 1.2786936009763383, |
| "grad_norm": 3.109375, |
| "learning_rate": 0.00019204685726680084, |
| "loss": 4.6792, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 17550 |
| }, |
| { |
| "epoch": 1.2823366545838721, |
| "grad_norm": 2.75, |
| "learning_rate": 0.00019200206235078717, |
| "loss": 4.6572, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 17600 |
| }, |
| { |
| "epoch": 1.2859797081914062, |
| "grad_norm": 1.5546875, |
| "learning_rate": 0.0001919571468925654, |
| "loss": 4.6803, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 17650 |
| }, |
| { |
| "epoch": 1.28962276179894, |
| "grad_norm": 2.78125, |
| "learning_rate": 0.00019191211095098424, |
| "loss": 4.6733, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 17700 |
| }, |
| { |
| "epoch": 1.2932658154064738, |
| "grad_norm": 1.546875, |
| "learning_rate": 0.0001918669545850504, |
| "loss": 4.6788, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 17750 |
| }, |
| { |
| "epoch": 1.2969088690140076, |
| "grad_norm": 1.6015625, |
| "learning_rate": 0.00019182167785392827, |
| "loss": 4.6925, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 17800 |
| }, |
| { |
| "epoch": 1.3005519226215414, |
| "grad_norm": 1.4921875, |
| "learning_rate": 0.00019177628081693993, |
| "loss": 4.683, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 17850 |
| }, |
| { |
| "epoch": 1.3041949762290752, |
| "grad_norm": 1.8125, |
| "learning_rate": 0.00019173076353356513, |
| "loss": 4.6842, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 17900 |
| }, |
| { |
| "epoch": 1.307838029836609, |
| "grad_norm": 2.34375, |
| "learning_rate": 0.0001916851260634412, |
| "loss": 4.6797, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 17950 |
| }, |
| { |
| "epoch": 1.3114810834441428, |
| "grad_norm": 1.6796875, |
| "learning_rate": 0.00019163936846636293, |
| "loss": 4.6547, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 18000 |
| }, |
| { |
| "epoch": 1.3151241370516766, |
| "grad_norm": 1.1796875, |
| "learning_rate": 0.00019159349080228238, |
| "loss": 4.6845, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 18050 |
| }, |
| { |
| "epoch": 1.3187671906592104, |
| "grad_norm": 1.1171875, |
| "learning_rate": 0.0001915474931313091, |
| "loss": 4.6925, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 18100 |
| }, |
| { |
| "epoch": 1.3224102442667443, |
| "grad_norm": 3.71875, |
| "learning_rate": 0.0001915013755137097, |
| "loss": 4.6897, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 18150 |
| }, |
| { |
| "epoch": 1.3260532978742783, |
| "grad_norm": 1.5625, |
| "learning_rate": 0.0001914551380099081, |
| "loss": 4.6832, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 18200 |
| }, |
| { |
| "epoch": 1.329696351481812, |
| "grad_norm": 1.0859375, |
| "learning_rate": 0.00019140878068048527, |
| "loss": 4.6879, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 18250 |
| }, |
| { |
| "epoch": 1.333339405089346, |
| "grad_norm": 1.90625, |
| "learning_rate": 0.00019136230358617908, |
| "loss": 4.6782, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 18300 |
| }, |
| { |
| "epoch": 1.3369824586968797, |
| "grad_norm": 1.9375, |
| "learning_rate": 0.00019131570678788438, |
| "loss": 4.6916, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 18350 |
| }, |
| { |
| "epoch": 1.3406255123044135, |
| "grad_norm": 1.671875, |
| "learning_rate": 0.00019126899034665286, |
| "loss": 4.6731, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 18400 |
| }, |
| { |
| "epoch": 1.3442685659119473, |
| "grad_norm": 2.671875, |
| "learning_rate": 0.000191222154323693, |
| "loss": 4.6781, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 18450 |
| }, |
| { |
| "epoch": 1.3479116195194814, |
| "grad_norm": 1.7109375, |
| "learning_rate": 0.00019117519878036988, |
| "loss": 4.678, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 18500 |
| }, |
| { |
| "epoch": 1.3515546731270152, |
| "grad_norm": 2.109375, |
| "learning_rate": 0.0001911281237782052, |
| "loss": 4.696, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 18550 |
| }, |
| { |
| "epoch": 1.355197726734549, |
| "grad_norm": 1.9453125, |
| "learning_rate": 0.00019108092937887726, |
| "loss": 4.6942, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 18600 |
| }, |
| { |
| "epoch": 1.3588407803420828, |
| "grad_norm": 1.6484375, |
| "learning_rate": 0.00019103361564422067, |
| "loss": 4.6789, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 18650 |
| }, |
| { |
| "epoch": 1.3624838339496166, |
| "grad_norm": 2.15625, |
| "learning_rate": 0.00019098618263622649, |
| "loss": 4.6765, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 18700 |
| }, |
| { |
| "epoch": 1.3661268875571504, |
| "grad_norm": 2.015625, |
| "learning_rate": 0.000190938630417042, |
| "loss": 4.6836, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 18750 |
| }, |
| { |
| "epoch": 1.3697699411646842, |
| "grad_norm": 1.453125, |
| "learning_rate": 0.0001908909590489707, |
| "loss": 4.6751, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 18800 |
| }, |
| { |
| "epoch": 1.373412994772218, |
| "grad_norm": 1.2109375, |
| "learning_rate": 0.00019084316859447218, |
| "loss": 4.6856, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 18850 |
| }, |
| { |
| "epoch": 1.3770560483797518, |
| "grad_norm": 1.4453125, |
| "learning_rate": 0.00019079525911616207, |
| "loss": 4.6787, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 18900 |
| }, |
| { |
| "epoch": 1.3806991019872856, |
| "grad_norm": 1.609375, |
| "learning_rate": 0.00019074723067681192, |
| "loss": 4.6701, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 18950 |
| }, |
| { |
| "epoch": 1.3843421555948197, |
| "grad_norm": 1.3828125, |
| "learning_rate": 0.00019069908333934924, |
| "loss": 4.6798, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 19000 |
| }, |
| { |
| "epoch": 1.3879852092023535, |
| "grad_norm": 1.53125, |
| "learning_rate": 0.00019065081716685718, |
| "loss": 4.6789, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 19050 |
| }, |
| { |
| "epoch": 1.3916282628098873, |
| "grad_norm": 1.4765625, |
| "learning_rate": 0.00019060243222257468, |
| "loss": 4.6722, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 19100 |
| }, |
| { |
| "epoch": 1.395271316417421, |
| "grad_norm": 2.5, |
| "learning_rate": 0.00019055392856989628, |
| "loss": 4.6803, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 19150 |
| }, |
| { |
| "epoch": 1.398914370024955, |
| "grad_norm": 1.1796875, |
| "learning_rate": 0.00019050530627237207, |
| "loss": 4.6862, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 19200 |
| }, |
| { |
| "epoch": 1.4025574236324887, |
| "grad_norm": 1.3984375, |
| "learning_rate": 0.0001904565653937075, |
| "loss": 4.6842, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 19250 |
| }, |
| { |
| "epoch": 1.4062004772400225, |
| "grad_norm": 1.7734375, |
| "learning_rate": 0.00019040770599776355, |
| "loss": 4.6885, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 19300 |
| }, |
| { |
| "epoch": 1.4098435308475565, |
| "grad_norm": 1.5078125, |
| "learning_rate": 0.0001903587281485563, |
| "loss": 4.6772, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 19350 |
| }, |
| { |
| "epoch": 1.4134865844550903, |
| "grad_norm": 1.453125, |
| "learning_rate": 0.00019030963191025716, |
| "loss": 4.6779, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 19400 |
| }, |
| { |
| "epoch": 1.4171296380626242, |
| "grad_norm": 1.2421875, |
| "learning_rate": 0.00019026041734719265, |
| "loss": 4.6701, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 19450 |
| }, |
| { |
| "epoch": 1.420772691670158, |
| "grad_norm": 1.8125, |
| "learning_rate": 0.0001902110845238442, |
| "loss": 4.6867, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 19500 |
| }, |
| { |
| "epoch": 1.4244157452776918, |
| "grad_norm": 1.2578125, |
| "learning_rate": 0.00019016163350484832, |
| "loss": 4.6724, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 19550 |
| }, |
| { |
| "epoch": 1.4280587988852256, |
| "grad_norm": 1.578125, |
| "learning_rate": 0.00019011206435499633, |
| "loss": 4.6723, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 19600 |
| }, |
| { |
| "epoch": 1.4317018524927594, |
| "grad_norm": 2.703125, |
| "learning_rate": 0.0001900623771392343, |
| "loss": 4.681, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 19650 |
| }, |
| { |
| "epoch": 1.4353449061002932, |
| "grad_norm": 1.46875, |
| "learning_rate": 0.00019001257192266305, |
| "loss": 4.6846, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 19700 |
| }, |
| { |
| "epoch": 1.438987959707827, |
| "grad_norm": 1.859375, |
| "learning_rate": 0.00018996264877053792, |
| "loss": 4.6883, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 19750 |
| }, |
| { |
| "epoch": 1.4426310133153608, |
| "grad_norm": 1.4453125, |
| "learning_rate": 0.0001899126077482689, |
| "loss": 4.6778, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 19800 |
| }, |
| { |
| "epoch": 1.4462740669228948, |
| "grad_norm": 3.78125, |
| "learning_rate": 0.00018986244892142025, |
| "loss": 4.6831, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 19850 |
| }, |
| { |
| "epoch": 1.4499171205304286, |
| "grad_norm": 2.546875, |
| "learning_rate": 0.00018981217235571076, |
| "loss": 4.6752, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 19900 |
| }, |
| { |
| "epoch": 1.4535601741379625, |
| "grad_norm": 1.234375, |
| "learning_rate": 0.00018976177811701336, |
| "loss": 4.6834, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 19950 |
| }, |
| { |
| "epoch": 1.4572032277454963, |
| "grad_norm": 1.1796875, |
| "learning_rate": 0.0001897112662713551, |
| "loss": 4.6962, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.46084628135303, |
| "grad_norm": 1.6171875, |
| "learning_rate": 0.00018966063688491736, |
| "loss": 4.6861, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 20050 |
| }, |
| { |
| "epoch": 1.4644893349605639, |
| "grad_norm": 1.3984375, |
| "learning_rate": 0.00018960989002403525, |
| "loss": 4.684, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 20100 |
| }, |
| { |
| "epoch": 1.468132388568098, |
| "grad_norm": 1.4296875, |
| "learning_rate": 0.00018955902575519797, |
| "loss": 4.671, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 20150 |
| }, |
| { |
| "epoch": 1.4717754421756317, |
| "grad_norm": 1.6328125, |
| "learning_rate": 0.00018950804414504847, |
| "loss": 4.6864, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 20200 |
| }, |
| { |
| "epoch": 1.4754184957831655, |
| "grad_norm": 1.8984375, |
| "learning_rate": 0.0001894569452603835, |
| "loss": 4.6884, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 20250 |
| }, |
| { |
| "epoch": 1.4790615493906993, |
| "grad_norm": 1.1484375, |
| "learning_rate": 0.00018940572916815342, |
| "loss": 4.6838, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 20300 |
| }, |
| { |
| "epoch": 1.4827046029982331, |
| "grad_norm": 1.65625, |
| "learning_rate": 0.00018935439593546218, |
| "loss": 4.6925, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 20350 |
| }, |
| { |
| "epoch": 1.486347656605767, |
| "grad_norm": 1.4375, |
| "learning_rate": 0.0001893029456295672, |
| "loss": 4.6731, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 20400 |
| }, |
| { |
| "epoch": 1.4899907102133008, |
| "grad_norm": 1.21875, |
| "learning_rate": 0.00018925137831787923, |
| "loss": 4.6744, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 20450 |
| }, |
| { |
| "epoch": 1.4936337638208346, |
| "grad_norm": 1.7265625, |
| "learning_rate": 0.00018919969406796242, |
| "loss": 4.6821, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 20500 |
| }, |
| { |
| "epoch": 1.4972768174283684, |
| "grad_norm": 1.71875, |
| "learning_rate": 0.00018914789294753414, |
| "loss": 4.6783, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 20550 |
| }, |
| { |
| "epoch": 1.5009198710359022, |
| "grad_norm": 1.4375, |
| "learning_rate": 0.00018909597502446478, |
| "loss": 4.6858, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 20600 |
| }, |
| { |
| "epoch": 1.504562924643436, |
| "grad_norm": 1.5703125, |
| "learning_rate": 0.00018904394036677787, |
| "loss": 4.6858, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 20650 |
| }, |
| { |
| "epoch": 1.5082059782509698, |
| "grad_norm": 1.8125, |
| "learning_rate": 0.0001889917890426498, |
| "loss": 4.683, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 20700 |
| }, |
| { |
| "epoch": 1.5118490318585038, |
| "grad_norm": 1.703125, |
| "learning_rate": 0.0001889395211204099, |
| "loss": 4.6717, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 20750 |
| }, |
| { |
| "epoch": 1.5154920854660376, |
| "grad_norm": 1.5390625, |
| "learning_rate": 0.00018888713666854022, |
| "loss": 4.6756, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 20800 |
| }, |
| { |
| "epoch": 1.5191351390735714, |
| "grad_norm": 1.3671875, |
| "learning_rate": 0.0001888346357556755, |
| "loss": 4.6734, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 20850 |
| }, |
| { |
| "epoch": 1.5227781926811053, |
| "grad_norm": 1.4296875, |
| "learning_rate": 0.00018878201845060305, |
| "loss": 4.6727, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 20900 |
| }, |
| { |
| "epoch": 1.5264212462886393, |
| "grad_norm": 1.4375, |
| "learning_rate": 0.00018872928482226273, |
| "loss": 4.672, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 20950 |
| }, |
| { |
| "epoch": 1.530064299896173, |
| "grad_norm": 2.234375, |
| "learning_rate": 0.00018867643493974674, |
| "loss": 4.6821, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.533707353503707, |
| "grad_norm": 1.984375, |
| "learning_rate": 0.00018862346887229961, |
| "loss": 4.6826, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 21050 |
| }, |
| { |
| "epoch": 1.5373504071112407, |
| "grad_norm": 1.265625, |
| "learning_rate": 0.0001885703866893182, |
| "loss": 4.6786, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 21100 |
| }, |
| { |
| "epoch": 1.5409934607187745, |
| "grad_norm": 2.390625, |
| "learning_rate": 0.00018851718846035133, |
| "loss": 4.6706, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 21150 |
| }, |
| { |
| "epoch": 1.5446365143263083, |
| "grad_norm": 1.3203125, |
| "learning_rate": 0.00018846387425510005, |
| "loss": 4.6719, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 21200 |
| }, |
| { |
| "epoch": 1.5482795679338421, |
| "grad_norm": 1.40625, |
| "learning_rate": 0.0001884104441434172, |
| "loss": 4.689, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 21250 |
| }, |
| { |
| "epoch": 1.551922621541376, |
| "grad_norm": 2.125, |
| "learning_rate": 0.00018835689819530758, |
| "loss": 4.6893, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 21300 |
| }, |
| { |
| "epoch": 1.5555656751489098, |
| "grad_norm": 1.75, |
| "learning_rate": 0.00018830323648092777, |
| "loss": 4.6814, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 21350 |
| }, |
| { |
| "epoch": 1.5592087287564436, |
| "grad_norm": 1.25, |
| "learning_rate": 0.00018824945907058591, |
| "loss": 4.6849, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 21400 |
| }, |
| { |
| "epoch": 1.5628517823639774, |
| "grad_norm": 1.8203125, |
| "learning_rate": 0.0001881955660347419, |
| "loss": 4.6842, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 21450 |
| }, |
| { |
| "epoch": 1.5664948359715112, |
| "grad_norm": 1.3125, |
| "learning_rate": 0.00018814155744400698, |
| "loss": 4.6786, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 21500 |
| }, |
| { |
| "epoch": 1.5701378895790452, |
| "grad_norm": 2.109375, |
| "learning_rate": 0.0001880874333691439, |
| "loss": 4.6883, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 21550 |
| }, |
| { |
| "epoch": 1.573780943186579, |
| "grad_norm": 1.6953125, |
| "learning_rate": 0.0001880331938810666, |
| "loss": 4.6582, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 21600 |
| }, |
| { |
| "epoch": 1.5774239967941128, |
| "grad_norm": 1.7890625, |
| "learning_rate": 0.0001879788390508404, |
| "loss": 4.6846, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 21650 |
| }, |
| { |
| "epoch": 1.5810670504016466, |
| "grad_norm": 1.4765625, |
| "learning_rate": 0.00018792436894968164, |
| "loss": 4.6714, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 21700 |
| }, |
| { |
| "epoch": 1.5847101040091804, |
| "grad_norm": 1.9140625, |
| "learning_rate": 0.00018786978364895761, |
| "loss": 4.6769, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 21750 |
| }, |
| { |
| "epoch": 1.5883531576167145, |
| "grad_norm": 1.65625, |
| "learning_rate": 0.00018781508322018674, |
| "loss": 4.6844, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 21800 |
| }, |
| { |
| "epoch": 1.5919962112242483, |
| "grad_norm": 1.28125, |
| "learning_rate": 0.0001877602677350381, |
| "loss": 4.6767, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 21850 |
| }, |
| { |
| "epoch": 1.595639264831782, |
| "grad_norm": 1.3359375, |
| "learning_rate": 0.00018770533726533172, |
| "loss": 4.6668, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 21900 |
| }, |
| { |
| "epoch": 1.599282318439316, |
| "grad_norm": 2.234375, |
| "learning_rate": 0.000187650291883038, |
| "loss": 4.6855, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 21950 |
| }, |
| { |
| "epoch": 1.6029253720468497, |
| "grad_norm": 1.4296875, |
| "learning_rate": 0.00018759513166027817, |
| "loss": 4.6807, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 22000 |
| }, |
| { |
| "epoch": 1.6065684256543835, |
| "grad_norm": 1.609375, |
| "learning_rate": 0.0001875398566693238, |
| "loss": 4.672, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 22050 |
| }, |
| { |
| "epoch": 1.6102114792619173, |
| "grad_norm": 1.296875, |
| "learning_rate": 0.0001874844669825968, |
| "loss": 4.689, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 22100 |
| }, |
| { |
| "epoch": 1.6138545328694511, |
| "grad_norm": 1.1953125, |
| "learning_rate": 0.00018742896267266948, |
| "loss": 4.6654, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 22150 |
| }, |
| { |
| "epoch": 1.617497586476985, |
| "grad_norm": 1.5703125, |
| "learning_rate": 0.00018737334381226418, |
| "loss": 4.6747, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 22200 |
| }, |
| { |
| "epoch": 1.6211406400845187, |
| "grad_norm": 3.5625, |
| "learning_rate": 0.00018731761047425347, |
| "loss": 4.6828, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 22250 |
| }, |
| { |
| "epoch": 1.6247836936920526, |
| "grad_norm": 1.7265625, |
| "learning_rate": 0.0001872617627316598, |
| "loss": 4.6878, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 22300 |
| }, |
| { |
| "epoch": 1.6284267472995864, |
| "grad_norm": 1.25, |
| "learning_rate": 0.00018720580065765554, |
| "loss": 4.6709, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 22350 |
| }, |
| { |
| "epoch": 1.6320698009071204, |
| "grad_norm": 1.3203125, |
| "learning_rate": 0.0001871497243255629, |
| "loss": 4.6742, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 22400 |
| }, |
| { |
| "epoch": 1.6357128545146542, |
| "grad_norm": 1.40625, |
| "learning_rate": 0.0001870935338088537, |
| "loss": 4.6825, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 22450 |
| }, |
| { |
| "epoch": 1.639355908122188, |
| "grad_norm": 1.5234375, |
| "learning_rate": 0.00018703722918114954, |
| "loss": 4.6736, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 22500 |
| }, |
| { |
| "epoch": 1.6429989617297218, |
| "grad_norm": 3.515625, |
| "learning_rate": 0.00018698081051622136, |
| "loss": 4.6625, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 22550 |
| }, |
| { |
| "epoch": 1.6466420153372558, |
| "grad_norm": 2.78125, |
| "learning_rate": 0.00018692427788798957, |
| "loss": 4.6777, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 22600 |
| }, |
| { |
| "epoch": 1.6502850689447897, |
| "grad_norm": 1.234375, |
| "learning_rate": 0.0001868676313705239, |
| "loss": 4.6809, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 22650 |
| }, |
| { |
| "epoch": 1.6539281225523235, |
| "grad_norm": 1.28125, |
| "learning_rate": 0.00018681087103804332, |
| "loss": 4.6657, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 22700 |
| }, |
| { |
| "epoch": 1.6575711761598573, |
| "grad_norm": 1.5703125, |
| "learning_rate": 0.00018675399696491587, |
| "loss": 4.6701, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 22750 |
| }, |
| { |
| "epoch": 1.661214229767391, |
| "grad_norm": 1.5390625, |
| "learning_rate": 0.00018669700922565864, |
| "loss": 4.6763, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 22800 |
| }, |
| { |
| "epoch": 1.664857283374925, |
| "grad_norm": 1.71875, |
| "learning_rate": 0.0001866399078949377, |
| "loss": 4.6824, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 22850 |
| }, |
| { |
| "epoch": 1.6685003369824587, |
| "grad_norm": 3.4375, |
| "learning_rate": 0.00018658269304756784, |
| "loss": 4.6686, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 22900 |
| }, |
| { |
| "epoch": 1.6721433905899925, |
| "grad_norm": 1.7109375, |
| "learning_rate": 0.00018652536475851272, |
| "loss": 4.6808, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 22950 |
| }, |
| { |
| "epoch": 1.6757864441975263, |
| "grad_norm": 1.59375, |
| "learning_rate": 0.00018646792310288447, |
| "loss": 4.687, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1.6794294978050601, |
| "grad_norm": 1.8984375, |
| "learning_rate": 0.00018641036815594388, |
| "loss": 4.6667, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 23050 |
| }, |
| { |
| "epoch": 1.683072551412594, |
| "grad_norm": 1.546875, |
| "learning_rate": 0.0001863526999931001, |
| "loss": 4.678, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 23100 |
| }, |
| { |
| "epoch": 1.6867156050201277, |
| "grad_norm": 2.609375, |
| "learning_rate": 0.00018629491868991073, |
| "loss": 4.6742, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 23150 |
| }, |
| { |
| "epoch": 1.6903586586276615, |
| "grad_norm": 3.15625, |
| "learning_rate": 0.00018623702432208144, |
| "loss": 4.6866, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 23200 |
| }, |
| { |
| "epoch": 1.6940017122351956, |
| "grad_norm": 2.390625, |
| "learning_rate": 0.00018617901696546616, |
| "loss": 4.6763, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 23250 |
| }, |
| { |
| "epoch": 1.6976447658427294, |
| "grad_norm": 1.5234375, |
| "learning_rate": 0.00018612089669606683, |
| "loss": 4.6808, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 23300 |
| }, |
| { |
| "epoch": 1.7012878194502632, |
| "grad_norm": 1.5859375, |
| "learning_rate": 0.00018606266359003331, |
| "loss": 4.6682, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 23350 |
| }, |
| { |
| "epoch": 1.704930873057797, |
| "grad_norm": 1.9296875, |
| "learning_rate": 0.00018600431772366335, |
| "loss": 4.6846, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 23400 |
| }, |
| { |
| "epoch": 1.708573926665331, |
| "grad_norm": 3.3125, |
| "learning_rate": 0.00018594585917340237, |
| "loss": 4.6857, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 23450 |
| }, |
| { |
| "epoch": 1.7122169802728648, |
| "grad_norm": 1.796875, |
| "learning_rate": 0.00018588728801584347, |
| "loss": 4.6678, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 23500 |
| }, |
| { |
| "epoch": 1.7158600338803986, |
| "grad_norm": 2.015625, |
| "learning_rate": 0.00018582860432772732, |
| "loss": 4.6897, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 23550 |
| }, |
| { |
| "epoch": 1.7195030874879325, |
| "grad_norm": 1.4453125, |
| "learning_rate": 0.00018576980818594193, |
| "loss": 4.6843, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 23600 |
| }, |
| { |
| "epoch": 1.7231461410954663, |
| "grad_norm": 1.390625, |
| "learning_rate": 0.00018571089966752278, |
| "loss": 4.6731, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 23650 |
| }, |
| { |
| "epoch": 1.726789194703, |
| "grad_norm": 1.46875, |
| "learning_rate": 0.00018565187884965248, |
| "loss": 4.6805, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 23700 |
| }, |
| { |
| "epoch": 1.7304322483105339, |
| "grad_norm": 1.5, |
| "learning_rate": 0.00018559274580966082, |
| "loss": 4.6872, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 23750 |
| }, |
| { |
| "epoch": 1.7340753019180677, |
| "grad_norm": 1.3359375, |
| "learning_rate": 0.0001855335006250246, |
| "loss": 4.6871, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 23800 |
| }, |
| { |
| "epoch": 1.7377183555256015, |
| "grad_norm": 2.453125, |
| "learning_rate": 0.00018547414337336763, |
| "loss": 4.6775, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 23850 |
| }, |
| { |
| "epoch": 1.7413614091331353, |
| "grad_norm": 1.484375, |
| "learning_rate": 0.00018541467413246045, |
| "loss": 4.6795, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 23900 |
| }, |
| { |
| "epoch": 1.7450044627406691, |
| "grad_norm": 1.6484375, |
| "learning_rate": 0.00018535509298022037, |
| "loss": 4.6936, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 23950 |
| }, |
| { |
| "epoch": 1.748647516348203, |
| "grad_norm": 2.484375, |
| "learning_rate": 0.00018529539999471138, |
| "loss": 4.6869, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.752290569955737, |
| "grad_norm": 1.703125, |
| "learning_rate": 0.0001852355952541439, |
| "loss": 4.6812, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 24050 |
| }, |
| { |
| "epoch": 1.7559336235632708, |
| "grad_norm": 1.9453125, |
| "learning_rate": 0.0001851756788368748, |
| "loss": 4.6919, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 24100 |
| }, |
| { |
| "epoch": 1.7595766771708046, |
| "grad_norm": 1.90625, |
| "learning_rate": 0.00018511565082140736, |
| "loss": 4.6749, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 24150 |
| }, |
| { |
| "epoch": 1.7632197307783384, |
| "grad_norm": 2.859375, |
| "learning_rate": 0.00018505551128639096, |
| "loss": 4.663, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 24200 |
| }, |
| { |
| "epoch": 1.7668627843858722, |
| "grad_norm": 1.515625, |
| "learning_rate": 0.00018499526031062115, |
| "loss": 4.6771, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 24250 |
| }, |
| { |
| "epoch": 1.7705058379934062, |
| "grad_norm": 2.515625, |
| "learning_rate": 0.0001849348979730395, |
| "loss": 4.672, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 24300 |
| }, |
| { |
| "epoch": 1.77414889160094, |
| "grad_norm": 1.390625, |
| "learning_rate": 0.00018487442435273345, |
| "loss": 4.6793, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 24350 |
| }, |
| { |
| "epoch": 1.7777919452084738, |
| "grad_norm": 2.109375, |
| "learning_rate": 0.00018481383952893629, |
| "loss": 4.6899, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 24400 |
| }, |
| { |
| "epoch": 1.7814349988160076, |
| "grad_norm": 2.4375, |
| "learning_rate": 0.00018475314358102693, |
| "loss": 4.6881, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 24450 |
| }, |
| { |
| "epoch": 1.7850780524235415, |
| "grad_norm": 1.890625, |
| "learning_rate": 0.00018469233658852997, |
| "loss": 4.6776, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 24500 |
| }, |
| { |
| "epoch": 1.7887211060310753, |
| "grad_norm": 1.34375, |
| "learning_rate": 0.0001846314186311155, |
| "loss": 4.6622, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 24550 |
| }, |
| { |
| "epoch": 1.792364159638609, |
| "grad_norm": 1.359375, |
| "learning_rate": 0.00018457038978859886, |
| "loss": 4.6624, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 24600 |
| }, |
| { |
| "epoch": 1.7960072132461429, |
| "grad_norm": 1.5703125, |
| "learning_rate": 0.00018450925014094086, |
| "loss": 4.6705, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 24650 |
| }, |
| { |
| "epoch": 1.7996502668536767, |
| "grad_norm": 2.578125, |
| "learning_rate": 0.00018444799976824737, |
| "loss": 4.6764, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 24700 |
| }, |
| { |
| "epoch": 1.8032933204612105, |
| "grad_norm": 1.046875, |
| "learning_rate": 0.00018438663875076938, |
| "loss": 4.6814, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 24750 |
| }, |
| { |
| "epoch": 1.8069363740687443, |
| "grad_norm": 1.859375, |
| "learning_rate": 0.0001843251671689028, |
| "loss": 4.6732, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 24800 |
| }, |
| { |
| "epoch": 1.810579427676278, |
| "grad_norm": 1.5625, |
| "learning_rate": 0.0001842635851031885, |
| "loss": 4.6754, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 24850 |
| }, |
| { |
| "epoch": 1.8142224812838121, |
| "grad_norm": 1.34375, |
| "learning_rate": 0.00018420189263431195, |
| "loss": 4.6738, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 24900 |
| }, |
| { |
| "epoch": 1.817865534891346, |
| "grad_norm": 1.71875, |
| "learning_rate": 0.00018414008984310345, |
| "loss": 4.6806, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 24950 |
| }, |
| { |
| "epoch": 1.8215085884988798, |
| "grad_norm": 2.234375, |
| "learning_rate": 0.00018407817681053768, |
| "loss": 4.6575, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 25000 |
| }, |
| { |
| "epoch": 1.8251516421064136, |
| "grad_norm": 1.59375, |
| "learning_rate": 0.0001840161536177339, |
| "loss": 4.6662, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 25050 |
| }, |
| { |
| "epoch": 1.8287946957139476, |
| "grad_norm": 1.796875, |
| "learning_rate": 0.00018395402034595567, |
| "loss": 4.6556, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 25100 |
| }, |
| { |
| "epoch": 1.8324377493214814, |
| "grad_norm": 2.078125, |
| "learning_rate": 0.00018389177707661065, |
| "loss": 4.6795, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 25150 |
| }, |
| { |
| "epoch": 1.8360808029290152, |
| "grad_norm": 1.6953125, |
| "learning_rate": 0.00018382942389125079, |
| "loss": 4.6796, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 25200 |
| }, |
| { |
| "epoch": 1.839723856536549, |
| "grad_norm": 1.8046875, |
| "learning_rate": 0.00018376696087157198, |
| "loss": 4.6607, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 25250 |
| }, |
| { |
| "epoch": 1.8433669101440828, |
| "grad_norm": 1.5703125, |
| "learning_rate": 0.00018370438809941402, |
| "loss": 4.681, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 25300 |
| }, |
| { |
| "epoch": 1.8470099637516166, |
| "grad_norm": 1.7734375, |
| "learning_rate": 0.0001836417056567605, |
| "loss": 4.6719, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 25350 |
| }, |
| { |
| "epoch": 1.8506530173591504, |
| "grad_norm": 1.234375, |
| "learning_rate": 0.00018357891362573864, |
| "loss": 4.6733, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 25400 |
| }, |
| { |
| "epoch": 1.8542960709666843, |
| "grad_norm": 1.7265625, |
| "learning_rate": 0.00018351601208861944, |
| "loss": 4.682, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 25450 |
| }, |
| { |
| "epoch": 1.857939124574218, |
| "grad_norm": 1.6953125, |
| "learning_rate": 0.0001834530011278172, |
| "loss": 4.7023, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 25500 |
| }, |
| { |
| "epoch": 1.8615821781817519, |
| "grad_norm": 1.390625, |
| "learning_rate": 0.00018338988082588958, |
| "loss": 4.6791, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 25550 |
| }, |
| { |
| "epoch": 1.8652252317892857, |
| "grad_norm": 2.359375, |
| "learning_rate": 0.0001833266512655376, |
| "loss": 4.6788, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 25600 |
| }, |
| { |
| "epoch": 1.8688682853968195, |
| "grad_norm": 1.8828125, |
| "learning_rate": 0.00018326331252960544, |
| "loss": 4.6757, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 25650 |
| }, |
| { |
| "epoch": 1.8725113390043533, |
| "grad_norm": 2.75, |
| "learning_rate": 0.00018319986470108023, |
| "loss": 4.6823, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 25700 |
| }, |
| { |
| "epoch": 1.8761543926118873, |
| "grad_norm": 1.3359375, |
| "learning_rate": 0.00018313630786309206, |
| "loss": 4.6772, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 25750 |
| }, |
| { |
| "epoch": 1.8797974462194211, |
| "grad_norm": 1.390625, |
| "learning_rate": 0.00018307264209891389, |
| "loss": 4.6804, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 25800 |
| }, |
| { |
| "epoch": 1.883440499826955, |
| "grad_norm": 1.328125, |
| "learning_rate": 0.00018300886749196134, |
| "loss": 4.685, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 25850 |
| }, |
| { |
| "epoch": 1.8870835534344887, |
| "grad_norm": 1.5703125, |
| "learning_rate": 0.0001829449841257927, |
| "loss": 4.6618, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 25900 |
| }, |
| { |
| "epoch": 1.8907266070420228, |
| "grad_norm": 1.671875, |
| "learning_rate": 0.00018288099208410872, |
| "loss": 4.6782, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 25950 |
| }, |
| { |
| "epoch": 1.8943696606495566, |
| "grad_norm": 1.6015625, |
| "learning_rate": 0.00018281689145075252, |
| "loss": 4.672, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 26000 |
| }, |
| { |
| "epoch": 1.8980127142570904, |
| "grad_norm": 2.15625, |
| "learning_rate": 0.00018275268230970955, |
| "loss": 4.6855, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 26050 |
| }, |
| { |
| "epoch": 1.9016557678646242, |
| "grad_norm": 1.8046875, |
| "learning_rate": 0.00018268836474510738, |
| "loss": 4.6776, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 26100 |
| }, |
| { |
| "epoch": 1.905298821472158, |
| "grad_norm": 1.7265625, |
| "learning_rate": 0.00018262393884121566, |
| "loss": 4.6828, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 26150 |
| }, |
| { |
| "epoch": 1.9089418750796918, |
| "grad_norm": 3.078125, |
| "learning_rate": 0.00018255940468244597, |
| "loss": 4.6805, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 26200 |
| }, |
| { |
| "epoch": 1.9125849286872256, |
| "grad_norm": 3.125, |
| "learning_rate": 0.00018249476235335176, |
| "loss": 4.6794, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 26250 |
| }, |
| { |
| "epoch": 1.9162279822947594, |
| "grad_norm": 1.875, |
| "learning_rate": 0.0001824300119386282, |
| "loss": 4.695, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 26300 |
| }, |
| { |
| "epoch": 1.9198710359022932, |
| "grad_norm": 1.96875, |
| "learning_rate": 0.000182365153523112, |
| "loss": 4.6675, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 26350 |
| }, |
| { |
| "epoch": 1.923514089509827, |
| "grad_norm": 2.390625, |
| "learning_rate": 0.0001823001871917815, |
| "loss": 4.6864, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 26400 |
| }, |
| { |
| "epoch": 1.9271571431173609, |
| "grad_norm": 1.5390625, |
| "learning_rate": 0.00018223511302975636, |
| "loss": 4.6744, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 26450 |
| }, |
| { |
| "epoch": 1.9308001967248947, |
| "grad_norm": 1.8828125, |
| "learning_rate": 0.00018216993112229747, |
| "loss": 4.6792, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 26500 |
| }, |
| { |
| "epoch": 1.9344432503324285, |
| "grad_norm": 2.890625, |
| "learning_rate": 0.00018210464155480707, |
| "loss": 4.681, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 26550 |
| }, |
| { |
| "epoch": 1.9380863039399625, |
| "grad_norm": 1.3359375, |
| "learning_rate": 0.00018203924441282822, |
| "loss": 4.6741, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 26600 |
| }, |
| { |
| "epoch": 1.9417293575474963, |
| "grad_norm": 1.4921875, |
| "learning_rate": 0.00018197373978204507, |
| "loss": 4.6715, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 26650 |
| }, |
| { |
| "epoch": 1.9453724111550301, |
| "grad_norm": 2.25, |
| "learning_rate": 0.0001819081277482826, |
| "loss": 4.6725, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 26700 |
| }, |
| { |
| "epoch": 1.949015464762564, |
| "grad_norm": 2.109375, |
| "learning_rate": 0.00018184240839750647, |
| "loss": 4.669, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 26750 |
| }, |
| { |
| "epoch": 1.952658518370098, |
| "grad_norm": 2.5625, |
| "learning_rate": 0.00018177658181582295, |
| "loss": 4.6894, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 26800 |
| }, |
| { |
| "epoch": 1.9563015719776318, |
| "grad_norm": 1.8828125, |
| "learning_rate": 0.00018171064808947883, |
| "loss": 4.679, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 26850 |
| }, |
| { |
| "epoch": 1.9599446255851656, |
| "grad_norm": 1.546875, |
| "learning_rate": 0.0001816446073048613, |
| "loss": 4.666, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 26900 |
| }, |
| { |
| "epoch": 1.9635876791926994, |
| "grad_norm": 1.2578125, |
| "learning_rate": 0.00018157845954849778, |
| "loss": 4.6769, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 26950 |
| }, |
| { |
| "epoch": 1.9672307328002332, |
| "grad_norm": 1.1875, |
| "learning_rate": 0.0001815122049070558, |
| "loss": 4.673, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 27000 |
| }, |
| { |
| "epoch": 1.970873786407767, |
| "grad_norm": 1.7421875, |
| "learning_rate": 0.00018144584346734308, |
| "loss": 4.6832, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 27050 |
| }, |
| { |
| "epoch": 1.9745168400153008, |
| "grad_norm": 1.828125, |
| "learning_rate": 0.00018137937531630707, |
| "loss": 4.6632, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 27100 |
| }, |
| { |
| "epoch": 1.9781598936228346, |
| "grad_norm": 1.1640625, |
| "learning_rate": 0.00018131280054103522, |
| "loss": 4.6781, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 27150 |
| }, |
| { |
| "epoch": 1.9818029472303684, |
| "grad_norm": 1.34375, |
| "learning_rate": 0.0001812461192287546, |
| "loss": 4.6722, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 27200 |
| }, |
| { |
| "epoch": 1.9854460008379022, |
| "grad_norm": 2.34375, |
| "learning_rate": 0.0001811793314668318, |
| "loss": 4.6739, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 27250 |
| }, |
| { |
| "epoch": 1.989089054445436, |
| "grad_norm": 2.703125, |
| "learning_rate": 0.000181112437342773, |
| "loss": 4.6668, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 27300 |
| }, |
| { |
| "epoch": 1.9927321080529699, |
| "grad_norm": 2.234375, |
| "learning_rate": 0.00018104543694422368, |
| "loss": 4.6842, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 27350 |
| }, |
| { |
| "epoch": 1.9963751616605039, |
| "grad_norm": 3.28125, |
| "learning_rate": 0.00018097833035896858, |
| "loss": 4.667, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 27400 |
| }, |
| { |
| "epoch": 1.9998724931237364, |
| "eval_loss": 4.683139801025391, |
| "eval_runtime": 582.0603, |
| "eval_samples_per_second": 521.128, |
| "eval_steps_per_second": 43.428, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 27448 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 1.53125, |
| "learning_rate": 0.00018091111767493153, |
| "loss": 4.6719, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 27450 |
| }, |
| { |
| "epoch": 2.003643053607534, |
| "grad_norm": 1.5625, |
| "learning_rate": 0.00018084379898017543, |
| "loss": 4.6549, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 27500 |
| }, |
| { |
| "epoch": 2.0072861072150676, |
| "grad_norm": 2.015625, |
| "learning_rate": 0.00018077637436290198, |
| "loss": 4.67, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 27550 |
| }, |
| { |
| "epoch": 2.0109291608226014, |
| "grad_norm": 1.359375, |
| "learning_rate": 0.0001807088439114518, |
| "loss": 4.6681, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 27600 |
| }, |
| { |
| "epoch": 2.0145722144301352, |
| "grad_norm": 2.140625, |
| "learning_rate": 0.00018064120771430403, |
| "loss": 4.6667, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 27650 |
| }, |
| { |
| "epoch": 2.018215268037669, |
| "grad_norm": 2.15625, |
| "learning_rate": 0.0001805734658600765, |
| "loss": 4.6603, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 27700 |
| }, |
| { |
| "epoch": 2.021858321645203, |
| "grad_norm": 2.203125, |
| "learning_rate": 0.00018050561843752533, |
| "loss": 4.6783, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 27750 |
| }, |
| { |
| "epoch": 2.0255013752527367, |
| "grad_norm": 1.796875, |
| "learning_rate": 0.00018043766553554506, |
| "loss": 4.6753, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 27800 |
| }, |
| { |
| "epoch": 2.0291444288602705, |
| "grad_norm": 1.609375, |
| "learning_rate": 0.00018036960724316842, |
| "loss": 4.6606, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 27850 |
| }, |
| { |
| "epoch": 2.0327874824678047, |
| "grad_norm": 1.9609375, |
| "learning_rate": 0.00018030144364956615, |
| "loss": 4.6776, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 27900 |
| }, |
| { |
| "epoch": 2.0364305360753385, |
| "grad_norm": 1.5703125, |
| "learning_rate": 0.00018023317484404708, |
| "loss": 4.6566, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 27950 |
| }, |
| { |
| "epoch": 2.0400735896828723, |
| "grad_norm": 2.1875, |
| "learning_rate": 0.00018016480091605778, |
| "loss": 4.66, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 28000 |
| }, |
| { |
| "epoch": 2.043716643290406, |
| "grad_norm": 1.921875, |
| "learning_rate": 0.0001800963219551826, |
| "loss": 4.6631, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 28050 |
| }, |
| { |
| "epoch": 2.04735969689794, |
| "grad_norm": 2.109375, |
| "learning_rate": 0.00018002773805114348, |
| "loss": 4.6639, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 28100 |
| }, |
| { |
| "epoch": 2.0510027505054738, |
| "grad_norm": 1.3984375, |
| "learning_rate": 0.00017995904929379988, |
| "loss": 4.6769, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 28150 |
| }, |
| { |
| "epoch": 2.0546458041130076, |
| "grad_norm": 1.46875, |
| "learning_rate": 0.00017989025577314866, |
| "loss": 4.6637, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 28200 |
| }, |
| { |
| "epoch": 2.0582888577205414, |
| "grad_norm": 1.203125, |
| "learning_rate": 0.0001798213575793239, |
| "loss": 4.665, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 28250 |
| }, |
| { |
| "epoch": 2.061931911328075, |
| "grad_norm": 1.4765625, |
| "learning_rate": 0.00017975235480259684, |
| "loss": 4.656, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 28300 |
| }, |
| { |
| "epoch": 2.065574964935609, |
| "grad_norm": 1.796875, |
| "learning_rate": 0.00017968324753337575, |
| "loss": 4.6532, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 28350 |
| }, |
| { |
| "epoch": 2.069218018543143, |
| "grad_norm": 1.796875, |
| "learning_rate": 0.00017961403586220582, |
| "loss": 4.6784, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 28400 |
| }, |
| { |
| "epoch": 2.0728610721506766, |
| "grad_norm": 3.328125, |
| "learning_rate": 0.000179544719879769, |
| "loss": 4.6751, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 28450 |
| }, |
| { |
| "epoch": 2.0765041257582104, |
| "grad_norm": 1.3828125, |
| "learning_rate": 0.0001794752996768839, |
| "loss": 4.6772, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 28500 |
| }, |
| { |
| "epoch": 2.0801471793657442, |
| "grad_norm": 2.0, |
| "learning_rate": 0.00017940577534450574, |
| "loss": 4.6652, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 28550 |
| }, |
| { |
| "epoch": 2.083790232973278, |
| "grad_norm": 1.3671875, |
| "learning_rate": 0.00017933614697372615, |
| "loss": 4.6699, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 28600 |
| }, |
| { |
| "epoch": 2.087433286580812, |
| "grad_norm": 1.3671875, |
| "learning_rate": 0.00017926641465577302, |
| "loss": 4.6686, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 28650 |
| }, |
| { |
| "epoch": 2.091076340188346, |
| "grad_norm": 2.28125, |
| "learning_rate": 0.00017919657848201046, |
| "loss": 4.6531, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 28700 |
| }, |
| { |
| "epoch": 2.09471939379588, |
| "grad_norm": 1.828125, |
| "learning_rate": 0.0001791266385439387, |
| "loss": 4.6573, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 28750 |
| }, |
| { |
| "epoch": 2.0983624474034137, |
| "grad_norm": 1.46875, |
| "learning_rate": 0.00017905659493319388, |
| "loss": 4.6785, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 28800 |
| }, |
| { |
| "epoch": 2.1020055010109475, |
| "grad_norm": 1.6171875, |
| "learning_rate": 0.00017898644774154798, |
| "loss": 4.6767, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 28850 |
| }, |
| { |
| "epoch": 2.1056485546184813, |
| "grad_norm": 1.46875, |
| "learning_rate": 0.0001789161970609087, |
| "loss": 4.6656, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 28900 |
| }, |
| { |
| "epoch": 2.109291608226015, |
| "grad_norm": 1.421875, |
| "learning_rate": 0.0001788458429833193, |
| "loss": 4.6714, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 28950 |
| }, |
| { |
| "epoch": 2.112934661833549, |
| "grad_norm": 1.640625, |
| "learning_rate": 0.0001787753856009586, |
| "loss": 4.6631, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 29000 |
| }, |
| { |
| "epoch": 2.1165777154410828, |
| "grad_norm": 1.8828125, |
| "learning_rate": 0.0001787048250061406, |
| "loss": 4.6646, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 29050 |
| }, |
| { |
| "epoch": 2.1202207690486166, |
| "grad_norm": 1.5234375, |
| "learning_rate": 0.00017863416129131475, |
| "loss": 4.6786, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 29100 |
| }, |
| { |
| "epoch": 2.1238638226561504, |
| "grad_norm": 1.9765625, |
| "learning_rate": 0.00017856339454906542, |
| "loss": 4.6569, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 29150 |
| }, |
| { |
| "epoch": 2.127506876263684, |
| "grad_norm": 1.96875, |
| "learning_rate": 0.0001784925248721121, |
| "loss": 4.6741, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 29200 |
| }, |
| { |
| "epoch": 2.131149929871218, |
| "grad_norm": 1.9453125, |
| "learning_rate": 0.00017842155235330904, |
| "loss": 4.6723, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 29250 |
| }, |
| { |
| "epoch": 2.134792983478752, |
| "grad_norm": 2.015625, |
| "learning_rate": 0.00017835047708564537, |
| "loss": 4.6765, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 29300 |
| }, |
| { |
| "epoch": 2.1384360370862856, |
| "grad_norm": 2.328125, |
| "learning_rate": 0.0001782792991622447, |
| "loss": 4.6637, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 29350 |
| }, |
| { |
| "epoch": 2.1420790906938194, |
| "grad_norm": 1.5, |
| "learning_rate": 0.00017820801867636518, |
| "loss": 4.6592, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 29400 |
| }, |
| { |
| "epoch": 2.145722144301353, |
| "grad_norm": 2.46875, |
| "learning_rate": 0.0001781366357213994, |
| "loss": 4.6823, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 29450 |
| }, |
| { |
| "epoch": 2.149365197908887, |
| "grad_norm": 2.96875, |
| "learning_rate": 0.00017806515039087416, |
| "loss": 4.6764, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 29500 |
| }, |
| { |
| "epoch": 2.153008251516421, |
| "grad_norm": 1.4765625, |
| "learning_rate": 0.0001779935627784504, |
| "loss": 4.663, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 29550 |
| }, |
| { |
| "epoch": 2.156651305123955, |
| "grad_norm": 1.40625, |
| "learning_rate": 0.00017792187297792307, |
| "loss": 4.6608, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 29600 |
| }, |
| { |
| "epoch": 2.160294358731489, |
| "grad_norm": 1.3984375, |
| "learning_rate": 0.00017785008108322102, |
| "loss": 4.6793, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 29650 |
| }, |
| { |
| "epoch": 2.1639374123390227, |
| "grad_norm": 1.3046875, |
| "learning_rate": 0.0001777781871884068, |
| "loss": 4.6803, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 29700 |
| }, |
| { |
| "epoch": 2.1675804659465565, |
| "grad_norm": 2.71875, |
| "learning_rate": 0.0001777061913876767, |
| "loss": 4.6578, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 29750 |
| }, |
| { |
| "epoch": 2.1712235195540903, |
| "grad_norm": 1.8046875, |
| "learning_rate": 0.0001776340937753605, |
| "loss": 4.6784, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 29800 |
| }, |
| { |
| "epoch": 2.174866573161624, |
| "grad_norm": 3.078125, |
| "learning_rate": 0.00017756189444592132, |
| "loss": 4.6732, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 29850 |
| }, |
| { |
| "epoch": 2.178509626769158, |
| "grad_norm": 1.78125, |
| "learning_rate": 0.0001774895934939556, |
| "loss": 4.6417, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 29900 |
| }, |
| { |
| "epoch": 2.1821526803766917, |
| "grad_norm": 2.125, |
| "learning_rate": 0.00017741719101419293, |
| "loss": 4.6829, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 29950 |
| }, |
| { |
| "epoch": 2.1857957339842256, |
| "grad_norm": 1.8671875, |
| "learning_rate": 0.00017734468710149589, |
| "loss": 4.6706, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 30000 |
| }, |
| { |
| "epoch": 2.1894387875917594, |
| "grad_norm": 1.2109375, |
| "learning_rate": 0.00017727208185086, |
| "loss": 4.671, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 30050 |
| }, |
| { |
| "epoch": 2.193081841199293, |
| "grad_norm": 1.984375, |
| "learning_rate": 0.00017719937535741354, |
| "loss": 4.6667, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 30100 |
| }, |
| { |
| "epoch": 2.196724894806827, |
| "grad_norm": 1.515625, |
| "learning_rate": 0.00017712656771641739, |
| "loss": 4.6823, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 30150 |
| }, |
| { |
| "epoch": 2.200367948414361, |
| "grad_norm": 3.171875, |
| "learning_rate": 0.00017705365902326498, |
| "loss": 4.6786, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 30200 |
| }, |
| { |
| "epoch": 2.2040110020218946, |
| "grad_norm": 1.3046875, |
| "learning_rate": 0.00017698064937348224, |
| "loss": 4.6732, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 30250 |
| }, |
| { |
| "epoch": 2.2076540556294284, |
| "grad_norm": 1.6640625, |
| "learning_rate": 0.00017690753886272727, |
| "loss": 4.6691, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 30300 |
| }, |
| { |
| "epoch": 2.2112971092369627, |
| "grad_norm": 2.15625, |
| "learning_rate": 0.00017683432758679025, |
| "loss": 4.688, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 30350 |
| }, |
| { |
| "epoch": 2.2149401628444965, |
| "grad_norm": 1.4296875, |
| "learning_rate": 0.00017676101564159357, |
| "loss": 4.6638, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 30400 |
| }, |
| { |
| "epoch": 2.2185832164520303, |
| "grad_norm": 2.703125, |
| "learning_rate": 0.00017668760312319142, |
| "loss": 4.6481, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 30450 |
| }, |
| { |
| "epoch": 2.222226270059564, |
| "grad_norm": 1.96875, |
| "learning_rate": 0.0001766140901277697, |
| "loss": 4.6795, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 30500 |
| }, |
| { |
| "epoch": 2.225869323667098, |
| "grad_norm": 3.125, |
| "learning_rate": 0.0001765404767516461, |
| "loss": 4.6823, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 30550 |
| }, |
| { |
| "epoch": 2.2295123772746317, |
| "grad_norm": 1.40625, |
| "learning_rate": 0.00017646676309126966, |
| "loss": 4.6588, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 30600 |
| }, |
| { |
| "epoch": 2.2331554308821655, |
| "grad_norm": 1.4609375, |
| "learning_rate": 0.000176392949243221, |
| "loss": 4.6726, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 30650 |
| }, |
| { |
| "epoch": 2.2367984844896993, |
| "grad_norm": 1.6328125, |
| "learning_rate": 0.00017631903530421181, |
| "loss": 4.66, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 30700 |
| }, |
| { |
| "epoch": 2.240441538097233, |
| "grad_norm": 1.3203125, |
| "learning_rate": 0.00017624502137108508, |
| "loss": 4.6832, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 30750 |
| }, |
| { |
| "epoch": 2.244084591704767, |
| "grad_norm": 1.6328125, |
| "learning_rate": 0.00017617090754081476, |
| "loss": 4.6839, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 30800 |
| }, |
| { |
| "epoch": 2.2477276453123007, |
| "grad_norm": 1.5078125, |
| "learning_rate": 0.00017609669391050568, |
| "loss": 4.658, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 30850 |
| }, |
| { |
| "epoch": 2.2513706989198345, |
| "grad_norm": 1.5703125, |
| "learning_rate": 0.00017602238057739343, |
| "loss": 4.6806, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 30900 |
| }, |
| { |
| "epoch": 2.2550137525273684, |
| "grad_norm": 1.2890625, |
| "learning_rate": 0.0001759479676388442, |
| "loss": 4.6706, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 30950 |
| }, |
| { |
| "epoch": 2.258656806134902, |
| "grad_norm": 1.1640625, |
| "learning_rate": 0.00017587345519235473, |
| "loss": 4.6698, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 31000 |
| }, |
| { |
| "epoch": 2.262299859742436, |
| "grad_norm": 1.484375, |
| "learning_rate": 0.00017579884333555215, |
| "loss": 4.6699, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 31050 |
| }, |
| { |
| "epoch": 2.26594291334997, |
| "grad_norm": 2.578125, |
| "learning_rate": 0.0001757241321661938, |
| "loss": 4.672, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 31100 |
| }, |
| { |
| "epoch": 2.2695859669575036, |
| "grad_norm": 1.6796875, |
| "learning_rate": 0.00017564932178216713, |
| "loss": 4.6814, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 31150 |
| }, |
| { |
| "epoch": 2.2732290205650374, |
| "grad_norm": 1.5234375, |
| "learning_rate": 0.0001755744122814896, |
| "loss": 4.6668, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 31200 |
| }, |
| { |
| "epoch": 2.276872074172571, |
| "grad_norm": 2.5625, |
| "learning_rate": 0.0001754994037623086, |
| "loss": 4.6684, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 31250 |
| }, |
| { |
| "epoch": 2.2805151277801055, |
| "grad_norm": 1.4921875, |
| "learning_rate": 0.00017542429632290115, |
| "loss": 4.6533, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 31300 |
| }, |
| { |
| "epoch": 2.2841581813876393, |
| "grad_norm": 1.46875, |
| "learning_rate": 0.00017534909006167393, |
| "loss": 4.6619, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 31350 |
| }, |
| { |
| "epoch": 2.287801234995173, |
| "grad_norm": 1.5390625, |
| "learning_rate": 0.00017527378507716304, |
| "loss": 4.6627, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 31400 |
| }, |
| { |
| "epoch": 2.291444288602707, |
| "grad_norm": 1.359375, |
| "learning_rate": 0.00017519838146803405, |
| "loss": 4.6611, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 31450 |
| }, |
| { |
| "epoch": 2.2950873422102407, |
| "grad_norm": 1.6484375, |
| "learning_rate": 0.00017512287933308166, |
| "loss": 4.6721, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 31500 |
| }, |
| { |
| "epoch": 2.2987303958177745, |
| "grad_norm": 2.234375, |
| "learning_rate": 0.0001750472787712296, |
| "loss": 4.6755, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 31550 |
| }, |
| { |
| "epoch": 2.3023734494253083, |
| "grad_norm": 1.3671875, |
| "learning_rate": 0.00017497157988153074, |
| "loss": 4.673, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 31600 |
| }, |
| { |
| "epoch": 2.306016503032842, |
| "grad_norm": 2.546875, |
| "learning_rate": 0.0001748957827631666, |
| "loss": 4.6731, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 31650 |
| }, |
| { |
| "epoch": 2.309659556640376, |
| "grad_norm": 1.3359375, |
| "learning_rate": 0.00017481988751544752, |
| "loss": 4.6532, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 31700 |
| }, |
| { |
| "epoch": 2.3133026102479097, |
| "grad_norm": 1.53125, |
| "learning_rate": 0.00017474389423781234, |
| "loss": 4.6598, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 31750 |
| }, |
| { |
| "epoch": 2.3169456638554435, |
| "grad_norm": 1.546875, |
| "learning_rate": 0.00017466780302982836, |
| "loss": 4.6708, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 31800 |
| }, |
| { |
| "epoch": 2.3205887174629773, |
| "grad_norm": 1.28125, |
| "learning_rate": 0.00017459161399119123, |
| "loss": 4.6808, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 31850 |
| }, |
| { |
| "epoch": 2.324231771070511, |
| "grad_norm": 1.6328125, |
| "learning_rate": 0.0001745153272217247, |
| "loss": 4.677, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 31900 |
| }, |
| { |
| "epoch": 2.327874824678045, |
| "grad_norm": 1.46875, |
| "learning_rate": 0.00017443894282138066, |
| "loss": 4.6683, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 31950 |
| }, |
| { |
| "epoch": 2.331517878285579, |
| "grad_norm": 1.9375, |
| "learning_rate": 0.00017436246089023888, |
| "loss": 4.6764, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 32000 |
| }, |
| { |
| "epoch": 2.335160931893113, |
| "grad_norm": 1.6328125, |
| "learning_rate": 0.0001742858815285068, |
| "loss": 4.6805, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 32050 |
| }, |
| { |
| "epoch": 2.338803985500647, |
| "grad_norm": 2.328125, |
| "learning_rate": 0.00017420920483651972, |
| "loss": 4.6645, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 32100 |
| }, |
| { |
| "epoch": 2.3424470391081806, |
| "grad_norm": 1.6640625, |
| "learning_rate": 0.00017413243091474035, |
| "loss": 4.6703, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 32150 |
| }, |
| { |
| "epoch": 2.3460900927157144, |
| "grad_norm": 2.1875, |
| "learning_rate": 0.00017405555986375881, |
| "loss": 4.6626, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 32200 |
| }, |
| { |
| "epoch": 2.3497331463232483, |
| "grad_norm": 1.5625, |
| "learning_rate": 0.00017397859178429242, |
| "loss": 4.675, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 32250 |
| }, |
| { |
| "epoch": 2.353376199930782, |
| "grad_norm": 1.8203125, |
| "learning_rate": 0.00017390152677718574, |
| "loss": 4.6866, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 32300 |
| }, |
| { |
| "epoch": 2.357019253538316, |
| "grad_norm": 1.7421875, |
| "learning_rate": 0.00017382436494341023, |
| "loss": 4.6791, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 32350 |
| }, |
| { |
| "epoch": 2.3606623071458497, |
| "grad_norm": 1.6796875, |
| "learning_rate": 0.0001737471063840643, |
| "loss": 4.6648, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 32400 |
| }, |
| { |
| "epoch": 2.3643053607533835, |
| "grad_norm": 1.53125, |
| "learning_rate": 0.00017366975120037298, |
| "loss": 4.6664, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 32450 |
| }, |
| { |
| "epoch": 2.3679484143609173, |
| "grad_norm": 1.8984375, |
| "learning_rate": 0.00017359229949368802, |
| "loss": 4.6722, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 32500 |
| }, |
| { |
| "epoch": 2.371591467968451, |
| "grad_norm": 1.21875, |
| "learning_rate": 0.0001735147513654875, |
| "loss": 4.6649, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 32550 |
| }, |
| { |
| "epoch": 2.375234521575985, |
| "grad_norm": 1.5390625, |
| "learning_rate": 0.00017343710691737602, |
| "loss": 4.6788, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 32600 |
| }, |
| { |
| "epoch": 2.3788775751835187, |
| "grad_norm": 1.9609375, |
| "learning_rate": 0.00017335936625108422, |
| "loss": 4.664, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 32650 |
| }, |
| { |
| "epoch": 2.3825206287910525, |
| "grad_norm": 1.5078125, |
| "learning_rate": 0.0001732815294684688, |
| "loss": 4.6543, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 32700 |
| }, |
| { |
| "epoch": 2.3861636823985863, |
| "grad_norm": 1.1875, |
| "learning_rate": 0.00017320359667151252, |
| "loss": 4.6802, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 32750 |
| }, |
| { |
| "epoch": 2.38980673600612, |
| "grad_norm": 1.78125, |
| "learning_rate": 0.00017312556796232387, |
| "loss": 4.664, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 32800 |
| }, |
| { |
| "epoch": 2.393449789613654, |
| "grad_norm": 2.3125, |
| "learning_rate": 0.00017304744344313693, |
| "loss": 4.6545, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 32850 |
| }, |
| { |
| "epoch": 2.3970928432211878, |
| "grad_norm": 1.21875, |
| "learning_rate": 0.00017296922321631146, |
| "loss": 4.6742, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 32900 |
| }, |
| { |
| "epoch": 2.400735896828722, |
| "grad_norm": 1.5546875, |
| "learning_rate": 0.0001728909073843325, |
| "loss": 4.674, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 32950 |
| }, |
| { |
| "epoch": 2.404378950436256, |
| "grad_norm": 1.515625, |
| "learning_rate": 0.0001728124960498104, |
| "loss": 4.6781, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 33000 |
| }, |
| { |
| "epoch": 2.4080220040437896, |
| "grad_norm": 2.171875, |
| "learning_rate": 0.00017273398931548064, |
| "loss": 4.6719, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 33050 |
| }, |
| { |
| "epoch": 2.4116650576513234, |
| "grad_norm": 2.359375, |
| "learning_rate": 0.0001726553872842037, |
| "loss": 4.6721, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 33100 |
| }, |
| { |
| "epoch": 2.4153081112588572, |
| "grad_norm": 1.6484375, |
| "learning_rate": 0.00017257669005896485, |
| "loss": 4.6609, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 33150 |
| }, |
| { |
| "epoch": 2.418951164866391, |
| "grad_norm": 1.2265625, |
| "learning_rate": 0.00017249789774287423, |
| "loss": 4.67, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 33200 |
| }, |
| { |
| "epoch": 2.422594218473925, |
| "grad_norm": 2.3125, |
| "learning_rate": 0.00017241901043916645, |
| "loss": 4.6684, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 33250 |
| }, |
| { |
| "epoch": 2.4262372720814587, |
| "grad_norm": 2.390625, |
| "learning_rate": 0.00017234002825120053, |
| "loss": 4.6529, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 33300 |
| }, |
| { |
| "epoch": 2.4298803256889925, |
| "grad_norm": 1.1796875, |
| "learning_rate": 0.00017226095128245998, |
| "loss": 4.6753, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 33350 |
| }, |
| { |
| "epoch": 2.4335233792965263, |
| "grad_norm": 1.9453125, |
| "learning_rate": 0.00017218177963655232, |
| "loss": 4.6712, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 33400 |
| }, |
| { |
| "epoch": 2.43716643290406, |
| "grad_norm": 2.109375, |
| "learning_rate": 0.00017210251341720926, |
| "loss": 4.6821, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 33450 |
| }, |
| { |
| "epoch": 2.440809486511594, |
| "grad_norm": 1.4609375, |
| "learning_rate": 0.0001720231527282863, |
| "loss": 4.6645, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 33500 |
| }, |
| { |
| "epoch": 2.4444525401191277, |
| "grad_norm": 1.390625, |
| "learning_rate": 0.00017194369767376281, |
| "loss": 4.6742, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 33550 |
| }, |
| { |
| "epoch": 2.4480955937266615, |
| "grad_norm": 1.7734375, |
| "learning_rate": 0.0001718641483577417, |
| "loss": 4.6638, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 33600 |
| }, |
| { |
| "epoch": 2.4517386473341953, |
| "grad_norm": 1.7578125, |
| "learning_rate": 0.00017178450488444954, |
| "loss": 4.6735, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 33650 |
| }, |
| { |
| "epoch": 2.4553817009417296, |
| "grad_norm": 1.1015625, |
| "learning_rate": 0.00017170476735823604, |
| "loss": 4.6823, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 33700 |
| }, |
| { |
| "epoch": 2.4590247545492634, |
| "grad_norm": 1.65625, |
| "learning_rate": 0.00017162493588357433, |
| "loss": 4.6804, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 33750 |
| }, |
| { |
| "epoch": 2.462667808156797, |
| "grad_norm": 1.4921875, |
| "learning_rate": 0.0001715450105650606, |
| "loss": 4.6773, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 33800 |
| }, |
| { |
| "epoch": 2.466310861764331, |
| "grad_norm": 1.3671875, |
| "learning_rate": 0.0001714649915074139, |
| "loss": 4.6683, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 33850 |
| }, |
| { |
| "epoch": 2.469953915371865, |
| "grad_norm": 1.859375, |
| "learning_rate": 0.0001713848788154762, |
| "loss": 4.6661, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 33900 |
| }, |
| { |
| "epoch": 2.4735969689793986, |
| "grad_norm": 1.3671875, |
| "learning_rate": 0.00017130467259421212, |
| "loss": 4.6834, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 33950 |
| }, |
| { |
| "epoch": 2.4772400225869324, |
| "grad_norm": 4.03125, |
| "learning_rate": 0.0001712243729487088, |
| "loss": 4.6664, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 34000 |
| }, |
| { |
| "epoch": 2.4808830761944662, |
| "grad_norm": 1.640625, |
| "learning_rate": 0.00017114397998417577, |
| "loss": 4.6697, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 34050 |
| }, |
| { |
| "epoch": 2.484526129802, |
| "grad_norm": 1.3984375, |
| "learning_rate": 0.00017106349380594492, |
| "loss": 4.686, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 34100 |
| }, |
| { |
| "epoch": 2.488169183409534, |
| "grad_norm": 2.8125, |
| "learning_rate": 0.00017098291451947013, |
| "loss": 4.6663, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 34150 |
| }, |
| { |
| "epoch": 2.4918122370170677, |
| "grad_norm": 1.5390625, |
| "learning_rate": 0.00017090224223032741, |
| "loss": 4.6654, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 34200 |
| }, |
| { |
| "epoch": 2.4954552906246015, |
| "grad_norm": 1.359375, |
| "learning_rate": 0.00017082147704421455, |
| "loss": 4.6702, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 34250 |
| }, |
| { |
| "epoch": 2.4990983442321353, |
| "grad_norm": 1.4765625, |
| "learning_rate": 0.00017074061906695109, |
| "loss": 4.6702, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 34300 |
| }, |
| { |
| "epoch": 2.502741397839669, |
| "grad_norm": 1.96875, |
| "learning_rate": 0.00017065966840447807, |
| "loss": 4.6797, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 34350 |
| }, |
| { |
| "epoch": 2.506384451447203, |
| "grad_norm": 1.53125, |
| "learning_rate": 0.0001705786251628581, |
| "loss": 4.672, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 34400 |
| }, |
| { |
| "epoch": 2.5100275050547367, |
| "grad_norm": 1.2421875, |
| "learning_rate": 0.00017049748944827494, |
| "loss": 4.6731, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 34450 |
| }, |
| { |
| "epoch": 2.5136705586622705, |
| "grad_norm": 2.09375, |
| "learning_rate": 0.00017041626136703357, |
| "loss": 4.6617, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 34500 |
| }, |
| { |
| "epoch": 2.5173136122698043, |
| "grad_norm": 2.046875, |
| "learning_rate": 0.00017033494102556006, |
| "loss": 4.6664, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 34550 |
| }, |
| { |
| "epoch": 2.520956665877338, |
| "grad_norm": 1.65625, |
| "learning_rate": 0.0001702535285304012, |
| "loss": 4.6672, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 34600 |
| }, |
| { |
| "epoch": 2.5245997194848724, |
| "grad_norm": 1.3359375, |
| "learning_rate": 0.00017017202398822477, |
| "loss": 4.6616, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 34650 |
| }, |
| { |
| "epoch": 2.528242773092406, |
| "grad_norm": 1.34375, |
| "learning_rate": 0.00017009042750581888, |
| "loss": 4.6608, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 34700 |
| }, |
| { |
| "epoch": 2.53188582669994, |
| "grad_norm": 1.765625, |
| "learning_rate": 0.00017000873919009225, |
| "loss": 4.6797, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 34750 |
| }, |
| { |
| "epoch": 2.535528880307474, |
| "grad_norm": 1.46875, |
| "learning_rate": 0.00016992695914807394, |
| "loss": 4.6768, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 34800 |
| }, |
| { |
| "epoch": 2.5391719339150076, |
| "grad_norm": 1.1484375, |
| "learning_rate": 0.00016984508748691307, |
| "loss": 4.6575, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 34850 |
| }, |
| { |
| "epoch": 2.5428149875225414, |
| "grad_norm": 2.0, |
| "learning_rate": 0.00016976312431387893, |
| "loss": 4.6685, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 34900 |
| }, |
| { |
| "epoch": 2.5464580411300752, |
| "grad_norm": 2.109375, |
| "learning_rate": 0.00016968106973636062, |
| "loss": 4.6746, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 34950 |
| }, |
| { |
| "epoch": 2.550101094737609, |
| "grad_norm": 1.6796875, |
| "learning_rate": 0.00016959892386186705, |
| "loss": 4.6698, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 35000 |
| }, |
| { |
| "epoch": 2.553744148345143, |
| "grad_norm": 1.46875, |
| "learning_rate": 0.00016951668679802677, |
| "loss": 4.6728, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 35050 |
| }, |
| { |
| "epoch": 2.5573872019526767, |
| "grad_norm": 1.3671875, |
| "learning_rate": 0.0001694343586525877, |
| "loss": 4.6767, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 35100 |
| }, |
| { |
| "epoch": 2.5610302555602105, |
| "grad_norm": 1.265625, |
| "learning_rate": 0.00016935193953341722, |
| "loss": 4.6755, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 35150 |
| }, |
| { |
| "epoch": 2.5646733091677443, |
| "grad_norm": 1.7109375, |
| "learning_rate": 0.00016926942954850182, |
| "loss": 4.6739, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 35200 |
| }, |
| { |
| "epoch": 2.568316362775278, |
| "grad_norm": 1.5625, |
| "learning_rate": 0.00016918682880594707, |
| "loss": 4.6814, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 35250 |
| }, |
| { |
| "epoch": 2.5719594163828123, |
| "grad_norm": 1.7578125, |
| "learning_rate": 0.0001691041374139775, |
| "loss": 4.6628, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 35300 |
| }, |
| { |
| "epoch": 2.575602469990346, |
| "grad_norm": 1.296875, |
| "learning_rate": 0.0001690213554809363, |
| "loss": 4.6624, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 35350 |
| }, |
| { |
| "epoch": 2.57924552359788, |
| "grad_norm": 2.421875, |
| "learning_rate": 0.00016893848311528542, |
| "loss": 4.6547, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 35400 |
| }, |
| { |
| "epoch": 2.5828885772054138, |
| "grad_norm": 2.109375, |
| "learning_rate": 0.00016885552042560518, |
| "loss": 4.6846, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 35450 |
| }, |
| { |
| "epoch": 2.5865316308129476, |
| "grad_norm": 1.328125, |
| "learning_rate": 0.0001687724675205943, |
| "loss": 4.6568, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 35500 |
| }, |
| { |
| "epoch": 2.5901746844204814, |
| "grad_norm": 1.234375, |
| "learning_rate": 0.0001686893245090697, |
| "loss": 4.6769, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 35550 |
| }, |
| { |
| "epoch": 2.593817738028015, |
| "grad_norm": 2.40625, |
| "learning_rate": 0.0001686060914999664, |
| "loss": 4.6646, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 35600 |
| }, |
| { |
| "epoch": 2.597460791635549, |
| "grad_norm": 1.78125, |
| "learning_rate": 0.00016852276860233724, |
| "loss": 4.6611, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 35650 |
| }, |
| { |
| "epoch": 2.601103845243083, |
| "grad_norm": 1.4609375, |
| "learning_rate": 0.0001684393559253529, |
| "loss": 4.6841, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 35700 |
| }, |
| { |
| "epoch": 2.6047468988506166, |
| "grad_norm": 1.75, |
| "learning_rate": 0.00016835585357830162, |
| "loss": 4.6643, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 35750 |
| }, |
| { |
| "epoch": 2.6083899524581504, |
| "grad_norm": 1.5078125, |
| "learning_rate": 0.0001682722616705892, |
| "loss": 4.6696, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 35800 |
| }, |
| { |
| "epoch": 2.6120330060656842, |
| "grad_norm": 3.953125, |
| "learning_rate": 0.00016818858031173887, |
| "loss": 4.6742, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 35850 |
| }, |
| { |
| "epoch": 2.615676059673218, |
| "grad_norm": 3.578125, |
| "learning_rate": 0.00016810480961139087, |
| "loss": 4.6653, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 35900 |
| }, |
| { |
| "epoch": 2.619319113280752, |
| "grad_norm": 1.578125, |
| "learning_rate": 0.00016802094967930252, |
| "loss": 4.665, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 35950 |
| }, |
| { |
| "epoch": 2.6229621668882857, |
| "grad_norm": 1.921875, |
| "learning_rate": 0.00016793700062534816, |
| "loss": 4.6802, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 36000 |
| }, |
| { |
| "epoch": 2.6266052204958195, |
| "grad_norm": 2.296875, |
| "learning_rate": 0.00016785296255951887, |
| "loss": 4.6725, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 36050 |
| }, |
| { |
| "epoch": 2.6302482741033533, |
| "grad_norm": 1.2734375, |
| "learning_rate": 0.00016776883559192234, |
| "loss": 4.6635, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 36100 |
| }, |
| { |
| "epoch": 2.633891327710887, |
| "grad_norm": 1.5078125, |
| "learning_rate": 0.00016768461983278273, |
| "loss": 4.6662, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 36150 |
| }, |
| { |
| "epoch": 2.637534381318421, |
| "grad_norm": 1.8125, |
| "learning_rate": 0.00016760031539244047, |
| "loss": 4.6712, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 36200 |
| }, |
| { |
| "epoch": 2.6411774349259547, |
| "grad_norm": 2.640625, |
| "learning_rate": 0.00016751592238135234, |
| "loss": 4.6612, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 36250 |
| }, |
| { |
| "epoch": 2.6448204885334885, |
| "grad_norm": 2.46875, |
| "learning_rate": 0.00016743144091009105, |
| "loss": 4.6673, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 36300 |
| }, |
| { |
| "epoch": 2.6484635421410228, |
| "grad_norm": 1.3828125, |
| "learning_rate": 0.0001673468710893452, |
| "loss": 4.6663, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 36350 |
| }, |
| { |
| "epoch": 2.6521065957485566, |
| "grad_norm": 1.2890625, |
| "learning_rate": 0.00016726221302991925, |
| "loss": 4.664, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 36400 |
| }, |
| { |
| "epoch": 2.6557496493560904, |
| "grad_norm": 2.71875, |
| "learning_rate": 0.0001671774668427332, |
| "loss": 4.6669, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 36450 |
| }, |
| { |
| "epoch": 2.659392702963624, |
| "grad_norm": 1.7578125, |
| "learning_rate": 0.00016709263263882244, |
| "loss": 4.6605, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 36500 |
| }, |
| { |
| "epoch": 2.663035756571158, |
| "grad_norm": 1.2109375, |
| "learning_rate": 0.00016700771052933786, |
| "loss": 4.668, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 36550 |
| }, |
| { |
| "epoch": 2.666678810178692, |
| "grad_norm": 1.375, |
| "learning_rate": 0.00016692270062554539, |
| "loss": 4.6727, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 36600 |
| }, |
| { |
| "epoch": 2.6703218637862256, |
| "grad_norm": 2.265625, |
| "learning_rate": 0.00016683760303882602, |
| "loss": 4.6592, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 36650 |
| }, |
| { |
| "epoch": 2.6739649173937594, |
| "grad_norm": 2.21875, |
| "learning_rate": 0.00016675241788067564, |
| "loss": 4.677, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 36700 |
| }, |
| { |
| "epoch": 2.677607971001293, |
| "grad_norm": 2.015625, |
| "learning_rate": 0.0001666671452627049, |
| "loss": 4.6738, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 36750 |
| }, |
| { |
| "epoch": 2.681251024608827, |
| "grad_norm": 1.53125, |
| "learning_rate": 0.00016658178529663892, |
| "loss": 4.6657, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 36800 |
| }, |
| { |
| "epoch": 2.684894078216361, |
| "grad_norm": 1.5703125, |
| "learning_rate": 0.00016649633809431746, |
| "loss": 4.6625, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 36850 |
| }, |
| { |
| "epoch": 2.6885371318238946, |
| "grad_norm": 3.28125, |
| "learning_rate": 0.0001664108037676944, |
| "loss": 4.6672, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 36900 |
| }, |
| { |
| "epoch": 2.692180185431429, |
| "grad_norm": 1.7890625, |
| "learning_rate": 0.00016632518242883787, |
| "loss": 4.6855, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 36950 |
| }, |
| { |
| "epoch": 2.6958232390389627, |
| "grad_norm": 1.578125, |
| "learning_rate": 0.00016623947418993, |
| "loss": 4.6676, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 37000 |
| }, |
| { |
| "epoch": 2.6994662926464965, |
| "grad_norm": 1.7578125, |
| "learning_rate": 0.00016615367916326672, |
| "loss": 4.6699, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 37050 |
| }, |
| { |
| "epoch": 2.7031093462540303, |
| "grad_norm": 1.765625, |
| "learning_rate": 0.0001660677974612577, |
| "loss": 4.6637, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 37100 |
| }, |
| { |
| "epoch": 2.706752399861564, |
| "grad_norm": 2.515625, |
| "learning_rate": 0.0001659818291964262, |
| "loss": 4.6789, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 37150 |
| }, |
| { |
| "epoch": 2.710395453469098, |
| "grad_norm": 2.0, |
| "learning_rate": 0.00016589577448140888, |
| "loss": 4.6829, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 37200 |
| }, |
| { |
| "epoch": 2.7140385070766317, |
| "grad_norm": 2.8125, |
| "learning_rate": 0.00016580963342895563, |
| "loss": 4.6611, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 37250 |
| }, |
| { |
| "epoch": 2.7176815606841656, |
| "grad_norm": 2.046875, |
| "learning_rate": 0.00016572340615192952, |
| "loss": 4.6801, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 37300 |
| }, |
| { |
| "epoch": 2.7213246142916994, |
| "grad_norm": 1.5078125, |
| "learning_rate": 0.00016563709276330656, |
| "loss": 4.6716, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 37350 |
| }, |
| { |
| "epoch": 2.724967667899233, |
| "grad_norm": 1.4453125, |
| "learning_rate": 0.00016555069337617558, |
| "loss": 4.6662, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 37400 |
| }, |
| { |
| "epoch": 2.728610721506767, |
| "grad_norm": 1.5390625, |
| "learning_rate": 0.00016546420810373809, |
| "loss": 4.6735, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 37450 |
| }, |
| { |
| "epoch": 2.732253775114301, |
| "grad_norm": 1.9375, |
| "learning_rate": 0.00016537763705930816, |
| "loss": 4.6842, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 37500 |
| }, |
| { |
| "epoch": 2.7358968287218346, |
| "grad_norm": 1.59375, |
| "learning_rate": 0.0001652909803563122, |
| "loss": 4.6689, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 37550 |
| }, |
| { |
| "epoch": 2.7395398823293684, |
| "grad_norm": 1.3359375, |
| "learning_rate": 0.00016520423810828888, |
| "loss": 4.6773, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 37600 |
| }, |
| { |
| "epoch": 2.743182935936902, |
| "grad_norm": 1.3671875, |
| "learning_rate": 0.0001651174104288889, |
| "loss": 4.6732, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 37650 |
| }, |
| { |
| "epoch": 2.746825989544436, |
| "grad_norm": 1.484375, |
| "learning_rate": 0.00016503049743187494, |
| "loss": 4.6915, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 37700 |
| }, |
| { |
| "epoch": 2.75046904315197, |
| "grad_norm": 1.78125, |
| "learning_rate": 0.0001649434992311215, |
| "loss": 4.684, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 37750 |
| }, |
| { |
| "epoch": 2.7541120967595036, |
| "grad_norm": 1.4296875, |
| "learning_rate": 0.0001648564159406146, |
| "loss": 4.6677, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 37800 |
| }, |
| { |
| "epoch": 2.7577551503670374, |
| "grad_norm": 1.5234375, |
| "learning_rate": 0.00016476924767445182, |
| "loss": 4.6729, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 37850 |
| }, |
| { |
| "epoch": 2.7613982039745713, |
| "grad_norm": 1.453125, |
| "learning_rate": 0.00016468199454684214, |
| "loss": 4.6682, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 37900 |
| }, |
| { |
| "epoch": 2.765041257582105, |
| "grad_norm": 1.6171875, |
| "learning_rate": 0.00016459465667210558, |
| "loss": 4.6634, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 37950 |
| }, |
| { |
| "epoch": 2.7686843111896393, |
| "grad_norm": 2.5, |
| "learning_rate": 0.00016450723416467332, |
| "loss": 4.6599, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 38000 |
| }, |
| { |
| "epoch": 2.772327364797173, |
| "grad_norm": 1.65625, |
| "learning_rate": 0.00016441972713908737, |
| "loss": 4.6732, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 38050 |
| }, |
| { |
| "epoch": 2.775970418404707, |
| "grad_norm": 1.2578125, |
| "learning_rate": 0.00016433213571000047, |
| "loss": 4.6749, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 38100 |
| }, |
| { |
| "epoch": 2.7796134720122407, |
| "grad_norm": 1.59375, |
| "learning_rate": 0.00016424445999217602, |
| "loss": 4.673, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 38150 |
| }, |
| { |
| "epoch": 2.7832565256197745, |
| "grad_norm": 2.625, |
| "learning_rate": 0.0001641567001004877, |
| "loss": 4.6883, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 38200 |
| }, |
| { |
| "epoch": 2.7868995792273084, |
| "grad_norm": 1.2890625, |
| "learning_rate": 0.00016406885614991964, |
| "loss": 4.6538, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 38250 |
| }, |
| { |
| "epoch": 2.790542632834842, |
| "grad_norm": 2.078125, |
| "learning_rate": 0.00016398092825556604, |
| "loss": 4.6694, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 38300 |
| }, |
| { |
| "epoch": 2.794185686442376, |
| "grad_norm": 1.4140625, |
| "learning_rate": 0.00016389291653263108, |
| "loss": 4.6546, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 38350 |
| }, |
| { |
| "epoch": 2.79782874004991, |
| "grad_norm": 1.7109375, |
| "learning_rate": 0.00016380482109642878, |
| "loss": 4.6594, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 38400 |
| }, |
| { |
| "epoch": 2.8014717936574436, |
| "grad_norm": 1.2421875, |
| "learning_rate": 0.0001637166420623828, |
| "loss": 4.6741, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 38450 |
| }, |
| { |
| "epoch": 2.8051148472649774, |
| "grad_norm": 3.25, |
| "learning_rate": 0.00016362837954602642, |
| "loss": 4.6649, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 38500 |
| }, |
| { |
| "epoch": 2.808757900872511, |
| "grad_norm": 2.078125, |
| "learning_rate": 0.00016354003366300223, |
| "loss": 4.6734, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 38550 |
| }, |
| { |
| "epoch": 2.812400954480045, |
| "grad_norm": 2.40625, |
| "learning_rate": 0.00016345160452906207, |
| "loss": 4.6686, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 38600 |
| }, |
| { |
| "epoch": 2.8160440080875793, |
| "grad_norm": 1.6328125, |
| "learning_rate": 0.00016336309226006688, |
| "loss": 4.663, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 38650 |
| }, |
| { |
| "epoch": 2.819687061695113, |
| "grad_norm": 2.15625, |
| "learning_rate": 0.0001632744969719864, |
| "loss": 4.6637, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 38700 |
| }, |
| { |
| "epoch": 2.823330115302647, |
| "grad_norm": 2.140625, |
| "learning_rate": 0.0001631858187808994, |
| "loss": 4.6513, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 38750 |
| }, |
| { |
| "epoch": 2.8269731689101807, |
| "grad_norm": 1.515625, |
| "learning_rate": 0.00016309705780299298, |
| "loss": 4.6545, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 38800 |
| }, |
| { |
| "epoch": 2.8306162225177145, |
| "grad_norm": 1.8359375, |
| "learning_rate": 0.00016300821415456292, |
| "loss": 4.6579, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 38850 |
| }, |
| { |
| "epoch": 2.8342592761252483, |
| "grad_norm": 2.203125, |
| "learning_rate": 0.00016291928795201318, |
| "loss": 4.6785, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 38900 |
| }, |
| { |
| "epoch": 2.837902329732782, |
| "grad_norm": 1.3125, |
| "learning_rate": 0.00016283027931185594, |
| "loss": 4.6634, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 38950 |
| }, |
| { |
| "epoch": 2.841545383340316, |
| "grad_norm": 2.84375, |
| "learning_rate": 0.00016274118835071146, |
| "loss": 4.6633, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 39000 |
| }, |
| { |
| "epoch": 2.8451884369478497, |
| "grad_norm": 1.484375, |
| "learning_rate": 0.0001626520151853077, |
| "loss": 4.6687, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 39050 |
| }, |
| { |
| "epoch": 2.8488314905553835, |
| "grad_norm": 1.140625, |
| "learning_rate": 0.00016256275993248052, |
| "loss": 4.6654, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 39100 |
| }, |
| { |
| "epoch": 2.8524745441629173, |
| "grad_norm": 1.6640625, |
| "learning_rate": 0.00016247342270917309, |
| "loss": 4.6683, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 39150 |
| }, |
| { |
| "epoch": 2.856117597770451, |
| "grad_norm": 3.953125, |
| "learning_rate": 0.0001623840036324362, |
| "loss": 4.6834, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 39200 |
| }, |
| { |
| "epoch": 2.859760651377985, |
| "grad_norm": 1.6328125, |
| "learning_rate": 0.0001622945028194278, |
| "loss": 4.6953, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 39250 |
| }, |
| { |
| "epoch": 2.8634037049855188, |
| "grad_norm": 1.4921875, |
| "learning_rate": 0.00016220492038741292, |
| "loss": 4.6717, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 39300 |
| }, |
| { |
| "epoch": 2.8670467585930526, |
| "grad_norm": 1.7578125, |
| "learning_rate": 0.00016211525645376353, |
| "loss": 4.6674, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 39350 |
| }, |
| { |
| "epoch": 2.8706898122005864, |
| "grad_norm": 1.265625, |
| "learning_rate": 0.0001620255111359584, |
| "loss": 4.6619, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 39400 |
| }, |
| { |
| "epoch": 2.87433286580812, |
| "grad_norm": 1.3203125, |
| "learning_rate": 0.0001619356845515829, |
| "loss": 4.6823, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 39450 |
| }, |
| { |
| "epoch": 2.877975919415654, |
| "grad_norm": 1.734375, |
| "learning_rate": 0.00016184577681832893, |
| "loss": 4.6693, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 39500 |
| }, |
| { |
| "epoch": 2.881618973023188, |
| "grad_norm": 1.8671875, |
| "learning_rate": 0.0001617557880539947, |
| "loss": 4.6815, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 39550 |
| }, |
| { |
| "epoch": 2.8852620266307216, |
| "grad_norm": 2.09375, |
| "learning_rate": 0.0001616657183764845, |
| "loss": 4.6618, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 39600 |
| }, |
| { |
| "epoch": 2.8889050802382554, |
| "grad_norm": 1.6171875, |
| "learning_rate": 0.00016157556790380882, |
| "loss": 4.6625, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 39650 |
| }, |
| { |
| "epoch": 2.8925481338457897, |
| "grad_norm": 1.7578125, |
| "learning_rate": 0.00016148533675408377, |
| "loss": 4.6692, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 39700 |
| }, |
| { |
| "epoch": 2.8961911874533235, |
| "grad_norm": 1.765625, |
| "learning_rate": 0.00016139502504553135, |
| "loss": 4.6749, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 39750 |
| }, |
| { |
| "epoch": 2.8998342410608573, |
| "grad_norm": 1.921875, |
| "learning_rate": 0.00016130463289647907, |
| "loss": 4.6764, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 39800 |
| }, |
| { |
| "epoch": 2.903477294668391, |
| "grad_norm": 1.390625, |
| "learning_rate": 0.00016121416042535973, |
| "loss": 4.6761, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 39850 |
| }, |
| { |
| "epoch": 2.907120348275925, |
| "grad_norm": 1.5546875, |
| "learning_rate": 0.00016112360775071154, |
| "loss": 4.6777, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 39900 |
| }, |
| { |
| "epoch": 2.9107634018834587, |
| "grad_norm": 1.375, |
| "learning_rate": 0.0001610329749911776, |
| "loss": 4.6786, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 39950 |
| }, |
| { |
| "epoch": 2.9144064554909925, |
| "grad_norm": 2.125, |
| "learning_rate": 0.00016094226226550618, |
| "loss": 4.6697, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 40000 |
| }, |
| { |
| "epoch": 2.9180495090985263, |
| "grad_norm": 2.25, |
| "learning_rate": 0.00016085146969255004, |
| "loss": 4.6815, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 40050 |
| }, |
| { |
| "epoch": 2.92169256270606, |
| "grad_norm": 1.2578125, |
| "learning_rate": 0.0001607605973912668, |
| "loss": 4.6589, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 40100 |
| }, |
| { |
| "epoch": 2.925335616313594, |
| "grad_norm": 5.1875, |
| "learning_rate": 0.00016066964548071838, |
| "loss": 4.6829, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 40150 |
| }, |
| { |
| "epoch": 2.9289786699211278, |
| "grad_norm": 1.875, |
| "learning_rate": 0.00016057861408007114, |
| "loss": 4.6657, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 40200 |
| }, |
| { |
| "epoch": 2.9326217235286616, |
| "grad_norm": 2.90625, |
| "learning_rate": 0.00016048750330859544, |
| "loss": 4.6819, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 40250 |
| }, |
| { |
| "epoch": 2.936264777136196, |
| "grad_norm": 1.171875, |
| "learning_rate": 0.0001603963132856657, |
| "loss": 4.6581, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 40300 |
| }, |
| { |
| "epoch": 2.9399078307437296, |
| "grad_norm": 2.28125, |
| "learning_rate": 0.00016030504413076032, |
| "loss": 4.6718, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 40350 |
| }, |
| { |
| "epoch": 2.9435508843512634, |
| "grad_norm": 2.578125, |
| "learning_rate": 0.00016021369596346108, |
| "loss": 4.6695, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 40400 |
| }, |
| { |
| "epoch": 2.9471939379587972, |
| "grad_norm": 2.0625, |
| "learning_rate": 0.00016012226890345352, |
| "loss": 4.6671, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 40450 |
| }, |
| { |
| "epoch": 2.950836991566331, |
| "grad_norm": 2.171875, |
| "learning_rate": 0.00016003076307052644, |
| "loss": 4.6717, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 40500 |
| }, |
| { |
| "epoch": 2.954480045173865, |
| "grad_norm": 2.734375, |
| "learning_rate": 0.00015993917858457194, |
| "loss": 4.6727, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 40550 |
| }, |
| { |
| "epoch": 2.9581230987813987, |
| "grad_norm": 1.75, |
| "learning_rate": 0.00015984751556558506, |
| "loss": 4.6665, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 40600 |
| }, |
| { |
| "epoch": 2.9617661523889325, |
| "grad_norm": 1.8046875, |
| "learning_rate": 0.00015975577413366386, |
| "loss": 4.661, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 40650 |
| }, |
| { |
| "epoch": 2.9654092059964663, |
| "grad_norm": 1.4765625, |
| "learning_rate": 0.00015966395440900896, |
| "loss": 4.673, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 40700 |
| }, |
| { |
| "epoch": 2.969052259604, |
| "grad_norm": 1.25, |
| "learning_rate": 0.00015957205651192377, |
| "loss": 4.6778, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 40750 |
| }, |
| { |
| "epoch": 2.972695313211534, |
| "grad_norm": 1.625, |
| "learning_rate": 0.00015948008056281395, |
| "loss": 4.67, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 40800 |
| }, |
| { |
| "epoch": 2.9763383668190677, |
| "grad_norm": 1.1015625, |
| "learning_rate": 0.00015938802668218752, |
| "loss": 4.6577, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 40850 |
| }, |
| { |
| "epoch": 2.9799814204266015, |
| "grad_norm": 1.296875, |
| "learning_rate": 0.00015929589499065458, |
| "loss": 4.6602, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 40900 |
| }, |
| { |
| "epoch": 2.9836244740341353, |
| "grad_norm": 2.015625, |
| "learning_rate": 0.00015920368560892723, |
| "loss": 4.676, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 40950 |
| }, |
| { |
| "epoch": 2.987267527641669, |
| "grad_norm": 1.828125, |
| "learning_rate": 0.0001591113986578192, |
| "loss": 4.658, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 41000 |
| }, |
| { |
| "epoch": 2.990910581249203, |
| "grad_norm": 3.234375, |
| "learning_rate": 0.00015901903425824605, |
| "loss": 4.6804, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 41050 |
| }, |
| { |
| "epoch": 2.9945536348567368, |
| "grad_norm": 1.7734375, |
| "learning_rate": 0.0001589265925312247, |
| "loss": 4.6605, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 41100 |
| }, |
| { |
| "epoch": 2.9981966884642706, |
| "grad_norm": 1.75, |
| "learning_rate": 0.0001588340735978734, |
| "loss": 4.6688, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 41150 |
| }, |
| { |
| "epoch": 2.9997996320515856, |
| "eval_loss": 4.678781032562256, |
| "eval_runtime": 582.4467, |
| "eval_samples_per_second": 520.782, |
| "eval_steps_per_second": 43.4, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 41172 |
| }, |
| { |
| "epoch": 3.001821526803767, |
| "grad_norm": 1.96875, |
| "learning_rate": 0.0001587414775794116, |
| "loss": 4.6603, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 41200 |
| }, |
| { |
| "epoch": 3.0054645804113007, |
| "grad_norm": 1.546875, |
| "learning_rate": 0.00015864880459715968, |
| "loss": 4.6561, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 41250 |
| }, |
| { |
| "epoch": 3.0091076340188345, |
| "grad_norm": 1.625, |
| "learning_rate": 0.00015855605477253893, |
| "loss": 4.6607, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 41300 |
| }, |
| { |
| "epoch": 3.0127506876263683, |
| "grad_norm": 1.734375, |
| "learning_rate": 0.00015846322822707124, |
| "loss": 4.6587, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 41350 |
| }, |
| { |
| "epoch": 3.016393741233902, |
| "grad_norm": 1.71875, |
| "learning_rate": 0.0001583703250823791, |
| "loss": 4.66, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 41400 |
| }, |
| { |
| "epoch": 3.020036794841436, |
| "grad_norm": 2.71875, |
| "learning_rate": 0.00015827734546018535, |
| "loss": 4.6593, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 41450 |
| }, |
| { |
| "epoch": 3.0236798484489698, |
| "grad_norm": 2.015625, |
| "learning_rate": 0.00015818428948231297, |
| "loss": 4.6726, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 41500 |
| }, |
| { |
| "epoch": 3.0273229020565036, |
| "grad_norm": 1.359375, |
| "learning_rate": 0.00015809115727068504, |
| "loss": 4.6615, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 41550 |
| }, |
| { |
| "epoch": 3.030965955664038, |
| "grad_norm": 1.4609375, |
| "learning_rate": 0.0001579979489473245, |
| "loss": 4.6702, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 41600 |
| }, |
| { |
| "epoch": 3.0346090092715716, |
| "grad_norm": 1.7734375, |
| "learning_rate": 0.00015790466463435395, |
| "loss": 4.6569, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 41650 |
| }, |
| { |
| "epoch": 3.0382520628791054, |
| "grad_norm": 1.2109375, |
| "learning_rate": 0.00015781130445399573, |
| "loss": 4.6531, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 41700 |
| }, |
| { |
| "epoch": 3.0418951164866392, |
| "grad_norm": 1.9296875, |
| "learning_rate": 0.00015771786852857142, |
| "loss": 4.654, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 41750 |
| }, |
| { |
| "epoch": 3.045538170094173, |
| "grad_norm": 1.7734375, |
| "learning_rate": 0.00015762435698050186, |
| "loss": 4.6573, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 41800 |
| }, |
| { |
| "epoch": 3.049181223701707, |
| "grad_norm": 1.4296875, |
| "learning_rate": 0.00015753076993230706, |
| "loss": 4.6682, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 41850 |
| }, |
| { |
| "epoch": 3.0528242773092407, |
| "grad_norm": 1.3203125, |
| "learning_rate": 0.00015743710750660588, |
| "loss": 4.666, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 41900 |
| }, |
| { |
| "epoch": 3.0564673309167745, |
| "grad_norm": 1.7265625, |
| "learning_rate": 0.00015734336982611594, |
| "loss": 4.6541, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 41950 |
| }, |
| { |
| "epoch": 3.0601103845243083, |
| "grad_norm": 1.765625, |
| "learning_rate": 0.0001572495570136535, |
| "loss": 4.6601, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 42000 |
| }, |
| { |
| "epoch": 3.063753438131842, |
| "grad_norm": 1.59375, |
| "learning_rate": 0.0001571556691921332, |
| "loss": 4.6492, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 42050 |
| }, |
| { |
| "epoch": 3.067396491739376, |
| "grad_norm": 1.8359375, |
| "learning_rate": 0.000157061706484568, |
| "loss": 4.6617, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 42100 |
| }, |
| { |
| "epoch": 3.0710395453469097, |
| "grad_norm": 1.34375, |
| "learning_rate": 0.000156967669014069, |
| "loss": 4.665, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 42150 |
| }, |
| { |
| "epoch": 3.0746825989544435, |
| "grad_norm": 1.1640625, |
| "learning_rate": 0.0001568735569038452, |
| "loss": 4.6767, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 42200 |
| }, |
| { |
| "epoch": 3.0783256525619773, |
| "grad_norm": 2.21875, |
| "learning_rate": 0.00015677937027720344, |
| "loss": 4.6662, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 42250 |
| }, |
| { |
| "epoch": 3.081968706169511, |
| "grad_norm": 1.8984375, |
| "learning_rate": 0.00015668510925754813, |
| "loss": 4.664, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 42300 |
| }, |
| { |
| "epoch": 3.085611759777045, |
| "grad_norm": 2.4375, |
| "learning_rate": 0.0001565907739683812, |
| "loss": 4.6605, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 42350 |
| }, |
| { |
| "epoch": 3.0892548133845787, |
| "grad_norm": 1.8046875, |
| "learning_rate": 0.00015649636453330193, |
| "loss": 4.6696, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 42400 |
| }, |
| { |
| "epoch": 3.092897866992113, |
| "grad_norm": 1.65625, |
| "learning_rate": 0.00015640188107600664, |
| "loss": 4.6404, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 42450 |
| }, |
| { |
| "epoch": 3.096540920599647, |
| "grad_norm": 1.46875, |
| "learning_rate": 0.00015630732372028873, |
| "loss": 4.6577, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 42500 |
| }, |
| { |
| "epoch": 3.1001839742071806, |
| "grad_norm": 2.015625, |
| "learning_rate": 0.00015621269259003835, |
| "loss": 4.6764, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 42550 |
| }, |
| { |
| "epoch": 3.1038270278147144, |
| "grad_norm": 1.421875, |
| "learning_rate": 0.00015611798780924236, |
| "loss": 4.6652, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 42600 |
| }, |
| { |
| "epoch": 3.1074700814222482, |
| "grad_norm": 1.5625, |
| "learning_rate": 0.0001560232095019841, |
| "loss": 4.6604, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 42650 |
| }, |
| { |
| "epoch": 3.111113135029782, |
| "grad_norm": 1.3984375, |
| "learning_rate": 0.00015592835779244327, |
| "loss": 4.6584, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 42700 |
| }, |
| { |
| "epoch": 3.114756188637316, |
| "grad_norm": 1.78125, |
| "learning_rate": 0.0001558334328048957, |
| "loss": 4.6588, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 42750 |
| }, |
| { |
| "epoch": 3.1183992422448497, |
| "grad_norm": 1.34375, |
| "learning_rate": 0.00015573843466371324, |
| "loss": 4.6702, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 42800 |
| }, |
| { |
| "epoch": 3.1220422958523835, |
| "grad_norm": 2.53125, |
| "learning_rate": 0.00015564336349336362, |
| "loss": 4.668, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 42850 |
| }, |
| { |
| "epoch": 3.1256853494599173, |
| "grad_norm": 1.7265625, |
| "learning_rate": 0.00015554821941841023, |
| "loss": 4.6675, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 42900 |
| }, |
| { |
| "epoch": 3.129328403067451, |
| "grad_norm": 1.71875, |
| "learning_rate": 0.00015545300256351193, |
| "loss": 4.6616, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 42950 |
| }, |
| { |
| "epoch": 3.132971456674985, |
| "grad_norm": 1.6015625, |
| "learning_rate": 0.00015535771305342307, |
| "loss": 4.6741, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 43000 |
| }, |
| { |
| "epoch": 3.1366145102825187, |
| "grad_norm": 1.9453125, |
| "learning_rate": 0.00015526235101299304, |
| "loss": 4.6554, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 43050 |
| }, |
| { |
| "epoch": 3.1402575638900525, |
| "grad_norm": 1.90625, |
| "learning_rate": 0.00015516691656716636, |
| "loss": 4.6621, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 43100 |
| }, |
| { |
| "epoch": 3.1439006174975863, |
| "grad_norm": 1.3515625, |
| "learning_rate": 0.00015507140984098242, |
| "loss": 4.66, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 43150 |
| }, |
| { |
| "epoch": 3.14754367110512, |
| "grad_norm": 1.90625, |
| "learning_rate": 0.00015497583095957522, |
| "loss": 4.6722, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 43200 |
| }, |
| { |
| "epoch": 3.1511867247126544, |
| "grad_norm": 1.921875, |
| "learning_rate": 0.0001548801800481734, |
| "loss": 4.6719, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 43250 |
| }, |
| { |
| "epoch": 3.154829778320188, |
| "grad_norm": 1.5703125, |
| "learning_rate": 0.0001547844572320999, |
| "loss": 4.6563, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 43300 |
| }, |
| { |
| "epoch": 3.158472831927722, |
| "grad_norm": 1.2890625, |
| "learning_rate": 0.00015468866263677194, |
| "loss": 4.6653, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 43350 |
| }, |
| { |
| "epoch": 3.162115885535256, |
| "grad_norm": 1.921875, |
| "learning_rate": 0.00015459279638770077, |
| "loss": 4.6787, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 43400 |
| }, |
| { |
| "epoch": 3.1657589391427896, |
| "grad_norm": 1.5546875, |
| "learning_rate": 0.00015449685861049144, |
| "loss": 4.6616, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 43450 |
| }, |
| { |
| "epoch": 3.1694019927503234, |
| "grad_norm": 2.0625, |
| "learning_rate": 0.00015440084943084282, |
| "loss": 4.6695, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 43500 |
| }, |
| { |
| "epoch": 3.1730450463578572, |
| "grad_norm": 1.2109375, |
| "learning_rate": 0.00015430476897454725, |
| "loss": 4.6631, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 43550 |
| }, |
| { |
| "epoch": 3.176688099965391, |
| "grad_norm": 1.6171875, |
| "learning_rate": 0.00015420861736749057, |
| "loss": 4.6522, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 43600 |
| }, |
| { |
| "epoch": 3.180331153572925, |
| "grad_norm": 2.015625, |
| "learning_rate": 0.00015411239473565172, |
| "loss": 4.662, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 43650 |
| }, |
| { |
| "epoch": 3.1839742071804586, |
| "grad_norm": 1.8515625, |
| "learning_rate": 0.00015401610120510274, |
| "loss": 4.6728, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 43700 |
| }, |
| { |
| "epoch": 3.1876172607879925, |
| "grad_norm": 1.9921875, |
| "learning_rate": 0.0001539197369020086, |
| "loss": 4.666, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 43750 |
| }, |
| { |
| "epoch": 3.1912603143955263, |
| "grad_norm": 1.2734375, |
| "learning_rate": 0.00015382330195262697, |
| "loss": 4.6667, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 43800 |
| }, |
| { |
| "epoch": 3.19490336800306, |
| "grad_norm": 2.265625, |
| "learning_rate": 0.00015372679648330807, |
| "loss": 4.6638, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 43850 |
| }, |
| { |
| "epoch": 3.198546421610594, |
| "grad_norm": 2.453125, |
| "learning_rate": 0.00015363022062049453, |
| "loss": 4.6707, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 43900 |
| }, |
| { |
| "epoch": 3.2021894752181277, |
| "grad_norm": 2.625, |
| "learning_rate": 0.00015353357449072118, |
| "loss": 4.6808, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 43950 |
| }, |
| { |
| "epoch": 3.2058325288256615, |
| "grad_norm": 2.0, |
| "learning_rate": 0.00015343685822061498, |
| "loss": 4.6628, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 44000 |
| }, |
| { |
| "epoch": 3.2094755824331953, |
| "grad_norm": 1.59375, |
| "learning_rate": 0.00015334007193689475, |
| "loss": 4.6789, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 44050 |
| }, |
| { |
| "epoch": 3.213118636040729, |
| "grad_norm": 1.5234375, |
| "learning_rate": 0.00015324321576637098, |
| "loss": 4.6682, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 44100 |
| }, |
| { |
| "epoch": 3.2167616896482634, |
| "grad_norm": 1.28125, |
| "learning_rate": 0.00015314628983594588, |
| "loss": 4.6518, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 44150 |
| }, |
| { |
| "epoch": 3.220404743255797, |
| "grad_norm": 1.890625, |
| "learning_rate": 0.0001530492942726129, |
| "loss": 4.6576, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 44200 |
| }, |
| { |
| "epoch": 3.224047796863331, |
| "grad_norm": 1.328125, |
| "learning_rate": 0.00015295222920345681, |
| "loss": 4.6761, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 44250 |
| }, |
| { |
| "epoch": 3.227690850470865, |
| "grad_norm": 1.3671875, |
| "learning_rate": 0.00015285509475565344, |
| "loss": 4.6632, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 44300 |
| }, |
| { |
| "epoch": 3.2313339040783986, |
| "grad_norm": 1.9453125, |
| "learning_rate": 0.00015275789105646952, |
| "loss": 4.6736, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 44350 |
| }, |
| { |
| "epoch": 3.2349769576859324, |
| "grad_norm": 1.3515625, |
| "learning_rate": 0.0001526606182332625, |
| "loss": 4.6553, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 44400 |
| }, |
| { |
| "epoch": 3.238620011293466, |
| "grad_norm": 2.109375, |
| "learning_rate": 0.00015256327641348036, |
| "loss": 4.6735, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 44450 |
| }, |
| { |
| "epoch": 3.242263064901, |
| "grad_norm": 2.3125, |
| "learning_rate": 0.0001524658657246616, |
| "loss": 4.6757, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 44500 |
| }, |
| { |
| "epoch": 3.245906118508534, |
| "grad_norm": 1.3828125, |
| "learning_rate": 0.0001523683862944348, |
| "loss": 4.6585, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 44550 |
| }, |
| { |
| "epoch": 3.2495491721160676, |
| "grad_norm": 1.625, |
| "learning_rate": 0.00015227083825051875, |
| "loss": 4.672, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 44600 |
| }, |
| { |
| "epoch": 3.2531922257236014, |
| "grad_norm": 1.9453125, |
| "learning_rate": 0.000152173221720722, |
| "loss": 4.6732, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 44650 |
| }, |
| { |
| "epoch": 3.2568352793311353, |
| "grad_norm": 2.671875, |
| "learning_rate": 0.00015207553683294298, |
| "loss": 4.6556, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 44700 |
| }, |
| { |
| "epoch": 3.260478332938669, |
| "grad_norm": 1.8984375, |
| "learning_rate": 0.0001519777837151695, |
| "loss": 4.6633, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 44750 |
| }, |
| { |
| "epoch": 3.264121386546203, |
| "grad_norm": 1.7734375, |
| "learning_rate": 0.000151879962495479, |
| "loss": 4.6799, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 44800 |
| }, |
| { |
| "epoch": 3.2677644401537367, |
| "grad_norm": 1.4296875, |
| "learning_rate": 0.0001517820733020379, |
| "loss": 4.6737, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 44850 |
| }, |
| { |
| "epoch": 3.271407493761271, |
| "grad_norm": 3.1875, |
| "learning_rate": 0.00015168411626310184, |
| "loss": 4.6544, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 44900 |
| }, |
| { |
| "epoch": 3.2750505473688047, |
| "grad_norm": 2.484375, |
| "learning_rate": 0.00015158609150701537, |
| "loss": 4.6729, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 44950 |
| }, |
| { |
| "epoch": 3.2786936009763386, |
| "grad_norm": 1.8984375, |
| "learning_rate": 0.0001514879991622116, |
| "loss": 4.6628, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 45000 |
| }, |
| { |
| "epoch": 3.2823366545838724, |
| "grad_norm": 2.234375, |
| "learning_rate": 0.0001513898393572124, |
| "loss": 4.6395, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 45050 |
| }, |
| { |
| "epoch": 3.285979708191406, |
| "grad_norm": 2.203125, |
| "learning_rate": 0.00015129161222062783, |
| "loss": 4.664, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 45100 |
| }, |
| { |
| "epoch": 3.28962276179894, |
| "grad_norm": 1.4140625, |
| "learning_rate": 0.00015119331788115633, |
| "loss": 4.6565, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 45150 |
| }, |
| { |
| "epoch": 3.293265815406474, |
| "grad_norm": 1.78125, |
| "learning_rate": 0.0001510949564675843, |
| "loss": 4.6621, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 45200 |
| }, |
| { |
| "epoch": 3.2969088690140076, |
| "grad_norm": 1.8359375, |
| "learning_rate": 0.0001509965281087861, |
| "loss": 4.677, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 45250 |
| }, |
| { |
| "epoch": 3.3005519226215414, |
| "grad_norm": 1.375, |
| "learning_rate": 0.00015089803293372365, |
| "loss": 4.6671, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 45300 |
| }, |
| { |
| "epoch": 3.304194976229075, |
| "grad_norm": 1.34375, |
| "learning_rate": 0.00015079947107144657, |
| "loss": 4.6683, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 45350 |
| }, |
| { |
| "epoch": 3.307838029836609, |
| "grad_norm": 1.75, |
| "learning_rate": 0.00015070084265109176, |
| "loss": 4.6641, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 45400 |
| }, |
| { |
| "epoch": 3.311481083444143, |
| "grad_norm": 1.8828125, |
| "learning_rate": 0.00015060214780188345, |
| "loss": 4.6374, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 45450 |
| }, |
| { |
| "epoch": 3.3151241370516766, |
| "grad_norm": 1.7578125, |
| "learning_rate": 0.00015050338665313276, |
| "loss": 4.6689, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 45500 |
| }, |
| { |
| "epoch": 3.3187671906592104, |
| "grad_norm": 1.9375, |
| "learning_rate": 0.0001504045593342377, |
| "loss": 4.6764, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 45550 |
| }, |
| { |
| "epoch": 3.3224102442667443, |
| "grad_norm": 2.359375, |
| "learning_rate": 0.00015030566597468305, |
| "loss": 4.6733, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 45600 |
| }, |
| { |
| "epoch": 3.326053297874278, |
| "grad_norm": 2.046875, |
| "learning_rate": 0.0001502067067040401, |
| "loss": 4.6676, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 45650 |
| }, |
| { |
| "epoch": 3.329696351481812, |
| "grad_norm": 1.0625, |
| "learning_rate": 0.00015010768165196647, |
| "loss": 4.6737, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 45700 |
| }, |
| { |
| "epoch": 3.3333394050893457, |
| "grad_norm": 1.515625, |
| "learning_rate": 0.00015000859094820593, |
| "loss": 4.6632, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 45750 |
| }, |
| { |
| "epoch": 3.3369824586968795, |
| "grad_norm": 1.6640625, |
| "learning_rate": 0.00014990943472258832, |
| "loss": 4.6773, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 45800 |
| }, |
| { |
| "epoch": 3.3406255123044137, |
| "grad_norm": 1.328125, |
| "learning_rate": 0.00014981021310502937, |
| "loss": 4.6582, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 45850 |
| }, |
| { |
| "epoch": 3.3442685659119475, |
| "grad_norm": 1.65625, |
| "learning_rate": 0.00014971092622553038, |
| "loss": 4.6628, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 45900 |
| }, |
| { |
| "epoch": 3.3479116195194814, |
| "grad_norm": 2.46875, |
| "learning_rate": 0.00014961157421417823, |
| "loss": 4.6629, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 45950 |
| }, |
| { |
| "epoch": 3.351554673127015, |
| "grad_norm": 1.4921875, |
| "learning_rate": 0.00014951215720114514, |
| "loss": 4.6821, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 46000 |
| }, |
| { |
| "epoch": 3.355197726734549, |
| "grad_norm": 2.765625, |
| "learning_rate": 0.00014941267531668845, |
| "loss": 4.6796, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 46050 |
| }, |
| { |
| "epoch": 3.3588407803420828, |
| "grad_norm": 1.96875, |
| "learning_rate": 0.00014931312869115052, |
| "loss": 4.6634, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 46100 |
| }, |
| { |
| "epoch": 3.3624838339496166, |
| "grad_norm": 1.9765625, |
| "learning_rate": 0.00014921351745495858, |
| "loss": 4.6605, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 46150 |
| }, |
| { |
| "epoch": 3.3661268875571504, |
| "grad_norm": 2.125, |
| "learning_rate": 0.00014911384173862445, |
| "loss": 4.6685, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 46200 |
| }, |
| { |
| "epoch": 3.369769941164684, |
| "grad_norm": 1.484375, |
| "learning_rate": 0.00014901410167274438, |
| "loss": 4.6604, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 46250 |
| }, |
| { |
| "epoch": 3.373412994772218, |
| "grad_norm": 1.578125, |
| "learning_rate": 0.0001489142973879991, |
| "loss": 4.6723, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 46300 |
| }, |
| { |
| "epoch": 3.377056048379752, |
| "grad_norm": 1.90625, |
| "learning_rate": 0.00014881442901515333, |
| "loss": 4.6638, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 46350 |
| }, |
| { |
| "epoch": 3.3806991019872856, |
| "grad_norm": 2.09375, |
| "learning_rate": 0.00014871449668505586, |
| "loss": 4.6548, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 46400 |
| }, |
| { |
| "epoch": 3.3843421555948194, |
| "grad_norm": 1.8359375, |
| "learning_rate": 0.00014861450052863914, |
| "loss": 4.6652, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 46450 |
| }, |
| { |
| "epoch": 3.3879852092023532, |
| "grad_norm": 1.296875, |
| "learning_rate": 0.00014851444067691944, |
| "loss": 4.6641, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 46500 |
| }, |
| { |
| "epoch": 3.3916282628098875, |
| "grad_norm": 2.59375, |
| "learning_rate": 0.00014841431726099632, |
| "loss": 4.6579, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 46550 |
| }, |
| { |
| "epoch": 3.3952713164174213, |
| "grad_norm": 2.09375, |
| "learning_rate": 0.00014831413041205272, |
| "loss": 4.666, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 46600 |
| }, |
| { |
| "epoch": 3.398914370024955, |
| "grad_norm": 1.1484375, |
| "learning_rate": 0.0001482138802613546, |
| "loss": 4.6717, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 46650 |
| }, |
| { |
| "epoch": 3.402557423632489, |
| "grad_norm": 2.234375, |
| "learning_rate": 0.00014811356694025097, |
| "loss": 4.6704, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 46700 |
| }, |
| { |
| "epoch": 3.4062004772400227, |
| "grad_norm": 1.203125, |
| "learning_rate": 0.00014801319058017348, |
| "loss": 4.6745, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 46750 |
| }, |
| { |
| "epoch": 3.4098435308475565, |
| "grad_norm": 2.8125, |
| "learning_rate": 0.00014791275131263652, |
| "loss": 4.6627, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 46800 |
| }, |
| { |
| "epoch": 3.4134865844550903, |
| "grad_norm": 1.625, |
| "learning_rate": 0.0001478122492692368, |
| "loss": 4.6639, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 46850 |
| }, |
| { |
| "epoch": 3.417129638062624, |
| "grad_norm": 1.359375, |
| "learning_rate": 0.0001477116845816533, |
| "loss": 4.6558, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 46900 |
| }, |
| { |
| "epoch": 3.420772691670158, |
| "grad_norm": 1.3515625, |
| "learning_rate": 0.00014761105738164706, |
| "loss": 4.6723, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 46950 |
| }, |
| { |
| "epoch": 3.4244157452776918, |
| "grad_norm": 1.40625, |
| "learning_rate": 0.0001475103678010611, |
| "loss": 4.659, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 47000 |
| }, |
| { |
| "epoch": 3.4280587988852256, |
| "grad_norm": 1.421875, |
| "learning_rate": 0.00014740961597182006, |
| "loss": 4.6581, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 47050 |
| }, |
| { |
| "epoch": 3.4317018524927594, |
| "grad_norm": 1.734375, |
| "learning_rate": 0.00014730880202593023, |
| "loss": 4.6662, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 47100 |
| }, |
| { |
| "epoch": 3.435344906100293, |
| "grad_norm": 3.75, |
| "learning_rate": 0.00014720792609547928, |
| "loss": 4.6708, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 47150 |
| }, |
| { |
| "epoch": 3.438987959707827, |
| "grad_norm": 2.078125, |
| "learning_rate": 0.00014710698831263595, |
| "loss": 4.6745, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 47200 |
| }, |
| { |
| "epoch": 3.442631013315361, |
| "grad_norm": 2.03125, |
| "learning_rate": 0.0001470059888096503, |
| "loss": 4.6637, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 47250 |
| }, |
| { |
| "epoch": 3.4462740669228946, |
| "grad_norm": 1.40625, |
| "learning_rate": 0.000146904927718853, |
| "loss": 4.6691, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 47300 |
| }, |
| { |
| "epoch": 3.4499171205304284, |
| "grad_norm": 2.34375, |
| "learning_rate": 0.00014680380517265556, |
| "loss": 4.6609, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 47350 |
| }, |
| { |
| "epoch": 3.4535601741379622, |
| "grad_norm": 1.7890625, |
| "learning_rate": 0.00014670262130354983, |
| "loss": 4.6701, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 47400 |
| }, |
| { |
| "epoch": 3.457203227745496, |
| "grad_norm": 2.28125, |
| "learning_rate": 0.00014660137624410827, |
| "loss": 4.6833, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 47450 |
| }, |
| { |
| "epoch": 3.4608462813530303, |
| "grad_norm": 1.5390625, |
| "learning_rate": 0.00014650007012698333, |
| "loss": 4.6722, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 47500 |
| }, |
| { |
| "epoch": 3.464489334960564, |
| "grad_norm": 1.46875, |
| "learning_rate": 0.0001463987030849075, |
| "loss": 4.6706, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 47550 |
| }, |
| { |
| "epoch": 3.468132388568098, |
| "grad_norm": 1.3203125, |
| "learning_rate": 0.00014629727525069307, |
| "loss": 4.6575, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 47600 |
| }, |
| { |
| "epoch": 3.4717754421756317, |
| "grad_norm": 2.90625, |
| "learning_rate": 0.00014619578675723201, |
| "loss": 4.6738, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 47650 |
| }, |
| { |
| "epoch": 3.4754184957831655, |
| "grad_norm": 1.1796875, |
| "learning_rate": 0.00014609423773749583, |
| "loss": 4.675, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 47700 |
| }, |
| { |
| "epoch": 3.4790615493906993, |
| "grad_norm": 1.40625, |
| "learning_rate": 0.00014599262832453523, |
| "loss": 4.6703, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 47750 |
| }, |
| { |
| "epoch": 3.482704602998233, |
| "grad_norm": 1.484375, |
| "learning_rate": 0.00014589095865148006, |
| "loss": 4.6795, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 47800 |
| }, |
| { |
| "epoch": 3.486347656605767, |
| "grad_norm": 1.6328125, |
| "learning_rate": 0.00014578922885153916, |
| "loss": 4.6597, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 47850 |
| }, |
| { |
| "epoch": 3.4899907102133008, |
| "grad_norm": 1.1953125, |
| "learning_rate": 0.0001456874390580002, |
| "loss": 4.6612, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 47900 |
| }, |
| { |
| "epoch": 3.4936337638208346, |
| "grad_norm": 2.171875, |
| "learning_rate": 0.0001455855894042293, |
| "loss": 4.6689, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 47950 |
| }, |
| { |
| "epoch": 3.4972768174283684, |
| "grad_norm": 1.5625, |
| "learning_rate": 0.00014548368002367118, |
| "loss": 4.6654, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 48000 |
| }, |
| { |
| "epoch": 3.500919871035902, |
| "grad_norm": 1.7265625, |
| "learning_rate": 0.00014538171104984868, |
| "loss": 4.6726, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 48050 |
| }, |
| { |
| "epoch": 3.504562924643436, |
| "grad_norm": 1.9453125, |
| "learning_rate": 0.00014527968261636277, |
| "loss": 4.6724, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 48100 |
| }, |
| { |
| "epoch": 3.50820597825097, |
| "grad_norm": 2.125, |
| "learning_rate": 0.00014517759485689236, |
| "loss": 4.6692, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 48150 |
| }, |
| { |
| "epoch": 3.511849031858504, |
| "grad_norm": 1.609375, |
| "learning_rate": 0.00014507544790519407, |
| "loss": 4.6592, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 48200 |
| }, |
| { |
| "epoch": 3.515492085466038, |
| "grad_norm": 1.515625, |
| "learning_rate": 0.00014497324189510208, |
| "loss": 4.6634, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 48250 |
| }, |
| { |
| "epoch": 3.5191351390735717, |
| "grad_norm": 1.828125, |
| "learning_rate": 0.00014487097696052784, |
| "loss": 4.6607, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 48300 |
| }, |
| { |
| "epoch": 3.5227781926811055, |
| "grad_norm": 1.3984375, |
| "learning_rate": 0.00014476865323546017, |
| "loss": 4.6601, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 48350 |
| }, |
| { |
| "epoch": 3.5264212462886393, |
| "grad_norm": 1.3671875, |
| "learning_rate": 0.00014466627085396485, |
| "loss": 4.6596, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 48400 |
| }, |
| { |
| "epoch": 3.530064299896173, |
| "grad_norm": 2.015625, |
| "learning_rate": 0.00014456382995018448, |
| "loss": 4.6696, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 48450 |
| }, |
| { |
| "epoch": 3.533707353503707, |
| "grad_norm": 1.4765625, |
| "learning_rate": 0.0001444613306583384, |
| "loss": 4.6711, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 48500 |
| }, |
| { |
| "epoch": 3.5373504071112407, |
| "grad_norm": 1.4453125, |
| "learning_rate": 0.00014435877311272234, |
| "loss": 4.6667, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 48550 |
| }, |
| { |
| "epoch": 3.5409934607187745, |
| "grad_norm": 2.390625, |
| "learning_rate": 0.0001442561574477085, |
| "loss": 4.6584, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 48600 |
| }, |
| { |
| "epoch": 3.5446365143263083, |
| "grad_norm": 1.21875, |
| "learning_rate": 0.00014415348379774514, |
| "loss": 4.6599, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 48650 |
| }, |
| { |
| "epoch": 3.548279567933842, |
| "grad_norm": 1.6640625, |
| "learning_rate": 0.0001440507522973565, |
| "loss": 4.6766, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 48700 |
| }, |
| { |
| "epoch": 3.551922621541376, |
| "grad_norm": 2.078125, |
| "learning_rate": 0.00014394796308114262, |
| "loss": 4.6769, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 48750 |
| }, |
| { |
| "epoch": 3.5555656751489098, |
| "grad_norm": 1.3515625, |
| "learning_rate": 0.00014384511628377918, |
| "loss": 4.668, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 48800 |
| }, |
| { |
| "epoch": 3.5592087287564436, |
| "grad_norm": 1.421875, |
| "learning_rate": 0.00014374221204001728, |
| "loss": 4.6725, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 48850 |
| }, |
| { |
| "epoch": 3.5628517823639774, |
| "grad_norm": 1.3046875, |
| "learning_rate": 0.00014363925048468335, |
| "loss": 4.6722, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 48900 |
| }, |
| { |
| "epoch": 3.566494835971511, |
| "grad_norm": 1.078125, |
| "learning_rate": 0.00014353623175267875, |
| "loss": 4.6667, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 48950 |
| }, |
| { |
| "epoch": 3.570137889579045, |
| "grad_norm": 1.78125, |
| "learning_rate": 0.00014343315597897997, |
| "loss": 4.6764, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 49000 |
| }, |
| { |
| "epoch": 3.573780943186579, |
| "grad_norm": 1.6953125, |
| "learning_rate": 0.00014333002329863808, |
| "loss": 4.6455, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 49050 |
| }, |
| { |
| "epoch": 3.5774239967941126, |
| "grad_norm": 2.53125, |
| "learning_rate": 0.00014322683384677875, |
| "loss": 4.6727, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 49100 |
| }, |
| { |
| "epoch": 3.5810670504016464, |
| "grad_norm": 2.03125, |
| "learning_rate": 0.00014312358775860203, |
| "loss": 4.6595, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 49150 |
| }, |
| { |
| "epoch": 3.58471010400918, |
| "grad_norm": 1.4375, |
| "learning_rate": 0.00014302028516938224, |
| "loss": 4.6654, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 49200 |
| }, |
| { |
| "epoch": 3.5883531576167145, |
| "grad_norm": 1.5546875, |
| "learning_rate": 0.00014291692621446763, |
| "loss": 4.6733, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 49250 |
| }, |
| { |
| "epoch": 3.5919962112242483, |
| "grad_norm": 1.75, |
| "learning_rate": 0.00014281351102928032, |
| "loss": 4.6648, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 49300 |
| }, |
| { |
| "epoch": 3.595639264831782, |
| "grad_norm": 1.75, |
| "learning_rate": 0.00014271003974931622, |
| "loss": 4.6548, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 49350 |
| }, |
| { |
| "epoch": 3.599282318439316, |
| "grad_norm": 1.953125, |
| "learning_rate": 0.00014260651251014458, |
| "loss": 4.6738, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 49400 |
| }, |
| { |
| "epoch": 3.6029253720468497, |
| "grad_norm": 2.125, |
| "learning_rate": 0.000142502929447408, |
| "loss": 4.6686, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 49450 |
| }, |
| { |
| "epoch": 3.6065684256543835, |
| "grad_norm": 1.5, |
| "learning_rate": 0.00014239929069682233, |
| "loss": 4.66, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 49500 |
| }, |
| { |
| "epoch": 3.6102114792619173, |
| "grad_norm": 1.8984375, |
| "learning_rate": 0.00014229559639417628, |
| "loss": 4.6783, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 49550 |
| }, |
| { |
| "epoch": 3.613854532869451, |
| "grad_norm": 1.15625, |
| "learning_rate": 0.00014219184667533136, |
| "loss": 4.6547, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 49600 |
| }, |
| { |
| "epoch": 3.617497586476985, |
| "grad_norm": 1.328125, |
| "learning_rate": 0.0001420880416762217, |
| "loss": 4.6647, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 49650 |
| }, |
| { |
| "epoch": 3.6211406400845187, |
| "grad_norm": 1.8203125, |
| "learning_rate": 0.00014198418153285385, |
| "loss": 4.6716, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 49700 |
| }, |
| { |
| "epoch": 3.6247836936920526, |
| "grad_norm": 1.5390625, |
| "learning_rate": 0.00014188026638130668, |
| "loss": 4.6764, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 49750 |
| }, |
| { |
| "epoch": 3.6284267472995864, |
| "grad_norm": 1.9765625, |
| "learning_rate": 0.00014177629635773098, |
| "loss": 4.6583, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 49800 |
| }, |
| { |
| "epoch": 3.6320698009071206, |
| "grad_norm": 1.828125, |
| "learning_rate": 0.00014167227159834955, |
| "loss": 4.6629, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 49850 |
| }, |
| { |
| "epoch": 3.6357128545146544, |
| "grad_norm": 1.5, |
| "learning_rate": 0.00014156819223945695, |
| "loss": 4.6711, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 49900 |
| }, |
| { |
| "epoch": 3.6393559081221882, |
| "grad_norm": 1.71875, |
| "learning_rate": 0.00014146405841741907, |
| "loss": 4.6629, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 49950 |
| }, |
| { |
| "epoch": 3.642998961729722, |
| "grad_norm": 2.0625, |
| "learning_rate": 0.00014135987026867345, |
| "loss": 4.6513, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 50000 |
| }, |
| { |
| "epoch": 3.646642015337256, |
| "grad_norm": 2.3125, |
| "learning_rate": 0.00014125562792972857, |
| "loss": 4.6667, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 50050 |
| }, |
| { |
| "epoch": 3.6502850689447897, |
| "grad_norm": 1.5078125, |
| "learning_rate": 0.00014115133153716402, |
| "loss": 4.67, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 50100 |
| }, |
| { |
| "epoch": 3.6539281225523235, |
| "grad_norm": 1.171875, |
| "learning_rate": 0.00014104698122763013, |
| "loss": 4.6534, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 50150 |
| }, |
| { |
| "epoch": 3.6575711761598573, |
| "grad_norm": 1.34375, |
| "learning_rate": 0.00014094257713784803, |
| "loss": 4.6594, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 50200 |
| }, |
| { |
| "epoch": 3.661214229767391, |
| "grad_norm": 2.421875, |
| "learning_rate": 0.00014083811940460915, |
| "loss": 4.665, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 50250 |
| }, |
| { |
| "epoch": 3.664857283374925, |
| "grad_norm": 1.5078125, |
| "learning_rate": 0.00014073360816477528, |
| "loss": 4.6714, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 50300 |
| }, |
| { |
| "epoch": 3.6685003369824587, |
| "grad_norm": 1.90625, |
| "learning_rate": 0.0001406290435552783, |
| "loss": 4.6579, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 50350 |
| }, |
| { |
| "epoch": 3.6721433905899925, |
| "grad_norm": 2.265625, |
| "learning_rate": 0.00014052442571312, |
| "loss": 4.6711, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 50400 |
| }, |
| { |
| "epoch": 3.6757864441975263, |
| "grad_norm": 1.6953125, |
| "learning_rate": 0.00014041975477537198, |
| "loss": 4.6767, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 50450 |
| }, |
| { |
| "epoch": 3.67942949780506, |
| "grad_norm": 2.421875, |
| "learning_rate": 0.00014031503087917532, |
| "loss": 4.6554, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 50500 |
| }, |
| { |
| "epoch": 3.683072551412594, |
| "grad_norm": 1.890625, |
| "learning_rate": 0.00014021025416174055, |
| "loss": 4.668, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 50550 |
| }, |
| { |
| "epoch": 3.6867156050201277, |
| "grad_norm": 1.8984375, |
| "learning_rate": 0.00014010542476034735, |
| "loss": 4.6641, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 50600 |
| }, |
| { |
| "epoch": 3.6903586586276615, |
| "grad_norm": 2.015625, |
| "learning_rate": 0.00014000054281234454, |
| "loss": 4.6771, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 50650 |
| }, |
| { |
| "epoch": 3.6940017122351954, |
| "grad_norm": 2.125, |
| "learning_rate": 0.00013989560845514964, |
| "loss": 4.6664, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 50700 |
| }, |
| { |
| "epoch": 3.697644765842729, |
| "grad_norm": 1.515625, |
| "learning_rate": 0.00013979062182624893, |
| "loss": 4.6692, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 50750 |
| }, |
| { |
| "epoch": 3.701287819450263, |
| "grad_norm": 1.7578125, |
| "learning_rate": 0.00013968558306319716, |
| "loss": 4.6558, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 50800 |
| }, |
| { |
| "epoch": 3.704930873057797, |
| "grad_norm": 1.328125, |
| "learning_rate": 0.00013958049230361732, |
| "loss": 4.6734, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 50850 |
| }, |
| { |
| "epoch": 3.708573926665331, |
| "grad_norm": 2.40625, |
| "learning_rate": 0.00013947534968520065, |
| "loss": 4.676, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 50900 |
| }, |
| { |
| "epoch": 3.712216980272865, |
| "grad_norm": 1.7265625, |
| "learning_rate": 0.00013937015534570629, |
| "loss": 4.6575, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 50950 |
| }, |
| { |
| "epoch": 3.7158600338803986, |
| "grad_norm": 1.4453125, |
| "learning_rate": 0.0001392649094229611, |
| "loss": 4.6795, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 51000 |
| }, |
| { |
| "epoch": 3.7195030874879325, |
| "grad_norm": 1.6015625, |
| "learning_rate": 0.0001391596120548595, |
| "loss": 4.6737, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 51050 |
| }, |
| { |
| "epoch": 3.7231461410954663, |
| "grad_norm": 1.5546875, |
| "learning_rate": 0.00013905426337936346, |
| "loss": 4.6628, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 51100 |
| }, |
| { |
| "epoch": 3.726789194703, |
| "grad_norm": 1.453125, |
| "learning_rate": 0.00013894886353450203, |
| "loss": 4.6703, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 51150 |
| }, |
| { |
| "epoch": 3.730432248310534, |
| "grad_norm": 1.3046875, |
| "learning_rate": 0.00013884341265837135, |
| "loss": 4.678, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 51200 |
| }, |
| { |
| "epoch": 3.7340753019180677, |
| "grad_norm": 1.515625, |
| "learning_rate": 0.00013873791088913446, |
| "loss": 4.6771, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 51250 |
| }, |
| { |
| "epoch": 3.7377183555256015, |
| "grad_norm": 2.078125, |
| "learning_rate": 0.000138632358365021, |
| "loss": 4.6677, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 51300 |
| }, |
| { |
| "epoch": 3.7413614091331353, |
| "grad_norm": 1.5546875, |
| "learning_rate": 0.00013852675522432718, |
| "loss": 4.6697, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 51350 |
| }, |
| { |
| "epoch": 3.745004462740669, |
| "grad_norm": 1.7890625, |
| "learning_rate": 0.0001384211016054155, |
| "loss": 4.6837, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 51400 |
| }, |
| { |
| "epoch": 3.748647516348203, |
| "grad_norm": 2.765625, |
| "learning_rate": 0.00013831539764671465, |
| "loss": 4.6769, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 51450 |
| }, |
| { |
| "epoch": 3.752290569955737, |
| "grad_norm": 1.90625, |
| "learning_rate": 0.00013820964348671915, |
| "loss": 4.6714, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 51500 |
| }, |
| { |
| "epoch": 3.755933623563271, |
| "grad_norm": 1.625, |
| "learning_rate": 0.0001381038392639894, |
| "loss": 4.6822, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 51550 |
| }, |
| { |
| "epoch": 3.759576677170805, |
| "grad_norm": 2.609375, |
| "learning_rate": 0.00013799798511715137, |
| "loss": 4.665, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 51600 |
| }, |
| { |
| "epoch": 3.7632197307783386, |
| "grad_norm": 2.125, |
| "learning_rate": 0.00013789208118489648, |
| "loss": 4.6527, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 51650 |
| }, |
| { |
| "epoch": 3.7668627843858724, |
| "grad_norm": 1.6796875, |
| "learning_rate": 0.00013778612760598124, |
| "loss": 4.667, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 51700 |
| }, |
| { |
| "epoch": 3.770505837993406, |
| "grad_norm": 1.515625, |
| "learning_rate": 0.00013768012451922736, |
| "loss": 4.6622, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 51750 |
| }, |
| { |
| "epoch": 3.77414889160094, |
| "grad_norm": 2.53125, |
| "learning_rate": 0.00013757407206352136, |
| "loss": 4.6693, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 51800 |
| }, |
| { |
| "epoch": 3.777791945208474, |
| "grad_norm": 1.921875, |
| "learning_rate": 0.00013746797037781448, |
| "loss": 4.6799, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 51850 |
| }, |
| { |
| "epoch": 3.7814349988160076, |
| "grad_norm": 3.21875, |
| "learning_rate": 0.00013736181960112234, |
| "loss": 4.6782, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 51900 |
| }, |
| { |
| "epoch": 3.7850780524235415, |
| "grad_norm": 1.7265625, |
| "learning_rate": 0.00013725561987252497, |
| "loss": 4.6676, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 51950 |
| }, |
| { |
| "epoch": 3.7887211060310753, |
| "grad_norm": 1.3125, |
| "learning_rate": 0.0001371493713311666, |
| "loss": 4.6522, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 52000 |
| }, |
| { |
| "epoch": 3.792364159638609, |
| "grad_norm": 2.265625, |
| "learning_rate": 0.0001370430741162553, |
| "loss": 4.6524, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 52050 |
| }, |
| { |
| "epoch": 3.796007213246143, |
| "grad_norm": 1.828125, |
| "learning_rate": 0.000136936728367063, |
| "loss": 4.6601, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 52100 |
| }, |
| { |
| "epoch": 3.7996502668536767, |
| "grad_norm": 2.921875, |
| "learning_rate": 0.0001368303342229251, |
| "loss": 4.6668, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 52150 |
| }, |
| { |
| "epoch": 3.8032933204612105, |
| "grad_norm": 1.9140625, |
| "learning_rate": 0.00013672389182324058, |
| "loss": 4.672, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 52200 |
| }, |
| { |
| "epoch": 3.8069363740687443, |
| "grad_norm": 1.859375, |
| "learning_rate": 0.00013661740130747145, |
| "loss": 4.6639, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 52250 |
| }, |
| { |
| "epoch": 3.810579427676278, |
| "grad_norm": 1.5078125, |
| "learning_rate": 0.000136510862815143, |
| "loss": 4.6658, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 52300 |
| }, |
| { |
| "epoch": 3.814222481283812, |
| "grad_norm": 2.4375, |
| "learning_rate": 0.00013640427648584308, |
| "loss": 4.6641, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 52350 |
| }, |
| { |
| "epoch": 3.8178655348913457, |
| "grad_norm": 1.53125, |
| "learning_rate": 0.00013629764245922253, |
| "loss": 4.6706, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 52400 |
| }, |
| { |
| "epoch": 3.8215085884988795, |
| "grad_norm": 1.21875, |
| "learning_rate": 0.0001361909608749944, |
| "loss": 4.6476, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 52450 |
| }, |
| { |
| "epoch": 3.8251516421064133, |
| "grad_norm": 1.390625, |
| "learning_rate": 0.0001360842318729343, |
| "loss": 4.6566, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 52500 |
| }, |
| { |
| "epoch": 3.8287946957139476, |
| "grad_norm": 1.3984375, |
| "learning_rate": 0.0001359774555928798, |
| "loss": 4.6454, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 52550 |
| }, |
| { |
| "epoch": 3.8324377493214814, |
| "grad_norm": 2.25, |
| "learning_rate": 0.00013587063217473046, |
| "loss": 4.6705, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 52600 |
| }, |
| { |
| "epoch": 3.836080802929015, |
| "grad_norm": 1.3515625, |
| "learning_rate": 0.00013576376175844764, |
| "loss": 4.6695, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 52650 |
| }, |
| { |
| "epoch": 3.839723856536549, |
| "grad_norm": 1.7265625, |
| "learning_rate": 0.00013565684448405417, |
| "loss": 4.6508, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 52700 |
| }, |
| { |
| "epoch": 3.843366910144083, |
| "grad_norm": 1.4609375, |
| "learning_rate": 0.00013554988049163444, |
| "loss": 4.6717, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 52750 |
| }, |
| { |
| "epoch": 3.8470099637516166, |
| "grad_norm": 2.0625, |
| "learning_rate": 0.0001354428699213339, |
| "loss": 4.663, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 52800 |
| }, |
| { |
| "epoch": 3.8506530173591504, |
| "grad_norm": 1.4609375, |
| "learning_rate": 0.00013533581291335912, |
| "loss": 4.6642, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 52850 |
| }, |
| { |
| "epoch": 3.8542960709666843, |
| "grad_norm": 3.03125, |
| "learning_rate": 0.00013522870960797743, |
| "loss": 4.6733, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 52900 |
| }, |
| { |
| "epoch": 3.857939124574218, |
| "grad_norm": 2.328125, |
| "learning_rate": 0.00013512156014551692, |
| "loss": 4.6938, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 52950 |
| }, |
| { |
| "epoch": 3.861582178181752, |
| "grad_norm": 2.375, |
| "learning_rate": 0.00013501436466636607, |
| "loss": 4.6695, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 53000 |
| }, |
| { |
| "epoch": 3.8652252317892857, |
| "grad_norm": 2.515625, |
| "learning_rate": 0.00013490712331097374, |
| "loss": 4.6698, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 53050 |
| }, |
| { |
| "epoch": 3.8688682853968195, |
| "grad_norm": 1.9375, |
| "learning_rate": 0.00013479983621984877, |
| "loss": 4.6667, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 53100 |
| }, |
| { |
| "epoch": 3.8725113390043533, |
| "grad_norm": 1.71875, |
| "learning_rate": 0.00013469250353356, |
| "loss": 4.6726, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 53150 |
| }, |
| { |
| "epoch": 3.8761543926118875, |
| "grad_norm": 1.2265625, |
| "learning_rate": 0.0001345851253927361, |
| "loss": 4.6674, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 53200 |
| }, |
| { |
| "epoch": 3.8797974462194214, |
| "grad_norm": 1.5390625, |
| "learning_rate": 0.00013447770193806508, |
| "loss": 4.6709, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 53250 |
| }, |
| { |
| "epoch": 3.883440499826955, |
| "grad_norm": 2.21875, |
| "learning_rate": 0.00013437023331029454, |
| "loss": 4.676, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 53300 |
| }, |
| { |
| "epoch": 3.887083553434489, |
| "grad_norm": 2.21875, |
| "learning_rate": 0.00013426271965023107, |
| "loss": 4.6524, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 53350 |
| }, |
| { |
| "epoch": 3.8907266070420228, |
| "grad_norm": 2.0625, |
| "learning_rate": 0.00013415516109874047, |
| "loss": 4.6691, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 53400 |
| }, |
| { |
| "epoch": 3.8943696606495566, |
| "grad_norm": 1.3359375, |
| "learning_rate": 0.00013404755779674715, |
| "loss": 4.6632, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 53450 |
| }, |
| { |
| "epoch": 3.8980127142570904, |
| "grad_norm": 1.8515625, |
| "learning_rate": 0.00013393990988523434, |
| "loss": 4.6769, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 53500 |
| }, |
| { |
| "epoch": 3.901655767864624, |
| "grad_norm": 1.6328125, |
| "learning_rate": 0.00013383221750524354, |
| "loss": 4.6693, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 53550 |
| }, |
| { |
| "epoch": 3.905298821472158, |
| "grad_norm": 1.359375, |
| "learning_rate": 0.00013372448079787465, |
| "loss": 4.674, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 53600 |
| }, |
| { |
| "epoch": 3.908941875079692, |
| "grad_norm": 2.515625, |
| "learning_rate": 0.0001336166999042856, |
| "loss": 4.6718, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 53650 |
| }, |
| { |
| "epoch": 3.9125849286872256, |
| "grad_norm": 1.515625, |
| "learning_rate": 0.00013350887496569217, |
| "loss": 4.6714, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 53700 |
| }, |
| { |
| "epoch": 3.9162279822947594, |
| "grad_norm": 2.359375, |
| "learning_rate": 0.00013340100612336799, |
| "loss": 4.6874, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 53750 |
| }, |
| { |
| "epoch": 3.9198710359022932, |
| "grad_norm": 1.7265625, |
| "learning_rate": 0.00013329309351864396, |
| "loss": 4.6584, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 53800 |
| }, |
| { |
| "epoch": 3.923514089509827, |
| "grad_norm": 2.171875, |
| "learning_rate": 0.00013318513729290862, |
| "loss": 4.6773, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 53850 |
| }, |
| { |
| "epoch": 3.927157143117361, |
| "grad_norm": 1.5390625, |
| "learning_rate": 0.00013307713758760746, |
| "loss": 4.6655, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 53900 |
| }, |
| { |
| "epoch": 3.9308001967248947, |
| "grad_norm": 2.5625, |
| "learning_rate": 0.00013296909454424297, |
| "loss": 4.6706, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 53950 |
| }, |
| { |
| "epoch": 3.9344432503324285, |
| "grad_norm": 1.8125, |
| "learning_rate": 0.00013286100830437445, |
| "loss": 4.6729, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 54000 |
| }, |
| { |
| "epoch": 3.9380863039399623, |
| "grad_norm": 1.609375, |
| "learning_rate": 0.0001327528790096178, |
| "loss": 4.6654, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 54050 |
| }, |
| { |
| "epoch": 3.941729357547496, |
| "grad_norm": 1.5859375, |
| "learning_rate": 0.00013264470680164533, |
| "loss": 4.6632, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 54100 |
| }, |
| { |
| "epoch": 3.94537241115503, |
| "grad_norm": 1.234375, |
| "learning_rate": 0.00013253649182218556, |
| "loss": 4.6643, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 54150 |
| }, |
| { |
| "epoch": 3.9490154647625637, |
| "grad_norm": 1.9140625, |
| "learning_rate": 0.00013242823421302308, |
| "loss": 4.6606, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 54200 |
| }, |
| { |
| "epoch": 3.952658518370098, |
| "grad_norm": 1.78125, |
| "learning_rate": 0.00013231993411599828, |
| "loss": 4.6812, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 54250 |
| }, |
| { |
| "epoch": 3.9563015719776318, |
| "grad_norm": 2.5, |
| "learning_rate": 0.00013221159167300725, |
| "loss": 4.6707, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 54300 |
| }, |
| { |
| "epoch": 3.9599446255851656, |
| "grad_norm": 1.625, |
| "learning_rate": 0.00013210320702600157, |
| "loss": 4.6572, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 54350 |
| }, |
| { |
| "epoch": 3.9635876791926994, |
| "grad_norm": 1.3359375, |
| "learning_rate": 0.0001319947803169881, |
| "loss": 4.6683, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 54400 |
| }, |
| { |
| "epoch": 3.967230732800233, |
| "grad_norm": 1.1953125, |
| "learning_rate": 0.00013188631168802883, |
| "loss": 4.6647, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 54450 |
| }, |
| { |
| "epoch": 3.970873786407767, |
| "grad_norm": 1.546875, |
| "learning_rate": 0.00013177780128124065, |
| "loss": 4.6754, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 54500 |
| }, |
| { |
| "epoch": 3.974516840015301, |
| "grad_norm": 1.5390625, |
| "learning_rate": 0.00013166924923879521, |
| "loss": 4.6546, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 54550 |
| }, |
| { |
| "epoch": 3.9781598936228346, |
| "grad_norm": 1.3515625, |
| "learning_rate": 0.0001315606557029187, |
| "loss": 4.6696, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 54600 |
| }, |
| { |
| "epoch": 3.9818029472303684, |
| "grad_norm": 2.09375, |
| "learning_rate": 0.00013145202081589168, |
| "loss": 4.6634, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 54650 |
| }, |
| { |
| "epoch": 3.9854460008379022, |
| "grad_norm": 2.171875, |
| "learning_rate": 0.00013134334472004886, |
| "loss": 4.6656, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 54700 |
| }, |
| { |
| "epoch": 3.989089054445436, |
| "grad_norm": 1.5703125, |
| "learning_rate": 0.00013123462755777897, |
| "loss": 4.6588, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 54750 |
| }, |
| { |
| "epoch": 3.99273210805297, |
| "grad_norm": 2.265625, |
| "learning_rate": 0.0001311258694715246, |
| "loss": 4.6771, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 54800 |
| }, |
| { |
| "epoch": 3.996375161660504, |
| "grad_norm": 2.5625, |
| "learning_rate": 0.00013101707060378186, |
| "loss": 4.6592, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 54850 |
| }, |
| { |
| "epoch": 3.999726770979435, |
| "eval_loss": 4.676952838897705, |
| "eval_runtime": 579.4088, |
| "eval_samples_per_second": 523.513, |
| "eval_steps_per_second": 43.627, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 54896 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 1.4921875, |
| "learning_rate": 0.00013090823109710028, |
| "loss": 4.6635, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 54900 |
| }, |
| { |
| "epoch": 4.003643053607534, |
| "grad_norm": 1.53125, |
| "learning_rate": 0.00013079935109408278, |
| "loss": 4.6468, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 54950 |
| }, |
| { |
| "epoch": 4.007286107215068, |
| "grad_norm": 1.5546875, |
| "learning_rate": 0.0001306904307373852, |
| "loss": 4.6623, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 55000 |
| }, |
| { |
| "epoch": 4.010929160822601, |
| "grad_norm": 1.484375, |
| "learning_rate": 0.00013058147016971637, |
| "loss": 4.6599, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 55050 |
| }, |
| { |
| "epoch": 4.014572214430135, |
| "grad_norm": 1.578125, |
| "learning_rate": 0.00013047246953383764, |
| "loss": 4.6579, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 55100 |
| }, |
| { |
| "epoch": 4.018215268037669, |
| "grad_norm": 1.8046875, |
| "learning_rate": 0.00013036342897256297, |
| "loss": 4.6526, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 55150 |
| }, |
| { |
| "epoch": 4.021858321645203, |
| "grad_norm": 1.546875, |
| "learning_rate": 0.00013025434862875865, |
| "loss": 4.6709, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 55200 |
| }, |
| { |
| "epoch": 4.025501375252737, |
| "grad_norm": 1.6171875, |
| "learning_rate": 0.00013014522864534308, |
| "loss": 4.6681, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 55250 |
| }, |
| { |
| "epoch": 4.0291444288602705, |
| "grad_norm": 1.484375, |
| "learning_rate": 0.0001300360691652865, |
| "loss": 4.6531, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 55300 |
| }, |
| { |
| "epoch": 4.032787482467804, |
| "grad_norm": 1.734375, |
| "learning_rate": 0.00012992687033161102, |
| "loss": 4.67, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 55350 |
| }, |
| { |
| "epoch": 4.036430536075338, |
| "grad_norm": 1.6015625, |
| "learning_rate": 0.00012981763228739029, |
| "loss": 4.6491, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 55400 |
| }, |
| { |
| "epoch": 4.040073589682872, |
| "grad_norm": 1.2265625, |
| "learning_rate": 0.0001297083551757492, |
| "loss": 4.6525, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 55450 |
| }, |
| { |
| "epoch": 4.043716643290406, |
| "grad_norm": 1.8125, |
| "learning_rate": 0.00012959903913986408, |
| "loss": 4.6554, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 55500 |
| }, |
| { |
| "epoch": 4.0473596968979395, |
| "grad_norm": 1.3359375, |
| "learning_rate": 0.000129489684322962, |
| "loss": 4.6561, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 55550 |
| }, |
| { |
| "epoch": 4.051002750505473, |
| "grad_norm": 2.140625, |
| "learning_rate": 0.00012938029086832106, |
| "loss": 4.6693, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 55600 |
| }, |
| { |
| "epoch": 4.054645804113007, |
| "grad_norm": 1.75, |
| "learning_rate": 0.00012927085891926976, |
| "loss": 4.6559, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 55650 |
| }, |
| { |
| "epoch": 4.058288857720541, |
| "grad_norm": 1.2578125, |
| "learning_rate": 0.00012916138861918726, |
| "loss": 4.6572, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 55700 |
| }, |
| { |
| "epoch": 4.061931911328076, |
| "grad_norm": 1.3359375, |
| "learning_rate": 0.0001290518801115028, |
| "loss": 4.6476, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 55750 |
| }, |
| { |
| "epoch": 4.065574964935609, |
| "grad_norm": 1.6875, |
| "learning_rate": 0.00012894233353969575, |
| "loss": 4.6452, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 55800 |
| }, |
| { |
| "epoch": 4.069218018543143, |
| "grad_norm": 1.9765625, |
| "learning_rate": 0.00012883274904729538, |
| "loss": 4.6708, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 55850 |
| }, |
| { |
| "epoch": 4.072861072150677, |
| "grad_norm": 2.28125, |
| "learning_rate": 0.00012872312677788056, |
| "loss": 4.6679, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 55900 |
| }, |
| { |
| "epoch": 4.076504125758211, |
| "grad_norm": 1.671875, |
| "learning_rate": 0.00012861346687507974, |
| "loss": 4.6709, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 55950 |
| }, |
| { |
| "epoch": 4.080147179365745, |
| "grad_norm": 2.34375, |
| "learning_rate": 0.00012850376948257068, |
| "loss": 4.658, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 56000 |
| }, |
| { |
| "epoch": 4.0837902329732785, |
| "grad_norm": 2.078125, |
| "learning_rate": 0.00012839403474408016, |
| "loss": 4.6636, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 56050 |
| }, |
| { |
| "epoch": 4.087433286580812, |
| "grad_norm": 2.234375, |
| "learning_rate": 0.00012828426280338392, |
| "loss": 4.6613, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 56100 |
| }, |
| { |
| "epoch": 4.091076340188346, |
| "grad_norm": 2.03125, |
| "learning_rate": 0.0001281744538043066, |
| "loss": 4.646, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 56150 |
| }, |
| { |
| "epoch": 4.09471939379588, |
| "grad_norm": 1.734375, |
| "learning_rate": 0.00012806460789072116, |
| "loss": 4.6497, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 56200 |
| }, |
| { |
| "epoch": 4.098362447403414, |
| "grad_norm": 1.125, |
| "learning_rate": 0.00012795472520654908, |
| "loss": 4.6718, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 56250 |
| }, |
| { |
| "epoch": 4.1020055010109475, |
| "grad_norm": 3.328125, |
| "learning_rate": 0.00012784480589575996, |
| "loss": 4.6702, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 56300 |
| }, |
| { |
| "epoch": 4.105648554618481, |
| "grad_norm": 1.734375, |
| "learning_rate": 0.0001277348501023714, |
| "loss": 4.6594, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 56350 |
| }, |
| { |
| "epoch": 4.109291608226015, |
| "grad_norm": 1.4140625, |
| "learning_rate": 0.00012762485797044882, |
| "loss": 4.6645, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 56400 |
| }, |
| { |
| "epoch": 4.112934661833549, |
| "grad_norm": 1.6953125, |
| "learning_rate": 0.00012751482964410525, |
| "loss": 4.6562, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 56450 |
| }, |
| { |
| "epoch": 4.116577715441083, |
| "grad_norm": 1.546875, |
| "learning_rate": 0.0001274047652675011, |
| "loss": 4.6582, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 56500 |
| }, |
| { |
| "epoch": 4.120220769048617, |
| "grad_norm": 1.2109375, |
| "learning_rate": 0.000127294664984844, |
| "loss": 4.6714, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 56550 |
| }, |
| { |
| "epoch": 4.12386382265615, |
| "grad_norm": 1.6328125, |
| "learning_rate": 0.00012718452894038874, |
| "loss": 4.6503, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 56600 |
| }, |
| { |
| "epoch": 4.127506876263684, |
| "grad_norm": 1.46875, |
| "learning_rate": 0.00012707435727843687, |
| "loss": 4.6677, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 56650 |
| }, |
| { |
| "epoch": 4.131149929871218, |
| "grad_norm": 1.703125, |
| "learning_rate": 0.0001269641501433366, |
| "loss": 4.666, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 56700 |
| }, |
| { |
| "epoch": 4.134792983478752, |
| "grad_norm": 1.390625, |
| "learning_rate": 0.00012685390767948267, |
| "loss": 4.6699, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 56750 |
| }, |
| { |
| "epoch": 4.138436037086286, |
| "grad_norm": 1.7421875, |
| "learning_rate": 0.000126743630031316, |
| "loss": 4.6572, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 56800 |
| }, |
| { |
| "epoch": 4.142079090693819, |
| "grad_norm": 1.5234375, |
| "learning_rate": 0.0001266333173433238, |
| "loss": 4.6519, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 56850 |
| }, |
| { |
| "epoch": 4.145722144301353, |
| "grad_norm": 2.59375, |
| "learning_rate": 0.00012652296976003907, |
| "loss": 4.6755, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 56900 |
| }, |
| { |
| "epoch": 4.149365197908887, |
| "grad_norm": 3.4375, |
| "learning_rate": 0.0001264125874260405, |
| "loss": 4.6704, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 56950 |
| }, |
| { |
| "epoch": 4.153008251516421, |
| "grad_norm": 1.8359375, |
| "learning_rate": 0.00012630217048595233, |
| "loss": 4.6562, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 57000 |
| }, |
| { |
| "epoch": 4.156651305123955, |
| "grad_norm": 1.4765625, |
| "learning_rate": 0.00012619171908444417, |
| "loss": 4.6538, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 57050 |
| }, |
| { |
| "epoch": 4.1602943587314885, |
| "grad_norm": 1.4921875, |
| "learning_rate": 0.00012608123336623083, |
| "loss": 4.6728, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 57100 |
| }, |
| { |
| "epoch": 4.163937412339022, |
| "grad_norm": 1.375, |
| "learning_rate": 0.000125970713476072, |
| "loss": 4.6743, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 57150 |
| }, |
| { |
| "epoch": 4.167580465946556, |
| "grad_norm": 1.8984375, |
| "learning_rate": 0.00012586015955877214, |
| "loss": 4.6516, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 57200 |
| }, |
| { |
| "epoch": 4.17122351955409, |
| "grad_norm": 1.1953125, |
| "learning_rate": 0.00012574957175918032, |
| "loss": 4.6724, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 57250 |
| }, |
| { |
| "epoch": 4.174866573161624, |
| "grad_norm": 1.828125, |
| "learning_rate": 0.00012563895022219004, |
| "loss": 4.6667, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 57300 |
| }, |
| { |
| "epoch": 4.1785096267691575, |
| "grad_norm": 1.359375, |
| "learning_rate": 0.00012552829509273898, |
| "loss": 4.6346, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 57350 |
| }, |
| { |
| "epoch": 4.182152680376692, |
| "grad_norm": 1.65625, |
| "learning_rate": 0.00012541760651580875, |
| "loss": 4.6762, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 57400 |
| }, |
| { |
| "epoch": 4.185795733984226, |
| "grad_norm": 1.984375, |
| "learning_rate": 0.00012530688463642493, |
| "loss": 4.664, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 57450 |
| }, |
| { |
| "epoch": 4.18943878759176, |
| "grad_norm": 2.640625, |
| "learning_rate": 0.0001251961295996566, |
| "loss": 4.6646, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 57500 |
| }, |
| { |
| "epoch": 4.193081841199294, |
| "grad_norm": 1.4453125, |
| "learning_rate": 0.0001250853415506164, |
| "loss": 4.6602, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 57550 |
| }, |
| { |
| "epoch": 4.196724894806827, |
| "grad_norm": 1.4609375, |
| "learning_rate": 0.00012497452063446013, |
| "loss": 4.6755, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 57600 |
| }, |
| { |
| "epoch": 4.200367948414361, |
| "grad_norm": 2.453125, |
| "learning_rate": 0.00012486366699638666, |
| "loss": 4.6727, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 57650 |
| }, |
| { |
| "epoch": 4.204011002021895, |
| "grad_norm": 1.6328125, |
| "learning_rate": 0.0001247527807816378, |
| "loss": 4.6672, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 57700 |
| }, |
| { |
| "epoch": 4.207654055629429, |
| "grad_norm": 1.453125, |
| "learning_rate": 0.000124641862135498, |
| "loss": 4.6627, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 57750 |
| }, |
| { |
| "epoch": 4.211297109236963, |
| "grad_norm": 1.640625, |
| "learning_rate": 0.00012453091120329416, |
| "loss": 4.6823, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 57800 |
| }, |
| { |
| "epoch": 4.2149401628444965, |
| "grad_norm": 1.578125, |
| "learning_rate": 0.00012441992813039555, |
| "loss": 4.6582, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 57850 |
| }, |
| { |
| "epoch": 4.21858321645203, |
| "grad_norm": 1.890625, |
| "learning_rate": 0.00012430891306221354, |
| "loss": 4.6422, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 57900 |
| }, |
| { |
| "epoch": 4.222226270059564, |
| "grad_norm": 1.5703125, |
| "learning_rate": 0.0001241978661442014, |
| "loss": 4.6743, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 57950 |
| }, |
| { |
| "epoch": 4.225869323667098, |
| "grad_norm": 1.796875, |
| "learning_rate": 0.00012408678752185418, |
| "loss": 4.6765, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 58000 |
| }, |
| { |
| "epoch": 4.229512377274632, |
| "grad_norm": 1.484375, |
| "learning_rate": 0.00012397567734070836, |
| "loss": 4.6528, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 58050 |
| }, |
| { |
| "epoch": 4.2331554308821655, |
| "grad_norm": 1.7265625, |
| "learning_rate": 0.00012386453574634183, |
| "loss": 4.6667, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 58100 |
| }, |
| { |
| "epoch": 4.236798484489699, |
| "grad_norm": 1.3046875, |
| "learning_rate": 0.00012375336288437372, |
| "loss": 4.6534, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 58150 |
| }, |
| { |
| "epoch": 4.240441538097233, |
| "grad_norm": 1.328125, |
| "learning_rate": 0.00012364215890046395, |
| "loss": 4.6772, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 58200 |
| }, |
| { |
| "epoch": 4.244084591704767, |
| "grad_norm": 1.53125, |
| "learning_rate": 0.00012353092394031337, |
| "loss": 4.6779, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 58250 |
| }, |
| { |
| "epoch": 4.247727645312301, |
| "grad_norm": 1.6171875, |
| "learning_rate": 0.0001234196581496634, |
| "loss": 4.6523, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 58300 |
| }, |
| { |
| "epoch": 4.2513706989198345, |
| "grad_norm": 1.65625, |
| "learning_rate": 0.00012330836167429577, |
| "loss": 4.6743, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 58350 |
| }, |
| { |
| "epoch": 4.255013752527368, |
| "grad_norm": 2.0625, |
| "learning_rate": 0.0001231970346600324, |
| "loss": 4.664, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 58400 |
| }, |
| { |
| "epoch": 4.258656806134902, |
| "grad_norm": 1.2109375, |
| "learning_rate": 0.00012308567725273544, |
| "loss": 4.6635, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 58450 |
| }, |
| { |
| "epoch": 4.262299859742436, |
| "grad_norm": 2.921875, |
| "learning_rate": 0.00012297428959830655, |
| "loss": 4.6636, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 58500 |
| }, |
| { |
| "epoch": 4.26594291334997, |
| "grad_norm": 1.4921875, |
| "learning_rate": 0.00012286287184268727, |
| "loss": 4.6659, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 58550 |
| }, |
| { |
| "epoch": 4.269585966957504, |
| "grad_norm": 1.8203125, |
| "learning_rate": 0.00012275142413185842, |
| "loss": 4.6752, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 58600 |
| }, |
| { |
| "epoch": 4.273229020565037, |
| "grad_norm": 1.28125, |
| "learning_rate": 0.00012263994661184017, |
| "loss": 4.6608, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 58650 |
| }, |
| { |
| "epoch": 4.276872074172571, |
| "grad_norm": 1.7890625, |
| "learning_rate": 0.00012252843942869173, |
| "loss": 4.6629, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 58700 |
| }, |
| { |
| "epoch": 4.280515127780105, |
| "grad_norm": 2.125, |
| "learning_rate": 0.00012241690272851109, |
| "loss": 4.6469, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 58750 |
| }, |
| { |
| "epoch": 4.284158181387639, |
| "grad_norm": 1.21875, |
| "learning_rate": 0.000122305336657435, |
| "loss": 4.6554, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 58800 |
| }, |
| { |
| "epoch": 4.287801234995173, |
| "grad_norm": 1.4375, |
| "learning_rate": 0.00012219374136163865, |
| "loss": 4.6569, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 58850 |
| }, |
| { |
| "epoch": 4.291444288602706, |
| "grad_norm": 2.53125, |
| "learning_rate": 0.0001220821169873356, |
| "loss": 4.6552, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 58900 |
| }, |
| { |
| "epoch": 4.29508734221024, |
| "grad_norm": 2.921875, |
| "learning_rate": 0.00012197046368077738, |
| "loss": 4.6664, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 58950 |
| }, |
| { |
| "epoch": 4.298730395817774, |
| "grad_norm": 1.8828125, |
| "learning_rate": 0.00012185878158825356, |
| "loss": 4.6696, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 59000 |
| }, |
| { |
| "epoch": 4.302373449425309, |
| "grad_norm": 1.546875, |
| "learning_rate": 0.0001217470708560913, |
| "loss": 4.6672, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 59050 |
| }, |
| { |
| "epoch": 4.306016503032842, |
| "grad_norm": 2.015625, |
| "learning_rate": 0.00012163533163065535, |
| "loss": 4.6674, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 59100 |
| }, |
| { |
| "epoch": 4.309659556640376, |
| "grad_norm": 1.890625, |
| "learning_rate": 0.00012152356405834782, |
| "loss": 4.6471, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 59150 |
| }, |
| { |
| "epoch": 4.31330261024791, |
| "grad_norm": 1.7890625, |
| "learning_rate": 0.00012141176828560787, |
| "loss": 4.6535, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 59200 |
| }, |
| { |
| "epoch": 4.316945663855444, |
| "grad_norm": 1.5625, |
| "learning_rate": 0.00012129994445891177, |
| "loss": 4.6649, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 59250 |
| }, |
| { |
| "epoch": 4.320588717462978, |
| "grad_norm": 1.3515625, |
| "learning_rate": 0.00012118809272477231, |
| "loss": 4.6748, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 59300 |
| }, |
| { |
| "epoch": 4.324231771070512, |
| "grad_norm": 1.3359375, |
| "learning_rate": 0.0001210762132297391, |
| "loss": 4.671, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 59350 |
| }, |
| { |
| "epoch": 4.327874824678045, |
| "grad_norm": 1.765625, |
| "learning_rate": 0.00012096430612039791, |
| "loss": 4.6624, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 59400 |
| }, |
| { |
| "epoch": 4.331517878285579, |
| "grad_norm": 2.59375, |
| "learning_rate": 0.00012085237154337083, |
| "loss": 4.6709, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 59450 |
| }, |
| { |
| "epoch": 4.335160931893113, |
| "grad_norm": 1.984375, |
| "learning_rate": 0.00012074040964531588, |
| "loss": 4.6755, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 59500 |
| }, |
| { |
| "epoch": 4.338803985500647, |
| "grad_norm": 1.609375, |
| "learning_rate": 0.00012062842057292685, |
| "loss": 4.6591, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 59550 |
| }, |
| { |
| "epoch": 4.342447039108181, |
| "grad_norm": 1.4765625, |
| "learning_rate": 0.0001205164044729332, |
| "loss": 4.6643, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 59600 |
| }, |
| { |
| "epoch": 4.3460900927157144, |
| "grad_norm": 1.6484375, |
| "learning_rate": 0.00012040436149209974, |
| "loss": 4.6569, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 59650 |
| }, |
| { |
| "epoch": 4.349733146323248, |
| "grad_norm": 1.7734375, |
| "learning_rate": 0.00012029229177722663, |
| "loss": 4.6693, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 59700 |
| }, |
| { |
| "epoch": 4.353376199930782, |
| "grad_norm": 2.0625, |
| "learning_rate": 0.00012018019547514882, |
| "loss": 4.6816, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 59750 |
| }, |
| { |
| "epoch": 4.357019253538316, |
| "grad_norm": 1.65625, |
| "learning_rate": 0.0001200680727327363, |
| "loss": 4.6742, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 59800 |
| }, |
| { |
| "epoch": 4.36066230714585, |
| "grad_norm": 2.140625, |
| "learning_rate": 0.00011995592369689366, |
| "loss": 4.6591, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 59850 |
| }, |
| { |
| "epoch": 4.3643053607533835, |
| "grad_norm": 1.6953125, |
| "learning_rate": 0.00011984374851455989, |
| "loss": 4.6604, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 59900 |
| }, |
| { |
| "epoch": 4.367948414360917, |
| "grad_norm": 1.3671875, |
| "learning_rate": 0.00011973154733270823, |
| "loss": 4.6667, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 59950 |
| }, |
| { |
| "epoch": 4.371591467968451, |
| "grad_norm": 2.03125, |
| "learning_rate": 0.00011961932029834605, |
| "loss": 4.6594, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 60000 |
| }, |
| { |
| "epoch": 4.375234521575985, |
| "grad_norm": 2.03125, |
| "learning_rate": 0.00011950706755851458, |
| "loss": 4.6734, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 60050 |
| }, |
| { |
| "epoch": 4.378877575183519, |
| "grad_norm": 3.015625, |
| "learning_rate": 0.00011939478926028869, |
| "loss": 4.6582, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 60100 |
| }, |
| { |
| "epoch": 4.3825206287910525, |
| "grad_norm": 3.078125, |
| "learning_rate": 0.00011928248555077672, |
| "loss": 4.6486, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 60150 |
| }, |
| { |
| "epoch": 4.386163682398586, |
| "grad_norm": 1.1953125, |
| "learning_rate": 0.00011917015657712038, |
| "loss": 4.6749, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 60200 |
| }, |
| { |
| "epoch": 4.38980673600612, |
| "grad_norm": 1.546875, |
| "learning_rate": 0.00011905780248649443, |
| "loss": 4.6586, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 60250 |
| }, |
| { |
| "epoch": 4.393449789613654, |
| "grad_norm": 1.9296875, |
| "learning_rate": 0.00011894542342610655, |
| "loss": 4.649, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 60300 |
| }, |
| { |
| "epoch": 4.397092843221188, |
| "grad_norm": 1.234375, |
| "learning_rate": 0.00011883301954319717, |
| "loss": 4.6684, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 60350 |
| }, |
| { |
| "epoch": 4.400735896828722, |
| "grad_norm": 1.234375, |
| "learning_rate": 0.00011872059098503916, |
| "loss": 4.6684, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 60400 |
| }, |
| { |
| "epoch": 4.404378950436255, |
| "grad_norm": 1.7265625, |
| "learning_rate": 0.00011860813789893777, |
| "loss": 4.6733, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 60450 |
| }, |
| { |
| "epoch": 4.408022004043789, |
| "grad_norm": 1.3359375, |
| "learning_rate": 0.00011849566043223041, |
| "loss": 4.6663, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 60500 |
| }, |
| { |
| "epoch": 4.411665057651323, |
| "grad_norm": 1.40625, |
| "learning_rate": 0.00011838315873228642, |
| "loss": 4.6666, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 60550 |
| }, |
| { |
| "epoch": 4.415308111258857, |
| "grad_norm": 1.75, |
| "learning_rate": 0.00011827063294650684, |
| "loss": 4.6562, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 60600 |
| }, |
| { |
| "epoch": 4.418951164866391, |
| "grad_norm": 1.296875, |
| "learning_rate": 0.00011815808322232434, |
| "loss": 4.6649, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 60650 |
| }, |
| { |
| "epoch": 4.422594218473925, |
| "grad_norm": 2.671875, |
| "learning_rate": 0.00011804550970720289, |
| "loss": 4.6632, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 60700 |
| }, |
| { |
| "epoch": 4.426237272081458, |
| "grad_norm": 1.21875, |
| "learning_rate": 0.00011793291254863772, |
| "loss": 4.6476, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 60750 |
| }, |
| { |
| "epoch": 4.429880325688993, |
| "grad_norm": 1.2890625, |
| "learning_rate": 0.0001178202918941549, |
| "loss": 4.6693, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 60800 |
| }, |
| { |
| "epoch": 4.433523379296527, |
| "grad_norm": 1.4140625, |
| "learning_rate": 0.00011770764789131143, |
| "loss": 4.6654, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 60850 |
| }, |
| { |
| "epoch": 4.4371664329040605, |
| "grad_norm": 1.40625, |
| "learning_rate": 0.00011759498068769481, |
| "loss": 4.6775, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 60900 |
| }, |
| { |
| "epoch": 4.440809486511594, |
| "grad_norm": 1.5625, |
| "learning_rate": 0.00011748229043092296, |
| "loss": 4.6591, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 60950 |
| }, |
| { |
| "epoch": 4.444452540119128, |
| "grad_norm": 1.5078125, |
| "learning_rate": 0.00011736957726864405, |
| "loss": 4.669, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 61000 |
| }, |
| { |
| "epoch": 4.448095593726662, |
| "grad_norm": 2.015625, |
| "learning_rate": 0.00011725684134853619, |
| "loss": 4.6586, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 61050 |
| }, |
| { |
| "epoch": 4.451738647334196, |
| "grad_norm": 1.828125, |
| "learning_rate": 0.00011714408281830734, |
| "loss": 4.6681, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 61100 |
| }, |
| { |
| "epoch": 4.45538170094173, |
| "grad_norm": 1.0390625, |
| "learning_rate": 0.00011703130182569502, |
| "loss": 4.6776, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 61150 |
| }, |
| { |
| "epoch": 4.459024754549263, |
| "grad_norm": 2.3125, |
| "learning_rate": 0.00011691849851846634, |
| "loss": 4.6756, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 61200 |
| }, |
| { |
| "epoch": 4.462667808156797, |
| "grad_norm": 1.4765625, |
| "learning_rate": 0.00011680567304441753, |
| "loss": 4.6723, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 61250 |
| }, |
| { |
| "epoch": 4.466310861764331, |
| "grad_norm": 1.4375, |
| "learning_rate": 0.00011669282555137382, |
| "loss": 4.6634, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 61300 |
| }, |
| { |
| "epoch": 4.469953915371865, |
| "grad_norm": 1.2265625, |
| "learning_rate": 0.00011657995618718944, |
| "loss": 4.661, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 61350 |
| }, |
| { |
| "epoch": 4.473596968979399, |
| "grad_norm": 1.390625, |
| "learning_rate": 0.0001164670650997471, |
| "loss": 4.6786, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 61400 |
| }, |
| { |
| "epoch": 4.477240022586932, |
| "grad_norm": 2.890625, |
| "learning_rate": 0.00011635415243695813, |
| "loss": 4.6606, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 61450 |
| }, |
| { |
| "epoch": 4.480883076194466, |
| "grad_norm": 1.3125, |
| "learning_rate": 0.00011624121834676203, |
| "loss": 4.665, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 61500 |
| }, |
| { |
| "epoch": 4.484526129802, |
| "grad_norm": 1.234375, |
| "learning_rate": 0.0001161282629771264, |
| "loss": 4.6808, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 61550 |
| }, |
| { |
| "epoch": 4.488169183409534, |
| "grad_norm": 1.8125, |
| "learning_rate": 0.00011601528647604671, |
| "loss": 4.6619, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 61600 |
| }, |
| { |
| "epoch": 4.491812237017068, |
| "grad_norm": 2.0, |
| "learning_rate": 0.00011590228899154618, |
| "loss": 4.6604, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 61650 |
| }, |
| { |
| "epoch": 4.4954552906246015, |
| "grad_norm": 1.9140625, |
| "learning_rate": 0.00011578927067167539, |
| "loss": 4.6656, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 61700 |
| }, |
| { |
| "epoch": 4.499098344232135, |
| "grad_norm": 1.4921875, |
| "learning_rate": 0.00011567623166451242, |
| "loss": 4.6652, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 61750 |
| }, |
| { |
| "epoch": 4.502741397839669, |
| "grad_norm": 1.5703125, |
| "learning_rate": 0.00011556317211816223, |
| "loss": 4.6743, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 61800 |
| }, |
| { |
| "epoch": 4.506384451447203, |
| "grad_norm": 1.40625, |
| "learning_rate": 0.00011545009218075682, |
| "loss": 4.6667, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 61850 |
| }, |
| { |
| "epoch": 4.510027505054737, |
| "grad_norm": 1.390625, |
| "learning_rate": 0.00011533699200045492, |
| "loss": 4.6681, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 61900 |
| }, |
| { |
| "epoch": 4.5136705586622705, |
| "grad_norm": 1.4765625, |
| "learning_rate": 0.00011522387172544169, |
| "loss": 4.6568, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 61950 |
| }, |
| { |
| "epoch": 4.517313612269804, |
| "grad_norm": 1.5234375, |
| "learning_rate": 0.00011511073150392875, |
| "loss": 4.6617, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 62000 |
| }, |
| { |
| "epoch": 4.520956665877338, |
| "grad_norm": 1.34375, |
| "learning_rate": 0.00011499757148415368, |
| "loss": 4.6625, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 62050 |
| }, |
| { |
| "epoch": 4.524599719484872, |
| "grad_norm": 1.09375, |
| "learning_rate": 0.00011488439181438022, |
| "loss": 4.6566, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 62100 |
| }, |
| { |
| "epoch": 4.528242773092406, |
| "grad_norm": 1.21875, |
| "learning_rate": 0.00011477119264289762, |
| "loss": 4.6556, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 62150 |
| }, |
| { |
| "epoch": 4.53188582669994, |
| "grad_norm": 1.6640625, |
| "learning_rate": 0.00011465797411802089, |
| "loss": 4.675, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 62200 |
| }, |
| { |
| "epoch": 4.535528880307473, |
| "grad_norm": 1.65625, |
| "learning_rate": 0.00011454473638809023, |
| "loss": 4.6728, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 62250 |
| }, |
| { |
| "epoch": 4.539171933915007, |
| "grad_norm": 1.671875, |
| "learning_rate": 0.00011443147960147108, |
| "loss": 4.6531, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 62300 |
| }, |
| { |
| "epoch": 4.542814987522542, |
| "grad_norm": 1.9453125, |
| "learning_rate": 0.00011431820390655387, |
| "loss": 4.6637, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 62350 |
| }, |
| { |
| "epoch": 4.546458041130075, |
| "grad_norm": 1.5546875, |
| "learning_rate": 0.00011420490945175377, |
| "loss": 4.6699, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 62400 |
| }, |
| { |
| "epoch": 4.5501010947376095, |
| "grad_norm": 1.5703125, |
| "learning_rate": 0.00011409159638551057, |
| "loss": 4.6651, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 62450 |
| }, |
| { |
| "epoch": 4.553744148345142, |
| "grad_norm": 1.296875, |
| "learning_rate": 0.00011397826485628835, |
| "loss": 4.6682, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 62500 |
| }, |
| { |
| "epoch": 4.557387201952677, |
| "grad_norm": 1.75, |
| "learning_rate": 0.00011386491501257548, |
| "loss": 4.6717, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 62550 |
| }, |
| { |
| "epoch": 4.561030255560211, |
| "grad_norm": 1.515625, |
| "learning_rate": 0.00011375154700288429, |
| "loss": 4.6708, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 62600 |
| }, |
| { |
| "epoch": 4.564673309167745, |
| "grad_norm": 1.796875, |
| "learning_rate": 0.0001136381609757509, |
| "loss": 4.6694, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 62650 |
| }, |
| { |
| "epoch": 4.5683163627752785, |
| "grad_norm": 2.125, |
| "learning_rate": 0.00011352475707973502, |
| "loss": 4.6766, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 62700 |
| }, |
| { |
| "epoch": 4.571959416382812, |
| "grad_norm": 1.390625, |
| "learning_rate": 0.00011341133546341985, |
| "loss": 4.6582, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 62750 |
| }, |
| { |
| "epoch": 4.575602469990346, |
| "grad_norm": 2.1875, |
| "learning_rate": 0.0001132978962754117, |
| "loss": 4.6576, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 62800 |
| }, |
| { |
| "epoch": 4.57924552359788, |
| "grad_norm": 1.9140625, |
| "learning_rate": 0.00011318443966434, |
| "loss": 4.6498, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 62850 |
| }, |
| { |
| "epoch": 4.582888577205414, |
| "grad_norm": 1.25, |
| "learning_rate": 0.00011307096577885697, |
| "loss": 4.6803, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 62900 |
| }, |
| { |
| "epoch": 4.586531630812948, |
| "grad_norm": 1.328125, |
| "learning_rate": 0.0001129574747676374, |
| "loss": 4.6524, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 62950 |
| }, |
| { |
| "epoch": 4.590174684420481, |
| "grad_norm": 1.078125, |
| "learning_rate": 0.00011284396677937863, |
| "loss": 4.6725, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 63000 |
| }, |
| { |
| "epoch": 4.593817738028015, |
| "grad_norm": 1.1640625, |
| "learning_rate": 0.00011273044196280018, |
| "loss": 4.6597, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 63050 |
| }, |
| { |
| "epoch": 4.597460791635549, |
| "grad_norm": 1.328125, |
| "learning_rate": 0.00011261690046664365, |
| "loss": 4.6564, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 63100 |
| }, |
| { |
| "epoch": 4.601103845243083, |
| "grad_norm": 1.3125, |
| "learning_rate": 0.00011250334243967245, |
| "loss": 4.6796, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 63150 |
| }, |
| { |
| "epoch": 4.604746898850617, |
| "grad_norm": 1.078125, |
| "learning_rate": 0.00011238976803067165, |
| "loss": 4.659, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 63200 |
| }, |
| { |
| "epoch": 4.60838995245815, |
| "grad_norm": 1.6484375, |
| "learning_rate": 0.00011227617738844782, |
| "loss": 4.6644, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 63250 |
| }, |
| { |
| "epoch": 4.612033006065684, |
| "grad_norm": 1.796875, |
| "learning_rate": 0.00011216257066182883, |
| "loss": 4.6703, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 63300 |
| }, |
| { |
| "epoch": 4.615676059673218, |
| "grad_norm": 1.40625, |
| "learning_rate": 0.00011204894799966352, |
| "loss": 4.6614, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 63350 |
| }, |
| { |
| "epoch": 4.619319113280752, |
| "grad_norm": 1.8671875, |
| "learning_rate": 0.0001119353095508217, |
| "loss": 4.6609, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 63400 |
| }, |
| { |
| "epoch": 4.622962166888286, |
| "grad_norm": 1.6015625, |
| "learning_rate": 0.00011182165546419381, |
| "loss": 4.676, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 63450 |
| }, |
| { |
| "epoch": 4.6266052204958195, |
| "grad_norm": 1.5, |
| "learning_rate": 0.00011170798588869083, |
| "loss": 4.6682, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 63500 |
| }, |
| { |
| "epoch": 4.630248274103353, |
| "grad_norm": 1.53125, |
| "learning_rate": 0.00011159430097324397, |
| "loss": 4.6587, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 63550 |
| }, |
| { |
| "epoch": 4.633891327710887, |
| "grad_norm": 1.609375, |
| "learning_rate": 0.0001114806008668046, |
| "loss": 4.6622, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 63600 |
| }, |
| { |
| "epoch": 4.637534381318421, |
| "grad_norm": 2.25, |
| "learning_rate": 0.00011136688571834395, |
| "loss": 4.6668, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 63650 |
| }, |
| { |
| "epoch": 4.641177434925955, |
| "grad_norm": 1.71875, |
| "learning_rate": 0.00011125315567685299, |
| "loss": 4.6569, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 63700 |
| }, |
| { |
| "epoch": 4.6448204885334885, |
| "grad_norm": 1.3984375, |
| "learning_rate": 0.00011113941089134219, |
| "loss": 4.6627, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 63750 |
| }, |
| { |
| "epoch": 4.648463542141022, |
| "grad_norm": 1.859375, |
| "learning_rate": 0.0001110256515108413, |
| "loss": 4.6619, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 63800 |
| }, |
| { |
| "epoch": 4.652106595748556, |
| "grad_norm": 1.546875, |
| "learning_rate": 0.0001109118776843993, |
| "loss": 4.6596, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 63850 |
| }, |
| { |
| "epoch": 4.65574964935609, |
| "grad_norm": 1.171875, |
| "learning_rate": 0.00011079808956108391, |
| "loss": 4.6623, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 63900 |
| }, |
| { |
| "epoch": 4.659392702963624, |
| "grad_norm": 2.0625, |
| "learning_rate": 0.00011068428728998182, |
| "loss": 4.6569, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 63950 |
| }, |
| { |
| "epoch": 4.663035756571158, |
| "grad_norm": 1.625, |
| "learning_rate": 0.00011057047102019804, |
| "loss": 4.6638, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 64000 |
| }, |
| { |
| "epoch": 4.666678810178691, |
| "grad_norm": 1.3515625, |
| "learning_rate": 0.00011045664090085605, |
| "loss": 4.6685, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 64050 |
| }, |
| { |
| "epoch": 4.670321863786226, |
| "grad_norm": 1.40625, |
| "learning_rate": 0.00011034279708109749, |
| "loss": 4.6551, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 64100 |
| }, |
| { |
| "epoch": 4.673964917393759, |
| "grad_norm": 1.5625, |
| "learning_rate": 0.00011022893971008182, |
| "loss": 4.6731, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 64150 |
| }, |
| { |
| "epoch": 4.677607971001294, |
| "grad_norm": 1.3359375, |
| "learning_rate": 0.00011011506893698638, |
| "loss": 4.6693, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 64200 |
| }, |
| { |
| "epoch": 4.6812510246088275, |
| "grad_norm": 1.78125, |
| "learning_rate": 0.00011000118491100601, |
| "loss": 4.6615, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 64250 |
| }, |
| { |
| "epoch": 4.684894078216361, |
| "grad_norm": 2.140625, |
| "learning_rate": 0.00010988728778135297, |
| "loss": 4.6585, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 64300 |
| }, |
| { |
| "epoch": 4.688537131823895, |
| "grad_norm": 1.3046875, |
| "learning_rate": 0.00010977337769725658, |
| "loss": 4.6636, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 64350 |
| }, |
| { |
| "epoch": 4.692180185431429, |
| "grad_norm": 1.578125, |
| "learning_rate": 0.00010965945480796325, |
| "loss": 4.6818, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 64400 |
| }, |
| { |
| "epoch": 4.695823239038963, |
| "grad_norm": 1.8984375, |
| "learning_rate": 0.0001095455192627361, |
| "loss": 4.6638, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 64450 |
| }, |
| { |
| "epoch": 4.6994662926464965, |
| "grad_norm": 1.5703125, |
| "learning_rate": 0.00010943157121085488, |
| "loss": 4.6655, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 64500 |
| }, |
| { |
| "epoch": 4.70310934625403, |
| "grad_norm": 1.71875, |
| "learning_rate": 0.00010931761080161566, |
| "loss": 4.6591, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 64550 |
| }, |
| { |
| "epoch": 4.706752399861564, |
| "grad_norm": 2.21875, |
| "learning_rate": 0.00010920363818433074, |
| "loss": 4.6745, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 64600 |
| }, |
| { |
| "epoch": 4.710395453469098, |
| "grad_norm": 1.8125, |
| "learning_rate": 0.00010908965350832845, |
| "loss": 4.6792, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 64650 |
| }, |
| { |
| "epoch": 4.714038507076632, |
| "grad_norm": 2.0, |
| "learning_rate": 0.00010897565692295284, |
| "loss": 4.6577, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 64700 |
| }, |
| { |
| "epoch": 4.7176815606841656, |
| "grad_norm": 1.671875, |
| "learning_rate": 0.00010886164857756364, |
| "loss": 4.6769, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 64750 |
| }, |
| { |
| "epoch": 4.721324614291699, |
| "grad_norm": 1.328125, |
| "learning_rate": 0.00010874762862153591, |
| "loss": 4.6679, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 64800 |
| }, |
| { |
| "epoch": 4.724967667899233, |
| "grad_norm": 1.28125, |
| "learning_rate": 0.00010863359720426005, |
| "loss": 4.6622, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 64850 |
| }, |
| { |
| "epoch": 4.728610721506767, |
| "grad_norm": 1.4453125, |
| "learning_rate": 0.0001085195544751413, |
| "loss": 4.6696, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 64900 |
| }, |
| { |
| "epoch": 4.732253775114301, |
| "grad_norm": 1.71875, |
| "learning_rate": 0.00010840550058359988, |
| "loss": 4.6804, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 64950 |
| }, |
| { |
| "epoch": 4.735896828721835, |
| "grad_norm": 1.3671875, |
| "learning_rate": 0.00010829143567907054, |
| "loss": 4.665, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 65000 |
| }, |
| { |
| "epoch": 4.739539882329368, |
| "grad_norm": 1.296875, |
| "learning_rate": 0.00010817735991100248, |
| "loss": 4.6734, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 65050 |
| }, |
| { |
| "epoch": 4.743182935936902, |
| "grad_norm": 1.3515625, |
| "learning_rate": 0.00010806327342885918, |
| "loss": 4.6693, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 65100 |
| }, |
| { |
| "epoch": 4.746825989544436, |
| "grad_norm": 1.2890625, |
| "learning_rate": 0.00010794917638211808, |
| "loss": 4.6875, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 65150 |
| }, |
| { |
| "epoch": 4.75046904315197, |
| "grad_norm": 1.7578125, |
| "learning_rate": 0.00010783506892027056, |
| "loss": 4.6801, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 65200 |
| }, |
| { |
| "epoch": 4.754112096759504, |
| "grad_norm": 1.4375, |
| "learning_rate": 0.00010772095119282151, |
| "loss": 4.6639, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 65250 |
| }, |
| { |
| "epoch": 4.7577551503670374, |
| "grad_norm": 2.0, |
| "learning_rate": 0.0001076068233492894, |
| "loss": 4.6687, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 65300 |
| }, |
| { |
| "epoch": 4.761398203974571, |
| "grad_norm": 1.328125, |
| "learning_rate": 0.00010749268553920587, |
| "loss": 4.6645, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 65350 |
| }, |
| { |
| "epoch": 4.765041257582105, |
| "grad_norm": 1.484375, |
| "learning_rate": 0.00010737853791211572, |
| "loss": 4.6596, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 65400 |
| }, |
| { |
| "epoch": 4.768684311189639, |
| "grad_norm": 2.0, |
| "learning_rate": 0.00010726438061757645, |
| "loss": 4.6566, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 65450 |
| }, |
| { |
| "epoch": 4.772327364797173, |
| "grad_norm": 1.6796875, |
| "learning_rate": 0.00010715021380515837, |
| "loss": 4.6698, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 65500 |
| }, |
| { |
| "epoch": 4.7759704184047065, |
| "grad_norm": 1.203125, |
| "learning_rate": 0.0001070360376244442, |
| "loss": 4.6715, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 65550 |
| }, |
| { |
| "epoch": 4.77961347201224, |
| "grad_norm": 1.515625, |
| "learning_rate": 0.00010692185222502898, |
| "loss": 4.6691, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 65600 |
| }, |
| { |
| "epoch": 4.783256525619775, |
| "grad_norm": 1.4140625, |
| "learning_rate": 0.00010680765775651972, |
| "loss": 4.6848, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 65650 |
| }, |
| { |
| "epoch": 4.786899579227308, |
| "grad_norm": 1.265625, |
| "learning_rate": 0.00010669345436853546, |
| "loss": 4.6499, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 65700 |
| }, |
| { |
| "epoch": 4.790542632834843, |
| "grad_norm": 1.515625, |
| "learning_rate": 0.00010657924221070681, |
| "loss": 4.6656, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 65750 |
| }, |
| { |
| "epoch": 4.7941856864423755, |
| "grad_norm": 1.515625, |
| "learning_rate": 0.00010646502143267591, |
| "loss": 4.6505, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 65800 |
| }, |
| { |
| "epoch": 4.79782874004991, |
| "grad_norm": 1.75, |
| "learning_rate": 0.00010635079218409623, |
| "loss": 4.6556, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 65850 |
| }, |
| { |
| "epoch": 4.801471793657444, |
| "grad_norm": 1.265625, |
| "learning_rate": 0.00010623655461463227, |
| "loss": 4.6704, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 65900 |
| }, |
| { |
| "epoch": 4.805114847264978, |
| "grad_norm": 2.328125, |
| "learning_rate": 0.00010612230887395946, |
| "loss": 4.6611, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 65950 |
| }, |
| { |
| "epoch": 4.808757900872512, |
| "grad_norm": 1.625, |
| "learning_rate": 0.000106008055111764, |
| "loss": 4.6705, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 66000 |
| }, |
| { |
| "epoch": 4.8124009544800455, |
| "grad_norm": 1.7109375, |
| "learning_rate": 0.00010589379347774249, |
| "loss": 4.6656, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 66050 |
| }, |
| { |
| "epoch": 4.816044008087579, |
| "grad_norm": 3.140625, |
| "learning_rate": 0.0001057795241216019, |
| "loss": 4.6599, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 66100 |
| }, |
| { |
| "epoch": 4.819687061695113, |
| "grad_norm": 1.296875, |
| "learning_rate": 0.00010566524719305934, |
| "loss": 4.6602, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 66150 |
| }, |
| { |
| "epoch": 4.823330115302647, |
| "grad_norm": 1.5625, |
| "learning_rate": 0.00010555096284184178, |
| "loss": 4.6479, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 66200 |
| }, |
| { |
| "epoch": 4.826973168910181, |
| "grad_norm": 1.578125, |
| "learning_rate": 0.00010543667121768602, |
| "loss": 4.6504, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 66250 |
| }, |
| { |
| "epoch": 4.8306162225177145, |
| "grad_norm": 2.0625, |
| "learning_rate": 0.00010532237247033823, |
| "loss": 4.654, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 66300 |
| }, |
| { |
| "epoch": 4.834259276125248, |
| "grad_norm": 1.515625, |
| "learning_rate": 0.00010520806674955403, |
| "loss": 4.6747, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 66350 |
| }, |
| { |
| "epoch": 4.837902329732782, |
| "grad_norm": 2.234375, |
| "learning_rate": 0.0001050937542050982, |
| "loss": 4.6595, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 66400 |
| }, |
| { |
| "epoch": 4.841545383340316, |
| "grad_norm": 2.015625, |
| "learning_rate": 0.00010497943498674436, |
| "loss": 4.6597, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 66450 |
| }, |
| { |
| "epoch": 4.84518843694785, |
| "grad_norm": 1.3515625, |
| "learning_rate": 0.00010486510924427496, |
| "loss": 4.665, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 66500 |
| }, |
| { |
| "epoch": 4.8488314905553835, |
| "grad_norm": 1.7421875, |
| "learning_rate": 0.00010475077712748091, |
| "loss": 4.6618, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 66550 |
| }, |
| { |
| "epoch": 4.852474544162917, |
| "grad_norm": 1.546875, |
| "learning_rate": 0.00010463643878616159, |
| "loss": 4.6647, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 66600 |
| }, |
| { |
| "epoch": 4.856117597770451, |
| "grad_norm": 1.2265625, |
| "learning_rate": 0.00010452209437012439, |
| "loss": 4.6807, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 66650 |
| }, |
| { |
| "epoch": 4.859760651377985, |
| "grad_norm": 1.5078125, |
| "learning_rate": 0.00010440774402918481, |
| "loss": 4.6923, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 66700 |
| }, |
| { |
| "epoch": 4.863403704985519, |
| "grad_norm": 2.5625, |
| "learning_rate": 0.00010429338791316601, |
| "loss": 4.6683, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 66750 |
| }, |
| { |
| "epoch": 4.867046758593053, |
| "grad_norm": 1.421875, |
| "learning_rate": 0.00010417902617189875, |
| "loss": 4.6638, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 66800 |
| }, |
| { |
| "epoch": 4.870689812200586, |
| "grad_norm": 2.078125, |
| "learning_rate": 0.00010406465895522117, |
| "loss": 4.6586, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 66850 |
| }, |
| { |
| "epoch": 4.87433286580812, |
| "grad_norm": 2.625, |
| "learning_rate": 0.00010395028641297853, |
| "loss": 4.6786, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 66900 |
| }, |
| { |
| "epoch": 4.877975919415654, |
| "grad_norm": 1.40625, |
| "learning_rate": 0.00010383590869502318, |
| "loss": 4.6656, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 66950 |
| }, |
| { |
| "epoch": 4.881618973023188, |
| "grad_norm": 1.6171875, |
| "learning_rate": 0.00010372152595121412, |
| "loss": 4.6776, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 67000 |
| }, |
| { |
| "epoch": 4.885262026630722, |
| "grad_norm": 1.4765625, |
| "learning_rate": 0.00010360713833141704, |
| "loss": 4.6579, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 67050 |
| }, |
| { |
| "epoch": 4.888905080238255, |
| "grad_norm": 1.53125, |
| "learning_rate": 0.00010349274598550391, |
| "loss": 4.6589, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 67100 |
| }, |
| { |
| "epoch": 4.892548133845789, |
| "grad_norm": 1.6875, |
| "learning_rate": 0.00010337834906335306, |
| "loss": 4.6657, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 67150 |
| }, |
| { |
| "epoch": 4.896191187453323, |
| "grad_norm": 1.7265625, |
| "learning_rate": 0.00010326394771484862, |
| "loss": 4.6714, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 67200 |
| }, |
| { |
| "epoch": 4.899834241060857, |
| "grad_norm": 1.296875, |
| "learning_rate": 0.00010314954208988066, |
| "loss": 4.6727, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 67250 |
| }, |
| { |
| "epoch": 4.903477294668391, |
| "grad_norm": 1.609375, |
| "learning_rate": 0.00010303513233834478, |
| "loss": 4.6727, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 67300 |
| }, |
| { |
| "epoch": 4.9071203482759245, |
| "grad_norm": 1.375, |
| "learning_rate": 0.00010292071861014202, |
| "loss": 4.6744, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 67350 |
| }, |
| { |
| "epoch": 4.910763401883459, |
| "grad_norm": 1.3515625, |
| "learning_rate": 0.00010280630105517863, |
| "loss": 4.6755, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 67400 |
| }, |
| { |
| "epoch": 4.914406455490992, |
| "grad_norm": 1.3125, |
| "learning_rate": 0.00010269187982336585, |
| "loss": 4.6669, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 67450 |
| }, |
| { |
| "epoch": 4.918049509098527, |
| "grad_norm": 1.578125, |
| "learning_rate": 0.00010257745506461979, |
| "loss": 4.6787, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 67500 |
| }, |
| { |
| "epoch": 4.921692562706061, |
| "grad_norm": 1.6484375, |
| "learning_rate": 0.00010246302692886108, |
| "loss": 4.6556, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 67550 |
| }, |
| { |
| "epoch": 4.925335616313594, |
| "grad_norm": 1.9609375, |
| "learning_rate": 0.00010234859556601491, |
| "loss": 4.6794, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 67600 |
| }, |
| { |
| "epoch": 4.928978669921128, |
| "grad_norm": 1.59375, |
| "learning_rate": 0.00010223416112601059, |
| "loss": 4.6625, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 67650 |
| }, |
| { |
| "epoch": 4.932621723528662, |
| "grad_norm": 1.40625, |
| "learning_rate": 0.00010211972375878152, |
| "loss": 4.679, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 67700 |
| }, |
| { |
| "epoch": 4.936264777136196, |
| "grad_norm": 1.1171875, |
| "learning_rate": 0.00010200528361426487, |
| "loss": 4.6547, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 67750 |
| }, |
| { |
| "epoch": 4.93990783074373, |
| "grad_norm": 1.390625, |
| "learning_rate": 0.00010189084084240153, |
| "loss": 4.668, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 67800 |
| }, |
| { |
| "epoch": 4.943550884351263, |
| "grad_norm": 1.9921875, |
| "learning_rate": 0.00010177639559313576, |
| "loss": 4.6662, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 67850 |
| }, |
| { |
| "epoch": 4.947193937958797, |
| "grad_norm": 1.453125, |
| "learning_rate": 0.00010166194801641515, |
| "loss": 4.6642, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 67900 |
| }, |
| { |
| "epoch": 4.950836991566331, |
| "grad_norm": 2.390625, |
| "learning_rate": 0.0001015474982621903, |
| "loss": 4.6687, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 67950 |
| }, |
| { |
| "epoch": 4.954480045173865, |
| "grad_norm": 1.4609375, |
| "learning_rate": 0.00010143304648041459, |
| "loss": 4.6698, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 68000 |
| }, |
| { |
| "epoch": 4.958123098781399, |
| "grad_norm": 2.21875, |
| "learning_rate": 0.00010131859282104416, |
| "loss": 4.6637, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 68050 |
| }, |
| { |
| "epoch": 4.9617661523889325, |
| "grad_norm": 1.5078125, |
| "learning_rate": 0.00010120413743403755, |
| "loss": 4.6576, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 68100 |
| }, |
| { |
| "epoch": 4.965409205996466, |
| "grad_norm": 1.3515625, |
| "learning_rate": 0.00010108968046935565, |
| "loss": 4.6698, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 68150 |
| }, |
| { |
| "epoch": 4.969052259604, |
| "grad_norm": 1.3671875, |
| "learning_rate": 0.00010097522207696126, |
| "loss": 4.6746, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 68200 |
| }, |
| { |
| "epoch": 4.972695313211534, |
| "grad_norm": 1.578125, |
| "learning_rate": 0.00010086076240681916, |
| "loss": 4.6666, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 68250 |
| }, |
| { |
| "epoch": 4.976338366819068, |
| "grad_norm": 1.625, |
| "learning_rate": 0.00010074630160889585, |
| "loss": 4.6544, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 68300 |
| }, |
| { |
| "epoch": 4.9799814204266015, |
| "grad_norm": 1.6484375, |
| "learning_rate": 0.00010063183983315919, |
| "loss": 4.6565, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 68350 |
| }, |
| { |
| "epoch": 4.983624474034135, |
| "grad_norm": 2.171875, |
| "learning_rate": 0.00010051737722957837, |
| "loss": 4.6728, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 68400 |
| }, |
| { |
| "epoch": 4.987267527641669, |
| "grad_norm": 3.171875, |
| "learning_rate": 0.00010040291394812366, |
| "loss": 4.6552, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 68450 |
| }, |
| { |
| "epoch": 4.990910581249203, |
| "grad_norm": 1.1796875, |
| "learning_rate": 0.0001002884501387663, |
| "loss": 4.6773, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 68500 |
| }, |
| { |
| "epoch": 4.994553634856737, |
| "grad_norm": 3.09375, |
| "learning_rate": 0.00010017398595147807, |
| "loss": 4.6575, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 68550 |
| }, |
| { |
| "epoch": 4.998196688464271, |
| "grad_norm": 2.125, |
| "learning_rate": 0.00010005952153623137, |
| "loss": 4.6656, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 68600 |
| }, |
| { |
| "epoch": 4.999653909907284, |
| "eval_loss": 4.676141738891602, |
| "eval_runtime": 585.0208, |
| "eval_samples_per_second": 518.491, |
| "eval_steps_per_second": 43.209, |
| "memory/device_mem_reserved(gib)": 63.58, |
| "memory/max_mem_active(gib)": 57.51, |
| "memory/max_mem_allocated(gib)": 57.51, |
| "step": 68620 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 137240, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.896851859210974e+19, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|