{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999274679045478, "eval_steps": 500, "global_step": 13786, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.253209545223762e-05, "grad_norm": 4.8125, "learning_rate": 0.0, "loss": 10.8997, "memory/device_memory_reserved": 2.712890625, "memory/max_memory_active": 2.6842808723449707, "memory/max_memory_allocated": 2.6842808723449707, "step": 1 }, { "epoch": 0.00014506419090447523, "grad_norm": 4.46875, "learning_rate": 0.00015, "loss": 11.0607, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2 }, { "epoch": 0.00021759628635671283, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 10.7649, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3 }, { "epoch": 0.00029012838180895046, "grad_norm": 30.5, "learning_rate": 0.0003, "loss": 11.0867, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4 }, { "epoch": 0.0003626604772611881, "grad_norm": 8.9375, "learning_rate": 0.0003, "loss": 10.743, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5 }, { "epoch": 0.00043519257271342566, "grad_norm": 24.875, "learning_rate": 0.0003, "loss": 11.0994, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6 }, { "epoch": 0.0005077246681656633, "grad_norm": 6.03125, "learning_rate": 0.0003, "loss": 10.9693, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7 }, { "epoch": 0.0005802567636179009, "grad_norm": 11.0, "learning_rate": 0.0003, "loss": 11.1745, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8 }, { "epoch": 0.0006527888590701386, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 10.8205, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9 }, { "epoch": 0.0007253209545223762, "grad_norm": 11.1875, "learning_rate": 0.0003, "loss": 10.7688, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10 }, { "epoch": 0.0007978530499746138, "grad_norm": 6.46875, "learning_rate": 0.0003, "loss": 10.8028, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11 }, { "epoch": 0.0008703851454268513, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 10.879, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12 }, { "epoch": 0.000942917240879089, "grad_norm": 11.0, "learning_rate": 0.0003, "loss": 10.7508, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13 }, { "epoch": 0.0010154493363313266, "grad_norm": 8.9375, "learning_rate": 0.0003, "loss": 10.8927, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 14 }, { "epoch": 0.0010879814317835642, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 10.7837, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 15 }, { "epoch": 0.0011605135272358018, "grad_norm": 7.84375, "learning_rate": 0.0003, "loss": 11.0005, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 16 }, { "epoch": 0.0012330456226880395, "grad_norm": 6.71875, "learning_rate": 0.0003, "loss": 10.9112, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 17 }, { "epoch": 0.001305577718140277, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 11.1267, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 18 }, { "epoch": 0.0013781098135925147, "grad_norm": 13.1875, "learning_rate": 0.0003, "loss": 10.8725, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 19 }, { "epoch": 0.0014506419090447524, "grad_norm": 160.0, "learning_rate": 0.0003, "loss": 10.7447, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 20 }, { "epoch": 0.00152317400449699, "grad_norm": 8.6875, "learning_rate": 0.0003, "loss": 10.9239, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 21 }, { "epoch": 0.0015957060999492276, "grad_norm": 9.4375, "learning_rate": 0.0003, "loss": 10.959, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 22 }, { "epoch": 0.0016682381954014652, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 10.8009, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 23 }, { "epoch": 0.0017407702908537027, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 11.0102, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 24 }, { "epoch": 0.0018133023863059403, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 10.7915, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 25 }, { "epoch": 0.001885834481758178, "grad_norm": 10.0, "learning_rate": 0.0003, "loss": 10.6733, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 26 }, { "epoch": 0.0019583665772104158, "grad_norm": 17.0, "learning_rate": 0.0003, "loss": 10.9028, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 27 }, { "epoch": 0.002030898672662653, "grad_norm": 14.4375, "learning_rate": 0.0003, "loss": 11.0611, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 28 }, { "epoch": 0.002103430768114891, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 10.9051, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 29 }, { "epoch": 0.0021759628635671284, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 10.6701, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 30 }, { "epoch": 0.0022484949590193663, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 10.7484, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 31 }, { "epoch": 0.0023210270544716037, "grad_norm": 18.75, "learning_rate": 0.0003, "loss": 10.8318, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 32 }, { "epoch": 0.002393559149923841, "grad_norm": 13.25, "learning_rate": 0.0003, "loss": 10.5648, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 33 }, { "epoch": 0.002466091245376079, "grad_norm": 10.3125, "learning_rate": 0.0003, "loss": 10.5946, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 34 }, { "epoch": 0.0025386233408283164, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 10.8058, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 35 }, { "epoch": 0.002611155436280554, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 10.8157, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 36 }, { "epoch": 0.0026836875317327916, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 10.8388, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 37 }, { "epoch": 0.0027562196271850295, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 10.6179, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 38 }, { "epoch": 0.002828751722637267, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 10.869, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 39 }, { "epoch": 0.0029012838180895047, "grad_norm": 5.375, "learning_rate": 0.0003, "loss": 10.8854, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 40 }, { "epoch": 0.002973815913541742, "grad_norm": 5.90625, "learning_rate": 0.0003, "loss": 10.6549, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 41 }, { "epoch": 0.00304634800899398, "grad_norm": 5.78125, "learning_rate": 0.0003, "loss": 10.6103, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 42 }, { "epoch": 0.0031188801044462174, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 10.7379, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 43 }, { "epoch": 0.0031914121998984552, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 10.642, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 44 }, { "epoch": 0.0032639442953506926, "grad_norm": 3.953125, "learning_rate": 0.0003, "loss": 10.7892, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 45 }, { "epoch": 0.0033364763908029305, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 10.6907, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 46 }, { "epoch": 0.003409008486255168, "grad_norm": 9.4375, "learning_rate": 0.0003, "loss": 10.6987, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 47 }, { "epoch": 0.0034815405817074053, "grad_norm": 7.625, "learning_rate": 0.0003, "loss": 10.8396, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 48 }, { "epoch": 0.003554072677159643, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 10.7985, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 49 }, { "epoch": 0.0036266047726118806, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 10.5805, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 50 }, { "epoch": 0.0036991368680641184, "grad_norm": 5.4375, "learning_rate": 0.0003, "loss": 10.4243, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 51 }, { "epoch": 0.003771668963516356, "grad_norm": 5.84375, "learning_rate": 0.0003, "loss": 10.9191, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 52 }, { "epoch": 0.0038442010589685937, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 10.5788, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 53 }, { "epoch": 0.0039167331544208315, "grad_norm": 6.5625, "learning_rate": 0.0003, "loss": 10.6745, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 54 }, { "epoch": 0.003989265249873069, "grad_norm": 8.5, "learning_rate": 0.0003, "loss": 10.6783, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 55 }, { "epoch": 0.004061797345325306, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 10.5295, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 56 }, { "epoch": 0.004134329440777544, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 10.8855, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 57 }, { "epoch": 0.004206861536229782, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 10.8227, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 58 }, { "epoch": 0.0042793936316820195, "grad_norm": 6.90625, "learning_rate": 0.0003, "loss": 10.8612, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 59 }, { "epoch": 0.004351925727134257, "grad_norm": 17.875, "learning_rate": 0.0003, "loss": 10.9136, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 60 }, { "epoch": 0.004424457822586494, "grad_norm": 5.625, "learning_rate": 0.0003, "loss": 10.5436, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 61 }, { "epoch": 0.0044969899180387326, "grad_norm": 10.125, "learning_rate": 0.0003, "loss": 10.7839, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 62 }, { "epoch": 0.00456952201349097, "grad_norm": 9.625, "learning_rate": 0.0003, "loss": 10.5487, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 63 }, { "epoch": 0.004642054108943207, "grad_norm": 20.0, "learning_rate": 0.0003, "loss": 10.5741, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 64 }, { "epoch": 0.004714586204395445, "grad_norm": 5.96875, "learning_rate": 0.0003, "loss": 10.6774, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 65 }, { "epoch": 0.004787118299847682, "grad_norm": 6.28125, "learning_rate": 0.0003, "loss": 10.6338, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 66 }, { "epoch": 0.0048596503952999205, "grad_norm": 5.78125, "learning_rate": 0.0003, "loss": 10.8044, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 67 }, { "epoch": 0.004932182490752158, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 10.4985, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 68 }, { "epoch": 0.005004714586204395, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 10.7786, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 69 }, { "epoch": 0.005077246681656633, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 10.6468, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 70 }, { "epoch": 0.005149778777108871, "grad_norm": 7.875, "learning_rate": 0.0003, "loss": 11.0068, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 71 }, { "epoch": 0.005222310872561108, "grad_norm": 38.0, "learning_rate": 0.0003, "loss": 10.5223, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 72 }, { "epoch": 0.005294842968013346, "grad_norm": 14.9375, "learning_rate": 0.0003, "loss": 10.5871, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 73 }, { "epoch": 0.005367375063465583, "grad_norm": 8.5, "learning_rate": 0.0003, "loss": 10.775, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 74 }, { "epoch": 0.0054399071589178215, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 10.51, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 75 }, { "epoch": 0.005512439254370059, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 10.812, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 76 }, { "epoch": 0.005584971349822296, "grad_norm": 6.125, "learning_rate": 0.0003, "loss": 10.4526, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 77 }, { "epoch": 0.005657503445274534, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 10.5574, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 78 }, { "epoch": 0.005730035540726772, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 10.587, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 79 }, { "epoch": 0.0058025676361790094, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 10.5311, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 80 }, { "epoch": 0.005875099731631247, "grad_norm": 6.15625, "learning_rate": 0.0003, "loss": 10.6108, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 81 }, { "epoch": 0.005947631827083484, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 10.703, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 82 }, { "epoch": 0.006020163922535722, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 10.4582, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 83 }, { "epoch": 0.00609269601798796, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 10.682, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 84 }, { "epoch": 0.006165228113440197, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 10.46, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 85 }, { "epoch": 0.006237760208892435, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 10.7479, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 86 }, { "epoch": 0.006310292304344672, "grad_norm": 10.375, "learning_rate": 0.0003, "loss": 10.7082, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 87 }, { "epoch": 0.0063828243997969105, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 10.3335, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 88 }, { "epoch": 0.006455356495249148, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 10.4741, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 89 }, { "epoch": 0.006527888590701385, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 10.5863, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 90 }, { "epoch": 0.006600420686153623, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 10.7226, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 91 }, { "epoch": 0.006672952781605861, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 10.4524, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 92 }, { "epoch": 0.006745484877058098, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 10.7559, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 93 }, { "epoch": 0.006818016972510336, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 10.6452, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 94 }, { "epoch": 0.006890549067962573, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 10.42, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 95 }, { "epoch": 0.006963081163414811, "grad_norm": 7.4375, "learning_rate": 0.0003, "loss": 10.5506, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 96 }, { "epoch": 0.007035613258867049, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 10.602, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 97 }, { "epoch": 0.007108145354319286, "grad_norm": 8.625, "learning_rate": 0.0003, "loss": 10.5707, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 98 }, { "epoch": 0.007180677449771524, "grad_norm": 10.625, "learning_rate": 0.0003, "loss": 10.5281, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 99 }, { "epoch": 0.007253209545223761, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 10.4318, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 100 }, { "epoch": 0.007325741640675999, "grad_norm": 10.9375, "learning_rate": 0.0003, "loss": 10.5005, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 101 }, { "epoch": 0.007398273736128237, "grad_norm": 5.84375, "learning_rate": 0.0003, "loss": 10.6385, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 102 }, { "epoch": 0.007470805831580474, "grad_norm": 15.9375, "learning_rate": 0.0003, "loss": 10.4711, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 103 }, { "epoch": 0.007543337927032712, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 10.5441, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 104 }, { "epoch": 0.00761587002248495, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 10.5715, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 105 }, { "epoch": 0.007688402117937187, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 10.5313, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 106 }, { "epoch": 0.007760934213389425, "grad_norm": 7.53125, "learning_rate": 0.0003, "loss": 10.5563, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 107 }, { "epoch": 0.007833466308841663, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 10.6182, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 108 }, { "epoch": 0.0079059984042939, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 10.4178, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 109 }, { "epoch": 0.007978530499746138, "grad_norm": 16.25, "learning_rate": 0.0003, "loss": 10.5581, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 110 }, { "epoch": 0.008051062595198374, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 10.497, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 111 }, { "epoch": 0.008123594690650613, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 10.5352, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 112 }, { "epoch": 0.008196126786102851, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 10.414, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 113 }, { "epoch": 0.008268658881555088, "grad_norm": 6.28125, "learning_rate": 0.0003, "loss": 10.7316, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 114 }, { "epoch": 0.008341190977007326, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 10.4075, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 115 }, { "epoch": 0.008413723072459564, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 10.6437, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 116 }, { "epoch": 0.0084862551679118, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 10.4287, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 117 }, { "epoch": 0.008558787263364039, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 10.5886, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 118 }, { "epoch": 0.008631319358816275, "grad_norm": 11.75, "learning_rate": 0.0003, "loss": 10.6712, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 119 }, { "epoch": 0.008703851454268514, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 10.6853, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 120 }, { "epoch": 0.008776383549720752, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 10.3046, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 121 }, { "epoch": 0.008848915645172989, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 10.5486, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 122 }, { "epoch": 0.008921447740625227, "grad_norm": 9.1875, "learning_rate": 0.0003, "loss": 10.4199, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 123 }, { "epoch": 0.008993979836077465, "grad_norm": 6.0, "learning_rate": 0.0003, "loss": 10.3421, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 124 }, { "epoch": 0.009066511931529702, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 10.5816, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 125 }, { "epoch": 0.00913904402698194, "grad_norm": 5.65625, "learning_rate": 0.0003, "loss": 10.4212, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 126 }, { "epoch": 0.009211576122434176, "grad_norm": 6.125, "learning_rate": 0.0003, "loss": 10.5346, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 127 }, { "epoch": 0.009284108217886415, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 10.4746, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 128 }, { "epoch": 0.009356640313338653, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 10.5545, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 129 }, { "epoch": 0.00942917240879089, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 10.4066, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 130 }, { "epoch": 0.009501704504243128, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 10.4252, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 131 }, { "epoch": 0.009574236599695364, "grad_norm": 6.3125, "learning_rate": 0.0003, "loss": 10.4982, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 132 }, { "epoch": 0.009646768695147603, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 10.2847, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 133 }, { "epoch": 0.009719300790599841, "grad_norm": 11.5, "learning_rate": 0.0003, "loss": 10.3046, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 134 }, { "epoch": 0.009791832886052078, "grad_norm": 10.75, "learning_rate": 0.0003, "loss": 10.3668, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 135 }, { "epoch": 0.009864364981504316, "grad_norm": 9.0625, "learning_rate": 0.0003, "loss": 10.4291, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 136 }, { "epoch": 0.009936897076956554, "grad_norm": 11.375, "learning_rate": 0.0003, "loss": 10.4586, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 137 }, { "epoch": 0.01000942917240879, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 10.3446, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 138 }, { "epoch": 0.010081961267861029, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 10.3947, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 139 }, { "epoch": 0.010154493363313265, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 10.571, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 140 }, { "epoch": 0.010227025458765504, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 10.4443, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 141 }, { "epoch": 0.010299557554217742, "grad_norm": 8.25, "learning_rate": 0.0003, "loss": 10.708, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 142 }, { "epoch": 0.010372089649669979, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 10.5302, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 143 }, { "epoch": 0.010444621745122217, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 10.4722, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 144 }, { "epoch": 0.010517153840574453, "grad_norm": 9.0, "learning_rate": 0.0003, "loss": 10.5844, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 145 }, { "epoch": 0.010589685936026692, "grad_norm": 6.21875, "learning_rate": 0.0003, "loss": 10.3026, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 146 }, { "epoch": 0.01066221803147893, "grad_norm": 6.15625, "learning_rate": 0.0003, "loss": 10.3518, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 147 }, { "epoch": 0.010734750126931166, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 10.5429, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 148 }, { "epoch": 0.010807282222383405, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 10.4451, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 149 }, { "epoch": 0.010879814317835643, "grad_norm": 9.25, "learning_rate": 0.0003, "loss": 10.6124, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 150 }, { "epoch": 0.01095234641328788, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 10.24, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 151 }, { "epoch": 0.011024878508740118, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 10.2801, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 152 }, { "epoch": 0.011097410604192354, "grad_norm": 7.21875, "learning_rate": 0.0003, "loss": 10.5938, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 153 }, { "epoch": 0.011169942699644593, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 10.3429, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 154 }, { "epoch": 0.011242474795096831, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 10.6533, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 155 }, { "epoch": 0.011315006890549067, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 10.3064, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 156 }, { "epoch": 0.011387538986001306, "grad_norm": 7.3125, "learning_rate": 0.0003, "loss": 10.1714, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 157 }, { "epoch": 0.011460071081453544, "grad_norm": 13.1875, "learning_rate": 0.0003, "loss": 10.2961, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 158 }, { "epoch": 0.01153260317690578, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 10.4073, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 159 }, { "epoch": 0.011605135272358019, "grad_norm": 9.125, "learning_rate": 0.0003, "loss": 10.3057, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 160 }, { "epoch": 0.011677667367810255, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 10.5174, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 161 }, { "epoch": 0.011750199463262494, "grad_norm": 28.625, "learning_rate": 0.0003, "loss": 10.2435, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 162 }, { "epoch": 0.011822731558714732, "grad_norm": 9.8125, "learning_rate": 0.0003, "loss": 10.36, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 163 }, { "epoch": 0.011895263654166969, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 10.3709, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 164 }, { "epoch": 0.011967795749619207, "grad_norm": 6.25, "learning_rate": 0.0003, "loss": 10.4393, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 165 }, { "epoch": 0.012040327845071443, "grad_norm": 9.3125, "learning_rate": 0.0003, "loss": 10.4097, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 166 }, { "epoch": 0.012112859940523682, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 10.291, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 167 }, { "epoch": 0.01218539203597592, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 10.3338, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 168 }, { "epoch": 0.012257924131428156, "grad_norm": 6.0625, "learning_rate": 0.0003, "loss": 10.423, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 169 }, { "epoch": 0.012330456226880395, "grad_norm": 7.4375, "learning_rate": 0.0003, "loss": 10.3686, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 170 }, { "epoch": 0.012402988322332633, "grad_norm": 1.734375, "learning_rate": 0.0003, "loss": 10.3954, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 171 }, { "epoch": 0.01247552041778487, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 10.0959, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 172 }, { "epoch": 0.012548052513237108, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 10.5384, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 173 }, { "epoch": 0.012620584608689344, "grad_norm": 8.625, "learning_rate": 0.0003, "loss": 10.4895, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 174 }, { "epoch": 0.012693116704141583, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 10.3136, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 175 }, { "epoch": 0.012765648799593821, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 10.4502, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 176 }, { "epoch": 0.012838180895046057, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 10.3137, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 177 }, { "epoch": 0.012910712990498296, "grad_norm": 7.65625, "learning_rate": 0.0003, "loss": 10.205, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 178 }, { "epoch": 0.012983245085950532, "grad_norm": 9.0, "learning_rate": 0.0003, "loss": 10.2628, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 179 }, { "epoch": 0.01305577718140277, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 10.167, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 180 }, { "epoch": 0.013128309276855009, "grad_norm": 7.625, "learning_rate": 0.0003, "loss": 10.6587, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 181 }, { "epoch": 0.013200841372307245, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 10.4571, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 182 }, { "epoch": 0.013273373467759484, "grad_norm": 15.3125, "learning_rate": 0.0003, "loss": 10.3303, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 183 }, { "epoch": 0.013345905563211722, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 10.2597, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 184 }, { "epoch": 0.013418437658663959, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 10.4776, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 185 }, { "epoch": 0.013490969754116197, "grad_norm": 10.875, "learning_rate": 0.0003, "loss": 10.0875, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 186 }, { "epoch": 0.013563501849568433, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 10.2467, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 187 }, { "epoch": 0.013636033945020672, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 10.3896, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 188 }, { "epoch": 0.01370856604047291, "grad_norm": 6.59375, "learning_rate": 0.0003, "loss": 10.2396, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 189 }, { "epoch": 0.013781098135925146, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 10.2765, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 190 }, { "epoch": 0.013853630231377385, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 10.106, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 191 }, { "epoch": 0.013926162326829621, "grad_norm": 13.5, "learning_rate": 0.0003, "loss": 10.1597, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 192 }, { "epoch": 0.01399869442228186, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 10.5672, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 193 }, { "epoch": 0.014071226517734098, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 10.2981, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 194 }, { "epoch": 0.014143758613186334, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 10.1556, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 195 }, { "epoch": 0.014216290708638573, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 10.1803, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 196 }, { "epoch": 0.014288822804090811, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 10.3483, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 197 }, { "epoch": 0.014361354899543047, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 10.2564, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 198 }, { "epoch": 0.014433886994995286, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 10.2356, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 199 }, { "epoch": 0.014506419090447522, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 10.3421, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 200 }, { "epoch": 0.01457895118589976, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 10.1838, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 201 }, { "epoch": 0.014651483281351999, "grad_norm": 7.46875, "learning_rate": 0.0003, "loss": 10.3686, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 202 }, { "epoch": 0.014724015376804235, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 10.3971, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 203 }, { "epoch": 0.014796547472256474, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 10.3254, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 204 }, { "epoch": 0.014869079567708712, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 10.0006, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 205 }, { "epoch": 0.014941611663160949, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 10.2859, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 206 }, { "epoch": 0.015014143758613187, "grad_norm": 7.40625, "learning_rate": 0.0003, "loss": 10.3637, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 207 }, { "epoch": 0.015086675854065423, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 10.1014, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 208 }, { "epoch": 0.015159207949517662, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 10.2504, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 209 }, { "epoch": 0.0152317400449699, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 10.2055, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 210 }, { "epoch": 0.015304272140422136, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 10.2915, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 211 }, { "epoch": 0.015376804235874375, "grad_norm": 8.1875, "learning_rate": 0.0003, "loss": 10.1891, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 212 }, { "epoch": 0.015449336331326611, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 10.5288, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 213 }, { "epoch": 0.01552186842677885, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 10.367, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 214 }, { "epoch": 0.015594400522231088, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 10.2141, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 215 }, { "epoch": 0.015666932617683326, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 10.5682, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 216 }, { "epoch": 0.015739464713135563, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 10.3658, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 217 }, { "epoch": 0.0158119968085878, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 10.1266, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 218 }, { "epoch": 0.01588452890404004, "grad_norm": 6.125, "learning_rate": 0.0003, "loss": 10.246, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 219 }, { "epoch": 0.015957060999492276, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 10.3247, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 220 }, { "epoch": 0.016029593094944512, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 10.2068, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 221 }, { "epoch": 0.01610212519039675, "grad_norm": 9.25, "learning_rate": 0.0003, "loss": 10.2842, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 222 }, { "epoch": 0.01617465728584899, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 10.2131, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 223 }, { "epoch": 0.016247189381301225, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 10.2543, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 224 }, { "epoch": 0.016319721476753462, "grad_norm": 8.3125, "learning_rate": 0.0003, "loss": 10.5977, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 225 }, { "epoch": 0.016392253572205702, "grad_norm": 8.3125, "learning_rate": 0.0003, "loss": 10.0732, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 226 }, { "epoch": 0.01646478566765794, "grad_norm": 8.1875, "learning_rate": 0.0003, "loss": 10.2104, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 227 }, { "epoch": 0.016537317763110175, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 10.2551, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 228 }, { "epoch": 0.016609849858562415, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 10.0166, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 229 }, { "epoch": 0.01668238195401465, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 10.4113, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 230 }, { "epoch": 0.016754914049466888, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 10.2566, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 231 }, { "epoch": 0.016827446144919128, "grad_norm": 29.5, "learning_rate": 0.0003, "loss": 10.2512, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 232 }, { "epoch": 0.016899978240371365, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 10.0074, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 233 }, { "epoch": 0.0169725103358236, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 9.9786, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 234 }, { "epoch": 0.01704504243127584, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 10.2572, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 235 }, { "epoch": 0.017117574526728078, "grad_norm": 5.78125, "learning_rate": 0.0003, "loss": 10.4133, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 236 }, { "epoch": 0.017190106622180314, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 10.4121, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 237 }, { "epoch": 0.01726263871763255, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 10.2614, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 238 }, { "epoch": 0.01733517081308479, "grad_norm": 19.25, "learning_rate": 0.0003, "loss": 10.2378, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 239 }, { "epoch": 0.017407702908537027, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 10.1471, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 240 }, { "epoch": 0.017480235003989264, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 10.1188, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 241 }, { "epoch": 0.017552767099441504, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 10.398, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 242 }, { "epoch": 0.01762529919489374, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 10.0136, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 243 }, { "epoch": 0.017697831290345977, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 10.099, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 244 }, { "epoch": 0.017770363385798217, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 10.4053, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 245 }, { "epoch": 0.017842895481250454, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 10.2494, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 246 }, { "epoch": 0.01791542757670269, "grad_norm": 9.25, "learning_rate": 0.0003, "loss": 10.2404, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 247 }, { "epoch": 0.01798795967215493, "grad_norm": 7.0625, "learning_rate": 0.0003, "loss": 10.2015, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 248 }, { "epoch": 0.018060491767607167, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 10.2896, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 249 }, { "epoch": 0.018133023863059403, "grad_norm": 19.375, "learning_rate": 0.0003, "loss": 9.9672, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 250 }, { "epoch": 0.01820555595851164, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 10.2009, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 251 }, { "epoch": 0.01827808805396388, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 10.3666, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 252 }, { "epoch": 0.018350620149416116, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 10.3757, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 253 }, { "epoch": 0.018423152244868353, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 10.3047, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 254 }, { "epoch": 0.018495684340320593, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 10.2306, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 255 }, { "epoch": 0.01856821643577283, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 10.2347, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 256 }, { "epoch": 0.018640748531225066, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 10.23, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 257 }, { "epoch": 0.018713280626677306, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 10.1167, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 258 }, { "epoch": 0.018785812722129543, "grad_norm": 6.875, "learning_rate": 0.0003, "loss": 10.1678, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 259 }, { "epoch": 0.01885834481758178, "grad_norm": 19.375, "learning_rate": 0.0003, "loss": 10.1924, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 260 }, { "epoch": 0.01893087691303402, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 10.19, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 261 }, { "epoch": 0.019003409008486256, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 10.054, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 262 }, { "epoch": 0.019075941103938492, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.9918, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 263 }, { "epoch": 0.01914847319939073, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 10.1795, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 264 }, { "epoch": 0.01922100529484297, "grad_norm": 6.0625, "learning_rate": 0.0003, "loss": 10.1791, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 265 }, { "epoch": 0.019293537390295205, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 10.2484, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 266 }, { "epoch": 0.019366069485747442, "grad_norm": 5.375, "learning_rate": 0.0003, "loss": 10.2372, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 267 }, { "epoch": 0.019438601581199682, "grad_norm": 10.375, "learning_rate": 0.0003, "loss": 10.103, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 268 }, { "epoch": 0.01951113367665192, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 10.1929, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 269 }, { "epoch": 0.019583665772104155, "grad_norm": 21.75, "learning_rate": 0.0003, "loss": 10.029, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 270 }, { "epoch": 0.019656197867556395, "grad_norm": 12.625, "learning_rate": 0.0003, "loss": 10.5439, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 271 }, { "epoch": 0.01972872996300863, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 10.2394, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 272 }, { "epoch": 0.019801262058460868, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.9863, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 273 }, { "epoch": 0.019873794153913108, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 10.2613, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 274 }, { "epoch": 0.019946326249365345, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 10.2055, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 275 }, { "epoch": 0.02001885834481758, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 10.2036, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 276 }, { "epoch": 0.020091390440269818, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.9585, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 277 }, { "epoch": 0.020163922535722058, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 10.2534, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 278 }, { "epoch": 0.020236454631174294, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 10.2084, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 279 }, { "epoch": 0.02030898672662653, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 9.9439, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 280 }, { "epoch": 0.02038151882207877, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 10.1695, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 281 }, { "epoch": 0.020454050917531007, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 10.2445, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 282 }, { "epoch": 0.020526583012983244, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 9.9417, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 283 }, { "epoch": 0.020599115108435484, "grad_norm": 12.0625, "learning_rate": 0.0003, "loss": 10.062, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 284 }, { "epoch": 0.02067164720388772, "grad_norm": 6.59375, "learning_rate": 0.0003, "loss": 10.0208, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 285 }, { "epoch": 0.020744179299339957, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 10.0879, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 286 }, { "epoch": 0.020816711394792197, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 10.0285, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 287 }, { "epoch": 0.020889243490244434, "grad_norm": 8.3125, "learning_rate": 0.0003, "loss": 9.9608, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 288 }, { "epoch": 0.02096177558569667, "grad_norm": 7.34375, "learning_rate": 0.0003, "loss": 10.0837, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 289 }, { "epoch": 0.021034307681148907, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 9.9087, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 290 }, { "epoch": 0.021106839776601147, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 10.0484, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 291 }, { "epoch": 0.021179371872053383, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 10.1197, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 292 }, { "epoch": 0.02125190396750562, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 9.9848, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 293 }, { "epoch": 0.02132443606295786, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.8819, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 294 }, { "epoch": 0.021396968158410096, "grad_norm": 12.0625, "learning_rate": 0.0003, "loss": 10.1691, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 295 }, { "epoch": 0.021469500253862333, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 10.1135, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 296 }, { "epoch": 0.021542032349314573, "grad_norm": 9.25, "learning_rate": 0.0003, "loss": 9.898, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 297 }, { "epoch": 0.02161456444476681, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 10.1599, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 298 }, { "epoch": 0.021687096540219046, "grad_norm": 38.25, "learning_rate": 0.0003, "loss": 10.1387, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 299 }, { "epoch": 0.021759628635671286, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 10.1013, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 300 }, { "epoch": 0.021832160731123523, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 10.0709, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 301 }, { "epoch": 0.02190469282657576, "grad_norm": 7.53125, "learning_rate": 0.0003, "loss": 10.112, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 302 }, { "epoch": 0.021977224922027996, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 10.0609, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 303 }, { "epoch": 0.022049757017480236, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 9.9883, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 304 }, { "epoch": 0.022122289112932472, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 10.1279, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 305 }, { "epoch": 0.02219482120838471, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 9.9984, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 306 }, { "epoch": 0.02226735330383695, "grad_norm": 7.09375, "learning_rate": 0.0003, "loss": 10.2216, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 307 }, { "epoch": 0.022339885399289185, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 10.1686, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 308 }, { "epoch": 0.022412417494741422, "grad_norm": 7.3125, "learning_rate": 0.0003, "loss": 10.0539, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 309 }, { "epoch": 0.022484949590193662, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 10.0387, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 310 }, { "epoch": 0.0225574816856459, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 10.5105, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 311 }, { "epoch": 0.022630013781098135, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.7549, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 312 }, { "epoch": 0.022702545876550375, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 10.0351, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 313 }, { "epoch": 0.02277507797200261, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 10.1023, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 314 }, { "epoch": 0.022847610067454848, "grad_norm": 5.9375, "learning_rate": 0.0003, "loss": 9.8897, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 315 }, { "epoch": 0.022920142162907088, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 9.9204, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 316 }, { "epoch": 0.022992674258359325, "grad_norm": 6.8125, "learning_rate": 0.0003, "loss": 10.1086, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 317 }, { "epoch": 0.02306520635381156, "grad_norm": 6.3125, "learning_rate": 0.0003, "loss": 10.0563, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 318 }, { "epoch": 0.023137738449263798, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 10.2218, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 319 }, { "epoch": 0.023210270544716038, "grad_norm": 6.59375, "learning_rate": 0.0003, "loss": 9.941, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 320 }, { "epoch": 0.023282802640168274, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 10.1603, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 321 }, { "epoch": 0.02335533473562051, "grad_norm": 10.25, "learning_rate": 0.0003, "loss": 9.8607, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 322 }, { "epoch": 0.02342786683107275, "grad_norm": 26.375, "learning_rate": 0.0003, "loss": 10.026, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 323 }, { "epoch": 0.023500398926524987, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 9.828, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 324 }, { "epoch": 0.023572931021977224, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 9.9444, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 325 }, { "epoch": 0.023645463117429464, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 9.9679, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 326 }, { "epoch": 0.0237179952128817, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 10.0031, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 327 }, { "epoch": 0.023790527308333937, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 10.0174, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 328 }, { "epoch": 0.023863059403786177, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 9.9199, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 329 }, { "epoch": 0.023935591499238414, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 9.8498, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 330 }, { "epoch": 0.02400812359469065, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 9.9869, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 331 }, { "epoch": 0.024080655690142887, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 9.9407, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 332 }, { "epoch": 0.024153187785595127, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.9246, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 333 }, { "epoch": 0.024225719881047363, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 10.3469, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 334 }, { "epoch": 0.0242982519764996, "grad_norm": 6.8125, "learning_rate": 0.0003, "loss": 10.0355, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 335 }, { "epoch": 0.02437078407195184, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 10.154, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 336 }, { "epoch": 0.024443316167404076, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 10.0908, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 337 }, { "epoch": 0.024515848262856313, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 9.801, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 338 }, { "epoch": 0.024588380358308553, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.9515, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 339 }, { "epoch": 0.02466091245376079, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 9.5944, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 340 }, { "epoch": 0.024733444549213026, "grad_norm": 9.1875, "learning_rate": 0.0003, "loss": 9.8674, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 341 }, { "epoch": 0.024805976644665266, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.8262, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 342 }, { "epoch": 0.024878508740117503, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 9.7439, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 343 }, { "epoch": 0.02495104083556974, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 9.8533, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 344 }, { "epoch": 0.025023572931021976, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 10.0089, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 345 }, { "epoch": 0.025096105026474216, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 9.8925, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 346 }, { "epoch": 0.025168637121926452, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 10.0046, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 347 }, { "epoch": 0.02524116921737869, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 9.8875, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 348 }, { "epoch": 0.02531370131283093, "grad_norm": 71.0, "learning_rate": 0.0003, "loss": 9.8334, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 349 }, { "epoch": 0.025386233408283165, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 10.0065, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 350 }, { "epoch": 0.025458765503735402, "grad_norm": 7.6875, "learning_rate": 0.0003, "loss": 10.0521, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 351 }, { "epoch": 0.025531297599187642, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 10.0712, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 352 }, { "epoch": 0.02560382969463988, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 9.6556, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 353 }, { "epoch": 0.025676361790092115, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 10.2462, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 354 }, { "epoch": 0.025748893885544355, "grad_norm": 5.375, "learning_rate": 0.0003, "loss": 9.9769, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 355 }, { "epoch": 0.02582142598099659, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.9435, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 356 }, { "epoch": 0.025893958076448828, "grad_norm": 5.6875, "learning_rate": 0.0003, "loss": 9.8413, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 357 }, { "epoch": 0.025966490171901065, "grad_norm": 6.0625, "learning_rate": 0.0003, "loss": 9.9094, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 358 }, { "epoch": 0.026039022267353305, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 9.6741, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 359 }, { "epoch": 0.02611155436280554, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 10.0362, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 360 }, { "epoch": 0.026184086458257778, "grad_norm": 6.84375, "learning_rate": 0.0003, "loss": 10.1702, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 361 }, { "epoch": 0.026256618553710018, "grad_norm": 19.0, "learning_rate": 0.0003, "loss": 10.012, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 362 }, { "epoch": 0.026329150649162254, "grad_norm": 41.5, "learning_rate": 0.0003, "loss": 9.9752, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 363 }, { "epoch": 0.02640168274461449, "grad_norm": 7.3125, "learning_rate": 0.0003, "loss": 9.9919, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 364 }, { "epoch": 0.02647421484006673, "grad_norm": 6.71875, "learning_rate": 0.0003, "loss": 9.9451, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 365 }, { "epoch": 0.026546746935518967, "grad_norm": 32.25, "learning_rate": 0.0003, "loss": 10.0645, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 366 }, { "epoch": 0.026619279030971204, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 9.7518, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 367 }, { "epoch": 0.026691811126423444, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 10.0287, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 368 }, { "epoch": 0.02676434322187568, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 9.8223, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 369 }, { "epoch": 0.026836875317327917, "grad_norm": 1.9765625, "learning_rate": 0.0003, "loss": 9.8902, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 370 }, { "epoch": 0.026909407412780154, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 9.9424, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 371 }, { "epoch": 0.026981939508232394, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 9.6801, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 372 }, { "epoch": 0.02705447160368463, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 10.1038, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 373 }, { "epoch": 0.027127003699136867, "grad_norm": 17.0, "learning_rate": 0.0003, "loss": 9.9518, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 374 }, { "epoch": 0.027199535794589107, "grad_norm": 6.21875, "learning_rate": 0.0003, "loss": 10.0155, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 375 }, { "epoch": 0.027272067890041343, "grad_norm": 17.0, "learning_rate": 0.0003, "loss": 9.8932, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 376 }, { "epoch": 0.02734459998549358, "grad_norm": 6.78125, "learning_rate": 0.0003, "loss": 9.9527, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 377 }, { "epoch": 0.02741713208094582, "grad_norm": 6.46875, "learning_rate": 0.0003, "loss": 9.826, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 378 }, { "epoch": 0.027489664176398056, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 9.852, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 379 }, { "epoch": 0.027562196271850293, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 9.9719, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 380 }, { "epoch": 0.027634728367302533, "grad_norm": 10.5, "learning_rate": 0.0003, "loss": 9.7762, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 381 }, { "epoch": 0.02770726046275477, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 9.9875, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 382 }, { "epoch": 0.027779792558207006, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.9121, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 383 }, { "epoch": 0.027852324653659243, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 9.8732, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 384 }, { "epoch": 0.027924856749111483, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 10.0314, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 385 }, { "epoch": 0.02799738884456372, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.9612, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 386 }, { "epoch": 0.028069920940015956, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.9716, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 387 }, { "epoch": 0.028142453035468196, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 9.7962, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 388 }, { "epoch": 0.028214985130920432, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 9.9048, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 389 }, { "epoch": 0.02828751722637267, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.9243, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 390 }, { "epoch": 0.02836004932182491, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 9.8904, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 391 }, { "epoch": 0.028432581417277145, "grad_norm": 6.6875, "learning_rate": 0.0003, "loss": 9.8785, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 392 }, { "epoch": 0.028505113512729382, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.7272, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 393 }, { "epoch": 0.028577645608181622, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 9.8295, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 394 }, { "epoch": 0.02865017770363386, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 10.1623, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 395 }, { "epoch": 0.028722709799086095, "grad_norm": 18.125, "learning_rate": 0.0003, "loss": 9.8764, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 396 }, { "epoch": 0.02879524189453833, "grad_norm": 5.625, "learning_rate": 0.0003, "loss": 9.542, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 397 }, { "epoch": 0.02886777398999057, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 9.8287, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 398 }, { "epoch": 0.028940306085442808, "grad_norm": 7.28125, "learning_rate": 0.0003, "loss": 10.0409, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 399 }, { "epoch": 0.029012838180895045, "grad_norm": 10.8125, "learning_rate": 0.0003, "loss": 9.6976, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 400 }, { "epoch": 0.029085370276347285, "grad_norm": 11.5625, "learning_rate": 0.0003, "loss": 10.0438, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 401 }, { "epoch": 0.02915790237179952, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 9.799, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 402 }, { "epoch": 0.029230434467251758, "grad_norm": 5.84375, "learning_rate": 0.0003, "loss": 9.9673, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 403 }, { "epoch": 0.029302966562703998, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 9.8532, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 404 }, { "epoch": 0.029375498658156234, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.9235, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 405 }, { "epoch": 0.02944803075360847, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 9.7456, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 406 }, { "epoch": 0.02952056284906071, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.6341, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 407 }, { "epoch": 0.029593094944512947, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 9.8282, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 408 }, { "epoch": 0.029665627039965184, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 9.7736, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 409 }, { "epoch": 0.029738159135417424, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 9.7695, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 410 }, { "epoch": 0.02981069123086966, "grad_norm": 7.78125, "learning_rate": 0.0003, "loss": 9.8158, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 411 }, { "epoch": 0.029883223326321897, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.9478, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 412 }, { "epoch": 0.029955755421774134, "grad_norm": 5.65625, "learning_rate": 0.0003, "loss": 9.9754, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 413 }, { "epoch": 0.030028287517226374, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.719, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 414 }, { "epoch": 0.03010081961267861, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 9.6692, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 415 }, { "epoch": 0.030173351708130847, "grad_norm": 15.875, "learning_rate": 0.0003, "loss": 9.7105, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 416 }, { "epoch": 0.030245883803583087, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 9.829, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 417 }, { "epoch": 0.030318415899035323, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.8769, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 418 }, { "epoch": 0.03039094799448756, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 9.7801, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 419 }, { "epoch": 0.0304634800899398, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 9.579, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 420 }, { "epoch": 0.030536012185392036, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 9.8948, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 421 }, { "epoch": 0.030608544280844273, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 9.5638, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 422 }, { "epoch": 0.030681076376296513, "grad_norm": 11.25, "learning_rate": 0.0003, "loss": 9.6835, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 423 }, { "epoch": 0.03075360847174875, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 9.9319, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 424 }, { "epoch": 0.030826140567200986, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 9.773, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 425 }, { "epoch": 0.030898672662653223, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 9.7063, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 426 }, { "epoch": 0.030971204758105463, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 9.8606, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 427 }, { "epoch": 0.0310437368535577, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 9.9384, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 428 }, { "epoch": 0.031116268949009936, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 9.8539, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 429 }, { "epoch": 0.031188801044462176, "grad_norm": 11.4375, "learning_rate": 0.0003, "loss": 10.0208, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 430 }, { "epoch": 0.03126133313991441, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 9.8515, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 431 }, { "epoch": 0.03133386523536665, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 9.6541, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 432 }, { "epoch": 0.03140639733081889, "grad_norm": 7.28125, "learning_rate": 0.0003, "loss": 9.979, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 433 }, { "epoch": 0.031478929426271125, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 9.9263, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 434 }, { "epoch": 0.03155146152172336, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 9.586, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 435 }, { "epoch": 0.0316239936171756, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 10.1273, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 436 }, { "epoch": 0.031696525712627835, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 9.7597, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 437 }, { "epoch": 0.03176905780808008, "grad_norm": 7.59375, "learning_rate": 0.0003, "loss": 9.7682, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 438 }, { "epoch": 0.031841589903532315, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.5718, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 439 }, { "epoch": 0.03191412199898455, "grad_norm": 8.1875, "learning_rate": 0.0003, "loss": 9.7697, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 440 }, { "epoch": 0.03198665409443679, "grad_norm": 51.75, "learning_rate": 0.0003, "loss": 9.7492, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 441 }, { "epoch": 0.032059186189889025, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 9.6886, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 442 }, { "epoch": 0.03213171828534126, "grad_norm": 7.71875, "learning_rate": 0.0003, "loss": 9.8529, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 443 }, { "epoch": 0.0322042503807935, "grad_norm": 5.6875, "learning_rate": 0.0003, "loss": 9.5135, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 444 }, { "epoch": 0.03227678247624574, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 9.6822, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 445 }, { "epoch": 0.03234931457169798, "grad_norm": 6.15625, "learning_rate": 0.0003, "loss": 9.7544, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 446 }, { "epoch": 0.032421846667150214, "grad_norm": 9.6875, "learning_rate": 0.0003, "loss": 9.5903, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 447 }, { "epoch": 0.03249437876260245, "grad_norm": 49.75, "learning_rate": 0.0003, "loss": 9.7188, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 448 }, { "epoch": 0.03256691085805469, "grad_norm": 9.125, "learning_rate": 0.0003, "loss": 9.7563, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 449 }, { "epoch": 0.032639442953506924, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 9.7763, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 450 }, { "epoch": 0.03271197504895917, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.7945, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 451 }, { "epoch": 0.032784507144411404, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.6633, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 452 }, { "epoch": 0.03285703923986364, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 9.5788, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 453 }, { "epoch": 0.03292957133531588, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 9.5141, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 454 }, { "epoch": 0.033002103430768114, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 9.9052, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 455 }, { "epoch": 0.03307463552622035, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 9.8098, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 456 }, { "epoch": 0.033147167621672594, "grad_norm": 6.78125, "learning_rate": 0.0003, "loss": 9.495, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 457 }, { "epoch": 0.03321969971712483, "grad_norm": 9.1875, "learning_rate": 0.0003, "loss": 9.6335, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 458 }, { "epoch": 0.03329223181257707, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 9.5501, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 459 }, { "epoch": 0.0333647639080293, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.6913, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 460 }, { "epoch": 0.03343729600348154, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 9.6483, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 461 }, { "epoch": 0.033509828098933776, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.6391, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 462 }, { "epoch": 0.03358236019438601, "grad_norm": 8.9375, "learning_rate": 0.0003, "loss": 9.8626, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 463 }, { "epoch": 0.033654892289838256, "grad_norm": 7.375, "learning_rate": 0.0003, "loss": 9.7595, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 464 }, { "epoch": 0.03372742438529049, "grad_norm": 5.84375, "learning_rate": 0.0003, "loss": 9.3604, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 465 }, { "epoch": 0.03379995648074273, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 9.6434, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 466 }, { "epoch": 0.033872488576194966, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.7835, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 467 }, { "epoch": 0.0339450206716472, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 9.6031, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 468 }, { "epoch": 0.03401755276709944, "grad_norm": 9.4375, "learning_rate": 0.0003, "loss": 9.5408, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 469 }, { "epoch": 0.03409008486255168, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.5313, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 470 }, { "epoch": 0.03416261695800392, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 9.6398, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 471 }, { "epoch": 0.034235149053456156, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.8971, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 472 }, { "epoch": 0.03430768114890839, "grad_norm": 5.90625, "learning_rate": 0.0003, "loss": 9.6906, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 473 }, { "epoch": 0.03438021324436063, "grad_norm": 67.0, "learning_rate": 0.0003, "loss": 9.8466, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 474 }, { "epoch": 0.034452745339812865, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.8615, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 475 }, { "epoch": 0.0345252774352651, "grad_norm": 7.40625, "learning_rate": 0.0003, "loss": 9.3904, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 476 }, { "epoch": 0.034597809530717345, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 9.3959, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 477 }, { "epoch": 0.03467034162616958, "grad_norm": 11.625, "learning_rate": 0.0003, "loss": 9.481, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 478 }, { "epoch": 0.03474287372162182, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.538, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 479 }, { "epoch": 0.034815405817074055, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 9.724, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 480 }, { "epoch": 0.03488793791252629, "grad_norm": 6.4375, "learning_rate": 0.0003, "loss": 9.8573, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 481 }, { "epoch": 0.03496047000797853, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.811, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 482 }, { "epoch": 0.03503300210343077, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 9.7608, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 483 }, { "epoch": 0.03510553419888301, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 9.6951, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 484 }, { "epoch": 0.035178066294335245, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 9.736, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 485 }, { "epoch": 0.03525059838978748, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 9.6024, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 486 }, { "epoch": 0.03532313048523972, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 9.6268, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 487 }, { "epoch": 0.035395662580691954, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 9.4448, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 488 }, { "epoch": 0.03546819467614419, "grad_norm": 7.65625, "learning_rate": 0.0003, "loss": 9.7091, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 489 }, { "epoch": 0.035540726771596434, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.4447, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 490 }, { "epoch": 0.03561325886704867, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 9.7548, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 491 }, { "epoch": 0.03568579096250091, "grad_norm": 9.5625, "learning_rate": 0.0003, "loss": 9.4639, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 492 }, { "epoch": 0.035758323057953144, "grad_norm": 7.71875, "learning_rate": 0.0003, "loss": 9.8694, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 493 }, { "epoch": 0.03583085515340538, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 9.5162, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 494 }, { "epoch": 0.03590338724885762, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 9.6182, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 495 }, { "epoch": 0.03597591934430986, "grad_norm": 5.84375, "learning_rate": 0.0003, "loss": 9.5833, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 496 }, { "epoch": 0.0360484514397621, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 9.5479, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 497 }, { "epoch": 0.036120983535214334, "grad_norm": 6.53125, "learning_rate": 0.0003, "loss": 9.7595, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 498 }, { "epoch": 0.03619351563066657, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 10.072, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 499 }, { "epoch": 0.03626604772611881, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.5261, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 500 }, { "epoch": 0.03633857982157104, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 9.5806, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 501 }, { "epoch": 0.03641111191702328, "grad_norm": 13.25, "learning_rate": 0.0003, "loss": 9.8195, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 502 }, { "epoch": 0.03648364401247552, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 9.9464, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 503 }, { "epoch": 0.03655617610792776, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 9.5721, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 504 }, { "epoch": 0.036628708203379996, "grad_norm": 9.25, "learning_rate": 0.0003, "loss": 9.5325, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 505 }, { "epoch": 0.03670124029883223, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 9.9598, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 506 }, { "epoch": 0.03677377239428447, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 9.4703, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 507 }, { "epoch": 0.036846304489736706, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.6183, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 508 }, { "epoch": 0.03691883658518895, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 9.5361, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 509 }, { "epoch": 0.036991368680641186, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 9.5712, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 510 }, { "epoch": 0.03706390077609342, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 9.6304, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 511 }, { "epoch": 0.03713643287154566, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 9.4297, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 512 }, { "epoch": 0.037208964966997896, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.2623, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 513 }, { "epoch": 0.03728149706245013, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 9.8366, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 514 }, { "epoch": 0.03735402915790237, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.4004, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 515 }, { "epoch": 0.03742656125335461, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 9.5757, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 516 }, { "epoch": 0.03749909334880685, "grad_norm": 35.75, "learning_rate": 0.0003, "loss": 9.6582, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 517 }, { "epoch": 0.037571625444259085, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 9.2419, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 518 }, { "epoch": 0.03764415753971132, "grad_norm": 7.84375, "learning_rate": 0.0003, "loss": 9.426, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 519 }, { "epoch": 0.03771668963516356, "grad_norm": 5.625, "learning_rate": 0.0003, "loss": 9.4311, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 520 }, { "epoch": 0.037789221730615795, "grad_norm": 9.375, "learning_rate": 0.0003, "loss": 9.4638, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 521 }, { "epoch": 0.03786175382606804, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.6146, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 522 }, { "epoch": 0.037934285921520275, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 9.6417, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 523 }, { "epoch": 0.03800681801697251, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 9.3089, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 524 }, { "epoch": 0.03807935011242475, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.7131, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 525 }, { "epoch": 0.038151882207876985, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 9.5849, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 526 }, { "epoch": 0.03822441430332922, "grad_norm": 6.125, "learning_rate": 0.0003, "loss": 9.5173, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 527 }, { "epoch": 0.03829694639878146, "grad_norm": 9.1875, "learning_rate": 0.0003, "loss": 9.6687, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 528 }, { "epoch": 0.0383694784942337, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 9.6791, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 529 }, { "epoch": 0.03844201058968594, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.4719, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 530 }, { "epoch": 0.038514542685138174, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 9.7778, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 531 }, { "epoch": 0.03858707478059041, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 9.3058, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 532 }, { "epoch": 0.03865960687604265, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.6298, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 533 }, { "epoch": 0.038732138971494884, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 9.5871, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 534 }, { "epoch": 0.03880467106694713, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 9.5189, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 535 }, { "epoch": 0.038877203162399364, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 9.4213, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 536 }, { "epoch": 0.0389497352578516, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 9.2276, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 537 }, { "epoch": 0.03902226735330384, "grad_norm": 11.0, "learning_rate": 0.0003, "loss": 9.4918, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 538 }, { "epoch": 0.039094799448756073, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 9.634, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 539 }, { "epoch": 0.03916733154420831, "grad_norm": 5.96875, "learning_rate": 0.0003, "loss": 9.6381, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 540 }, { "epoch": 0.03923986363966055, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.6879, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 541 }, { "epoch": 0.03931239573511279, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.4346, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 542 }, { "epoch": 0.03938492783056503, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 9.5866, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 543 }, { "epoch": 0.03945745992601726, "grad_norm": 7.84375, "learning_rate": 0.0003, "loss": 9.7493, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 544 }, { "epoch": 0.0395299920214695, "grad_norm": 8.3125, "learning_rate": 0.0003, "loss": 9.7286, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 545 }, { "epoch": 0.039602524116921736, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 9.342, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 546 }, { "epoch": 0.03967505621237397, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 9.138, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 547 }, { "epoch": 0.039747588307826216, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 9.2336, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 548 }, { "epoch": 0.03982012040327845, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 9.6372, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 549 }, { "epoch": 0.03989265249873069, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.5931, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 550 }, { "epoch": 0.039965184594182926, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 9.477, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 551 }, { "epoch": 0.04003771668963516, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 9.6644, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 552 }, { "epoch": 0.0401102487850874, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.5801, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 553 }, { "epoch": 0.040182780880539636, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 9.3134, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 554 }, { "epoch": 0.04025531297599188, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 9.2724, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 555 }, { "epoch": 0.040327845071444116, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.5, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 556 }, { "epoch": 0.04040037716689635, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 9.2601, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 557 }, { "epoch": 0.04047290926234859, "grad_norm": 25.875, "learning_rate": 0.0003, "loss": 9.5201, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 558 }, { "epoch": 0.040545441357800825, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 9.8032, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 559 }, { "epoch": 0.04061797345325306, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 9.5343, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 560 }, { "epoch": 0.040690505548705305, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 9.4805, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 561 }, { "epoch": 0.04076303764415754, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.5262, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 562 }, { "epoch": 0.04083556973960978, "grad_norm": 6.6875, "learning_rate": 0.0003, "loss": 9.5778, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 563 }, { "epoch": 0.040908101835062015, "grad_norm": 64.5, "learning_rate": 0.0003, "loss": 9.7825, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 564 }, { "epoch": 0.04098063393051425, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 9.6299, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 565 }, { "epoch": 0.04105316602596649, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 9.7506, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 566 }, { "epoch": 0.041125698121418724, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 9.1499, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 567 }, { "epoch": 0.04119823021687097, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.3195, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 568 }, { "epoch": 0.041270762312323205, "grad_norm": 1.984375, "learning_rate": 0.0003, "loss": 9.4655, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 569 }, { "epoch": 0.04134329440777544, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 9.338, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 570 }, { "epoch": 0.04141582650322768, "grad_norm": 1.9375, "learning_rate": 0.0003, "loss": 9.3456, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 571 }, { "epoch": 0.041488358598679914, "grad_norm": 6.59375, "learning_rate": 0.0003, "loss": 9.2272, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 572 }, { "epoch": 0.04156089069413215, "grad_norm": 12.0625, "learning_rate": 0.0003, "loss": 9.6366, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 573 }, { "epoch": 0.041633422789584394, "grad_norm": 8.8125, "learning_rate": 0.0003, "loss": 9.3403, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 574 }, { "epoch": 0.04170595488503663, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 9.443, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 575 }, { "epoch": 0.04177848698048887, "grad_norm": 8.875, "learning_rate": 0.0003, "loss": 9.4128, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 576 }, { "epoch": 0.041851019075941104, "grad_norm": 10.0625, "learning_rate": 0.0003, "loss": 9.3613, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 577 }, { "epoch": 0.04192355117139334, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 9.5526, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 578 }, { "epoch": 0.04199608326684558, "grad_norm": 5.84375, "learning_rate": 0.0003, "loss": 9.2192, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 579 }, { "epoch": 0.04206861536229781, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.3726, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 580 }, { "epoch": 0.04214114745775006, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 9.3027, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 581 }, { "epoch": 0.042213679553202293, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.499, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 582 }, { "epoch": 0.04228621164865453, "grad_norm": 8.4375, "learning_rate": 0.0003, "loss": 9.9352, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 583 }, { "epoch": 0.04235874374410677, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 9.4744, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 584 }, { "epoch": 0.042431275839559, "grad_norm": 8.5625, "learning_rate": 0.0003, "loss": 9.7155, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 585 }, { "epoch": 0.04250380793501124, "grad_norm": 10.5, "learning_rate": 0.0003, "loss": 9.307, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 586 }, { "epoch": 0.04257634003046348, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 9.6365, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 587 }, { "epoch": 0.04264887212591572, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 9.2123, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 588 }, { "epoch": 0.042721404221367956, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 9.9017, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 589 }, { "epoch": 0.04279393631682019, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 9.8273, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 590 }, { "epoch": 0.04286646841227243, "grad_norm": 47.5, "learning_rate": 0.0003, "loss": 9.3487, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 591 }, { "epoch": 0.042939000507724666, "grad_norm": 7.90625, "learning_rate": 0.0003, "loss": 9.4823, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 592 }, { "epoch": 0.0430115326031769, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 9.6488, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 593 }, { "epoch": 0.043084064698629146, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 9.5393, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 594 }, { "epoch": 0.04315659679408138, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 9.1755, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 595 }, { "epoch": 0.04322912888953362, "grad_norm": 7.28125, "learning_rate": 0.0003, "loss": 9.3507, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 596 }, { "epoch": 0.043301660984985856, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 9.2157, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 597 }, { "epoch": 0.04337419308043809, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 9.5364, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 598 }, { "epoch": 0.04344672517589033, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 9.4158, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 599 }, { "epoch": 0.04351925727134257, "grad_norm": 17.875, "learning_rate": 0.0003, "loss": 9.3999, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 600 }, { "epoch": 0.04359178936679481, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 9.2929, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 601 }, { "epoch": 0.043664321462247045, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 9.823, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 602 }, { "epoch": 0.04373685355769928, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 9.3927, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 603 }, { "epoch": 0.04380938565315152, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.5139, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 604 }, { "epoch": 0.043881917748603755, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 9.3948, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 605 }, { "epoch": 0.04395444984405599, "grad_norm": 1.9296875, "learning_rate": 0.0003, "loss": 9.1441, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 606 }, { "epoch": 0.044026981939508235, "grad_norm": 7.34375, "learning_rate": 0.0003, "loss": 9.13, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 607 }, { "epoch": 0.04409951403496047, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 9.4826, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 608 }, { "epoch": 0.04417204613041271, "grad_norm": 6.0, "learning_rate": 0.0003, "loss": 9.4286, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 609 }, { "epoch": 0.044244578225864944, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.5566, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 610 }, { "epoch": 0.04431711032131718, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.4433, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 611 }, { "epoch": 0.04438964241676942, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 9.0794, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 612 }, { "epoch": 0.04446217451222166, "grad_norm": 22.75, "learning_rate": 0.0003, "loss": 9.4688, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 613 }, { "epoch": 0.0445347066076739, "grad_norm": 20.25, "learning_rate": 0.0003, "loss": 9.2379, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 614 }, { "epoch": 0.044607238703126134, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 9.5805, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 615 }, { "epoch": 0.04467977079857837, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.511, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 616 }, { "epoch": 0.04475230289403061, "grad_norm": 6.0, "learning_rate": 0.0003, "loss": 9.2319, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 617 }, { "epoch": 0.044824834989482844, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 9.0215, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 618 }, { "epoch": 0.04489736708493508, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 9.3276, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 619 }, { "epoch": 0.044969899180387324, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 9.3894, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 620 }, { "epoch": 0.04504243127583956, "grad_norm": 12.8125, "learning_rate": 0.0003, "loss": 9.6212, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 621 }, { "epoch": 0.0451149633712918, "grad_norm": 8.125, "learning_rate": 0.0003, "loss": 9.3248, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 622 }, { "epoch": 0.04518749546674403, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.5602, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 623 }, { "epoch": 0.04526002756219627, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 9.4263, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 624 }, { "epoch": 0.04533255965764851, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.2302, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 625 }, { "epoch": 0.04540509175310075, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.2935, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 626 }, { "epoch": 0.04547762384855299, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 9.4376, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 627 }, { "epoch": 0.04555015594400522, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.3098, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 628 }, { "epoch": 0.04562268803945746, "grad_norm": 1.796875, "learning_rate": 0.0003, "loss": 9.6149, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 629 }, { "epoch": 0.045695220134909696, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 9.2602, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 630 }, { "epoch": 0.04576775223036193, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.4158, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 631 }, { "epoch": 0.045840284325814176, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 9.493, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 632 }, { "epoch": 0.04591281642126641, "grad_norm": 8.375, "learning_rate": 0.0003, "loss": 9.401, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 633 }, { "epoch": 0.04598534851671865, "grad_norm": 1.9140625, "learning_rate": 0.0003, "loss": 9.1248, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 634 }, { "epoch": 0.046057880612170886, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 9.2746, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 635 }, { "epoch": 0.04613041270762312, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 9.1496, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 636 }, { "epoch": 0.04620294480307536, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 9.2097, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 637 }, { "epoch": 0.046275476898527595, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.4591, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 638 }, { "epoch": 0.04634800899397984, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 9.3186, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 639 }, { "epoch": 0.046420541089432076, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 9.4319, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 640 }, { "epoch": 0.04649307318488431, "grad_norm": 5.46875, "learning_rate": 0.0003, "loss": 9.4429, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 641 }, { "epoch": 0.04656560528033655, "grad_norm": 15.0625, "learning_rate": 0.0003, "loss": 9.3924, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 642 }, { "epoch": 0.046638137375788785, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 9.3658, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 643 }, { "epoch": 0.04671066947124102, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 9.7039, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 644 }, { "epoch": 0.046783201566693265, "grad_norm": 6.03125, "learning_rate": 0.0003, "loss": 9.5311, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 645 }, { "epoch": 0.0468557336621455, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 9.2415, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 646 }, { "epoch": 0.04692826575759774, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 9.6813, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 647 }, { "epoch": 0.047000797853049975, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 9.3629, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 648 }, { "epoch": 0.04707332994850221, "grad_norm": 11.1875, "learning_rate": 0.0003, "loss": 9.3068, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 649 }, { "epoch": 0.04714586204395445, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.5635, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 650 }, { "epoch": 0.047218394139406684, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 9.4129, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 651 }, { "epoch": 0.04729092623485893, "grad_norm": 6.65625, "learning_rate": 0.0003, "loss": 9.2692, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 652 }, { "epoch": 0.047363458330311164, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 9.4118, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 653 }, { "epoch": 0.0474359904257634, "grad_norm": 7.84375, "learning_rate": 0.0003, "loss": 9.4461, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 654 }, { "epoch": 0.04750852252121564, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.6549, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 655 }, { "epoch": 0.047581054616667874, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.4392, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 656 }, { "epoch": 0.04765358671212011, "grad_norm": 10.3125, "learning_rate": 0.0003, "loss": 9.3867, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 657 }, { "epoch": 0.047726118807572354, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.919, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 658 }, { "epoch": 0.04779865090302459, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 9.3317, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 659 }, { "epoch": 0.04787118299847683, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.6041, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 660 }, { "epoch": 0.047943715093929064, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 9.5392, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 661 }, { "epoch": 0.0480162471893813, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 9.2965, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 662 }, { "epoch": 0.04808877928483354, "grad_norm": 25.0, "learning_rate": 0.0003, "loss": 9.516, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 663 }, { "epoch": 0.04816131138028577, "grad_norm": 19.125, "learning_rate": 0.0003, "loss": 9.4033, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 664 }, { "epoch": 0.04823384347573802, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 9.2389, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 665 }, { "epoch": 0.04830637557119025, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.2977, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 666 }, { "epoch": 0.04837890766664249, "grad_norm": 8.3125, "learning_rate": 0.0003, "loss": 9.7245, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 667 }, { "epoch": 0.048451439762094727, "grad_norm": 6.90625, "learning_rate": 0.0003, "loss": 9.4754, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 668 }, { "epoch": 0.04852397185754696, "grad_norm": 7.8125, "learning_rate": 0.0003, "loss": 9.1938, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 669 }, { "epoch": 0.0485965039529992, "grad_norm": 13.125, "learning_rate": 0.0003, "loss": 8.9568, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 670 }, { "epoch": 0.04866903604845144, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 9.4751, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 671 }, { "epoch": 0.04874156814390368, "grad_norm": 18.5, "learning_rate": 0.0003, "loss": 9.5158, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 672 }, { "epoch": 0.048814100239355916, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 9.2202, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 673 }, { "epoch": 0.04888663233480815, "grad_norm": 7.375, "learning_rate": 0.0003, "loss": 8.9978, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 674 }, { "epoch": 0.04895916443026039, "grad_norm": 1.96875, "learning_rate": 0.0003, "loss": 9.6258, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 675 }, { "epoch": 0.049031696525712626, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 9.5684, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 676 }, { "epoch": 0.04910422862116486, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.2455, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 677 }, { "epoch": 0.049176760716617106, "grad_norm": 7.4375, "learning_rate": 0.0003, "loss": 9.2114, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 678 }, { "epoch": 0.04924929281206934, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 9.398, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 679 }, { "epoch": 0.04932182490752158, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.7135, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 680 }, { "epoch": 0.049394357002973815, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.2502, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 681 }, { "epoch": 0.04946688909842605, "grad_norm": 5.21875, "learning_rate": 0.0003, "loss": 9.1296, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 682 }, { "epoch": 0.04953942119387829, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 9.3677, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 683 }, { "epoch": 0.04961195328933053, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.6142, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 684 }, { "epoch": 0.04968448538478277, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.4205, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 685 }, { "epoch": 0.049757017480235005, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 9.4129, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 686 }, { "epoch": 0.04982954957568724, "grad_norm": 9.875, "learning_rate": 0.0003, "loss": 9.5647, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 687 }, { "epoch": 0.04990208167113948, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 9.3251, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 688 }, { "epoch": 0.049974613766591715, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 8.8497, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 689 }, { "epoch": 0.05004714586204395, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 9.6116, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 690 }, { "epoch": 0.050119677957496195, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.7516, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 691 }, { "epoch": 0.05019221005294843, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 9.3982, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 692 }, { "epoch": 0.05026474214840067, "grad_norm": 7.65625, "learning_rate": 0.0003, "loss": 9.2848, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 693 }, { "epoch": 0.050337274243852904, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.2406, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 694 }, { "epoch": 0.05040980633930514, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.4518, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 695 }, { "epoch": 0.05048233843475738, "grad_norm": 6.3125, "learning_rate": 0.0003, "loss": 9.2876, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 696 }, { "epoch": 0.05055487053020962, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 9.1091, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 697 }, { "epoch": 0.05062740262566186, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 9.4094, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 698 }, { "epoch": 0.050699934721114094, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 9.5287, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 699 }, { "epoch": 0.05077246681656633, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 9.5359, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 700 }, { "epoch": 0.05084499891201857, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 9.1621, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 701 }, { "epoch": 0.050917531007470804, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 9.173, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 702 }, { "epoch": 0.05099006310292304, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 9.2187, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 703 }, { "epoch": 0.051062595198375284, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.5326, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 704 }, { "epoch": 0.05113512729382752, "grad_norm": 8.0625, "learning_rate": 0.0003, "loss": 9.6292, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 705 }, { "epoch": 0.05120765938927976, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.0472, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 706 }, { "epoch": 0.05128019148473199, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 9.4913, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 707 }, { "epoch": 0.05135272358018423, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.1278, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 708 }, { "epoch": 0.051425255675636466, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.4596, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 709 }, { "epoch": 0.05149778777108871, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 9.2557, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 710 }, { "epoch": 0.051570319866540947, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 9.3823, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 711 }, { "epoch": 0.05164285196199318, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 9.5766, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 712 }, { "epoch": 0.05171538405744542, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 9.4287, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 713 }, { "epoch": 0.051787916152897656, "grad_norm": 24.375, "learning_rate": 0.0003, "loss": 9.3276, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 714 }, { "epoch": 0.05186044824834989, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.1637, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 715 }, { "epoch": 0.05193298034380213, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 9.163, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 716 }, { "epoch": 0.05200551243925437, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.2937, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 717 }, { "epoch": 0.05207804453470661, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 9.5296, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 718 }, { "epoch": 0.052150576630158846, "grad_norm": 14.625, "learning_rate": 0.0003, "loss": 9.3839, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 719 }, { "epoch": 0.05222310872561108, "grad_norm": 6.8125, "learning_rate": 0.0003, "loss": 9.2768, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 720 }, { "epoch": 0.05229564082106332, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.5066, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 721 }, { "epoch": 0.052368172916515555, "grad_norm": 23.0, "learning_rate": 0.0003, "loss": 9.1297, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 722 }, { "epoch": 0.0524407050119678, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 9.2169, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 723 }, { "epoch": 0.052513237107420035, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 9.248, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 724 }, { "epoch": 0.05258576920287227, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 9.1142, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 725 }, { "epoch": 0.05265830129832451, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 9.3667, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 726 }, { "epoch": 0.052730833393776745, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.5331, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 727 }, { "epoch": 0.05280336548922898, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.5168, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 728 }, { "epoch": 0.05287589758468122, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 9.164, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 729 }, { "epoch": 0.05294842968013346, "grad_norm": 33.25, "learning_rate": 0.0003, "loss": 9.0054, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 730 }, { "epoch": 0.0530209617755857, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 9.3131, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 731 }, { "epoch": 0.053093493871037935, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.1501, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 732 }, { "epoch": 0.05316602596649017, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 9.4637, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 733 }, { "epoch": 0.05323855806194241, "grad_norm": 5.84375, "learning_rate": 0.0003, "loss": 9.3952, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 734 }, { "epoch": 0.053311090157394644, "grad_norm": 1.8671875, "learning_rate": 0.0003, "loss": 9.2367, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 735 }, { "epoch": 0.05338362225284689, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 9.764, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 736 }, { "epoch": 0.053456154348299124, "grad_norm": 1.890625, "learning_rate": 0.0003, "loss": 9.0951, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 737 }, { "epoch": 0.05352868644375136, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 9.3774, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 738 }, { "epoch": 0.0536012185392036, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 9.1086, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 739 }, { "epoch": 0.053673750634655834, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 9.4488, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 740 }, { "epoch": 0.05374628273010807, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 9.211, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 741 }, { "epoch": 0.05381881482556031, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 9.1991, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 742 }, { "epoch": 0.05389134692101255, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 9.5041, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 743 }, { "epoch": 0.05396387901646479, "grad_norm": 1.65625, "learning_rate": 0.0003, "loss": 9.2093, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 744 }, { "epoch": 0.054036411111917024, "grad_norm": 9.4375, "learning_rate": 0.0003, "loss": 9.3874, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 745 }, { "epoch": 0.05410894320736926, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 9.384, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 746 }, { "epoch": 0.0541814753028215, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 9.5178, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 747 }, { "epoch": 0.05425400739827373, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 9.3525, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 748 }, { "epoch": 0.05432653949372598, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 8.7826, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 749 }, { "epoch": 0.05439907158917821, "grad_norm": 1.6328125, "learning_rate": 0.0003, "loss": 9.1879, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 750 }, { "epoch": 0.05447160368463045, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.5115, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 751 }, { "epoch": 0.054544135780082686, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 9.3794, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 752 }, { "epoch": 0.05461666787553492, "grad_norm": 18.5, "learning_rate": 0.0003, "loss": 9.289, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 753 }, { "epoch": 0.05468919997098716, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 9.2262, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 754 }, { "epoch": 0.054761732066439396, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.0794, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 755 }, { "epoch": 0.05483426416189164, "grad_norm": 1.625, "learning_rate": 0.0003, "loss": 9.51, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 756 }, { "epoch": 0.054906796257343876, "grad_norm": 7.09375, "learning_rate": 0.0003, "loss": 9.7324, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 757 }, { "epoch": 0.05497932835279611, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.3475, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 758 }, { "epoch": 0.05505186044824835, "grad_norm": 1.875, "learning_rate": 0.0003, "loss": 9.5025, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 759 }, { "epoch": 0.055124392543700586, "grad_norm": 6.875, "learning_rate": 0.0003, "loss": 9.1111, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 760 }, { "epoch": 0.05519692463915282, "grad_norm": 12.4375, "learning_rate": 0.0003, "loss": 9.5989, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 761 }, { "epoch": 0.055269456734605066, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 9.1899, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 762 }, { "epoch": 0.0553419888300573, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 9.1912, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 763 }, { "epoch": 0.05541452092550954, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 9.5193, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 764 }, { "epoch": 0.055487053020961775, "grad_norm": 5.46875, "learning_rate": 0.0003, "loss": 9.0862, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 765 }, { "epoch": 0.05555958511641401, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.079, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 766 }, { "epoch": 0.05563211721186625, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 9.4386, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 767 }, { "epoch": 0.055704649307318485, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 9.2788, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 768 }, { "epoch": 0.05577718140277073, "grad_norm": 9.625, "learning_rate": 0.0003, "loss": 9.2629, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 769 }, { "epoch": 0.055849713498222965, "grad_norm": 8.625, "learning_rate": 0.0003, "loss": 9.6796, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 770 }, { "epoch": 0.0559222455936752, "grad_norm": 1.609375, "learning_rate": 0.0003, "loss": 9.6717, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 771 }, { "epoch": 0.05599477768912744, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 8.9312, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 772 }, { "epoch": 0.056067309784579675, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.5546, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 773 }, { "epoch": 0.05613984188003191, "grad_norm": 10.75, "learning_rate": 0.0003, "loss": 8.8598, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 774 }, { "epoch": 0.056212373975484155, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 9.3864, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 775 }, { "epoch": 0.05628490607093639, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 9.3804, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 776 }, { "epoch": 0.05635743816638863, "grad_norm": 7.9375, "learning_rate": 0.0003, "loss": 9.323, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 777 }, { "epoch": 0.056429970261840864, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 9.5702, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 778 }, { "epoch": 0.0565025023572931, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 9.2949, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 779 }, { "epoch": 0.05657503445274534, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.774, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 780 }, { "epoch": 0.056647566548197574, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 9.3055, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 781 }, { "epoch": 0.05672009864364982, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.5205, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 782 }, { "epoch": 0.056792630739102054, "grad_norm": 1.8203125, "learning_rate": 0.0003, "loss": 9.2984, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 783 }, { "epoch": 0.05686516283455429, "grad_norm": 1.828125, "learning_rate": 0.0003, "loss": 8.7956, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 784 }, { "epoch": 0.05693769493000653, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.8502, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 785 }, { "epoch": 0.057010227025458764, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.6229, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 786 }, { "epoch": 0.057082759120911, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 9.3397, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 787 }, { "epoch": 0.057155291216363244, "grad_norm": 1.78125, "learning_rate": 0.0003, "loss": 9.5011, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 788 }, { "epoch": 0.05722782331181548, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.4951, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 789 }, { "epoch": 0.05730035540726772, "grad_norm": 6.59375, "learning_rate": 0.0003, "loss": 9.2315, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 790 }, { "epoch": 0.05737288750271995, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.3382, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 791 }, { "epoch": 0.05744541959817219, "grad_norm": 10.3125, "learning_rate": 0.0003, "loss": 9.2927, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 792 }, { "epoch": 0.057517951693624426, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.5921, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 793 }, { "epoch": 0.05759048378907666, "grad_norm": 8.5625, "learning_rate": 0.0003, "loss": 9.309, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 794 }, { "epoch": 0.057663015884528906, "grad_norm": 9.5, "learning_rate": 0.0003, "loss": 9.4155, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 795 }, { "epoch": 0.05773554797998114, "grad_norm": 20.375, "learning_rate": 0.0003, "loss": 9.045, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 796 }, { "epoch": 0.05780808007543338, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.2969, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 797 }, { "epoch": 0.057880612170885616, "grad_norm": 12.5, "learning_rate": 0.0003, "loss": 9.2393, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 798 }, { "epoch": 0.05795314426633785, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 9.1116, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 799 }, { "epoch": 0.05802567636179009, "grad_norm": 5.4375, "learning_rate": 0.0003, "loss": 9.4841, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 800 }, { "epoch": 0.05809820845724233, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.297, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 801 }, { "epoch": 0.05817074055269457, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 8.9153, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 802 }, { "epoch": 0.058243272648146806, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 8.9981, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 803 }, { "epoch": 0.05831580474359904, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.273, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 804 }, { "epoch": 0.05838833683905128, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 9.0812, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 805 }, { "epoch": 0.058460868934503515, "grad_norm": 27.875, "learning_rate": 0.0003, "loss": 9.334, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 806 }, { "epoch": 0.05853340102995576, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 9.2261, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 807 }, { "epoch": 0.058605933125407995, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 9.2871, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 808 }, { "epoch": 0.05867846522086023, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.8673, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 809 }, { "epoch": 0.05875099731631247, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 9.1776, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 810 }, { "epoch": 0.058823529411764705, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.2637, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 811 }, { "epoch": 0.05889606150721694, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 9.2785, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 812 }, { "epoch": 0.05896859360266918, "grad_norm": 9.625, "learning_rate": 0.0003, "loss": 9.618, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 813 }, { "epoch": 0.05904112569812142, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.3085, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 814 }, { "epoch": 0.05911365779357366, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 9.6471, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 815 }, { "epoch": 0.059186189889025895, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 9.4884, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 816 }, { "epoch": 0.05925872198447813, "grad_norm": 1.953125, "learning_rate": 0.0003, "loss": 9.4044, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 817 }, { "epoch": 0.05933125407993037, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.9104, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 818 }, { "epoch": 0.059403786175382604, "grad_norm": 11.6875, "learning_rate": 0.0003, "loss": 9.5999, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 819 }, { "epoch": 0.05947631827083485, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 9.1045, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 820 }, { "epoch": 0.059548850366287084, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 9.6447, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 821 }, { "epoch": 0.05962138246173932, "grad_norm": 1.7109375, "learning_rate": 0.0003, "loss": 9.1196, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 822 }, { "epoch": 0.05969391455719156, "grad_norm": 5.375, "learning_rate": 0.0003, "loss": 8.8627, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 823 }, { "epoch": 0.059766446652643794, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 9.2571, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 824 }, { "epoch": 0.05983897874809603, "grad_norm": 11.5, "learning_rate": 0.0003, "loss": 9.3824, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 825 }, { "epoch": 0.05991151084354827, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.4632, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 826 }, { "epoch": 0.05998404293900051, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 9.4017, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 827 }, { "epoch": 0.06005657503445275, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 9.2506, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 828 }, { "epoch": 0.060129107129904984, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.2719, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 829 }, { "epoch": 0.06020163922535722, "grad_norm": 1.9453125, "learning_rate": 0.0003, "loss": 8.7862, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 830 }, { "epoch": 0.06027417132080946, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 9.4031, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 831 }, { "epoch": 0.06034670341626169, "grad_norm": 37.75, "learning_rate": 0.0003, "loss": 9.53, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 832 }, { "epoch": 0.06041923551171394, "grad_norm": 9.5, "learning_rate": 0.0003, "loss": 9.0351, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 833 }, { "epoch": 0.06049176760716617, "grad_norm": 1.5703125, "learning_rate": 0.0003, "loss": 9.5036, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 834 }, { "epoch": 0.06056429970261841, "grad_norm": 6.21875, "learning_rate": 0.0003, "loss": 9.1656, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 835 }, { "epoch": 0.060636831798070646, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 9.1664, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 836 }, { "epoch": 0.06070936389352288, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.4773, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 837 }, { "epoch": 0.06078189598897512, "grad_norm": 11.5625, "learning_rate": 0.0003, "loss": 9.5188, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 838 }, { "epoch": 0.060854428084427356, "grad_norm": 28.5, "learning_rate": 0.0003, "loss": 9.3673, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 839 }, { "epoch": 0.0609269601798796, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.196, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 840 }, { "epoch": 0.060999492275331836, "grad_norm": 9.0, "learning_rate": 0.0003, "loss": 9.349, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 841 }, { "epoch": 0.06107202437078407, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 9.2763, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 842 }, { "epoch": 0.06114455646623631, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 9.1198, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 843 }, { "epoch": 0.061217088561688546, "grad_norm": 23.375, "learning_rate": 0.0003, "loss": 9.59, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 844 }, { "epoch": 0.06128962065714078, "grad_norm": 6.1875, "learning_rate": 0.0003, "loss": 9.3348, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 845 }, { "epoch": 0.061362152752593026, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.0703, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 846 }, { "epoch": 0.06143468484804526, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 9.4141, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 847 }, { "epoch": 0.0615072169434975, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 9.301, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 848 }, { "epoch": 0.061579749038949735, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 9.6379, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 849 }, { "epoch": 0.06165228113440197, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 9.5122, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 850 }, { "epoch": 0.06172481322985421, "grad_norm": 11.125, "learning_rate": 0.0003, "loss": 9.2606, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 851 }, { "epoch": 0.061797345325306445, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 9.7729, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 852 }, { "epoch": 0.06186987742075869, "grad_norm": 6.6875, "learning_rate": 0.0003, "loss": 8.8137, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 853 }, { "epoch": 0.061942409516210925, "grad_norm": 6.28125, "learning_rate": 0.0003, "loss": 9.3743, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 854 }, { "epoch": 0.06201494161166316, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 9.6451, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 855 }, { "epoch": 0.0620874737071154, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.1445, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 856 }, { "epoch": 0.062160005802567635, "grad_norm": 11.9375, "learning_rate": 0.0003, "loss": 9.1288, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 857 }, { "epoch": 0.06223253789801987, "grad_norm": 1.4609375, "learning_rate": 0.0003, "loss": 9.3064, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 858 }, { "epoch": 0.062305069993472115, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 9.6149, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 859 }, { "epoch": 0.06237760208892435, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.2081, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 860 }, { "epoch": 0.06245013418437659, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.1821, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 861 }, { "epoch": 0.06252266627982882, "grad_norm": 1.8671875, "learning_rate": 0.0003, "loss": 9.2172, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 862 }, { "epoch": 0.06259519837528106, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 9.1278, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 863 }, { "epoch": 0.0626677304707333, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 9.009, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 864 }, { "epoch": 0.06274026256618553, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 9.2517, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 865 }, { "epoch": 0.06281279466163778, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.2604, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 866 }, { "epoch": 0.06288532675709001, "grad_norm": 1.46875, "learning_rate": 0.0003, "loss": 9.7164, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 867 }, { "epoch": 0.06295785885254225, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 9.4609, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 868 }, { "epoch": 0.0630303909479945, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.5626, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 869 }, { "epoch": 0.06310292304344672, "grad_norm": 9.8125, "learning_rate": 0.0003, "loss": 9.7165, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 870 }, { "epoch": 0.06317545513889897, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 9.5225, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 871 }, { "epoch": 0.0632479872343512, "grad_norm": 6.25, "learning_rate": 0.0003, "loss": 9.6421, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 872 }, { "epoch": 0.06332051932980344, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 9.2571, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 873 }, { "epoch": 0.06339305142525567, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.5354, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 874 }, { "epoch": 0.06346558352070791, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 9.0687, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 875 }, { "epoch": 0.06353811561616016, "grad_norm": 10.5625, "learning_rate": 0.0003, "loss": 9.5007, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 876 }, { "epoch": 0.06361064771161239, "grad_norm": 67.0, "learning_rate": 0.0003, "loss": 9.2192, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 877 }, { "epoch": 0.06368317980706463, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.2014, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 878 }, { "epoch": 0.06375571190251686, "grad_norm": 1.484375, "learning_rate": 0.0003, "loss": 9.4993, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 879 }, { "epoch": 0.0638282439979691, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 9.3356, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 880 }, { "epoch": 0.06390077609342133, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 9.51, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 881 }, { "epoch": 0.06397330818887358, "grad_norm": 1.5078125, "learning_rate": 0.0003, "loss": 9.2175, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 882 }, { "epoch": 0.06404584028432582, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 9.6174, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 883 }, { "epoch": 0.06411837237977805, "grad_norm": 9.1875, "learning_rate": 0.0003, "loss": 9.3812, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 884 }, { "epoch": 0.06419090447523029, "grad_norm": 59.0, "learning_rate": 0.0003, "loss": 9.5484, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 885 }, { "epoch": 0.06426343657068252, "grad_norm": 5.9375, "learning_rate": 0.0003, "loss": 9.052, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 886 }, { "epoch": 0.06433596866613477, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 9.2903, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 887 }, { "epoch": 0.064408500761587, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 9.2529, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 888 }, { "epoch": 0.06448103285703924, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.2282, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 889 }, { "epoch": 0.06455356495249148, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 9.4554, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 890 }, { "epoch": 0.06462609704794371, "grad_norm": 1.8828125, "learning_rate": 0.0003, "loss": 9.2679, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 891 }, { "epoch": 0.06469862914339596, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.234, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 892 }, { "epoch": 0.06477116123884818, "grad_norm": 135.0, "learning_rate": 0.0003, "loss": 9.6303, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 893 }, { "epoch": 0.06484369333430043, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.1549, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 894 }, { "epoch": 0.06491622542975267, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 9.1782, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 895 }, { "epoch": 0.0649887575252049, "grad_norm": 1.5390625, "learning_rate": 0.0003, "loss": 9.0103, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 896 }, { "epoch": 0.06506128962065715, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 9.4526, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 897 }, { "epoch": 0.06513382171610937, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 9.448, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 898 }, { "epoch": 0.06520635381156162, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 9.3651, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 899 }, { "epoch": 0.06527888590701385, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 9.742, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 900 }, { "epoch": 0.06535141800246609, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 9.2177, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 901 }, { "epoch": 0.06542395009791833, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 9.4405, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 902 }, { "epoch": 0.06549648219337056, "grad_norm": 10.625, "learning_rate": 0.0003, "loss": 9.238, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 903 }, { "epoch": 0.06556901428882281, "grad_norm": 7.09375, "learning_rate": 0.0003, "loss": 9.3998, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 904 }, { "epoch": 0.06564154638427504, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 8.9861, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 905 }, { "epoch": 0.06571407847972728, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 9.198, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 906 }, { "epoch": 0.06578661057517951, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.3464, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 907 }, { "epoch": 0.06585914267063175, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 9.065, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 908 }, { "epoch": 0.065931674766084, "grad_norm": 7.9375, "learning_rate": 0.0003, "loss": 9.3489, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 909 }, { "epoch": 0.06600420686153623, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.4656, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 910 }, { "epoch": 0.06607673895698847, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 9.3122, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 911 }, { "epoch": 0.0661492710524407, "grad_norm": 30.0, "learning_rate": 0.0003, "loss": 9.4828, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 912 }, { "epoch": 0.06622180314789294, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 9.2262, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 913 }, { "epoch": 0.06629433524334519, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.3671, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 914 }, { "epoch": 0.06636686733879742, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 9.4531, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 915 }, { "epoch": 0.06643939943424966, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 9.5486, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 916 }, { "epoch": 0.06651193152970189, "grad_norm": 8.6875, "learning_rate": 0.0003, "loss": 8.9892, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 917 }, { "epoch": 0.06658446362515413, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 9.0204, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 918 }, { "epoch": 0.06665699572060636, "grad_norm": 7.59375, "learning_rate": 0.0003, "loss": 9.5794, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 919 }, { "epoch": 0.0667295278160586, "grad_norm": 7.3125, "learning_rate": 0.0003, "loss": 9.0522, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 920 }, { "epoch": 0.06680205991151085, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 9.2266, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 921 }, { "epoch": 0.06687459200696308, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.2772, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 922 }, { "epoch": 0.06694712410241532, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.8273, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 923 }, { "epoch": 0.06701965619786755, "grad_norm": 1.578125, "learning_rate": 0.0003, "loss": 9.2349, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 924 }, { "epoch": 0.0670921882933198, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.2277, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 925 }, { "epoch": 0.06716472038877203, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 9.3712, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 926 }, { "epoch": 0.06723725248422427, "grad_norm": 7.3125, "learning_rate": 0.0003, "loss": 9.5608, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 927 }, { "epoch": 0.06730978457967651, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.4009, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 928 }, { "epoch": 0.06738231667512874, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 9.0552, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 929 }, { "epoch": 0.06745484877058099, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.9328, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 930 }, { "epoch": 0.06752738086603322, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 9.4703, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 931 }, { "epoch": 0.06759991296148546, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 9.1563, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 932 }, { "epoch": 0.06767244505693769, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.6361, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 933 }, { "epoch": 0.06774497715238993, "grad_norm": 7.3125, "learning_rate": 0.0003, "loss": 9.1031, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 934 }, { "epoch": 0.06781750924784218, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.1754, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 935 }, { "epoch": 0.0678900413432944, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 9.3324, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 936 }, { "epoch": 0.06796257343874665, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 9.4012, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 937 }, { "epoch": 0.06803510553419888, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 8.905, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 938 }, { "epoch": 0.06810763762965112, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.5266, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 939 }, { "epoch": 0.06818016972510337, "grad_norm": 9.875, "learning_rate": 0.0003, "loss": 9.4294, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 940 }, { "epoch": 0.0682527018205556, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 9.2407, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 941 }, { "epoch": 0.06832523391600784, "grad_norm": 7.9375, "learning_rate": 0.0003, "loss": 8.9601, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 942 }, { "epoch": 0.06839776601146007, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 9.4505, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 943 }, { "epoch": 0.06847029810691231, "grad_norm": 1.890625, "learning_rate": 0.0003, "loss": 9.3391, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 944 }, { "epoch": 0.06854283020236454, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.4416, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 945 }, { "epoch": 0.06861536229781678, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.3433, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 946 }, { "epoch": 0.06868789439326903, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 9.2741, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 947 }, { "epoch": 0.06876042648872126, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 9.0682, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 948 }, { "epoch": 0.0688329585841735, "grad_norm": 1.828125, "learning_rate": 0.0003, "loss": 9.0228, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 949 }, { "epoch": 0.06890549067962573, "grad_norm": 1.7109375, "learning_rate": 0.0003, "loss": 9.2012, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 950 }, { "epoch": 0.06897802277507797, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 9.1957, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 951 }, { "epoch": 0.0690505548705302, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.782, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 952 }, { "epoch": 0.06912308696598245, "grad_norm": 8.375, "learning_rate": 0.0003, "loss": 9.4486, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 953 }, { "epoch": 0.06919561906143469, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.4107, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 954 }, { "epoch": 0.06926815115688692, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 8.7951, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 955 }, { "epoch": 0.06934068325233916, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.4042, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 956 }, { "epoch": 0.0694132153477914, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 9.1255, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 957 }, { "epoch": 0.06948574744324364, "grad_norm": 19.625, "learning_rate": 0.0003, "loss": 9.413, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 958 }, { "epoch": 0.06955827953869587, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 9.2971, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 959 }, { "epoch": 0.06963081163414811, "grad_norm": 1.9375, "learning_rate": 0.0003, "loss": 9.2047, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 960 }, { "epoch": 0.06970334372960035, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 9.0272, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 961 }, { "epoch": 0.06977587582505258, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 9.4125, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 962 }, { "epoch": 0.06984840792050483, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.2035, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 963 }, { "epoch": 0.06992094001595706, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 9.2716, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 964 }, { "epoch": 0.0699934721114093, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.2123, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 965 }, { "epoch": 0.07006600420686154, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 9.2393, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 966 }, { "epoch": 0.07013853630231377, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.2227, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 967 }, { "epoch": 0.07021106839776602, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 9.1457, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 968 }, { "epoch": 0.07028360049321825, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.9082, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 969 }, { "epoch": 0.07035613258867049, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 9.4258, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 970 }, { "epoch": 0.07042866468412272, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 9.4414, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 971 }, { "epoch": 0.07050119677957496, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 9.2486, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 972 }, { "epoch": 0.0705737288750272, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.4223, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 973 }, { "epoch": 0.07064626097047944, "grad_norm": 1.9375, "learning_rate": 0.0003, "loss": 9.372, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 974 }, { "epoch": 0.07071879306593168, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.2888, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 975 }, { "epoch": 0.07079132516138391, "grad_norm": 6.59375, "learning_rate": 0.0003, "loss": 9.6113, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 976 }, { "epoch": 0.07086385725683615, "grad_norm": 1.8203125, "learning_rate": 0.0003, "loss": 9.1513, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 977 }, { "epoch": 0.07093638935228838, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 9.2491, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 978 }, { "epoch": 0.07100892144774062, "grad_norm": 1.8359375, "learning_rate": 0.0003, "loss": 9.4235, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 979 }, { "epoch": 0.07108145354319287, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.9544, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 980 }, { "epoch": 0.0711539856386451, "grad_norm": 7.09375, "learning_rate": 0.0003, "loss": 9.5917, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 981 }, { "epoch": 0.07122651773409734, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.1558, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 982 }, { "epoch": 0.07129904982954957, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.2325, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 983 }, { "epoch": 0.07137158192500181, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 9.0464, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 984 }, { "epoch": 0.07144411402045404, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.4421, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 985 }, { "epoch": 0.07151664611590629, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 9.6759, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 986 }, { "epoch": 0.07158917821135853, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 9.3229, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 987 }, { "epoch": 0.07166171030681076, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.4167, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 988 }, { "epoch": 0.071734242402263, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 9.3241, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 989 }, { "epoch": 0.07180677449771523, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 9.0959, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 990 }, { "epoch": 0.07187930659316748, "grad_norm": 1.4296875, "learning_rate": 0.0003, "loss": 9.1638, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 991 }, { "epoch": 0.07195183868861972, "grad_norm": 1.7109375, "learning_rate": 0.0003, "loss": 9.2039, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 992 }, { "epoch": 0.07202437078407195, "grad_norm": 1.3046875, "learning_rate": 0.0003, "loss": 9.4073, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 993 }, { "epoch": 0.0720969028795242, "grad_norm": 7.4375, "learning_rate": 0.0003, "loss": 9.3251, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 994 }, { "epoch": 0.07216943497497642, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 9.5912, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 995 }, { "epoch": 0.07224196707042867, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.0393, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 996 }, { "epoch": 0.0723144991658809, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.4589, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 997 }, { "epoch": 0.07238703126133314, "grad_norm": 3.953125, "learning_rate": 0.0003, "loss": 9.4649, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 998 }, { "epoch": 0.07245956335678538, "grad_norm": 5.65625, "learning_rate": 0.0003, "loss": 9.2444, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 999 }, { "epoch": 0.07253209545223761, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 9.1269, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1000 }, { "epoch": 0.07260462754768986, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 9.2992, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1001 }, { "epoch": 0.07267715964314209, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 9.0929, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1002 }, { "epoch": 0.07274969173859433, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.3447, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1003 }, { "epoch": 0.07282222383404656, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 9.0209, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1004 }, { "epoch": 0.0728947559294988, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.3305, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1005 }, { "epoch": 0.07296728802495105, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 9.2322, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1006 }, { "epoch": 0.07303982012040328, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 9.5263, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1007 }, { "epoch": 0.07311235221585552, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.5231, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1008 }, { "epoch": 0.07318488431130775, "grad_norm": 1.9140625, "learning_rate": 0.0003, "loss": 9.1174, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1009 }, { "epoch": 0.07325741640675999, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 9.0057, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1010 }, { "epoch": 0.07332994850221222, "grad_norm": 1.921875, "learning_rate": 0.0003, "loss": 9.1108, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1011 }, { "epoch": 0.07340248059766447, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 9.0907, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1012 }, { "epoch": 0.07347501269311671, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.4525, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1013 }, { "epoch": 0.07354754478856894, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 9.3529, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1014 }, { "epoch": 0.07362007688402118, "grad_norm": 1.703125, "learning_rate": 0.0003, "loss": 8.898, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1015 }, { "epoch": 0.07369260897947341, "grad_norm": 1.984375, "learning_rate": 0.0003, "loss": 9.3945, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1016 }, { "epoch": 0.07376514107492566, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 9.4818, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1017 }, { "epoch": 0.0738376731703779, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.2507, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1018 }, { "epoch": 0.07391020526583013, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.2721, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1019 }, { "epoch": 0.07398273736128237, "grad_norm": 6.1875, "learning_rate": 0.0003, "loss": 9.0601, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1020 }, { "epoch": 0.0740552694567346, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 9.5673, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1021 }, { "epoch": 0.07412780155218684, "grad_norm": 1.4375, "learning_rate": 0.0003, "loss": 9.3086, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1022 }, { "epoch": 0.07420033364763907, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.0249, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1023 }, { "epoch": 0.07427286574309132, "grad_norm": 13.3125, "learning_rate": 0.0003, "loss": 9.3177, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1024 }, { "epoch": 0.07434539783854356, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 9.1126, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1025 }, { "epoch": 0.07441792993399579, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.4147, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1026 }, { "epoch": 0.07449046202944803, "grad_norm": 7.9375, "learning_rate": 0.0003, "loss": 9.6881, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1027 }, { "epoch": 0.07456299412490026, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 9.2589, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1028 }, { "epoch": 0.07463552622035251, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.1043, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1029 }, { "epoch": 0.07470805831580474, "grad_norm": 1.8203125, "learning_rate": 0.0003, "loss": 9.2669, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1030 }, { "epoch": 0.07478059041125698, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 9.3513, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1031 }, { "epoch": 0.07485312250670922, "grad_norm": 6.5625, "learning_rate": 0.0003, "loss": 9.2795, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1032 }, { "epoch": 0.07492565460216145, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 9.2832, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1033 }, { "epoch": 0.0749981866976137, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 9.4212, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1034 }, { "epoch": 0.07507071879306593, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 9.3225, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1035 }, { "epoch": 0.07514325088851817, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 9.0978, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1036 }, { "epoch": 0.0752157829839704, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 9.585, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1037 }, { "epoch": 0.07528831507942264, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 9.2604, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1038 }, { "epoch": 0.07536084717487489, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 9.2344, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1039 }, { "epoch": 0.07543337927032712, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 9.3067, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1040 }, { "epoch": 0.07550591136577936, "grad_norm": 1.65625, "learning_rate": 0.0003, "loss": 9.0087, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1041 }, { "epoch": 0.07557844346123159, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 8.8733, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1042 }, { "epoch": 0.07565097555668383, "grad_norm": 11.125, "learning_rate": 0.0003, "loss": 9.4725, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1043 }, { "epoch": 0.07572350765213608, "grad_norm": 7.34375, "learning_rate": 0.0003, "loss": 9.2121, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1044 }, { "epoch": 0.0757960397475883, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 9.6159, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1045 }, { "epoch": 0.07586857184304055, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.0551, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1046 }, { "epoch": 0.07594110393849278, "grad_norm": 9.0625, "learning_rate": 0.0003, "loss": 9.454, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1047 }, { "epoch": 0.07601363603394502, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.2516, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1048 }, { "epoch": 0.07608616812939725, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 9.0316, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1049 }, { "epoch": 0.0761587002248495, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.4787, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1050 }, { "epoch": 0.07623123232030174, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.0556, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1051 }, { "epoch": 0.07630376441575397, "grad_norm": 1.5859375, "learning_rate": 0.0003, "loss": 8.8903, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1052 }, { "epoch": 0.07637629651120621, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 9.2922, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1053 }, { "epoch": 0.07644882860665844, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.0809, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1054 }, { "epoch": 0.07652136070211069, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 9.1834, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1055 }, { "epoch": 0.07659389279756292, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.5486, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1056 }, { "epoch": 0.07666642489301516, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 9.343, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1057 }, { "epoch": 0.0767389569884674, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.1793, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1058 }, { "epoch": 0.07681148908391963, "grad_norm": 1.78125, "learning_rate": 0.0003, "loss": 9.7326, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1059 }, { "epoch": 0.07688402117937188, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 9.2763, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1060 }, { "epoch": 0.0769565532748241, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 9.2325, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1061 }, { "epoch": 0.07702908537027635, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.3892, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1062 }, { "epoch": 0.07710161746572858, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.4705, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1063 }, { "epoch": 0.07717414956118082, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.4342, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1064 }, { "epoch": 0.07724668165663306, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 9.3821, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1065 }, { "epoch": 0.0773192137520853, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.2818, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1066 }, { "epoch": 0.07739174584753754, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 9.3072, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1067 }, { "epoch": 0.07746427794298977, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.1494, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1068 }, { "epoch": 0.07753681003844201, "grad_norm": 1.9375, "learning_rate": 0.0003, "loss": 8.9688, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1069 }, { "epoch": 0.07760934213389425, "grad_norm": 1.9296875, "learning_rate": 0.0003, "loss": 9.1783, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1070 }, { "epoch": 0.07768187422934648, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 9.6366, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1071 }, { "epoch": 0.07775440632479873, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.1632, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1072 }, { "epoch": 0.07782693842025096, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 9.2495, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1073 }, { "epoch": 0.0778994705157032, "grad_norm": 10.625, "learning_rate": 0.0003, "loss": 9.2415, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1074 }, { "epoch": 0.07797200261115543, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 9.5055, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1075 }, { "epoch": 0.07804453470660767, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 9.2248, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1076 }, { "epoch": 0.07811706680205992, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.9091, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1077 }, { "epoch": 0.07818959889751215, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 9.0983, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1078 }, { "epoch": 0.07826213099296439, "grad_norm": 7.65625, "learning_rate": 0.0003, "loss": 8.9463, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1079 }, { "epoch": 0.07833466308841662, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 9.1842, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1080 }, { "epoch": 0.07840719518386886, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.1897, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1081 }, { "epoch": 0.0784797272793211, "grad_norm": 12.0, "learning_rate": 0.0003, "loss": 9.3134, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1082 }, { "epoch": 0.07855225937477334, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 9.5076, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1083 }, { "epoch": 0.07862479147022558, "grad_norm": 1.8359375, "learning_rate": 0.0003, "loss": 9.0239, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1084 }, { "epoch": 0.07869732356567781, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 9.6854, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1085 }, { "epoch": 0.07876985566113005, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 9.2289, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1086 }, { "epoch": 0.07884238775658228, "grad_norm": 30.625, "learning_rate": 0.0003, "loss": 9.3686, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1087 }, { "epoch": 0.07891491985203453, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.9916, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1088 }, { "epoch": 0.07898745194748677, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 8.9876, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1089 }, { "epoch": 0.079059984042939, "grad_norm": 20.625, "learning_rate": 0.0003, "loss": 9.0601, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1090 }, { "epoch": 0.07913251613839124, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.1957, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1091 }, { "epoch": 0.07920504823384347, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.1393, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1092 }, { "epoch": 0.07927758032929572, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 9.0675, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1093 }, { "epoch": 0.07935011242474795, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 9.6558, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1094 }, { "epoch": 0.07942264452020019, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 9.1928, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1095 }, { "epoch": 0.07949517661565243, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 9.2697, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1096 }, { "epoch": 0.07956770871110466, "grad_norm": 1.484375, "learning_rate": 0.0003, "loss": 9.2566, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1097 }, { "epoch": 0.0796402408065569, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 9.1498, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1098 }, { "epoch": 0.07971277290200914, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 9.0675, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1099 }, { "epoch": 0.07978530499746138, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.3367, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1100 }, { "epoch": 0.07985783709291361, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.982, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1101 }, { "epoch": 0.07993036918836585, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.9048, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1102 }, { "epoch": 0.0800029012838181, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.1169, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1103 }, { "epoch": 0.08007543337927032, "grad_norm": 9.1875, "learning_rate": 0.0003, "loss": 9.2162, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1104 }, { "epoch": 0.08014796547472257, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.2396, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1105 }, { "epoch": 0.0802204975701748, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 9.1028, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1106 }, { "epoch": 0.08029302966562704, "grad_norm": 8.625, "learning_rate": 0.0003, "loss": 9.3223, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1107 }, { "epoch": 0.08036556176107927, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 9.4313, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1108 }, { "epoch": 0.08043809385653151, "grad_norm": 1.9140625, "learning_rate": 0.0003, "loss": 9.1696, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1109 }, { "epoch": 0.08051062595198376, "grad_norm": 1.734375, "learning_rate": 0.0003, "loss": 9.5224, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1110 }, { "epoch": 0.08058315804743599, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.2258, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1111 }, { "epoch": 0.08065569014288823, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.135, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1112 }, { "epoch": 0.08072822223834046, "grad_norm": 7.46875, "learning_rate": 0.0003, "loss": 9.4615, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1113 }, { "epoch": 0.0808007543337927, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 9.1966, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1114 }, { "epoch": 0.08087328642924495, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 8.9255, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1115 }, { "epoch": 0.08094581852469718, "grad_norm": 1.96875, "learning_rate": 0.0003, "loss": 9.304, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1116 }, { "epoch": 0.08101835062014942, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 9.1987, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1117 }, { "epoch": 0.08109088271560165, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.0741, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1118 }, { "epoch": 0.0811634148110539, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.3925, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1119 }, { "epoch": 0.08123594690650612, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.9336, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1120 }, { "epoch": 0.08130847900195837, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 9.2219, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1121 }, { "epoch": 0.08138101109741061, "grad_norm": 6.15625, "learning_rate": 0.0003, "loss": 8.9627, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1122 }, { "epoch": 0.08145354319286284, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.2125, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1123 }, { "epoch": 0.08152607528831508, "grad_norm": 12.0, "learning_rate": 0.0003, "loss": 9.1608, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1124 }, { "epoch": 0.08159860738376731, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.9527, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1125 }, { "epoch": 0.08167113947921956, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 9.4194, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1126 }, { "epoch": 0.08174367157467179, "grad_norm": 15.9375, "learning_rate": 0.0003, "loss": 9.5078, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1127 }, { "epoch": 0.08181620367012403, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 9.2817, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1128 }, { "epoch": 0.08188873576557627, "grad_norm": 5.4375, "learning_rate": 0.0003, "loss": 9.2542, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1129 }, { "epoch": 0.0819612678610285, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.9799, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1130 }, { "epoch": 0.08203379995648075, "grad_norm": 1.765625, "learning_rate": 0.0003, "loss": 9.3267, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1131 }, { "epoch": 0.08210633205193298, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.3361, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1132 }, { "epoch": 0.08217886414738522, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.8841, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1133 }, { "epoch": 0.08225139624283745, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 9.4924, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1134 }, { "epoch": 0.08232392833828969, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.2107, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1135 }, { "epoch": 0.08239646043374194, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 9.0473, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1136 }, { "epoch": 0.08246899252919417, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 8.8619, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1137 }, { "epoch": 0.08254152462464641, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 9.013, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1138 }, { "epoch": 0.08261405672009864, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 9.2331, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1139 }, { "epoch": 0.08268658881555088, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 9.0607, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1140 }, { "epoch": 0.08275912091100313, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 9.3247, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1141 }, { "epoch": 0.08283165300645536, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.5097, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1142 }, { "epoch": 0.0829041851019076, "grad_norm": 6.59375, "learning_rate": 0.0003, "loss": 9.7118, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1143 }, { "epoch": 0.08297671719735983, "grad_norm": 10.625, "learning_rate": 0.0003, "loss": 9.066, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1144 }, { "epoch": 0.08304924929281207, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.1641, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1145 }, { "epoch": 0.0831217813882643, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 9.5249, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1146 }, { "epoch": 0.08319431348371654, "grad_norm": 7.15625, "learning_rate": 0.0003, "loss": 9.0911, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1147 }, { "epoch": 0.08326684557916879, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 9.2408, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1148 }, { "epoch": 0.08333937767462102, "grad_norm": 6.1875, "learning_rate": 0.0003, "loss": 9.1189, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1149 }, { "epoch": 0.08341190977007326, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 9.0227, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1150 }, { "epoch": 0.08348444186552549, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.2657, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1151 }, { "epoch": 0.08355697396097773, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.0387, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1152 }, { "epoch": 0.08362950605642996, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.9432, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1153 }, { "epoch": 0.08370203815188221, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 9.2086, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1154 }, { "epoch": 0.08377457024733445, "grad_norm": 1.4765625, "learning_rate": 0.0003, "loss": 8.9416, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1155 }, { "epoch": 0.08384710234278668, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 9.079, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1156 }, { "epoch": 0.08391963443823892, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.6976, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1157 }, { "epoch": 0.08399216653369115, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.6745, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1158 }, { "epoch": 0.0840646986291434, "grad_norm": 1.59375, "learning_rate": 0.0003, "loss": 9.1202, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1159 }, { "epoch": 0.08413723072459563, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 9.1131, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1160 }, { "epoch": 0.08420976282004787, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.5003, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1161 }, { "epoch": 0.08428229491550011, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 9.4193, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1162 }, { "epoch": 0.08435482701095234, "grad_norm": 3.953125, "learning_rate": 0.0003, "loss": 9.1072, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1163 }, { "epoch": 0.08442735910640459, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 9.8073, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1164 }, { "epoch": 0.08449989120185682, "grad_norm": 1.890625, "learning_rate": 0.0003, "loss": 9.1819, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1165 }, { "epoch": 0.08457242329730906, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 9.5276, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1166 }, { "epoch": 0.0846449553927613, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.9984, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1167 }, { "epoch": 0.08471748748821353, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 9.377, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1168 }, { "epoch": 0.08479001958366578, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 9.5685, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1169 }, { "epoch": 0.084862551679118, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 9.5603, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1170 }, { "epoch": 0.08493508377457025, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 9.1837, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1171 }, { "epoch": 0.08500761587002248, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.2328, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1172 }, { "epoch": 0.08508014796547472, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.1897, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1173 }, { "epoch": 0.08515268006092697, "grad_norm": 7.375, "learning_rate": 0.0003, "loss": 9.0838, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1174 }, { "epoch": 0.0852252121563792, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.2239, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1175 }, { "epoch": 0.08529774425183144, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 9.8111, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1176 }, { "epoch": 0.08537027634728367, "grad_norm": 17.625, "learning_rate": 0.0003, "loss": 9.249, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1177 }, { "epoch": 0.08544280844273591, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 9.1525, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1178 }, { "epoch": 0.08551534053818814, "grad_norm": 1.40625, "learning_rate": 0.0003, "loss": 9.0434, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1179 }, { "epoch": 0.08558787263364039, "grad_norm": 8.625, "learning_rate": 0.0003, "loss": 9.5191, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1180 }, { "epoch": 0.08566040472909263, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.6523, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1181 }, { "epoch": 0.08573293682454486, "grad_norm": 7.03125, "learning_rate": 0.0003, "loss": 9.2735, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1182 }, { "epoch": 0.0858054689199971, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 9.7009, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1183 }, { "epoch": 0.08587800101544933, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 9.311, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1184 }, { "epoch": 0.08595053311090158, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.2869, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1185 }, { "epoch": 0.0860230652063538, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.1728, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1186 }, { "epoch": 0.08609559730180605, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 9.4346, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1187 }, { "epoch": 0.08616812939725829, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 9.0493, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1188 }, { "epoch": 0.08624066149271052, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.4186, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1189 }, { "epoch": 0.08631319358816276, "grad_norm": 8.9375, "learning_rate": 0.0003, "loss": 9.1984, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1190 }, { "epoch": 0.086385725683615, "grad_norm": 12.8125, "learning_rate": 0.0003, "loss": 9.2968, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1191 }, { "epoch": 0.08645825777906724, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.9722, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1192 }, { "epoch": 0.08653078987451948, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 9.4686, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1193 }, { "epoch": 0.08660332196997171, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 9.1235, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1194 }, { "epoch": 0.08667585406542395, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 9.1265, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1195 }, { "epoch": 0.08674838616087618, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 9.1295, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1196 }, { "epoch": 0.08682091825632843, "grad_norm": 6.78125, "learning_rate": 0.0003, "loss": 9.1713, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1197 }, { "epoch": 0.08689345035178066, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 9.6755, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1198 }, { "epoch": 0.0869659824472329, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 9.2414, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1199 }, { "epoch": 0.08703851454268514, "grad_norm": 11.1875, "learning_rate": 0.0003, "loss": 8.989, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1200 }, { "epoch": 0.08711104663813737, "grad_norm": 7.28125, "learning_rate": 0.0003, "loss": 9.3484, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1201 }, { "epoch": 0.08718357873358962, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.5836, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1202 }, { "epoch": 0.08725611082904185, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 9.4803, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1203 }, { "epoch": 0.08732864292449409, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.5013, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1204 }, { "epoch": 0.08740117501994632, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 9.3415, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1205 }, { "epoch": 0.08747370711539856, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.1555, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1206 }, { "epoch": 0.08754623921085081, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.9257, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1207 }, { "epoch": 0.08761877130630304, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.1986, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1208 }, { "epoch": 0.08769130340175528, "grad_norm": 6.625, "learning_rate": 0.0003, "loss": 9.2411, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1209 }, { "epoch": 0.08776383549720751, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 8.9174, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1210 }, { "epoch": 0.08783636759265975, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 9.0966, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1211 }, { "epoch": 0.08790889968811198, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 9.4212, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1212 }, { "epoch": 0.08798143178356423, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.7352, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1213 }, { "epoch": 0.08805396387901647, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 9.2398, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1214 }, { "epoch": 0.0881264959744687, "grad_norm": 6.15625, "learning_rate": 0.0003, "loss": 9.3527, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1215 }, { "epoch": 0.08819902806992094, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 9.1847, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1216 }, { "epoch": 0.08827156016537317, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 9.1633, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1217 }, { "epoch": 0.08834409226082542, "grad_norm": 9.625, "learning_rate": 0.0003, "loss": 9.3148, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1218 }, { "epoch": 0.08841662435627766, "grad_norm": 6.625, "learning_rate": 0.0003, "loss": 9.0292, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1219 }, { "epoch": 0.08848915645172989, "grad_norm": 5.625, "learning_rate": 0.0003, "loss": 9.1281, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1220 }, { "epoch": 0.08856168854718213, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 9.1098, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1221 }, { "epoch": 0.08863422064263436, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 9.4845, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1222 }, { "epoch": 0.0887067527380866, "grad_norm": 1.8515625, "learning_rate": 0.0003, "loss": 9.2641, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1223 }, { "epoch": 0.08877928483353884, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.3906, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1224 }, { "epoch": 0.08885181692899108, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 9.376, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1225 }, { "epoch": 0.08892434902444332, "grad_norm": 7.3125, "learning_rate": 0.0003, "loss": 9.3976, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1226 }, { "epoch": 0.08899688111989555, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 9.256, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1227 }, { "epoch": 0.0890694132153478, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 9.3561, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1228 }, { "epoch": 0.08914194531080002, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 9.2524, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1229 }, { "epoch": 0.08921447740625227, "grad_norm": 6.625, "learning_rate": 0.0003, "loss": 9.1504, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1230 }, { "epoch": 0.0892870095017045, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 9.2535, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1231 }, { "epoch": 0.08935954159715674, "grad_norm": 5.65625, "learning_rate": 0.0003, "loss": 8.7316, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1232 }, { "epoch": 0.08943207369260898, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 9.2045, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1233 }, { "epoch": 0.08950460578806121, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.1601, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1234 }, { "epoch": 0.08957713788351346, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 9.4613, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1235 }, { "epoch": 0.08964966997896569, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 9.0822, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1236 }, { "epoch": 0.08972220207441793, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 9.5005, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1237 }, { "epoch": 0.08979473416987016, "grad_norm": 1.6171875, "learning_rate": 0.0003, "loss": 8.8424, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1238 }, { "epoch": 0.0898672662653224, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.5803, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1239 }, { "epoch": 0.08993979836077465, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 9.2647, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1240 }, { "epoch": 0.09001233045622688, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 9.3948, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1241 }, { "epoch": 0.09008486255167912, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 9.3824, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1242 }, { "epoch": 0.09015739464713135, "grad_norm": 1.8359375, "learning_rate": 0.0003, "loss": 9.9101, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1243 }, { "epoch": 0.0902299267425836, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.1058, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1244 }, { "epoch": 0.09030245883803584, "grad_norm": 17.625, "learning_rate": 0.0003, "loss": 9.0436, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1245 }, { "epoch": 0.09037499093348807, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 9.2933, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1246 }, { "epoch": 0.09044752302894031, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 9.6304, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1247 }, { "epoch": 0.09052005512439254, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.3892, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1248 }, { "epoch": 0.09059258721984478, "grad_norm": 19.5, "learning_rate": 0.0003, "loss": 9.4083, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1249 }, { "epoch": 0.09066511931529701, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 8.9736, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1250 }, { "epoch": 0.09073765141074926, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 9.0177, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1251 }, { "epoch": 0.0908101835062015, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 8.785, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1252 }, { "epoch": 0.09088271560165373, "grad_norm": 1.9296875, "learning_rate": 0.0003, "loss": 9.0314, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1253 }, { "epoch": 0.09095524769710597, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.3269, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1254 }, { "epoch": 0.0910277797925582, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 9.1033, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1255 }, { "epoch": 0.09110031188801045, "grad_norm": 23.375, "learning_rate": 0.0003, "loss": 9.2354, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1256 }, { "epoch": 0.09117284398346268, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.9256, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1257 }, { "epoch": 0.09124537607891492, "grad_norm": 1.828125, "learning_rate": 0.0003, "loss": 8.8495, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1258 }, { "epoch": 0.09131790817436716, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.9926, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1259 }, { "epoch": 0.09139044026981939, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.1497, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1260 }, { "epoch": 0.09146297236527164, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.2076, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1261 }, { "epoch": 0.09153550446072387, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 9.6068, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1262 }, { "epoch": 0.09160803655617611, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 9.069, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1263 }, { "epoch": 0.09168056865162835, "grad_norm": 1.7109375, "learning_rate": 0.0003, "loss": 9.5325, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1264 }, { "epoch": 0.09175310074708058, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.4011, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1265 }, { "epoch": 0.09182563284253283, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 9.0351, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1266 }, { "epoch": 0.09189816493798506, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 9.4837, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1267 }, { "epoch": 0.0919706970334373, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 9.1694, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1268 }, { "epoch": 0.09204322912888953, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 9.6415, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1269 }, { "epoch": 0.09211576122434177, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.2697, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1270 }, { "epoch": 0.09218829331979402, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.4181, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1271 }, { "epoch": 0.09226082541524624, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 9.1866, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1272 }, { "epoch": 0.09233335751069849, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 9.0232, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1273 }, { "epoch": 0.09240588960615072, "grad_norm": 9.0625, "learning_rate": 0.0003, "loss": 9.0209, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1274 }, { "epoch": 0.09247842170160296, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 9.1937, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1275 }, { "epoch": 0.09255095379705519, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.0282, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1276 }, { "epoch": 0.09262348589250743, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 9.324, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1277 }, { "epoch": 0.09269601798795968, "grad_norm": 1.90625, "learning_rate": 0.0003, "loss": 8.9983, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1278 }, { "epoch": 0.09276855008341191, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 9.048, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1279 }, { "epoch": 0.09284108217886415, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.2944, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1280 }, { "epoch": 0.09291361427431638, "grad_norm": 31.75, "learning_rate": 0.0003, "loss": 9.3264, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1281 }, { "epoch": 0.09298614636976862, "grad_norm": 9.0, "learning_rate": 0.0003, "loss": 9.3218, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1282 }, { "epoch": 0.09305867846522085, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 9.2624, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1283 }, { "epoch": 0.0931312105606731, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 9.3669, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1284 }, { "epoch": 0.09320374265612534, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 9.522, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1285 }, { "epoch": 0.09327627475157757, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 9.5419, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1286 }, { "epoch": 0.09334880684702981, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 9.11, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1287 }, { "epoch": 0.09342133894248204, "grad_norm": 1.9140625, "learning_rate": 0.0003, "loss": 9.3173, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1288 }, { "epoch": 0.09349387103793429, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.809, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1289 }, { "epoch": 0.09356640313338653, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 9.4176, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1290 }, { "epoch": 0.09363893522883876, "grad_norm": 1.3515625, "learning_rate": 0.0003, "loss": 9.127, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1291 }, { "epoch": 0.093711467324291, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 9.4852, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1292 }, { "epoch": 0.09378399941974323, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 9.2863, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1293 }, { "epoch": 0.09385653151519548, "grad_norm": 14.875, "learning_rate": 0.0003, "loss": 9.0929, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1294 }, { "epoch": 0.0939290636106477, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 9.4592, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1295 }, { "epoch": 0.09400159570609995, "grad_norm": 1.546875, "learning_rate": 0.0003, "loss": 9.3845, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1296 }, { "epoch": 0.0940741278015522, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 9.2954, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1297 }, { "epoch": 0.09414665989700442, "grad_norm": 1.75, "learning_rate": 0.0003, "loss": 8.953, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1298 }, { "epoch": 0.09421919199245667, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 9.2866, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1299 }, { "epoch": 0.0942917240879089, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.3142, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1300 }, { "epoch": 0.09436425618336114, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.2615, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1301 }, { "epoch": 0.09443678827881337, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 9.4547, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1302 }, { "epoch": 0.09450932037426561, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 8.99, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1303 }, { "epoch": 0.09458185246971786, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.1889, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1304 }, { "epoch": 0.09465438456517009, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.7628, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1305 }, { "epoch": 0.09472691666062233, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 9.3291, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1306 }, { "epoch": 0.09479944875607456, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.9913, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1307 }, { "epoch": 0.0948719808515268, "grad_norm": 1.546875, "learning_rate": 0.0003, "loss": 9.1655, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1308 }, { "epoch": 0.09494451294697903, "grad_norm": 47.0, "learning_rate": 0.0003, "loss": 9.5538, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1309 }, { "epoch": 0.09501704504243128, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.259, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1310 }, { "epoch": 0.09508957713788352, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 9.354, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1311 }, { "epoch": 0.09516210923333575, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.82, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1312 }, { "epoch": 0.09523464132878799, "grad_norm": 24.25, "learning_rate": 0.0003, "loss": 8.9415, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1313 }, { "epoch": 0.09530717342424022, "grad_norm": 1.859375, "learning_rate": 0.0003, "loss": 9.1443, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1314 }, { "epoch": 0.09537970551969246, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 9.1587, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1315 }, { "epoch": 0.09545223761514471, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 9.0771, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1316 }, { "epoch": 0.09552476971059694, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.2027, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1317 }, { "epoch": 0.09559730180604918, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 9.288, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1318 }, { "epoch": 0.09566983390150141, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.928, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1319 }, { "epoch": 0.09574236599695365, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 9.1974, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1320 }, { "epoch": 0.09581489809240588, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.3375, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1321 }, { "epoch": 0.09588743018785813, "grad_norm": 6.46875, "learning_rate": 0.0003, "loss": 9.538, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1322 }, { "epoch": 0.09595996228331037, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 9.1935, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1323 }, { "epoch": 0.0960324943787626, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.3424, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1324 }, { "epoch": 0.09610502647421484, "grad_norm": 1.8984375, "learning_rate": 0.0003, "loss": 9.0947, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1325 }, { "epoch": 0.09617755856966707, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.8445, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1326 }, { "epoch": 0.09625009066511932, "grad_norm": 29.625, "learning_rate": 0.0003, "loss": 8.9181, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1327 }, { "epoch": 0.09632262276057155, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.4615, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1328 }, { "epoch": 0.09639515485602379, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 9.2949, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1329 }, { "epoch": 0.09646768695147603, "grad_norm": 1.953125, "learning_rate": 0.0003, "loss": 8.9615, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1330 }, { "epoch": 0.09654021904692826, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.8418, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1331 }, { "epoch": 0.0966127511423805, "grad_norm": 1.9453125, "learning_rate": 0.0003, "loss": 9.1516, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1332 }, { "epoch": 0.09668528323783274, "grad_norm": 5.4375, "learning_rate": 0.0003, "loss": 8.9034, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1333 }, { "epoch": 0.09675781533328498, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 9.2088, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1334 }, { "epoch": 0.09683034742873721, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 9.2611, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1335 }, { "epoch": 0.09690287952418945, "grad_norm": 1.4296875, "learning_rate": 0.0003, "loss": 9.3709, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1336 }, { "epoch": 0.0969754116196417, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 9.1424, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1337 }, { "epoch": 0.09704794371509393, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 9.307, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1338 }, { "epoch": 0.09712047581054617, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.4207, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1339 }, { "epoch": 0.0971930079059984, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 9.0982, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1340 }, { "epoch": 0.09726554000145064, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 9.2199, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1341 }, { "epoch": 0.09733807209690289, "grad_norm": 1.25, "learning_rate": 0.0003, "loss": 9.3038, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1342 }, { "epoch": 0.09741060419235512, "grad_norm": 15.125, "learning_rate": 0.0003, "loss": 8.6692, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1343 }, { "epoch": 0.09748313628780736, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.9951, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1344 }, { "epoch": 0.09755566838325959, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 9.1007, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1345 }, { "epoch": 0.09762820047871183, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 9.3607, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1346 }, { "epoch": 0.09770073257416406, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.1793, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1347 }, { "epoch": 0.0977732646696163, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 8.9289, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1348 }, { "epoch": 0.09784579676506855, "grad_norm": 8.625, "learning_rate": 0.0003, "loss": 9.3431, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1349 }, { "epoch": 0.09791832886052078, "grad_norm": 1.796875, "learning_rate": 0.0003, "loss": 9.0927, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1350 }, { "epoch": 0.09799086095597302, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.3932, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1351 }, { "epoch": 0.09806339305142525, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 8.9431, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1352 }, { "epoch": 0.0981359251468775, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.9253, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1353 }, { "epoch": 0.09820845724232972, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.973, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1354 }, { "epoch": 0.09828098933778197, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 9.094, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1355 }, { "epoch": 0.09835352143323421, "grad_norm": 7.78125, "learning_rate": 0.0003, "loss": 8.9893, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1356 }, { "epoch": 0.09842605352868644, "grad_norm": 1.921875, "learning_rate": 0.0003, "loss": 9.5601, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1357 }, { "epoch": 0.09849858562413868, "grad_norm": 12.0, "learning_rate": 0.0003, "loss": 9.5416, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1358 }, { "epoch": 0.09857111771959091, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.2838, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1359 }, { "epoch": 0.09864364981504316, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.1453, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1360 }, { "epoch": 0.09871618191049539, "grad_norm": 16.0, "learning_rate": 0.0003, "loss": 9.1704, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1361 }, { "epoch": 0.09878871400594763, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 8.9501, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1362 }, { "epoch": 0.09886124610139987, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.184, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1363 }, { "epoch": 0.0989337781968521, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.1491, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1364 }, { "epoch": 0.09900631029230435, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.9449, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1365 }, { "epoch": 0.09907884238775658, "grad_norm": 7.28125, "learning_rate": 0.0003, "loss": 9.0299, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1366 }, { "epoch": 0.09915137448320882, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 9.5946, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1367 }, { "epoch": 0.09922390657866106, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.0369, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1368 }, { "epoch": 0.0992964386741133, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 9.1119, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1369 }, { "epoch": 0.09936897076956554, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 9.1963, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1370 }, { "epoch": 0.09944150286501777, "grad_norm": 84.5, "learning_rate": 0.0003, "loss": 9.5106, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1371 }, { "epoch": 0.09951403496047001, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.925, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1372 }, { "epoch": 0.09958656705592224, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 9.1685, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1373 }, { "epoch": 0.09965909915137448, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.4525, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1374 }, { "epoch": 0.09973163124682673, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.3926, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1375 }, { "epoch": 0.09980416334227896, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 9.1711, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1376 }, { "epoch": 0.0998766954377312, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 8.9833, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1377 }, { "epoch": 0.09994922753318343, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.2566, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1378 }, { "epoch": 0.10002175962863567, "grad_norm": 6.8125, "learning_rate": 0.0003, "loss": 9.3201, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1379 }, { "epoch": 0.1000942917240879, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 9.8261, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1380 }, { "epoch": 0.10016682381954015, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.9264, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1381 }, { "epoch": 0.10023935591499239, "grad_norm": 1.75, "learning_rate": 0.0003, "loss": 8.9982, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1382 }, { "epoch": 0.10031188801044462, "grad_norm": 1.921875, "learning_rate": 0.0003, "loss": 9.0884, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1383 }, { "epoch": 0.10038442010589686, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.1594, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1384 }, { "epoch": 0.10045695220134909, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 9.1956, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1385 }, { "epoch": 0.10052948429680134, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 8.8147, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1386 }, { "epoch": 0.10060201639225357, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 9.2519, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1387 }, { "epoch": 0.10067454848770581, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 9.0491, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1388 }, { "epoch": 0.10074708058315805, "grad_norm": 65.0, "learning_rate": 0.0003, "loss": 8.6242, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1389 }, { "epoch": 0.10081961267861028, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 9.2167, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1390 }, { "epoch": 0.10089214477406253, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 8.5289, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1391 }, { "epoch": 0.10096467686951476, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.067, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1392 }, { "epoch": 0.101037208964967, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.2489, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1393 }, { "epoch": 0.10110974106041924, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 9.2386, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1394 }, { "epoch": 0.10118227315587147, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 9.2475, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1395 }, { "epoch": 0.10125480525132372, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.2805, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1396 }, { "epoch": 0.10132733734677594, "grad_norm": 10.4375, "learning_rate": 0.0003, "loss": 9.2782, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1397 }, { "epoch": 0.10139986944222819, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.9475, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1398 }, { "epoch": 0.10147240153768042, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 9.3697, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1399 }, { "epoch": 0.10154493363313266, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.7481, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1400 }, { "epoch": 0.1016174657285849, "grad_norm": 1.2421875, "learning_rate": 0.0003, "loss": 9.3203, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1401 }, { "epoch": 0.10168999782403713, "grad_norm": 9.4375, "learning_rate": 0.0003, "loss": 8.9514, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1402 }, { "epoch": 0.10176252991948938, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 9.2544, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1403 }, { "epoch": 0.10183506201494161, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 9.8414, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1404 }, { "epoch": 0.10190759411039385, "grad_norm": 1.625, "learning_rate": 0.0003, "loss": 9.4167, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1405 }, { "epoch": 0.10198012620584608, "grad_norm": 1.9375, "learning_rate": 0.0003, "loss": 9.3329, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1406 }, { "epoch": 0.10205265830129832, "grad_norm": 1.984375, "learning_rate": 0.0003, "loss": 9.1943, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1407 }, { "epoch": 0.10212519039675057, "grad_norm": 29.625, "learning_rate": 0.0003, "loss": 9.0955, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1408 }, { "epoch": 0.1021977224922028, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.9343, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1409 }, { "epoch": 0.10227025458765504, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 9.5849, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1410 }, { "epoch": 0.10234278668310727, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 9.1862, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1411 }, { "epoch": 0.10241531877855951, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.8091, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1412 }, { "epoch": 0.10248785087401174, "grad_norm": 7.75, "learning_rate": 0.0003, "loss": 9.2289, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1413 }, { "epoch": 0.10256038296946399, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.8011, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1414 }, { "epoch": 0.10263291506491623, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 9.4717, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1415 }, { "epoch": 0.10270544716036846, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 9.1104, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1416 }, { "epoch": 0.1027779792558207, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 9.2241, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1417 }, { "epoch": 0.10285051135127293, "grad_norm": 1.6328125, "learning_rate": 0.0003, "loss": 9.1804, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1418 }, { "epoch": 0.10292304344672518, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.3381, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1419 }, { "epoch": 0.10299557554217742, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.2164, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1420 }, { "epoch": 0.10306810763762965, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.107, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1421 }, { "epoch": 0.10314063973308189, "grad_norm": 6.5, "learning_rate": 0.0003, "loss": 9.1057, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1422 }, { "epoch": 0.10321317182853412, "grad_norm": 9.5, "learning_rate": 0.0003, "loss": 9.1822, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1423 }, { "epoch": 0.10328570392398637, "grad_norm": 11.3125, "learning_rate": 0.0003, "loss": 9.075, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1424 }, { "epoch": 0.1033582360194386, "grad_norm": 1.9453125, "learning_rate": 0.0003, "loss": 9.4351, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1425 }, { "epoch": 0.10343076811489084, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 9.1306, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1426 }, { "epoch": 0.10350330021034308, "grad_norm": 8.8125, "learning_rate": 0.0003, "loss": 8.9663, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1427 }, { "epoch": 0.10357583230579531, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 9.4365, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1428 }, { "epoch": 0.10364836440124756, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.1776, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1429 }, { "epoch": 0.10372089649669979, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 9.3344, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1430 }, { "epoch": 0.10379342859215203, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.1715, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1431 }, { "epoch": 0.10386596068760426, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 9.3839, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1432 }, { "epoch": 0.1039384927830565, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 9.0281, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1433 }, { "epoch": 0.10401102487850875, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 9.3332, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1434 }, { "epoch": 0.10408355697396098, "grad_norm": 1.375, "learning_rate": 0.0003, "loss": 9.1057, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1435 }, { "epoch": 0.10415608906941322, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.4884, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1436 }, { "epoch": 0.10422862116486545, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 9.5075, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1437 }, { "epoch": 0.10430115326031769, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 9.3276, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1438 }, { "epoch": 0.10437368535576994, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 9.2746, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1439 }, { "epoch": 0.10444621745122216, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 9.3242, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1440 }, { "epoch": 0.10451874954667441, "grad_norm": 7.84375, "learning_rate": 0.0003, "loss": 9.1403, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1441 }, { "epoch": 0.10459128164212664, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.4815, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1442 }, { "epoch": 0.10466381373757888, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 8.9272, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1443 }, { "epoch": 0.10473634583303111, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 9.4091, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1444 }, { "epoch": 0.10480887792848335, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 9.0547, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1445 }, { "epoch": 0.1048814100239356, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.098, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1446 }, { "epoch": 0.10495394211938783, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.0586, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1447 }, { "epoch": 0.10502647421484007, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.188, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1448 }, { "epoch": 0.1050990063102923, "grad_norm": 1.9453125, "learning_rate": 0.0003, "loss": 9.531, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1449 }, { "epoch": 0.10517153840574454, "grad_norm": 1.78125, "learning_rate": 0.0003, "loss": 9.4972, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1450 }, { "epoch": 0.10524407050119677, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 9.1798, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1451 }, { "epoch": 0.10531660259664902, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.0985, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1452 }, { "epoch": 0.10538913469210126, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 9.5488, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1453 }, { "epoch": 0.10546166678755349, "grad_norm": 17.125, "learning_rate": 0.0003, "loss": 9.0886, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1454 }, { "epoch": 0.10553419888300573, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 9.175, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1455 }, { "epoch": 0.10560673097845796, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.4616, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1456 }, { "epoch": 0.1056792630739102, "grad_norm": 7.59375, "learning_rate": 0.0003, "loss": 9.151, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1457 }, { "epoch": 0.10575179516936244, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 9.244, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1458 }, { "epoch": 0.10582432726481468, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 9.3424, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1459 }, { "epoch": 0.10589685936026692, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.7479, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1460 }, { "epoch": 0.10596939145571915, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 8.7936, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1461 }, { "epoch": 0.1060419235511714, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 9.6504, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1462 }, { "epoch": 0.10611445564662363, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 9.0663, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1463 }, { "epoch": 0.10618698774207587, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 9.3989, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1464 }, { "epoch": 0.10625951983752811, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 9.4013, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1465 }, { "epoch": 0.10633205193298034, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.2589, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1466 }, { "epoch": 0.10640458402843259, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 9.3811, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1467 }, { "epoch": 0.10647711612388482, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.5313, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1468 }, { "epoch": 0.10654964821933706, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 9.178, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1469 }, { "epoch": 0.10662218031478929, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.09, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1470 }, { "epoch": 0.10669471241024153, "grad_norm": 1.9140625, "learning_rate": 0.0003, "loss": 9.3938, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1471 }, { "epoch": 0.10676724450569378, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.2636, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1472 }, { "epoch": 0.106839776601146, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.3452, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1473 }, { "epoch": 0.10691230869659825, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.1548, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1474 }, { "epoch": 0.10698484079205048, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 9.479, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1475 }, { "epoch": 0.10705737288750272, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.7151, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1476 }, { "epoch": 0.10712990498295495, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.1933, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1477 }, { "epoch": 0.1072024370784072, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 9.4465, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1478 }, { "epoch": 0.10727496917385944, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 9.0458, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1479 }, { "epoch": 0.10734750126931167, "grad_norm": 1.7890625, "learning_rate": 0.0003, "loss": 9.0253, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1480 }, { "epoch": 0.10742003336476391, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 9.3021, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1481 }, { "epoch": 0.10749256546021614, "grad_norm": 1.890625, "learning_rate": 0.0003, "loss": 8.9536, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1482 }, { "epoch": 0.10756509755566838, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.1421, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1483 }, { "epoch": 0.10763762965112061, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.5556, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1484 }, { "epoch": 0.10771016174657286, "grad_norm": 20.875, "learning_rate": 0.0003, "loss": 9.2586, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1485 }, { "epoch": 0.1077826938420251, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 8.9812, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1486 }, { "epoch": 0.10785522593747733, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.125, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1487 }, { "epoch": 0.10792775803292957, "grad_norm": 1.96875, "learning_rate": 0.0003, "loss": 9.2105, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1488 }, { "epoch": 0.1080002901283818, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 9.508, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1489 }, { "epoch": 0.10807282222383405, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 9.5629, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1490 }, { "epoch": 0.10814535431928629, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 9.2032, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1491 }, { "epoch": 0.10821788641473852, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 9.0271, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1492 }, { "epoch": 0.10829041851019076, "grad_norm": 7.21875, "learning_rate": 0.0003, "loss": 9.0141, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1493 }, { "epoch": 0.108362950605643, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.054, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1494 }, { "epoch": 0.10843548270109524, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 9.3792, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1495 }, { "epoch": 0.10850801479654747, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 8.9763, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1496 }, { "epoch": 0.10858054689199971, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.0125, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1497 }, { "epoch": 0.10865307898745195, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 9.5656, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1498 }, { "epoch": 0.10872561108290418, "grad_norm": 1.8359375, "learning_rate": 0.0003, "loss": 8.989, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1499 }, { "epoch": 0.10879814317835643, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.2798, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1500 }, { "epoch": 0.10887067527380866, "grad_norm": 8.3125, "learning_rate": 0.0003, "loss": 9.3636, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1501 }, { "epoch": 0.1089432073692609, "grad_norm": 1.546875, "learning_rate": 0.0003, "loss": 9.2979, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1502 }, { "epoch": 0.10901573946471313, "grad_norm": 5.46875, "learning_rate": 0.0003, "loss": 9.301, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1503 }, { "epoch": 0.10908827156016537, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.5255, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1504 }, { "epoch": 0.10916080365561762, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 9.1722, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1505 }, { "epoch": 0.10923333575106985, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.1757, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1506 }, { "epoch": 0.10930586784652209, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.4262, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1507 }, { "epoch": 0.10937839994197432, "grad_norm": 1.8828125, "learning_rate": 0.0003, "loss": 8.7977, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1508 }, { "epoch": 0.10945093203742656, "grad_norm": 11.625, "learning_rate": 0.0003, "loss": 9.1386, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1509 }, { "epoch": 0.10952346413287879, "grad_norm": 1.703125, "learning_rate": 0.0003, "loss": 9.3752, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1510 }, { "epoch": 0.10959599622833104, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 9.2173, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1511 }, { "epoch": 0.10966852832378328, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.3206, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1512 }, { "epoch": 0.10974106041923551, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 9.175, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1513 }, { "epoch": 0.10981359251468775, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 9.2584, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1514 }, { "epoch": 0.10988612461013998, "grad_norm": 13.875, "learning_rate": 0.0003, "loss": 9.4889, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1515 }, { "epoch": 0.10995865670559223, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 9.2084, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1516 }, { "epoch": 0.11003118880104447, "grad_norm": 1.9765625, "learning_rate": 0.0003, "loss": 9.2863, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1517 }, { "epoch": 0.1101037208964967, "grad_norm": 1.625, "learning_rate": 0.0003, "loss": 9.1377, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1518 }, { "epoch": 0.11017625299194894, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 9.5643, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1519 }, { "epoch": 0.11024878508740117, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 8.9338, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1520 }, { "epoch": 0.11032131718285342, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.0775, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1521 }, { "epoch": 0.11039384927830564, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 9.4119, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1522 }, { "epoch": 0.11046638137375789, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 8.7192, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1523 }, { "epoch": 0.11053891346921013, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.1016, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1524 }, { "epoch": 0.11061144556466236, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 9.2831, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1525 }, { "epoch": 0.1106839776601146, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.7551, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1526 }, { "epoch": 0.11075650975556683, "grad_norm": 1.7890625, "learning_rate": 0.0003, "loss": 9.0006, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1527 }, { "epoch": 0.11082904185101908, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 9.055, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1528 }, { "epoch": 0.11090157394647131, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.108, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1529 }, { "epoch": 0.11097410604192355, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.1963, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1530 }, { "epoch": 0.1110466381373758, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 9.3504, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1531 }, { "epoch": 0.11111917023282802, "grad_norm": 5.46875, "learning_rate": 0.0003, "loss": 9.1921, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1532 }, { "epoch": 0.11119170232828027, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.3146, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1533 }, { "epoch": 0.1112642344237325, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 9.2772, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1534 }, { "epoch": 0.11133676651918474, "grad_norm": 5.21875, "learning_rate": 0.0003, "loss": 9.2966, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1535 }, { "epoch": 0.11140929861463697, "grad_norm": 7.0625, "learning_rate": 0.0003, "loss": 9.0831, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1536 }, { "epoch": 0.11148183071008921, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 8.9248, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1537 }, { "epoch": 0.11155436280554146, "grad_norm": 1.5390625, "learning_rate": 0.0003, "loss": 8.9674, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1538 }, { "epoch": 0.11162689490099369, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 9.213, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1539 }, { "epoch": 0.11169942699644593, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 9.5858, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1540 }, { "epoch": 0.11177195909189816, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 9.1101, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1541 }, { "epoch": 0.1118444911873504, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 9.2878, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1542 }, { "epoch": 0.11191702328280265, "grad_norm": 5.6875, "learning_rate": 0.0003, "loss": 9.4251, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1543 }, { "epoch": 0.11198955537825488, "grad_norm": 1.4765625, "learning_rate": 0.0003, "loss": 9.3873, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1544 }, { "epoch": 0.11206208747370712, "grad_norm": 6.03125, "learning_rate": 0.0003, "loss": 9.1675, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1545 }, { "epoch": 0.11213461956915935, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.259, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1546 }, { "epoch": 0.11220715166461159, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.9168, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1547 }, { "epoch": 0.11227968376006382, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 9.3689, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1548 }, { "epoch": 0.11235221585551607, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 9.2144, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1549 }, { "epoch": 0.11242474795096831, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.2125, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1550 }, { "epoch": 0.11249728004642054, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 9.0123, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1551 }, { "epoch": 0.11256981214187278, "grad_norm": 1.75, "learning_rate": 0.0003, "loss": 8.441, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1552 }, { "epoch": 0.11264234423732501, "grad_norm": 28.125, "learning_rate": 0.0003, "loss": 8.7881, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1553 }, { "epoch": 0.11271487633277726, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.7135, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1554 }, { "epoch": 0.11278740842822949, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.1131, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1555 }, { "epoch": 0.11285994052368173, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 9.0086, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1556 }, { "epoch": 0.11293247261913397, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 8.8233, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1557 }, { "epoch": 0.1130050047145862, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.2854, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1558 }, { "epoch": 0.11307753681003845, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.2499, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1559 }, { "epoch": 0.11315006890549067, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.1286, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1560 }, { "epoch": 0.11322260100094292, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 9.3624, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1561 }, { "epoch": 0.11329513309639515, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 9.1814, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1562 }, { "epoch": 0.11336766519184739, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.9639, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1563 }, { "epoch": 0.11344019728729964, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.1217, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1564 }, { "epoch": 0.11351272938275186, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.4199, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1565 }, { "epoch": 0.11358526147820411, "grad_norm": 1.90625, "learning_rate": 0.0003, "loss": 8.8928, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1566 }, { "epoch": 0.11365779357365634, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 9.0405, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1567 }, { "epoch": 0.11373032566910858, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.6799, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1568 }, { "epoch": 0.11380285776456082, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.1884, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1569 }, { "epoch": 0.11387538986001305, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 9.1924, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1570 }, { "epoch": 0.1139479219554653, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.3218, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1571 }, { "epoch": 0.11402045405091753, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 9.3459, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1572 }, { "epoch": 0.11409298614636977, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.3508, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1573 }, { "epoch": 0.114165518241822, "grad_norm": 1.75, "learning_rate": 0.0003, "loss": 9.1151, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1574 }, { "epoch": 0.11423805033727424, "grad_norm": 1.515625, "learning_rate": 0.0003, "loss": 9.1911, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1575 }, { "epoch": 0.11431058243272649, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.1826, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1576 }, { "epoch": 0.11438311452817872, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.2715, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1577 }, { "epoch": 0.11445564662363096, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.3227, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1578 }, { "epoch": 0.11452817871908319, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 9.1071, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1579 }, { "epoch": 0.11460071081453543, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.3174, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1580 }, { "epoch": 0.11467324290998766, "grad_norm": 9.5625, "learning_rate": 0.0003, "loss": 8.9934, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1581 }, { "epoch": 0.1147457750054399, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 9.2463, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1582 }, { "epoch": 0.11481830710089215, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 9.4073, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1583 }, { "epoch": 0.11489083919634438, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 9.173, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1584 }, { "epoch": 0.11496337129179662, "grad_norm": 6.34375, "learning_rate": 0.0003, "loss": 8.7273, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1585 }, { "epoch": 0.11503590338724885, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 9.384, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1586 }, { "epoch": 0.1151084354827011, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 8.9841, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1587 }, { "epoch": 0.11518096757815333, "grad_norm": 20.75, "learning_rate": 0.0003, "loss": 9.3788, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1588 }, { "epoch": 0.11525349967360557, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 9.2291, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1589 }, { "epoch": 0.11532603176905781, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 8.9133, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1590 }, { "epoch": 0.11539856386451004, "grad_norm": 32.0, "learning_rate": 0.0003, "loss": 9.0803, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1591 }, { "epoch": 0.11547109595996229, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 9.5267, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1592 }, { "epoch": 0.11554362805541452, "grad_norm": 1.4296875, "learning_rate": 0.0003, "loss": 9.5939, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1593 }, { "epoch": 0.11561616015086676, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 9.2126, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1594 }, { "epoch": 0.115688692246319, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 9.1805, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1595 }, { "epoch": 0.11576122434177123, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 9.2906, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1596 }, { "epoch": 0.11583375643722348, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.0588, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1597 }, { "epoch": 0.1159062885326757, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.3535, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1598 }, { "epoch": 0.11597882062812795, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 9.1732, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1599 }, { "epoch": 0.11605135272358018, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.2772, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1600 }, { "epoch": 0.11612388481903242, "grad_norm": 6.3125, "learning_rate": 0.0003, "loss": 9.0132, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1601 }, { "epoch": 0.11619641691448467, "grad_norm": 1.4375, "learning_rate": 0.0003, "loss": 9.145, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1602 }, { "epoch": 0.1162689490099369, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.7255, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1603 }, { "epoch": 0.11634148110538914, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 9.5038, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1604 }, { "epoch": 0.11641401320084137, "grad_norm": 1.8203125, "learning_rate": 0.0003, "loss": 9.2981, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1605 }, { "epoch": 0.11648654529629361, "grad_norm": 7.0, "learning_rate": 0.0003, "loss": 9.2211, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1606 }, { "epoch": 0.11655907739174584, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 9.315, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1607 }, { "epoch": 0.11663160948719808, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.3992, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1608 }, { "epoch": 0.11670414158265033, "grad_norm": 6.8125, "learning_rate": 0.0003, "loss": 8.8669, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1609 }, { "epoch": 0.11677667367810256, "grad_norm": 6.75, "learning_rate": 0.0003, "loss": 9.5908, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1610 }, { "epoch": 0.1168492057735548, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 9.2789, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1611 }, { "epoch": 0.11692173786900703, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.1128, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1612 }, { "epoch": 0.11699426996445927, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 9.4905, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1613 }, { "epoch": 0.11706680205991152, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.0796, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1614 }, { "epoch": 0.11713933415536375, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.9609, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1615 }, { "epoch": 0.11721186625081599, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 9.6557, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1616 }, { "epoch": 0.11728439834626822, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 9.3337, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1617 }, { "epoch": 0.11735693044172046, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 9.458, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1618 }, { "epoch": 0.1174294625371727, "grad_norm": 1.6875, "learning_rate": 0.0003, "loss": 9.2193, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1619 }, { "epoch": 0.11750199463262494, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.2837, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1620 }, { "epoch": 0.11757452672807718, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.2236, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1621 }, { "epoch": 0.11764705882352941, "grad_norm": 1.953125, "learning_rate": 0.0003, "loss": 9.3739, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1622 }, { "epoch": 0.11771959091898165, "grad_norm": 9.5625, "learning_rate": 0.0003, "loss": 9.3955, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1623 }, { "epoch": 0.11779212301443388, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 8.9804, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1624 }, { "epoch": 0.11786465510988613, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.8267, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1625 }, { "epoch": 0.11793718720533836, "grad_norm": 13.75, "learning_rate": 0.0003, "loss": 9.2725, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1626 }, { "epoch": 0.1180097193007906, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 9.2798, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1627 }, { "epoch": 0.11808225139624284, "grad_norm": 7.03125, "learning_rate": 0.0003, "loss": 9.1536, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1628 }, { "epoch": 0.11815478349169507, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 9.1948, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1629 }, { "epoch": 0.11822731558714732, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 9.1348, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1630 }, { "epoch": 0.11829984768259955, "grad_norm": 1.6484375, "learning_rate": 0.0003, "loss": 9.3054, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1631 }, { "epoch": 0.11837237977805179, "grad_norm": 1.8671875, "learning_rate": 0.0003, "loss": 9.2021, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1632 }, { "epoch": 0.11844491187350402, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 9.3811, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1633 }, { "epoch": 0.11851744396895626, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.2106, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1634 }, { "epoch": 0.1185899760644085, "grad_norm": 1.9765625, "learning_rate": 0.0003, "loss": 9.0744, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1635 }, { "epoch": 0.11866250815986074, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.9524, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1636 }, { "epoch": 0.11873504025531298, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 9.4993, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1637 }, { "epoch": 0.11880757235076521, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 9.3429, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1638 }, { "epoch": 0.11888010444621745, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 8.792, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1639 }, { "epoch": 0.1189526365416697, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 9.2916, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1640 }, { "epoch": 0.11902516863712193, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 9.3502, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1641 }, { "epoch": 0.11909770073257417, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.3765, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1642 }, { "epoch": 0.1191702328280264, "grad_norm": 36.5, "learning_rate": 0.0003, "loss": 9.243, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1643 }, { "epoch": 0.11924276492347864, "grad_norm": 1.546875, "learning_rate": 0.0003, "loss": 9.3339, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1644 }, { "epoch": 0.11931529701893087, "grad_norm": 1.4765625, "learning_rate": 0.0003, "loss": 9.1614, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1645 }, { "epoch": 0.11938782911438311, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.0224, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1646 }, { "epoch": 0.11946036120983536, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 8.9819, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1647 }, { "epoch": 0.11953289330528759, "grad_norm": 29.0, "learning_rate": 0.0003, "loss": 9.141, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1648 }, { "epoch": 0.11960542540073983, "grad_norm": 6.625, "learning_rate": 0.0003, "loss": 9.4241, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1649 }, { "epoch": 0.11967795749619206, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.1918, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1650 }, { "epoch": 0.1197504895916443, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 9.3117, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1651 }, { "epoch": 0.11982302168709653, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 9.6187, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1652 }, { "epoch": 0.11989555378254878, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 8.9674, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1653 }, { "epoch": 0.11996808587800102, "grad_norm": 12.125, "learning_rate": 0.0003, "loss": 9.312, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1654 }, { "epoch": 0.12004061797345325, "grad_norm": 1.9296875, "learning_rate": 0.0003, "loss": 9.2205, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1655 }, { "epoch": 0.1201131500689055, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.3404, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1656 }, { "epoch": 0.12018568216435772, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 8.9708, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1657 }, { "epoch": 0.12025821425980997, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.2612, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1658 }, { "epoch": 0.1203307463552622, "grad_norm": 6.5, "learning_rate": 0.0003, "loss": 9.1251, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1659 }, { "epoch": 0.12040327845071444, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 9.1106, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1660 }, { "epoch": 0.12047581054616668, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 9.5467, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1661 }, { "epoch": 0.12054834264161891, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.0654, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1662 }, { "epoch": 0.12062087473707116, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.5818, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1663 }, { "epoch": 0.12069340683252339, "grad_norm": 7.84375, "learning_rate": 0.0003, "loss": 9.2307, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1664 }, { "epoch": 0.12076593892797563, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 9.1282, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1665 }, { "epoch": 0.12083847102342787, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 9.1203, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1666 }, { "epoch": 0.1209110031188801, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 9.0551, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1667 }, { "epoch": 0.12098353521433235, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.8087, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1668 }, { "epoch": 0.12105606730978458, "grad_norm": 1.7578125, "learning_rate": 0.0003, "loss": 9.4861, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1669 }, { "epoch": 0.12112859940523682, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.2948, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1670 }, { "epoch": 0.12120113150068905, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 9.1551, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1671 }, { "epoch": 0.12127366359614129, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 9.3106, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1672 }, { "epoch": 0.12134619569159354, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.2349, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1673 }, { "epoch": 0.12141872778704577, "grad_norm": 1.3046875, "learning_rate": 0.0003, "loss": 8.7145, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1674 }, { "epoch": 0.12149125988249801, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.0668, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1675 }, { "epoch": 0.12156379197795024, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.2012, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1676 }, { "epoch": 0.12163632407340248, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.9957, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1677 }, { "epoch": 0.12170885616885471, "grad_norm": 1.6484375, "learning_rate": 0.0003, "loss": 9.2429, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1678 }, { "epoch": 0.12178138826430696, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 9.1467, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1679 }, { "epoch": 0.1218539203597592, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 9.1574, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1680 }, { "epoch": 0.12192645245521143, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.0045, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1681 }, { "epoch": 0.12199898455066367, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.5357, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1682 }, { "epoch": 0.1220715166461159, "grad_norm": 1.984375, "learning_rate": 0.0003, "loss": 9.0962, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1683 }, { "epoch": 0.12214404874156815, "grad_norm": 7.875, "learning_rate": 0.0003, "loss": 9.6167, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1684 }, { "epoch": 0.12221658083702037, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 9.18, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1685 }, { "epoch": 0.12228911293247262, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 9.1212, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1686 }, { "epoch": 0.12236164502792486, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.7209, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1687 }, { "epoch": 0.12243417712337709, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 9.3334, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1688 }, { "epoch": 0.12250670921882933, "grad_norm": 16.375, "learning_rate": 0.0003, "loss": 9.0, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1689 }, { "epoch": 0.12257924131428156, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 8.6447, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1690 }, { "epoch": 0.12265177340973381, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 9.0365, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1691 }, { "epoch": 0.12272430550518605, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 9.1706, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1692 }, { "epoch": 0.12279683760063828, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 9.5727, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1693 }, { "epoch": 0.12286936969609052, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 9.1402, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1694 }, { "epoch": 0.12294190179154275, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.1166, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1695 }, { "epoch": 0.123014433886995, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 9.658, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1696 }, { "epoch": 0.12308696598244723, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 9.2608, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1697 }, { "epoch": 0.12315949807789947, "grad_norm": 12.125, "learning_rate": 0.0003, "loss": 9.3139, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1698 }, { "epoch": 0.12323203017335171, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 9.3941, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1699 }, { "epoch": 0.12330456226880394, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.9522, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1700 }, { "epoch": 0.12337709436425619, "grad_norm": 6.5625, "learning_rate": 0.0003, "loss": 9.3963, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1701 }, { "epoch": 0.12344962645970842, "grad_norm": 90.5, "learning_rate": 0.0003, "loss": 9.0906, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1702 }, { "epoch": 0.12352215855516066, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.0426, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1703 }, { "epoch": 0.12359469065061289, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.3636, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1704 }, { "epoch": 0.12366722274606513, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 9.2652, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1705 }, { "epoch": 0.12373975484151738, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 8.9749, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1706 }, { "epoch": 0.1238122869369696, "grad_norm": 1.7421875, "learning_rate": 0.0003, "loss": 9.2673, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1707 }, { "epoch": 0.12388481903242185, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 9.3088, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1708 }, { "epoch": 0.12395735112787408, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 9.02, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1709 }, { "epoch": 0.12402988322332632, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.7574, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1710 }, { "epoch": 0.12410241531877855, "grad_norm": 1.9296875, "learning_rate": 0.0003, "loss": 9.295, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1711 }, { "epoch": 0.1241749474142308, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 9.5489, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1712 }, { "epoch": 0.12424747950968304, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 9.2072, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1713 }, { "epoch": 0.12432001160513527, "grad_norm": 1.390625, "learning_rate": 0.0003, "loss": 9.1132, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1714 }, { "epoch": 0.12439254370058751, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 9.3173, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1715 }, { "epoch": 0.12446507579603974, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 9.4511, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1716 }, { "epoch": 0.12453760789149199, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 9.3096, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1717 }, { "epoch": 0.12461013998694423, "grad_norm": 1.8046875, "learning_rate": 0.0003, "loss": 9.2291, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1718 }, { "epoch": 0.12468267208239646, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 9.1157, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1719 }, { "epoch": 0.1247552041778487, "grad_norm": 5.65625, "learning_rate": 0.0003, "loss": 9.1449, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1720 }, { "epoch": 0.12482773627330093, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 9.4369, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1721 }, { "epoch": 0.12490026836875318, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 8.9899, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1722 }, { "epoch": 0.1249728004642054, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.2587, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1723 }, { "epoch": 0.12504533255965763, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.3932, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1724 }, { "epoch": 0.12511786465510988, "grad_norm": 26.625, "learning_rate": 0.0003, "loss": 9.0536, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1725 }, { "epoch": 0.12519039675056212, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 9.1332, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1726 }, { "epoch": 0.12526292884601437, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.8878, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1727 }, { "epoch": 0.1253354609414666, "grad_norm": 1.5859375, "learning_rate": 0.0003, "loss": 8.9339, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1728 }, { "epoch": 0.12540799303691882, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 9.1226, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1729 }, { "epoch": 0.12548052513237107, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.87, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1730 }, { "epoch": 0.1255530572278233, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.0556, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1731 }, { "epoch": 0.12562558932327555, "grad_norm": 1.53125, "learning_rate": 0.0003, "loss": 9.5663, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1732 }, { "epoch": 0.1256981214187278, "grad_norm": 1.7890625, "learning_rate": 0.0003, "loss": 8.8744, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1733 }, { "epoch": 0.12577065351418001, "grad_norm": 1.578125, "learning_rate": 0.0003, "loss": 9.5843, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1734 }, { "epoch": 0.12584318560963226, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 9.061, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1735 }, { "epoch": 0.1259157177050845, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 9.3519, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1736 }, { "epoch": 0.12598824980053674, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 9.066, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1737 }, { "epoch": 0.126060781895989, "grad_norm": 5.9375, "learning_rate": 0.0003, "loss": 8.9918, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1738 }, { "epoch": 0.1261333139914412, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.8803, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1739 }, { "epoch": 0.12620584608689345, "grad_norm": 14.625, "learning_rate": 0.0003, "loss": 9.1618, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1740 }, { "epoch": 0.1262783781823457, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.7833, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1741 }, { "epoch": 0.12635091027779793, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.1082, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1742 }, { "epoch": 0.12642344237325015, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 9.2086, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1743 }, { "epoch": 0.1264959744687024, "grad_norm": 6.25, "learning_rate": 0.0003, "loss": 9.4409, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1744 }, { "epoch": 0.12656850656415464, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 9.0066, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1745 }, { "epoch": 0.12664103865960688, "grad_norm": 7.40625, "learning_rate": 0.0003, "loss": 8.9664, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1746 }, { "epoch": 0.12671357075505912, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.1728, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1747 }, { "epoch": 0.12678610285051134, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 9.4583, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1748 }, { "epoch": 0.12685863494596358, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.1369, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1749 }, { "epoch": 0.12693116704141583, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 9.0776, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1750 }, { "epoch": 0.12700369913686807, "grad_norm": 12.25, "learning_rate": 0.0003, "loss": 9.1828, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1751 }, { "epoch": 0.1270762312323203, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.5381, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1752 }, { "epoch": 0.12714876332777253, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.8808, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1753 }, { "epoch": 0.12722129542322477, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.3746, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1754 }, { "epoch": 0.12729382751867702, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 8.8856, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1755 }, { "epoch": 0.12736635961412926, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.2106, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1756 }, { "epoch": 0.1274388917095815, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.2771, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1757 }, { "epoch": 0.12751142380503372, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.993, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1758 }, { "epoch": 0.12758395590048596, "grad_norm": 11.4375, "learning_rate": 0.0003, "loss": 9.2372, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1759 }, { "epoch": 0.1276564879959382, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.4058, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1760 }, { "epoch": 0.12772902009139045, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.4056, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1761 }, { "epoch": 0.12780155218684267, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.1697, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1762 }, { "epoch": 0.1278740842822949, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.4698, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1763 }, { "epoch": 0.12794661637774715, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 9.2919, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1764 }, { "epoch": 0.1280191484731994, "grad_norm": 1.921875, "learning_rate": 0.0003, "loss": 9.1992, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1765 }, { "epoch": 0.12809168056865164, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 9.377, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1766 }, { "epoch": 0.12816421266410385, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 9.2378, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1767 }, { "epoch": 0.1282367447595561, "grad_norm": 6.875, "learning_rate": 0.0003, "loss": 8.4114, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1768 }, { "epoch": 0.12830927685500834, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.1376, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1769 }, { "epoch": 0.12838180895046059, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.2017, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1770 }, { "epoch": 0.12845434104591283, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.4669, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1771 }, { "epoch": 0.12852687314136504, "grad_norm": 1.859375, "learning_rate": 0.0003, "loss": 9.0792, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1772 }, { "epoch": 0.1285994052368173, "grad_norm": 1.6640625, "learning_rate": 0.0003, "loss": 9.363, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1773 }, { "epoch": 0.12867193733226953, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 9.0985, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1774 }, { "epoch": 0.12874446942772177, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 9.24, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1775 }, { "epoch": 0.128817001523174, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 9.2205, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1776 }, { "epoch": 0.12888953361862623, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 9.2688, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1777 }, { "epoch": 0.12896206571407848, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 9.2557, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1778 }, { "epoch": 0.12903459780953072, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.5227, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1779 }, { "epoch": 0.12910712990498296, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 9.1179, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1780 }, { "epoch": 0.12917966200043518, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.0805, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1781 }, { "epoch": 0.12925219409588742, "grad_norm": 8.3125, "learning_rate": 0.0003, "loss": 9.7265, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1782 }, { "epoch": 0.12932472619133967, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 9.1116, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1783 }, { "epoch": 0.1293972582867919, "grad_norm": 7.09375, "learning_rate": 0.0003, "loss": 9.0008, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1784 }, { "epoch": 0.12946979038224415, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.0734, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1785 }, { "epoch": 0.12954232247769637, "grad_norm": 5.46875, "learning_rate": 0.0003, "loss": 8.8718, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1786 }, { "epoch": 0.1296148545731486, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 9.0617, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1787 }, { "epoch": 0.12968738666860086, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.577, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1788 }, { "epoch": 0.1297599187640531, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 9.0493, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1789 }, { "epoch": 0.12983245085950534, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.8285, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1790 }, { "epoch": 0.12990498295495756, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.2114, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1791 }, { "epoch": 0.1299775150504098, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 9.0004, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1792 }, { "epoch": 0.13005004714586205, "grad_norm": 1.75, "learning_rate": 0.0003, "loss": 8.9739, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1793 }, { "epoch": 0.1301225792413143, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 9.3569, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1794 }, { "epoch": 0.1301951113367665, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 9.0074, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1795 }, { "epoch": 0.13026764343221875, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 9.3319, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1796 }, { "epoch": 0.130340175527671, "grad_norm": 1.609375, "learning_rate": 0.0003, "loss": 9.3007, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1797 }, { "epoch": 0.13041270762312324, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 9.1472, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1798 }, { "epoch": 0.13048523971857548, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.3036, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1799 }, { "epoch": 0.1305577718140277, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 9.6301, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1800 }, { "epoch": 0.13063030390947994, "grad_norm": 10.5, "learning_rate": 0.0003, "loss": 8.9536, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1801 }, { "epoch": 0.13070283600493218, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.9416, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1802 }, { "epoch": 0.13077536810038443, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 9.0583, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1803 }, { "epoch": 0.13084790019583667, "grad_norm": 1.75, "learning_rate": 0.0003, "loss": 8.962, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1804 }, { "epoch": 0.13092043229128889, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 8.7838, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1805 }, { "epoch": 0.13099296438674113, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.8521, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1806 }, { "epoch": 0.13106549648219337, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.518, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1807 }, { "epoch": 0.13113802857764562, "grad_norm": 7.875, "learning_rate": 0.0003, "loss": 9.0836, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1808 }, { "epoch": 0.13121056067309786, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.0169, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1809 }, { "epoch": 0.13128309276855007, "grad_norm": 1.5625, "learning_rate": 0.0003, "loss": 8.9072, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1810 }, { "epoch": 0.13135562486400232, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.1633, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1811 }, { "epoch": 0.13142815695945456, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.2731, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1812 }, { "epoch": 0.1315006890549068, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.9055, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1813 }, { "epoch": 0.13157322115035902, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 9.1367, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1814 }, { "epoch": 0.13164575324581126, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.989, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1815 }, { "epoch": 0.1317182853412635, "grad_norm": 8.75, "learning_rate": 0.0003, "loss": 9.1766, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1816 }, { "epoch": 0.13179081743671575, "grad_norm": 1.7421875, "learning_rate": 0.0003, "loss": 9.2451, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1817 }, { "epoch": 0.131863349532168, "grad_norm": 1.9765625, "learning_rate": 0.0003, "loss": 9.1221, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1818 }, { "epoch": 0.1319358816276202, "grad_norm": 1.8671875, "learning_rate": 0.0003, "loss": 9.0309, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1819 }, { "epoch": 0.13200841372307245, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 9.2491, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1820 }, { "epoch": 0.1320809458185247, "grad_norm": 1.8125, "learning_rate": 0.0003, "loss": 9.5388, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1821 }, { "epoch": 0.13215347791397694, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 8.8169, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1822 }, { "epoch": 0.13222601000942918, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.1367, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1823 }, { "epoch": 0.1322985421048814, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 9.0746, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1824 }, { "epoch": 0.13237107420033364, "grad_norm": 1.6953125, "learning_rate": 0.0003, "loss": 9.222, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1825 }, { "epoch": 0.1324436062957859, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 9.3939, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1826 }, { "epoch": 0.13251613839123813, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.767, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1827 }, { "epoch": 0.13258867048669037, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.5905, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1828 }, { "epoch": 0.1326612025821426, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.9819, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1829 }, { "epoch": 0.13273373467759483, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 9.3916, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1830 }, { "epoch": 0.13280626677304708, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 9.1331, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1831 }, { "epoch": 0.13287879886849932, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.0969, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1832 }, { "epoch": 0.13295133096395154, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 8.9828, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1833 }, { "epoch": 0.13302386305940378, "grad_norm": 1.7109375, "learning_rate": 0.0003, "loss": 8.9224, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1834 }, { "epoch": 0.13309639515485602, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 9.4792, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1835 }, { "epoch": 0.13316892725030827, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 9.2131, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1836 }, { "epoch": 0.1332414593457605, "grad_norm": 10.0625, "learning_rate": 0.0003, "loss": 9.2299, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1837 }, { "epoch": 0.13331399144121273, "grad_norm": 7.5625, "learning_rate": 0.0003, "loss": 9.2313, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1838 }, { "epoch": 0.13338652353666497, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 9.1447, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1839 }, { "epoch": 0.1334590556321172, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.959, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1840 }, { "epoch": 0.13353158772756946, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 9.6051, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1841 }, { "epoch": 0.1336041198230217, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 9.6581, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1842 }, { "epoch": 0.13367665191847392, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.6811, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1843 }, { "epoch": 0.13374918401392616, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.0675, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1844 }, { "epoch": 0.1338217161093784, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.881, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1845 }, { "epoch": 0.13389424820483065, "grad_norm": 1.78125, "learning_rate": 0.0003, "loss": 9.3796, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1846 }, { "epoch": 0.13396678030028286, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 9.1991, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1847 }, { "epoch": 0.1340393123957351, "grad_norm": 6.1875, "learning_rate": 0.0003, "loss": 9.0584, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1848 }, { "epoch": 0.13411184449118735, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 9.0699, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1849 }, { "epoch": 0.1341843765866396, "grad_norm": 1.7578125, "learning_rate": 0.0003, "loss": 9.359, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1850 }, { "epoch": 0.13425690868209184, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 9.1411, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1851 }, { "epoch": 0.13432944077754405, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 9.1395, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1852 }, { "epoch": 0.1344019728729963, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 9.6347, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1853 }, { "epoch": 0.13447450496844854, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 8.8217, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1854 }, { "epoch": 0.13454703706390078, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.4047, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1855 }, { "epoch": 0.13461956915935303, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 8.9145, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1856 }, { "epoch": 0.13469210125480524, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.0764, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1857 }, { "epoch": 0.13476463335025748, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 9.1206, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1858 }, { "epoch": 0.13483716544570973, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.2576, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1859 }, { "epoch": 0.13490969754116197, "grad_norm": 7.53125, "learning_rate": 0.0003, "loss": 9.017, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1860 }, { "epoch": 0.13498222963661421, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.0625, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1861 }, { "epoch": 0.13505476173206643, "grad_norm": 5.65625, "learning_rate": 0.0003, "loss": 9.0566, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1862 }, { "epoch": 0.13512729382751867, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.7679, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1863 }, { "epoch": 0.13519982592297092, "grad_norm": 1.9375, "learning_rate": 0.0003, "loss": 8.9686, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1864 }, { "epoch": 0.13527235801842316, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.1051, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1865 }, { "epoch": 0.13534489011387538, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 9.569, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1866 }, { "epoch": 0.13541742220932762, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.8953, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1867 }, { "epoch": 0.13548995430477986, "grad_norm": 1.546875, "learning_rate": 0.0003, "loss": 9.6699, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1868 }, { "epoch": 0.1355624864002321, "grad_norm": 6.5, "learning_rate": 0.0003, "loss": 9.2525, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1869 }, { "epoch": 0.13563501849568435, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.1999, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1870 }, { "epoch": 0.13570755059113657, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.1013, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1871 }, { "epoch": 0.1357800826865888, "grad_norm": 1.5859375, "learning_rate": 0.0003, "loss": 9.506, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1872 }, { "epoch": 0.13585261478204105, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 9.4283, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1873 }, { "epoch": 0.1359251468774933, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 9.0538, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1874 }, { "epoch": 0.13599767897294554, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.9321, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1875 }, { "epoch": 0.13607021106839776, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.8638, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1876 }, { "epoch": 0.13614274316385, "grad_norm": 1.5, "learning_rate": 0.0003, "loss": 9.0956, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1877 }, { "epoch": 0.13621527525930224, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 9.5954, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1878 }, { "epoch": 0.1362878073547545, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.4866, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1879 }, { "epoch": 0.13636033945020673, "grad_norm": 9.75, "learning_rate": 0.0003, "loss": 8.7289, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1880 }, { "epoch": 0.13643287154565895, "grad_norm": 8.75, "learning_rate": 0.0003, "loss": 8.7853, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1881 }, { "epoch": 0.1365054036411112, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 8.7652, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1882 }, { "epoch": 0.13657793573656343, "grad_norm": 17.0, "learning_rate": 0.0003, "loss": 9.475, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1883 }, { "epoch": 0.13665046783201568, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.0203, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1884 }, { "epoch": 0.1367229999274679, "grad_norm": 1.6640625, "learning_rate": 0.0003, "loss": 8.7366, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1885 }, { "epoch": 0.13679553202292014, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 9.1879, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1886 }, { "epoch": 0.13686806411837238, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.8077, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1887 }, { "epoch": 0.13694059621382462, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.1817, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1888 }, { "epoch": 0.13701312830927687, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 9.1814, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1889 }, { "epoch": 0.13708566040472908, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.8787, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1890 }, { "epoch": 0.13715819250018133, "grad_norm": 10.25, "learning_rate": 0.0003, "loss": 9.1543, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1891 }, { "epoch": 0.13723072459563357, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 9.2619, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1892 }, { "epoch": 0.1373032566910858, "grad_norm": 64.0, "learning_rate": 0.0003, "loss": 9.0845, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1893 }, { "epoch": 0.13737578878653806, "grad_norm": 1.7109375, "learning_rate": 0.0003, "loss": 9.0212, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1894 }, { "epoch": 0.13744832088199027, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.9517, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1895 }, { "epoch": 0.13752085297744251, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 9.3841, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1896 }, { "epoch": 0.13759338507289476, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.3729, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1897 }, { "epoch": 0.137665917168347, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.9913, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1898 }, { "epoch": 0.13773844926379922, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.9357, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1899 }, { "epoch": 0.13781098135925146, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.5688, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1900 }, { "epoch": 0.1378835134547037, "grad_norm": 7.1875, "learning_rate": 0.0003, "loss": 8.9311, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1901 }, { "epoch": 0.13795604555015595, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.8588, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1902 }, { "epoch": 0.1380285776456082, "grad_norm": 1.96875, "learning_rate": 0.0003, "loss": 8.9255, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1903 }, { "epoch": 0.1381011097410604, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 9.0274, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1904 }, { "epoch": 0.13817364183651265, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.8763, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1905 }, { "epoch": 0.1382461739319649, "grad_norm": 13.4375, "learning_rate": 0.0003, "loss": 9.4909, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1906 }, { "epoch": 0.13831870602741714, "grad_norm": 1.75, "learning_rate": 0.0003, "loss": 9.0441, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1907 }, { "epoch": 0.13839123812286938, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 9.1568, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1908 }, { "epoch": 0.1384637702183216, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.3884, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1909 }, { "epoch": 0.13853630231377384, "grad_norm": 1.6015625, "learning_rate": 0.0003, "loss": 9.2015, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1910 }, { "epoch": 0.13860883440922608, "grad_norm": 10.9375, "learning_rate": 0.0003, "loss": 9.1698, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1911 }, { "epoch": 0.13868136650467833, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.4815, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1912 }, { "epoch": 0.13875389860013057, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.1545, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1913 }, { "epoch": 0.1388264306955828, "grad_norm": 1.765625, "learning_rate": 0.0003, "loss": 9.0507, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1914 }, { "epoch": 0.13889896279103503, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 9.0609, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1915 }, { "epoch": 0.13897149488648727, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 9.2888, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1916 }, { "epoch": 0.13904402698193952, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.0145, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1917 }, { "epoch": 0.13911655907739173, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 9.2137, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1918 }, { "epoch": 0.13918909117284398, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.9429, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1919 }, { "epoch": 0.13926162326829622, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.546, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1920 }, { "epoch": 0.13933415536374846, "grad_norm": 1.6953125, "learning_rate": 0.0003, "loss": 8.9837, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1921 }, { "epoch": 0.1394066874592007, "grad_norm": 1.6875, "learning_rate": 0.0003, "loss": 8.7305, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1922 }, { "epoch": 0.13947921955465292, "grad_norm": 77.0, "learning_rate": 0.0003, "loss": 9.3921, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1923 }, { "epoch": 0.13955175165010517, "grad_norm": 8.4375, "learning_rate": 0.0003, "loss": 9.1822, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1924 }, { "epoch": 0.1396242837455574, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.3562, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1925 }, { "epoch": 0.13969681584100965, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 9.2109, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1926 }, { "epoch": 0.1397693479364619, "grad_norm": 8.25, "learning_rate": 0.0003, "loss": 9.1842, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1927 }, { "epoch": 0.1398418800319141, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 9.275, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1928 }, { "epoch": 0.13991441212736636, "grad_norm": 1.8359375, "learning_rate": 0.0003, "loss": 9.3595, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1929 }, { "epoch": 0.1399869442228186, "grad_norm": 6.9375, "learning_rate": 0.0003, "loss": 9.1689, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1930 }, { "epoch": 0.14005947631827084, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 8.976, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1931 }, { "epoch": 0.14013200841372309, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 9.3052, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1932 }, { "epoch": 0.1402045405091753, "grad_norm": 9.3125, "learning_rate": 0.0003, "loss": 8.6195, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1933 }, { "epoch": 0.14027707260462755, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 8.9587, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1934 }, { "epoch": 0.1403496047000798, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.9879, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1935 }, { "epoch": 0.14042213679553203, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.0275, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1936 }, { "epoch": 0.14049466889098425, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 9.0457, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1937 }, { "epoch": 0.1405672009864365, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 8.8069, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1938 }, { "epoch": 0.14063973308188873, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 9.0325, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1939 }, { "epoch": 0.14071226517734098, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 9.2231, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1940 }, { "epoch": 0.14078479727279322, "grad_norm": 1.953125, "learning_rate": 0.0003, "loss": 9.5069, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1941 }, { "epoch": 0.14085732936824544, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 9.048, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1942 }, { "epoch": 0.14092986146369768, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.0819, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1943 }, { "epoch": 0.14100239355914992, "grad_norm": 14.625, "learning_rate": 0.0003, "loss": 9.0224, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1944 }, { "epoch": 0.14107492565460217, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.3892, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1945 }, { "epoch": 0.1411474577500544, "grad_norm": 14.875, "learning_rate": 0.0003, "loss": 9.0376, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1946 }, { "epoch": 0.14121998984550663, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 9.1175, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1947 }, { "epoch": 0.14129252194095887, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 9.0851, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1948 }, { "epoch": 0.14136505403641111, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 9.4356, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1949 }, { "epoch": 0.14143758613186336, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.3278, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1950 }, { "epoch": 0.14151011822731557, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.0905, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1951 }, { "epoch": 0.14158265032276782, "grad_norm": 1.3359375, "learning_rate": 0.0003, "loss": 9.2062, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1952 }, { "epoch": 0.14165518241822006, "grad_norm": 5.21875, "learning_rate": 0.0003, "loss": 8.6453, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1953 }, { "epoch": 0.1417277145136723, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.7279, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1954 }, { "epoch": 0.14180024660912455, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 9.1872, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1955 }, { "epoch": 0.14187277870457676, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.0324, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1956 }, { "epoch": 0.141945310800029, "grad_norm": 8.6875, "learning_rate": 0.0003, "loss": 9.1849, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1957 }, { "epoch": 0.14201784289548125, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.1765, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1958 }, { "epoch": 0.1420903749909335, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.4032, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1959 }, { "epoch": 0.14216290708638574, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 8.9013, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1960 }, { "epoch": 0.14223543918183795, "grad_norm": 12.625, "learning_rate": 0.0003, "loss": 9.0593, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1961 }, { "epoch": 0.1423079712772902, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 9.0556, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1962 }, { "epoch": 0.14238050337274244, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 9.0221, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1963 }, { "epoch": 0.14245303546819468, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 9.2679, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1964 }, { "epoch": 0.14252556756364693, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 8.9599, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1965 }, { "epoch": 0.14259809965909914, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.3048, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1966 }, { "epoch": 0.14267063175455139, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 9.0715, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1967 }, { "epoch": 0.14274316385000363, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 9.1763, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1968 }, { "epoch": 0.14281569594545587, "grad_norm": 6.15625, "learning_rate": 0.0003, "loss": 9.2795, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1969 }, { "epoch": 0.1428882280409081, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 9.1554, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1970 }, { "epoch": 0.14296076013636033, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.437, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1971 }, { "epoch": 0.14303329223181258, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 9.2773, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1972 }, { "epoch": 0.14310582432726482, "grad_norm": 1.3984375, "learning_rate": 0.0003, "loss": 8.6762, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1973 }, { "epoch": 0.14317835642271706, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.2698, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1974 }, { "epoch": 0.14325088851816928, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.8736, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1975 }, { "epoch": 0.14332342061362152, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.899, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1976 }, { "epoch": 0.14339595270907377, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 9.2774, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1977 }, { "epoch": 0.143468484804526, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 9.5032, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1978 }, { "epoch": 0.14354101689997825, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.9355, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1979 }, { "epoch": 0.14361354899543047, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 9.0793, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1980 }, { "epoch": 0.1436860810908827, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 9.2105, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1981 }, { "epoch": 0.14375861318633495, "grad_norm": 8.5625, "learning_rate": 0.0003, "loss": 8.6735, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1982 }, { "epoch": 0.1438311452817872, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.7191, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1983 }, { "epoch": 0.14390367737723944, "grad_norm": 1.546875, "learning_rate": 0.0003, "loss": 8.7695, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1984 }, { "epoch": 0.14397620947269166, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.221, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1985 }, { "epoch": 0.1440487415681439, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 9.1505, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1986 }, { "epoch": 0.14412127366359614, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.8251, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1987 }, { "epoch": 0.1441938057590484, "grad_norm": 5.78125, "learning_rate": 0.0003, "loss": 9.0722, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1988 }, { "epoch": 0.1442663378545006, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.0918, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1989 }, { "epoch": 0.14433886994995285, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 9.2213, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1990 }, { "epoch": 0.1444114020454051, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.9548, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1991 }, { "epoch": 0.14448393414085733, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.9745, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1992 }, { "epoch": 0.14455646623630958, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 9.1128, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1993 }, { "epoch": 0.1446289983317618, "grad_norm": 5.6875, "learning_rate": 0.0003, "loss": 8.6169, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1994 }, { "epoch": 0.14470153042721404, "grad_norm": 1.96875, "learning_rate": 0.0003, "loss": 9.1985, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1995 }, { "epoch": 0.14477406252266628, "grad_norm": 8.25, "learning_rate": 0.0003, "loss": 9.208, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1996 }, { "epoch": 0.14484659461811852, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.9966, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1997 }, { "epoch": 0.14491912671357077, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 9.5149, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1998 }, { "epoch": 0.14499165880902298, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 9.1399, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 1999 }, { "epoch": 0.14506419090447523, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 9.3497, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2000 }, { "epoch": 0.14513672299992747, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.301, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2001 }, { "epoch": 0.1452092550953797, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 9.4109, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2002 }, { "epoch": 0.14528178719083196, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 9.0559, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2003 }, { "epoch": 0.14535431928628417, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 8.9577, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2004 }, { "epoch": 0.14542685138173642, "grad_norm": 1.6328125, "learning_rate": 0.0003, "loss": 9.3243, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2005 }, { "epoch": 0.14549938347718866, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.0772, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2006 }, { "epoch": 0.1455719155726409, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 9.434, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2007 }, { "epoch": 0.14564444766809312, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 8.9423, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2008 }, { "epoch": 0.14571697976354536, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.0715, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2009 }, { "epoch": 0.1457895118589976, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.102, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2010 }, { "epoch": 0.14586204395444985, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.5485, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2011 }, { "epoch": 0.1459345760499021, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 9.1628, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2012 }, { "epoch": 0.1460071081453543, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.0608, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2013 }, { "epoch": 0.14607964024080655, "grad_norm": 1.9375, "learning_rate": 0.0003, "loss": 9.2112, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2014 }, { "epoch": 0.1461521723362588, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.8567, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2015 }, { "epoch": 0.14622470443171104, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 9.3724, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2016 }, { "epoch": 0.14629723652716328, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 8.9839, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2017 }, { "epoch": 0.1463697686226155, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 9.6579, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2018 }, { "epoch": 0.14644230071806774, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 9.1573, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2019 }, { "epoch": 0.14651483281351999, "grad_norm": 5.9375, "learning_rate": 0.0003, "loss": 9.138, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2020 }, { "epoch": 0.14658736490897223, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.945, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2021 }, { "epoch": 0.14665989700442444, "grad_norm": 8.625, "learning_rate": 0.0003, "loss": 9.0387, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2022 }, { "epoch": 0.1467324290998767, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 9.4695, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2023 }, { "epoch": 0.14680496119532893, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.1469, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2024 }, { "epoch": 0.14687749329078117, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 9.2802, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2025 }, { "epoch": 0.14695002538623342, "grad_norm": 7.6875, "learning_rate": 0.0003, "loss": 8.9361, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2026 }, { "epoch": 0.14702255748168563, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 8.9604, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2027 }, { "epoch": 0.14709508957713788, "grad_norm": 1.8828125, "learning_rate": 0.0003, "loss": 9.2963, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2028 }, { "epoch": 0.14716762167259012, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 9.081, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2029 }, { "epoch": 0.14724015376804236, "grad_norm": 11.5625, "learning_rate": 0.0003, "loss": 8.9906, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2030 }, { "epoch": 0.1473126858634946, "grad_norm": 9.5, "learning_rate": 0.0003, "loss": 9.2563, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2031 }, { "epoch": 0.14738521795894682, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 8.8977, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2032 }, { "epoch": 0.14745775005439907, "grad_norm": 6.84375, "learning_rate": 0.0003, "loss": 9.3725, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2033 }, { "epoch": 0.1475302821498513, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.0354, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2034 }, { "epoch": 0.14760281424530355, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.9219, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2035 }, { "epoch": 0.1476753463407558, "grad_norm": 1.71875, "learning_rate": 0.0003, "loss": 8.7935, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2036 }, { "epoch": 0.147747878436208, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 8.9004, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2037 }, { "epoch": 0.14782041053166026, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.1761, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2038 }, { "epoch": 0.1478929426271125, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 9.0774, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2039 }, { "epoch": 0.14796547472256474, "grad_norm": 6.84375, "learning_rate": 0.0003, "loss": 8.9148, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2040 }, { "epoch": 0.14803800681801696, "grad_norm": 1.8984375, "learning_rate": 0.0003, "loss": 9.1237, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2041 }, { "epoch": 0.1481105389134692, "grad_norm": 1.9296875, "learning_rate": 0.0003, "loss": 9.7275, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2042 }, { "epoch": 0.14818307100892145, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 9.359, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2043 }, { "epoch": 0.1482556031043737, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.1337, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2044 }, { "epoch": 0.14832813519982593, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.1141, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2045 }, { "epoch": 0.14840066729527815, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 9.1957, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2046 }, { "epoch": 0.1484731993907304, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 8.8895, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2047 }, { "epoch": 0.14854573148618264, "grad_norm": 41.75, "learning_rate": 0.0003, "loss": 8.9173, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2048 }, { "epoch": 0.14861826358163488, "grad_norm": 1.9765625, "learning_rate": 0.0003, "loss": 8.7156, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2049 }, { "epoch": 0.14869079567708712, "grad_norm": 1.4765625, "learning_rate": 0.0003, "loss": 9.1897, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2050 }, { "epoch": 0.14876332777253934, "grad_norm": 1.5390625, "learning_rate": 0.0003, "loss": 8.8614, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2051 }, { "epoch": 0.14883585986799158, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 9.2299, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2052 }, { "epoch": 0.14890839196344383, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 8.7313, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2053 }, { "epoch": 0.14898092405889607, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 9.4189, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2054 }, { "epoch": 0.1490534561543483, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 9.0686, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2055 }, { "epoch": 0.14912598824980053, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.9681, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2056 }, { "epoch": 0.14919852034525277, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 9.4397, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2057 }, { "epoch": 0.14927105244070502, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 8.8626, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2058 }, { "epoch": 0.14934358453615726, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 9.2865, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2059 }, { "epoch": 0.14941611663160947, "grad_norm": 16.5, "learning_rate": 0.0003, "loss": 9.1614, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2060 }, { "epoch": 0.14948864872706172, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 9.5043, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2061 }, { "epoch": 0.14956118082251396, "grad_norm": 7.375, "learning_rate": 0.0003, "loss": 9.2376, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2062 }, { "epoch": 0.1496337129179662, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 9.2394, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2063 }, { "epoch": 0.14970624501341845, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 8.9672, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2064 }, { "epoch": 0.14977877710887066, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.4275, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2065 }, { "epoch": 0.1498513092043229, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.8682, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2066 }, { "epoch": 0.14992384129977515, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 9.1521, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2067 }, { "epoch": 0.1499963733952274, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 8.976, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2068 }, { "epoch": 0.15006890549067964, "grad_norm": 1.8125, "learning_rate": 0.0003, "loss": 9.0986, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2069 }, { "epoch": 0.15014143758613185, "grad_norm": 1.8515625, "learning_rate": 0.0003, "loss": 8.6348, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2070 }, { "epoch": 0.1502139696815841, "grad_norm": 10.125, "learning_rate": 0.0003, "loss": 9.5059, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2071 }, { "epoch": 0.15028650177703634, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 9.0519, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2072 }, { "epoch": 0.15035903387248858, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 8.9077, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2073 }, { "epoch": 0.1504315659679408, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.5178, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2074 }, { "epoch": 0.15050409806339304, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.1928, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2075 }, { "epoch": 0.1505766301588453, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.8498, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2076 }, { "epoch": 0.15064916225429753, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.2412, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2077 }, { "epoch": 0.15072169434974977, "grad_norm": 1.8984375, "learning_rate": 0.0003, "loss": 9.1564, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2078 }, { "epoch": 0.150794226445202, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 9.3446, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2079 }, { "epoch": 0.15086675854065423, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 8.874, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2080 }, { "epoch": 0.15093929063610648, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.9953, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2081 }, { "epoch": 0.15101182273155872, "grad_norm": 9.75, "learning_rate": 0.0003, "loss": 9.0614, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2082 }, { "epoch": 0.15108435482701096, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 9.3452, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2083 }, { "epoch": 0.15115688692246318, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 8.753, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2084 }, { "epoch": 0.15122941901791542, "grad_norm": 9.75, "learning_rate": 0.0003, "loss": 8.7854, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2085 }, { "epoch": 0.15130195111336767, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 9.1568, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2086 }, { "epoch": 0.1513744832088199, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 9.4616, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2087 }, { "epoch": 0.15144701530427215, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 9.4206, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2088 }, { "epoch": 0.15151954739972437, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.277, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2089 }, { "epoch": 0.1515920794951766, "grad_norm": 17.25, "learning_rate": 0.0003, "loss": 9.0584, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2090 }, { "epoch": 0.15166461159062886, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.8713, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2091 }, { "epoch": 0.1517371436860811, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 9.2091, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2092 }, { "epoch": 0.15180967578153332, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.0817, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2093 }, { "epoch": 0.15188220787698556, "grad_norm": 47.0, "learning_rate": 0.0003, "loss": 9.2991, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2094 }, { "epoch": 0.1519547399724378, "grad_norm": 6.15625, "learning_rate": 0.0003, "loss": 9.2613, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2095 }, { "epoch": 0.15202727206789005, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.6815, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2096 }, { "epoch": 0.1520998041633423, "grad_norm": 1.8359375, "learning_rate": 0.0003, "loss": 9.4169, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2097 }, { "epoch": 0.1521723362587945, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 8.832, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2098 }, { "epoch": 0.15224486835424675, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 9.0697, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2099 }, { "epoch": 0.152317400449699, "grad_norm": 1.6484375, "learning_rate": 0.0003, "loss": 8.9606, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2100 }, { "epoch": 0.15238993254515124, "grad_norm": 6.46875, "learning_rate": 0.0003, "loss": 9.0506, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2101 }, { "epoch": 0.15246246464060348, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 9.569, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2102 }, { "epoch": 0.1525349967360557, "grad_norm": 3.953125, "learning_rate": 0.0003, "loss": 9.3701, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2103 }, { "epoch": 0.15260752883150794, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 9.2005, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2104 }, { "epoch": 0.15268006092696018, "grad_norm": 6.4375, "learning_rate": 0.0003, "loss": 9.1014, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2105 }, { "epoch": 0.15275259302241243, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 9.2128, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2106 }, { "epoch": 0.15282512511786467, "grad_norm": 10.625, "learning_rate": 0.0003, "loss": 9.1055, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2107 }, { "epoch": 0.15289765721331688, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.3281, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2108 }, { "epoch": 0.15297018930876913, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 9.261, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2109 }, { "epoch": 0.15304272140422137, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.1552, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2110 }, { "epoch": 0.15311525349967361, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.3203, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2111 }, { "epoch": 0.15318778559512583, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 9.3167, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2112 }, { "epoch": 0.15326031769057807, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.9292, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2113 }, { "epoch": 0.15333284978603032, "grad_norm": 1.8359375, "learning_rate": 0.0003, "loss": 9.0281, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2114 }, { "epoch": 0.15340538188148256, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.8822, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2115 }, { "epoch": 0.1534779139769348, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.056, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2116 }, { "epoch": 0.15355044607238702, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 8.999, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2117 }, { "epoch": 0.15362297816783926, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 8.9656, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2118 }, { "epoch": 0.1536955102632915, "grad_norm": 1.953125, "learning_rate": 0.0003, "loss": 9.1339, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2119 }, { "epoch": 0.15376804235874375, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 9.603, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2120 }, { "epoch": 0.153840574454196, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 9.1656, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2121 }, { "epoch": 0.1539131065496482, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.02, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2122 }, { "epoch": 0.15398563864510045, "grad_norm": 52.75, "learning_rate": 0.0003, "loss": 9.4922, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2123 }, { "epoch": 0.1540581707405527, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.3722, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2124 }, { "epoch": 0.15413070283600494, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 9.3571, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2125 }, { "epoch": 0.15420323493145716, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 9.188, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2126 }, { "epoch": 0.1542757670269094, "grad_norm": 1.8671875, "learning_rate": 0.0003, "loss": 9.3505, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2127 }, { "epoch": 0.15434829912236164, "grad_norm": 22.375, "learning_rate": 0.0003, "loss": 8.9634, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2128 }, { "epoch": 0.1544208312178139, "grad_norm": 1.8515625, "learning_rate": 0.0003, "loss": 9.2335, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2129 }, { "epoch": 0.15449336331326613, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 9.4316, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2130 }, { "epoch": 0.15456589540871835, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.7531, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2131 }, { "epoch": 0.1546384275041706, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 9.2049, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2132 }, { "epoch": 0.15471095959962283, "grad_norm": 12.375, "learning_rate": 0.0003, "loss": 9.241, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2133 }, { "epoch": 0.15478349169507508, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.8045, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2134 }, { "epoch": 0.15485602379052732, "grad_norm": 10.3125, "learning_rate": 0.0003, "loss": 9.2555, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2135 }, { "epoch": 0.15492855588597954, "grad_norm": 1.984375, "learning_rate": 0.0003, "loss": 9.2217, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2136 }, { "epoch": 0.15500108798143178, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 9.1597, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2137 }, { "epoch": 0.15507362007688402, "grad_norm": 26.125, "learning_rate": 0.0003, "loss": 9.4411, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2138 }, { "epoch": 0.15514615217233627, "grad_norm": 1.71875, "learning_rate": 0.0003, "loss": 8.9801, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2139 }, { "epoch": 0.1552186842677885, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 9.0777, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2140 }, { "epoch": 0.15529121636324072, "grad_norm": 6.6875, "learning_rate": 0.0003, "loss": 8.4984, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2141 }, { "epoch": 0.15536374845869297, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.8579, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2142 }, { "epoch": 0.1554362805541452, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 9.0876, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2143 }, { "epoch": 0.15550881264959746, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 9.2663, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2144 }, { "epoch": 0.15558134474504967, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.2516, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2145 }, { "epoch": 0.15565387684050191, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 8.8836, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2146 }, { "epoch": 0.15572640893595416, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 9.252, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2147 }, { "epoch": 0.1557989410314064, "grad_norm": 18.625, "learning_rate": 0.0003, "loss": 9.1735, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2148 }, { "epoch": 0.15587147312685865, "grad_norm": 8.6875, "learning_rate": 0.0003, "loss": 9.1857, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2149 }, { "epoch": 0.15594400522231086, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.122, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2150 }, { "epoch": 0.1560165373177631, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 9.1731, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2151 }, { "epoch": 0.15608906941321535, "grad_norm": 10.6875, "learning_rate": 0.0003, "loss": 8.7795, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2152 }, { "epoch": 0.1561616015086676, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.9891, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2153 }, { "epoch": 0.15623413360411983, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.3062, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2154 }, { "epoch": 0.15630666569957205, "grad_norm": 6.21875, "learning_rate": 0.0003, "loss": 9.3515, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2155 }, { "epoch": 0.1563791977950243, "grad_norm": 1.828125, "learning_rate": 0.0003, "loss": 9.3416, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2156 }, { "epoch": 0.15645172989047654, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 9.2996, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2157 }, { "epoch": 0.15652426198592878, "grad_norm": 1.953125, "learning_rate": 0.0003, "loss": 8.906, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2158 }, { "epoch": 0.15659679408138102, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 9.4564, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2159 }, { "epoch": 0.15666932617683324, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 9.1292, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2160 }, { "epoch": 0.15674185827228548, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 9.1096, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2161 }, { "epoch": 0.15681439036773773, "grad_norm": 6.34375, "learning_rate": 0.0003, "loss": 9.1228, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2162 }, { "epoch": 0.15688692246318997, "grad_norm": 5.21875, "learning_rate": 0.0003, "loss": 9.2804, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2163 }, { "epoch": 0.1569594545586422, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 9.5263, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2164 }, { "epoch": 0.15703198665409443, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 9.0221, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2165 }, { "epoch": 0.15710451874954667, "grad_norm": 7.40625, "learning_rate": 0.0003, "loss": 9.4331, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2166 }, { "epoch": 0.15717705084499892, "grad_norm": 16.5, "learning_rate": 0.0003, "loss": 9.4579, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2167 }, { "epoch": 0.15724958294045116, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 9.4058, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2168 }, { "epoch": 0.15732211503590338, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.8928, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2169 }, { "epoch": 0.15739464713135562, "grad_norm": 18.75, "learning_rate": 0.0003, "loss": 8.9639, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2170 }, { "epoch": 0.15746717922680786, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.7775, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2171 }, { "epoch": 0.1575397113222601, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 9.2984, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2172 }, { "epoch": 0.15761224341771235, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.2873, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2173 }, { "epoch": 0.15768477551316457, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 9.5457, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2174 }, { "epoch": 0.1577573076086168, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.0008, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2175 }, { "epoch": 0.15782983970406905, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.067, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2176 }, { "epoch": 0.1579023717995213, "grad_norm": 7.0625, "learning_rate": 0.0003, "loss": 9.0552, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2177 }, { "epoch": 0.15797490389497354, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 9.4031, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2178 }, { "epoch": 0.15804743599042576, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.0837, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2179 }, { "epoch": 0.158119968085878, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 9.2531, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2180 }, { "epoch": 0.15819250018133024, "grad_norm": 16.625, "learning_rate": 0.0003, "loss": 9.0461, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2181 }, { "epoch": 0.15826503227678249, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 9.0524, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2182 }, { "epoch": 0.1583375643722347, "grad_norm": 6.21875, "learning_rate": 0.0003, "loss": 9.3317, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2183 }, { "epoch": 0.15841009646768694, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 8.6981, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2184 }, { "epoch": 0.1584826285631392, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.2731, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2185 }, { "epoch": 0.15855516065859143, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 9.1794, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2186 }, { "epoch": 0.15862769275404368, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 9.4666, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2187 }, { "epoch": 0.1587002248494959, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.408, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2188 }, { "epoch": 0.15877275694494813, "grad_norm": 1.90625, "learning_rate": 0.0003, "loss": 8.9753, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2189 }, { "epoch": 0.15884528904040038, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.7857, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2190 }, { "epoch": 0.15891782113585262, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.0512, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2191 }, { "epoch": 0.15899035323130487, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 9.0909, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2192 }, { "epoch": 0.15906288532675708, "grad_norm": 10.625, "learning_rate": 0.0003, "loss": 9.0838, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2193 }, { "epoch": 0.15913541742220932, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.9995, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2194 }, { "epoch": 0.15920794951766157, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 9.5149, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2195 }, { "epoch": 0.1592804816131138, "grad_norm": 1.4296875, "learning_rate": 0.0003, "loss": 9.3131, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2196 }, { "epoch": 0.15935301370856603, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 8.8689, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2197 }, { "epoch": 0.15942554580401827, "grad_norm": 7.71875, "learning_rate": 0.0003, "loss": 8.9606, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2198 }, { "epoch": 0.15949807789947051, "grad_norm": 7.3125, "learning_rate": 0.0003, "loss": 9.076, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2199 }, { "epoch": 0.15957060999492276, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 9.1785, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2200 }, { "epoch": 0.159643142090375, "grad_norm": 1.7890625, "learning_rate": 0.0003, "loss": 9.3129, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2201 }, { "epoch": 0.15971567418582722, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.293, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2202 }, { "epoch": 0.15978820628127946, "grad_norm": 1.6328125, "learning_rate": 0.0003, "loss": 9.1777, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2203 }, { "epoch": 0.1598607383767317, "grad_norm": 7.09375, "learning_rate": 0.0003, "loss": 8.716, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2204 }, { "epoch": 0.15993327047218395, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.2453, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2205 }, { "epoch": 0.1600058025676362, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 8.9962, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2206 }, { "epoch": 0.1600783346630884, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 9.1286, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2207 }, { "epoch": 0.16015086675854065, "grad_norm": 1.4765625, "learning_rate": 0.0003, "loss": 9.7321, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2208 }, { "epoch": 0.1602233988539929, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.3314, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2209 }, { "epoch": 0.16029593094944514, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 9.5233, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2210 }, { "epoch": 0.16036846304489738, "grad_norm": 11.8125, "learning_rate": 0.0003, "loss": 9.0967, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2211 }, { "epoch": 0.1604409951403496, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.7418, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2212 }, { "epoch": 0.16051352723580184, "grad_norm": 1.9453125, "learning_rate": 0.0003, "loss": 9.1982, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2213 }, { "epoch": 0.16058605933125408, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.033, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2214 }, { "epoch": 0.16065859142670633, "grad_norm": 7.03125, "learning_rate": 0.0003, "loss": 8.9382, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2215 }, { "epoch": 0.16073112352215854, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 9.3825, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2216 }, { "epoch": 0.16080365561761079, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 9.2881, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2217 }, { "epoch": 0.16087618771306303, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.4075, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2218 }, { "epoch": 0.16094871980851527, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 9.0981, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2219 }, { "epoch": 0.16102125190396752, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 9.1841, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2220 }, { "epoch": 0.16109378399941973, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.957, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2221 }, { "epoch": 0.16116631609487198, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 9.405, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2222 }, { "epoch": 0.16123884819032422, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.0851, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2223 }, { "epoch": 0.16131138028577646, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 9.1417, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2224 }, { "epoch": 0.1613839123812287, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 9.3072, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2225 }, { "epoch": 0.16145644447668092, "grad_norm": 7.03125, "learning_rate": 0.0003, "loss": 8.8528, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2226 }, { "epoch": 0.16152897657213316, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 9.07, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2227 }, { "epoch": 0.1616015086675854, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.0424, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2228 }, { "epoch": 0.16167404076303765, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.9802, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2229 }, { "epoch": 0.1617465728584899, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 9.3469, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2230 }, { "epoch": 0.1618191049539421, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.8267, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2231 }, { "epoch": 0.16189163704939435, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 9.4736, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2232 }, { "epoch": 0.1619641691448466, "grad_norm": 1.6953125, "learning_rate": 0.0003, "loss": 9.2622, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2233 }, { "epoch": 0.16203670124029884, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 9.453, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2234 }, { "epoch": 0.16210923333575106, "grad_norm": 14.125, "learning_rate": 0.0003, "loss": 9.3597, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2235 }, { "epoch": 0.1621817654312033, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 9.1134, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2236 }, { "epoch": 0.16225429752665554, "grad_norm": 1.6484375, "learning_rate": 0.0003, "loss": 9.408, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2237 }, { "epoch": 0.1623268296221078, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 9.4146, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2238 }, { "epoch": 0.16239936171756003, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.2911, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2239 }, { "epoch": 0.16247189381301225, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.7503, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2240 }, { "epoch": 0.1625444259084645, "grad_norm": 7.84375, "learning_rate": 0.0003, "loss": 9.0191, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2241 }, { "epoch": 0.16261695800391673, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 9.1882, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2242 }, { "epoch": 0.16268949009936898, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 9.323, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2243 }, { "epoch": 0.16276202219482122, "grad_norm": 1.9375, "learning_rate": 0.0003, "loss": 9.3839, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2244 }, { "epoch": 0.16283455429027344, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.7348, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2245 }, { "epoch": 0.16290708638572568, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 9.1198, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2246 }, { "epoch": 0.16297961848117792, "grad_norm": 6.3125, "learning_rate": 0.0003, "loss": 8.8323, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2247 }, { "epoch": 0.16305215057663017, "grad_norm": 10.8125, "learning_rate": 0.0003, "loss": 9.2472, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2248 }, { "epoch": 0.16312468267208238, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.0925, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2249 }, { "epoch": 0.16319721476753463, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 9.2541, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2250 }, { "epoch": 0.16326974686298687, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 9.4576, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2251 }, { "epoch": 0.1633422789584391, "grad_norm": 8.875, "learning_rate": 0.0003, "loss": 9.2979, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2252 }, { "epoch": 0.16341481105389136, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.1621, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2253 }, { "epoch": 0.16348734314934357, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 9.0138, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2254 }, { "epoch": 0.16355987524479582, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 8.9759, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2255 }, { "epoch": 0.16363240734024806, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.864, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2256 }, { "epoch": 0.1637049394357003, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 9.1273, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2257 }, { "epoch": 0.16377747153115255, "grad_norm": 1.5859375, "learning_rate": 0.0003, "loss": 9.0027, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2258 }, { "epoch": 0.16385000362660476, "grad_norm": 1.5078125, "learning_rate": 0.0003, "loss": 9.3289, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2259 }, { "epoch": 0.163922535722057, "grad_norm": 27.375, "learning_rate": 0.0003, "loss": 9.2551, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2260 }, { "epoch": 0.16399506781750925, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.0681, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2261 }, { "epoch": 0.1640675999129615, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 9.3579, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2262 }, { "epoch": 0.16414013200841374, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.78, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2263 }, { "epoch": 0.16421266410386595, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.9747, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2264 }, { "epoch": 0.1642851961993182, "grad_norm": 6.0625, "learning_rate": 0.0003, "loss": 8.8224, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2265 }, { "epoch": 0.16435772829477044, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.2711, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2266 }, { "epoch": 0.16443026039022268, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 9.0018, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2267 }, { "epoch": 0.1645027924856749, "grad_norm": 8.75, "learning_rate": 0.0003, "loss": 9.3623, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2268 }, { "epoch": 0.16457532458112714, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.2595, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2269 }, { "epoch": 0.16464785667657938, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 8.8291, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2270 }, { "epoch": 0.16472038877203163, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.9794, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2271 }, { "epoch": 0.16479292086748387, "grad_norm": 51.75, "learning_rate": 0.0003, "loss": 9.2082, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2272 }, { "epoch": 0.1648654529629361, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 8.9767, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2273 }, { "epoch": 0.16493798505838833, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 9.3948, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2274 }, { "epoch": 0.16501051715384057, "grad_norm": 7.375, "learning_rate": 0.0003, "loss": 9.4618, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2275 }, { "epoch": 0.16508304924929282, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.6427, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2276 }, { "epoch": 0.16515558134474506, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.9746, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2277 }, { "epoch": 0.16522811344019728, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.5003, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2278 }, { "epoch": 0.16530064553564952, "grad_norm": 7.4375, "learning_rate": 0.0003, "loss": 8.8671, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2279 }, { "epoch": 0.16537317763110176, "grad_norm": 6.75, "learning_rate": 0.0003, "loss": 9.411, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2280 }, { "epoch": 0.165445709726554, "grad_norm": 10.3125, "learning_rate": 0.0003, "loss": 8.7855, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2281 }, { "epoch": 0.16551824182200625, "grad_norm": 12.625, "learning_rate": 0.0003, "loss": 9.0448, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2282 }, { "epoch": 0.16559077391745847, "grad_norm": 5.5625, "learning_rate": 0.0003, "loss": 8.8836, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2283 }, { "epoch": 0.1656633060129107, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 9.1997, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2284 }, { "epoch": 0.16573583810836295, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.7351, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2285 }, { "epoch": 0.1658083702038152, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.6935, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2286 }, { "epoch": 0.1658809022992674, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 8.6168, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2287 }, { "epoch": 0.16595343439471966, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.9988, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2288 }, { "epoch": 0.1660259664901719, "grad_norm": 6.53125, "learning_rate": 0.0003, "loss": 9.0476, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2289 }, { "epoch": 0.16609849858562414, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.6393, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2290 }, { "epoch": 0.1661710306810764, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.1886, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2291 }, { "epoch": 0.1662435627765286, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 9.0213, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2292 }, { "epoch": 0.16631609487198085, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 9.5952, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2293 }, { "epoch": 0.1663886269674331, "grad_norm": 1.71875, "learning_rate": 0.0003, "loss": 9.19, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2294 }, { "epoch": 0.16646115906288533, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 9.321, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2295 }, { "epoch": 0.16653369115833758, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 9.1784, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2296 }, { "epoch": 0.1666062232537898, "grad_norm": 7.3125, "learning_rate": 0.0003, "loss": 9.2206, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2297 }, { "epoch": 0.16667875534924204, "grad_norm": 5.78125, "learning_rate": 0.0003, "loss": 9.2916, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2298 }, { "epoch": 0.16675128744469428, "grad_norm": 1.4296875, "learning_rate": 0.0003, "loss": 9.5649, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2299 }, { "epoch": 0.16682381954014652, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.8379, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2300 }, { "epoch": 0.16689635163559874, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 8.5627, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2301 }, { "epoch": 0.16696888373105098, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.8496, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2302 }, { "epoch": 0.16704141582650323, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.1409, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2303 }, { "epoch": 0.16711394792195547, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.1778, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2304 }, { "epoch": 0.1671864800174077, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 8.7749, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2305 }, { "epoch": 0.16725901211285993, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 9.5314, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2306 }, { "epoch": 0.16733154420831217, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.2192, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2307 }, { "epoch": 0.16740407630376442, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 9.1781, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2308 }, { "epoch": 0.16747660839921666, "grad_norm": 12.8125, "learning_rate": 0.0003, "loss": 8.8438, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2309 }, { "epoch": 0.1675491404946689, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 9.2853, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2310 }, { "epoch": 0.16762167259012112, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 9.2841, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2311 }, { "epoch": 0.16769420468557336, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.2061, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2312 }, { "epoch": 0.1677667367810256, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 9.26, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2313 }, { "epoch": 0.16783926887647785, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.9639, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2314 }, { "epoch": 0.1679118009719301, "grad_norm": 1.8046875, "learning_rate": 0.0003, "loss": 9.2361, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2315 }, { "epoch": 0.1679843330673823, "grad_norm": 10.8125, "learning_rate": 0.0003, "loss": 8.8584, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2316 }, { "epoch": 0.16805686516283455, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 9.0376, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2317 }, { "epoch": 0.1681293972582868, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 9.2182, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2318 }, { "epoch": 0.16820192935373904, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.8872, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2319 }, { "epoch": 0.16827446144919125, "grad_norm": 10.25, "learning_rate": 0.0003, "loss": 9.5693, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2320 }, { "epoch": 0.1683469935446435, "grad_norm": 5.46875, "learning_rate": 0.0003, "loss": 8.9977, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2321 }, { "epoch": 0.16841952564009574, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.2807, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2322 }, { "epoch": 0.16849205773554798, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.8731, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2323 }, { "epoch": 0.16856458983100023, "grad_norm": 19.125, "learning_rate": 0.0003, "loss": 8.8888, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2324 }, { "epoch": 0.16863712192645244, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 9.5666, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2325 }, { "epoch": 0.1687096540219047, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 9.3251, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2326 }, { "epoch": 0.16878218611735693, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.2621, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2327 }, { "epoch": 0.16885471821280917, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.1862, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2328 }, { "epoch": 0.16892725030826142, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 9.1047, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2329 }, { "epoch": 0.16899978240371363, "grad_norm": 6.5, "learning_rate": 0.0003, "loss": 8.9099, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2330 }, { "epoch": 0.16907231449916588, "grad_norm": 1.6328125, "learning_rate": 0.0003, "loss": 9.6313, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2331 }, { "epoch": 0.16914484659461812, "grad_norm": 5.90625, "learning_rate": 0.0003, "loss": 9.0947, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2332 }, { "epoch": 0.16921737869007036, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 9.2308, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2333 }, { "epoch": 0.1692899107855226, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 9.2645, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2334 }, { "epoch": 0.16936244288097482, "grad_norm": 7.15625, "learning_rate": 0.0003, "loss": 9.2815, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2335 }, { "epoch": 0.16943497497642707, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 9.0928, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2336 }, { "epoch": 0.1695075070718793, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.8687, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2337 }, { "epoch": 0.16958003916733155, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 8.7632, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2338 }, { "epoch": 0.16965257126278377, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.6161, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2339 }, { "epoch": 0.169725103358236, "grad_norm": 1.6796875, "learning_rate": 0.0003, "loss": 9.1477, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2340 }, { "epoch": 0.16979763545368826, "grad_norm": 1.3359375, "learning_rate": 0.0003, "loss": 8.8255, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2341 }, { "epoch": 0.1698701675491405, "grad_norm": 6.8125, "learning_rate": 0.0003, "loss": 8.9208, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2342 }, { "epoch": 0.16994269964459274, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.8366, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2343 }, { "epoch": 0.17001523174004496, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.9084, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2344 }, { "epoch": 0.1700877638354972, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.955, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2345 }, { "epoch": 0.17016029593094945, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.0176, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2346 }, { "epoch": 0.1702328280264017, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.2066, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2347 }, { "epoch": 0.17030536012185393, "grad_norm": 10.0625, "learning_rate": 0.0003, "loss": 9.0537, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2348 }, { "epoch": 0.17037789221730615, "grad_norm": 1.84375, "learning_rate": 0.0003, "loss": 9.2285, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2349 }, { "epoch": 0.1704504243127584, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.9445, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2350 }, { "epoch": 0.17052295640821064, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 9.229, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2351 }, { "epoch": 0.17059548850366288, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 9.2891, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2352 }, { "epoch": 0.17066802059911512, "grad_norm": 16.25, "learning_rate": 0.0003, "loss": 9.457, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2353 }, { "epoch": 0.17074055269456734, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 9.5044, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2354 }, { "epoch": 0.17081308479001958, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 9.3106, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2355 }, { "epoch": 0.17088561688547182, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 9.1688, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2356 }, { "epoch": 0.17095814898092407, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.1882, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2357 }, { "epoch": 0.17103068107637628, "grad_norm": 1.8359375, "learning_rate": 0.0003, "loss": 8.7801, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2358 }, { "epoch": 0.17110321317182853, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 9.3023, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2359 }, { "epoch": 0.17117574526728077, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 9.175, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2360 }, { "epoch": 0.17124827736273301, "grad_norm": 1.9765625, "learning_rate": 0.0003, "loss": 9.3139, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2361 }, { "epoch": 0.17132080945818526, "grad_norm": 1.8984375, "learning_rate": 0.0003, "loss": 9.3756, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2362 }, { "epoch": 0.17139334155363747, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.7036, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2363 }, { "epoch": 0.17146587364908972, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.9754, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2364 }, { "epoch": 0.17153840574454196, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 9.4772, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2365 }, { "epoch": 0.1716109378399942, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 9.0446, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2366 }, { "epoch": 0.17168346993544645, "grad_norm": 1.828125, "learning_rate": 0.0003, "loss": 9.0048, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2367 }, { "epoch": 0.17175600203089866, "grad_norm": 6.59375, "learning_rate": 0.0003, "loss": 9.4079, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2368 }, { "epoch": 0.1718285341263509, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.8572, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2369 }, { "epoch": 0.17190106622180315, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 9.2271, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2370 }, { "epoch": 0.1719735983172554, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 9.2815, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2371 }, { "epoch": 0.1720461304127076, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.1192, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2372 }, { "epoch": 0.17211866250815985, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 8.9123, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2373 }, { "epoch": 0.1721911946036121, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 9.079, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2374 }, { "epoch": 0.17226372669906434, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.3696, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2375 }, { "epoch": 0.17233625879451658, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.8829, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2376 }, { "epoch": 0.1724087908899688, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 9.3657, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2377 }, { "epoch": 0.17248132298542104, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 9.2716, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2378 }, { "epoch": 0.1725538550808733, "grad_norm": 1.765625, "learning_rate": 0.0003, "loss": 9.6597, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2379 }, { "epoch": 0.17262638717632553, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.304, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2380 }, { "epoch": 0.17269891927177777, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.108, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2381 }, { "epoch": 0.17277145136723, "grad_norm": 1.7421875, "learning_rate": 0.0003, "loss": 9.2263, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2382 }, { "epoch": 0.17284398346268223, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.9464, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2383 }, { "epoch": 0.17291651555813448, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.2037, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2384 }, { "epoch": 0.17298904765358672, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.0505, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2385 }, { "epoch": 0.17306157974903896, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.271, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2386 }, { "epoch": 0.17313411184449118, "grad_norm": 8.625, "learning_rate": 0.0003, "loss": 9.22, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2387 }, { "epoch": 0.17320664393994342, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.1563, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2388 }, { "epoch": 0.17327917603539567, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.4038, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2389 }, { "epoch": 0.1733517081308479, "grad_norm": 1.8203125, "learning_rate": 0.0003, "loss": 9.1995, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2390 }, { "epoch": 0.17342424022630012, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 9.0037, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2391 }, { "epoch": 0.17349677232175237, "grad_norm": 3.953125, "learning_rate": 0.0003, "loss": 9.7156, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2392 }, { "epoch": 0.1735693044172046, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 8.9513, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2393 }, { "epoch": 0.17364183651265686, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.9253, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2394 }, { "epoch": 0.1737143686081091, "grad_norm": 6.125, "learning_rate": 0.0003, "loss": 9.1692, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2395 }, { "epoch": 0.17378690070356131, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 9.0669, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2396 }, { "epoch": 0.17385943279901356, "grad_norm": 1.515625, "learning_rate": 0.0003, "loss": 9.0736, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2397 }, { "epoch": 0.1739319648944658, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 9.1429, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2398 }, { "epoch": 0.17400449698991804, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 9.1511, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2399 }, { "epoch": 0.1740770290853703, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.9597, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2400 }, { "epoch": 0.1741495611808225, "grad_norm": 10.875, "learning_rate": 0.0003, "loss": 9.2668, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2401 }, { "epoch": 0.17422209327627475, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 9.1349, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2402 }, { "epoch": 0.174294625371727, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.5865, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2403 }, { "epoch": 0.17436715746717923, "grad_norm": 14.125, "learning_rate": 0.0003, "loss": 8.9198, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2404 }, { "epoch": 0.17443968956263148, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.9226, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2405 }, { "epoch": 0.1745122216580837, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.9592, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2406 }, { "epoch": 0.17458475375353594, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.9818, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2407 }, { "epoch": 0.17465728584898818, "grad_norm": 7.09375, "learning_rate": 0.0003, "loss": 8.7511, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2408 }, { "epoch": 0.17472981794444042, "grad_norm": 1.7265625, "learning_rate": 0.0003, "loss": 9.2539, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2409 }, { "epoch": 0.17480235003989264, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 9.0862, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2410 }, { "epoch": 0.17487488213534488, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 9.0517, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2411 }, { "epoch": 0.17494741423079713, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.1831, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2412 }, { "epoch": 0.17501994632624937, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 8.9743, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2413 }, { "epoch": 0.17509247842170161, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 9.1612, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2414 }, { "epoch": 0.17516501051715383, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.1936, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2415 }, { "epoch": 0.17523754261260607, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 9.1846, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2416 }, { "epoch": 0.17531007470805832, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 9.4656, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2417 }, { "epoch": 0.17538260680351056, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 9.4075, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2418 }, { "epoch": 0.1754551388989628, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 9.0527, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2419 }, { "epoch": 0.17552767099441502, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.6784, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2420 }, { "epoch": 0.17560020308986726, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 8.7528, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2421 }, { "epoch": 0.1756727351853195, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.2468, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2422 }, { "epoch": 0.17574526728077175, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 9.0411, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2423 }, { "epoch": 0.17581779937622397, "grad_norm": 1.6953125, "learning_rate": 0.0003, "loss": 9.2203, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2424 }, { "epoch": 0.1758903314716762, "grad_norm": 1.96875, "learning_rate": 0.0003, "loss": 8.8824, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2425 }, { "epoch": 0.17596286356712845, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.9136, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2426 }, { "epoch": 0.1760353956625807, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 9.0609, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2427 }, { "epoch": 0.17610792775803294, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 9.2387, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2428 }, { "epoch": 0.17618045985348516, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.1651, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2429 }, { "epoch": 0.1762529919489374, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 9.0968, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2430 }, { "epoch": 0.17632552404438964, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 9.3621, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2431 }, { "epoch": 0.17639805613984189, "grad_norm": 7.65625, "learning_rate": 0.0003, "loss": 9.2487, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2432 }, { "epoch": 0.17647058823529413, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 9.5076, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2433 }, { "epoch": 0.17654312033074634, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 9.3848, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2434 }, { "epoch": 0.1766156524261986, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.8701, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2435 }, { "epoch": 0.17668818452165083, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.8453, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2436 }, { "epoch": 0.17676071661710308, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 9.2144, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2437 }, { "epoch": 0.17683324871255532, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 9.1901, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2438 }, { "epoch": 0.17690578080800753, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.0698, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2439 }, { "epoch": 0.17697831290345978, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.0732, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2440 }, { "epoch": 0.17705084499891202, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 9.159, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2441 }, { "epoch": 0.17712337709436426, "grad_norm": 6.3125, "learning_rate": 0.0003, "loss": 9.2563, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2442 }, { "epoch": 0.17719590918981648, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.1419, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2443 }, { "epoch": 0.17726844128526872, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 9.2478, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2444 }, { "epoch": 0.17734097338072097, "grad_norm": 9.1875, "learning_rate": 0.0003, "loss": 8.7311, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2445 }, { "epoch": 0.1774135054761732, "grad_norm": 6.40625, "learning_rate": 0.0003, "loss": 8.939, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2446 }, { "epoch": 0.17748603757162545, "grad_norm": 1.484375, "learning_rate": 0.0003, "loss": 8.9847, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2447 }, { "epoch": 0.17755856966707767, "grad_norm": 19.5, "learning_rate": 0.0003, "loss": 8.8699, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2448 }, { "epoch": 0.1776311017625299, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.4868, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2449 }, { "epoch": 0.17770363385798216, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 8.7758, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2450 }, { "epoch": 0.1777761659534344, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.0815, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2451 }, { "epoch": 0.17784869804888664, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.1595, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2452 }, { "epoch": 0.17792123014433886, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 9.3901, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2453 }, { "epoch": 0.1779937622397911, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 9.025, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2454 }, { "epoch": 0.17806629433524335, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.9633, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2455 }, { "epoch": 0.1781388264306956, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 9.3738, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2456 }, { "epoch": 0.17821135852614783, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 9.2411, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2457 }, { "epoch": 0.17828389062160005, "grad_norm": 7.03125, "learning_rate": 0.0003, "loss": 9.0598, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2458 }, { "epoch": 0.1783564227170523, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.9424, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2459 }, { "epoch": 0.17842895481250454, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.3208, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2460 }, { "epoch": 0.17850148690795678, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 9.1507, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2461 }, { "epoch": 0.178574019003409, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 9.1259, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2462 }, { "epoch": 0.17864655109886124, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 9.3155, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2463 }, { "epoch": 0.17871908319431348, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 9.2415, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2464 }, { "epoch": 0.17879161528976573, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.183, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2465 }, { "epoch": 0.17886414738521797, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 9.4408, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2466 }, { "epoch": 0.17893667948067019, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 9.2066, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2467 }, { "epoch": 0.17900921157612243, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 9.5473, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2468 }, { "epoch": 0.17908174367157467, "grad_norm": 8.5, "learning_rate": 0.0003, "loss": 9.6551, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2469 }, { "epoch": 0.17915427576702692, "grad_norm": 13.5625, "learning_rate": 0.0003, "loss": 9.137, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2470 }, { "epoch": 0.17922680786247916, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 8.9773, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2471 }, { "epoch": 0.17929933995793138, "grad_norm": 5.90625, "learning_rate": 0.0003, "loss": 9.0363, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2472 }, { "epoch": 0.17937187205338362, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.2681, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2473 }, { "epoch": 0.17944440414883586, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 9.1485, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2474 }, { "epoch": 0.1795169362442881, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 8.8034, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2475 }, { "epoch": 0.17958946833974032, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 8.9623, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2476 }, { "epoch": 0.17966200043519256, "grad_norm": 1.859375, "learning_rate": 0.0003, "loss": 9.2151, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2477 }, { "epoch": 0.1797345325306448, "grad_norm": 15.5, "learning_rate": 0.0003, "loss": 9.1332, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2478 }, { "epoch": 0.17980706462609705, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.3849, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2479 }, { "epoch": 0.1798795967215493, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.2041, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2480 }, { "epoch": 0.1799521288170015, "grad_norm": 1.828125, "learning_rate": 0.0003, "loss": 9.0754, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2481 }, { "epoch": 0.18002466091245375, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.96, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2482 }, { "epoch": 0.180097193007906, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 9.0522, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2483 }, { "epoch": 0.18016972510335824, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 9.1814, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2484 }, { "epoch": 0.18024225719881048, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.2202, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2485 }, { "epoch": 0.1803147892942627, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.9349, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2486 }, { "epoch": 0.18038732138971494, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 8.8965, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2487 }, { "epoch": 0.1804598534851672, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 8.923, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2488 }, { "epoch": 0.18053238558061943, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 9.4103, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2489 }, { "epoch": 0.18060491767607167, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.1994, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2490 }, { "epoch": 0.1806774497715239, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 9.0193, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2491 }, { "epoch": 0.18074998186697613, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.9528, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2492 }, { "epoch": 0.18082251396242838, "grad_norm": 8.875, "learning_rate": 0.0003, "loss": 9.0867, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2493 }, { "epoch": 0.18089504605788062, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.7187, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2494 }, { "epoch": 0.18096757815333284, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.1937, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2495 }, { "epoch": 0.18104011024878508, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.1595, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2496 }, { "epoch": 0.18111264234423732, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 9.3546, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2497 }, { "epoch": 0.18118517443968957, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 9.0079, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2498 }, { "epoch": 0.1812577065351418, "grad_norm": 25.5, "learning_rate": 0.0003, "loss": 8.7503, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2499 }, { "epoch": 0.18133023863059403, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.0304, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2500 }, { "epoch": 0.18140277072604627, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.3443, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2501 }, { "epoch": 0.1814753028214985, "grad_norm": 1.7734375, "learning_rate": 0.0003, "loss": 8.9048, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2502 }, { "epoch": 0.18154783491695076, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.1391, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2503 }, { "epoch": 0.181620367012403, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.1841, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2504 }, { "epoch": 0.18169289910785522, "grad_norm": 1.984375, "learning_rate": 0.0003, "loss": 8.7413, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2505 }, { "epoch": 0.18176543120330746, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 9.0388, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2506 }, { "epoch": 0.1818379632987597, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.2843, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2507 }, { "epoch": 0.18191049539421195, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 9.0163, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2508 }, { "epoch": 0.1819830274896642, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.2983, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2509 }, { "epoch": 0.1820555595851164, "grad_norm": 7.8125, "learning_rate": 0.0003, "loss": 9.4963, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2510 }, { "epoch": 0.18212809168056865, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 9.1361, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2511 }, { "epoch": 0.1822006237760209, "grad_norm": 10.5625, "learning_rate": 0.0003, "loss": 9.0104, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2512 }, { "epoch": 0.18227315587147314, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 8.8272, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2513 }, { "epoch": 0.18234568796692535, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 9.0321, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2514 }, { "epoch": 0.1824182200623776, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 9.065, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2515 }, { "epoch": 0.18249075215782984, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.9364, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2516 }, { "epoch": 0.18256328425328208, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 9.0958, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2517 }, { "epoch": 0.18263581634873433, "grad_norm": 30.625, "learning_rate": 0.0003, "loss": 9.443, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2518 }, { "epoch": 0.18270834844418654, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 9.0525, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2519 }, { "epoch": 0.18278088053963878, "grad_norm": 7.5625, "learning_rate": 0.0003, "loss": 9.1377, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2520 }, { "epoch": 0.18285341263509103, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 8.6861, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2521 }, { "epoch": 0.18292594473054327, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 9.1672, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2522 }, { "epoch": 0.18299847682599552, "grad_norm": 1.4921875, "learning_rate": 0.0003, "loss": 8.8851, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2523 }, { "epoch": 0.18307100892144773, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.2274, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2524 }, { "epoch": 0.18314354101689997, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 9.3358, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2525 }, { "epoch": 0.18321607311235222, "grad_norm": 5.375, "learning_rate": 0.0003, "loss": 9.5256, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2526 }, { "epoch": 0.18328860520780446, "grad_norm": 8.75, "learning_rate": 0.0003, "loss": 9.4059, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2527 }, { "epoch": 0.1833611373032567, "grad_norm": 1.921875, "learning_rate": 0.0003, "loss": 9.1248, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2528 }, { "epoch": 0.18343366939870892, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.2246, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2529 }, { "epoch": 0.18350620149416116, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 9.4625, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2530 }, { "epoch": 0.1835787335896134, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.0608, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2531 }, { "epoch": 0.18365126568506565, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 9.1502, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2532 }, { "epoch": 0.18372379778051787, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.1473, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2533 }, { "epoch": 0.1837963298759701, "grad_norm": 1.9453125, "learning_rate": 0.0003, "loss": 9.1499, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2534 }, { "epoch": 0.18386886197142235, "grad_norm": 16.5, "learning_rate": 0.0003, "loss": 9.6222, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2535 }, { "epoch": 0.1839413940668746, "grad_norm": 6.59375, "learning_rate": 0.0003, "loss": 9.5149, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2536 }, { "epoch": 0.18401392616232684, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 9.3805, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2537 }, { "epoch": 0.18408645825777906, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.8978, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2538 }, { "epoch": 0.1841589903532313, "grad_norm": 7.21875, "learning_rate": 0.0003, "loss": 8.9006, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2539 }, { "epoch": 0.18423152244868354, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.8685, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2540 }, { "epoch": 0.1843040545441358, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 8.847, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2541 }, { "epoch": 0.18437658663958803, "grad_norm": 6.40625, "learning_rate": 0.0003, "loss": 9.2489, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2542 }, { "epoch": 0.18444911873504025, "grad_norm": 5.4375, "learning_rate": 0.0003, "loss": 9.6067, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2543 }, { "epoch": 0.1845216508304925, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 9.2836, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2544 }, { "epoch": 0.18459418292594473, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.3746, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2545 }, { "epoch": 0.18466671502139698, "grad_norm": 6.0625, "learning_rate": 0.0003, "loss": 9.2648, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2546 }, { "epoch": 0.1847392471168492, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.9177, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2547 }, { "epoch": 0.18481177921230144, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 9.1208, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2548 }, { "epoch": 0.18488431130775368, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 8.8754, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2549 }, { "epoch": 0.18495684340320592, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 9.0495, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2550 }, { "epoch": 0.18502937549865817, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.3928, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2551 }, { "epoch": 0.18510190759411038, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 8.8429, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2552 }, { "epoch": 0.18517443968956263, "grad_norm": 1.5234375, "learning_rate": 0.0003, "loss": 8.7424, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2553 }, { "epoch": 0.18524697178501487, "grad_norm": 1.953125, "learning_rate": 0.0003, "loss": 9.1769, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2554 }, { "epoch": 0.1853195038804671, "grad_norm": 16.125, "learning_rate": 0.0003, "loss": 9.4558, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2555 }, { "epoch": 0.18539203597591936, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 9.0095, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2556 }, { "epoch": 0.18546456807137157, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.2924, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2557 }, { "epoch": 0.18553710016682382, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 9.0613, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2558 }, { "epoch": 0.18560963226227606, "grad_norm": 5.90625, "learning_rate": 0.0003, "loss": 9.2687, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2559 }, { "epoch": 0.1856821643577283, "grad_norm": 1.6171875, "learning_rate": 0.0003, "loss": 8.7835, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2560 }, { "epoch": 0.18575469645318055, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 9.2437, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2561 }, { "epoch": 0.18582722854863276, "grad_norm": 1.8828125, "learning_rate": 0.0003, "loss": 9.2977, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2562 }, { "epoch": 0.185899760644085, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.433, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2563 }, { "epoch": 0.18597229273953725, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 9.2109, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2564 }, { "epoch": 0.1860448248349895, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 9.4776, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2565 }, { "epoch": 0.1861173569304417, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 9.068, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2566 }, { "epoch": 0.18618988902589395, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 9.0483, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2567 }, { "epoch": 0.1862624211213462, "grad_norm": 21.625, "learning_rate": 0.0003, "loss": 8.8987, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2568 }, { "epoch": 0.18633495321679844, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 9.1193, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2569 }, { "epoch": 0.18640748531225068, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 8.7263, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2570 }, { "epoch": 0.1864800174077029, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 9.4154, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2571 }, { "epoch": 0.18655254950315514, "grad_norm": 6.125, "learning_rate": 0.0003, "loss": 8.6158, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2572 }, { "epoch": 0.18662508159860738, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.7777, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2573 }, { "epoch": 0.18669761369405963, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 8.846, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2574 }, { "epoch": 0.18677014578951187, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.2635, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2575 }, { "epoch": 0.1868426778849641, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.9707, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2576 }, { "epoch": 0.18691520998041633, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 8.9472, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2577 }, { "epoch": 0.18698774207586857, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 9.2514, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2578 }, { "epoch": 0.18706027417132082, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.4371, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2579 }, { "epoch": 0.18713280626677306, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 8.9357, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2580 }, { "epoch": 0.18720533836222528, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 8.6399, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2581 }, { "epoch": 0.18727787045767752, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 8.8863, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2582 }, { "epoch": 0.18735040255312976, "grad_norm": 14.0625, "learning_rate": 0.0003, "loss": 9.3112, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2583 }, { "epoch": 0.187422934648582, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 9.342, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2584 }, { "epoch": 0.18749546674403422, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.4455, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2585 }, { "epoch": 0.18756799883948647, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.7784, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2586 }, { "epoch": 0.1876405309349387, "grad_norm": 7.8125, "learning_rate": 0.0003, "loss": 9.1976, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2587 }, { "epoch": 0.18771306303039095, "grad_norm": 5.46875, "learning_rate": 0.0003, "loss": 9.2446, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2588 }, { "epoch": 0.1877855951258432, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 9.279, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2589 }, { "epoch": 0.1878581272212954, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 9.1959, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2590 }, { "epoch": 0.18793065931674766, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 9.317, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2591 }, { "epoch": 0.1880031914121999, "grad_norm": 1.3359375, "learning_rate": 0.0003, "loss": 9.12, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2592 }, { "epoch": 0.18807572350765214, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.9588, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2593 }, { "epoch": 0.1881482556031044, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.0692, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2594 }, { "epoch": 0.1882207876985566, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 9.096, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2595 }, { "epoch": 0.18829331979400885, "grad_norm": 1.875, "learning_rate": 0.0003, "loss": 9.0744, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2596 }, { "epoch": 0.1883658518894611, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 9.2109, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2597 }, { "epoch": 0.18843838398491333, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 9.1086, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2598 }, { "epoch": 0.18851091608036555, "grad_norm": 6.40625, "learning_rate": 0.0003, "loss": 9.1185, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2599 }, { "epoch": 0.1885834481758178, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.6578, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2600 }, { "epoch": 0.18865598027127004, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.052, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2601 }, { "epoch": 0.18872851236672228, "grad_norm": 37.75, "learning_rate": 0.0003, "loss": 9.181, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2602 }, { "epoch": 0.18880104446217452, "grad_norm": 5.90625, "learning_rate": 0.0003, "loss": 9.0753, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2603 }, { "epoch": 0.18887357655762674, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 9.2973, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2604 }, { "epoch": 0.18894610865307898, "grad_norm": 10.5625, "learning_rate": 0.0003, "loss": 8.9509, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2605 }, { "epoch": 0.18901864074853122, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 8.8971, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2606 }, { "epoch": 0.18909117284398347, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.9495, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2607 }, { "epoch": 0.1891637049394357, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.1388, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2608 }, { "epoch": 0.18923623703488793, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.985, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2609 }, { "epoch": 0.18930876913034017, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.9179, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2610 }, { "epoch": 0.18938130122579241, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 9.4329, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2611 }, { "epoch": 0.18945383332124466, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.204, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2612 }, { "epoch": 0.1895263654166969, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.8068, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2613 }, { "epoch": 0.18959889751214912, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 9.1399, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2614 }, { "epoch": 0.18967142960760136, "grad_norm": 5.96875, "learning_rate": 0.0003, "loss": 8.936, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2615 }, { "epoch": 0.1897439617030536, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 8.7137, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2616 }, { "epoch": 0.18981649379850585, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 9.0205, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2617 }, { "epoch": 0.18988902589395806, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 9.5686, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2618 }, { "epoch": 0.1899615579894103, "grad_norm": 16.0, "learning_rate": 0.0003, "loss": 9.3853, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2619 }, { "epoch": 0.19003409008486255, "grad_norm": 1.6015625, "learning_rate": 0.0003, "loss": 8.9726, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2620 }, { "epoch": 0.1901066221803148, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.1223, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2621 }, { "epoch": 0.19017915427576704, "grad_norm": 22.625, "learning_rate": 0.0003, "loss": 9.2952, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2622 }, { "epoch": 0.19025168637121925, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 9.7054, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2623 }, { "epoch": 0.1903242184666715, "grad_norm": 5.625, "learning_rate": 0.0003, "loss": 9.0128, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2624 }, { "epoch": 0.19039675056212374, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 9.1034, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2625 }, { "epoch": 0.19046928265757598, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 9.1233, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2626 }, { "epoch": 0.19054181475302823, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 9.0601, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2627 }, { "epoch": 0.19061434684848044, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.8539, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2628 }, { "epoch": 0.19068687894393269, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 9.1389, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2629 }, { "epoch": 0.19075941103938493, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 9.4126, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2630 }, { "epoch": 0.19083194313483717, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.1608, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2631 }, { "epoch": 0.19090447523028942, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 9.0494, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2632 }, { "epoch": 0.19097700732574163, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 9.08, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2633 }, { "epoch": 0.19104953942119388, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 9.184, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2634 }, { "epoch": 0.19112207151664612, "grad_norm": 5.78125, "learning_rate": 0.0003, "loss": 8.8986, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2635 }, { "epoch": 0.19119460361209836, "grad_norm": 1.4453125, "learning_rate": 0.0003, "loss": 9.1767, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2636 }, { "epoch": 0.19126713570755058, "grad_norm": 1.4140625, "learning_rate": 0.0003, "loss": 9.2603, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2637 }, { "epoch": 0.19133966780300282, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.2289, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2638 }, { "epoch": 0.19141219989845507, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 8.9582, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2639 }, { "epoch": 0.1914847319939073, "grad_norm": 6.21875, "learning_rate": 0.0003, "loss": 9.0693, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2640 }, { "epoch": 0.19155726408935955, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 9.1191, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2641 }, { "epoch": 0.19162979618481177, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 9.3675, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2642 }, { "epoch": 0.191702328280264, "grad_norm": 6.78125, "learning_rate": 0.0003, "loss": 8.9905, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2643 }, { "epoch": 0.19177486037571626, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 9.3404, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2644 }, { "epoch": 0.1918473924711685, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 9.2615, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2645 }, { "epoch": 0.19191992456662074, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 9.2512, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2646 }, { "epoch": 0.19199245666207296, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 9.2216, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2647 }, { "epoch": 0.1920649887575252, "grad_norm": 12.0625, "learning_rate": 0.0003, "loss": 9.1603, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2648 }, { "epoch": 0.19213752085297744, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.3204, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2649 }, { "epoch": 0.1922100529484297, "grad_norm": 1.8828125, "learning_rate": 0.0003, "loss": 9.2666, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2650 }, { "epoch": 0.1922825850438819, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 9.0008, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2651 }, { "epoch": 0.19235511713933415, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 9.0927, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2652 }, { "epoch": 0.1924276492347864, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.8578, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2653 }, { "epoch": 0.19250018133023863, "grad_norm": 18.625, "learning_rate": 0.0003, "loss": 9.415, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2654 }, { "epoch": 0.19257271342569088, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 9.029, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2655 }, { "epoch": 0.1926452455211431, "grad_norm": 1.7421875, "learning_rate": 0.0003, "loss": 9.2593, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2656 }, { "epoch": 0.19271777761659534, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.0996, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2657 }, { "epoch": 0.19279030971204758, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.5529, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2658 }, { "epoch": 0.19286284180749982, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 9.6164, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2659 }, { "epoch": 0.19293537390295207, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 9.0135, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2660 }, { "epoch": 0.19300790599840428, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 9.0681, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2661 }, { "epoch": 0.19308043809385653, "grad_norm": 11.0625, "learning_rate": 0.0003, "loss": 8.8601, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2662 }, { "epoch": 0.19315297018930877, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 9.0672, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2663 }, { "epoch": 0.193225502284761, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.009, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2664 }, { "epoch": 0.19329803438021326, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 9.5313, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2665 }, { "epoch": 0.19337056647566547, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 9.5682, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2666 }, { "epoch": 0.19344309857111772, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.7833, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2667 }, { "epoch": 0.19351563066656996, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 8.688, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2668 }, { "epoch": 0.1935881627620222, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 9.0606, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2669 }, { "epoch": 0.19366069485747442, "grad_norm": 1.8046875, "learning_rate": 0.0003, "loss": 9.1463, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2670 }, { "epoch": 0.19373322695292666, "grad_norm": 1.9375, "learning_rate": 0.0003, "loss": 8.8487, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2671 }, { "epoch": 0.1938057590483789, "grad_norm": 11.375, "learning_rate": 0.0003, "loss": 8.5942, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2672 }, { "epoch": 0.19387829114383115, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 9.1887, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2673 }, { "epoch": 0.1939508232392834, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 9.0902, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2674 }, { "epoch": 0.1940233553347356, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 9.4119, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2675 }, { "epoch": 0.19409588743018785, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 9.4843, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2676 }, { "epoch": 0.1941684195256401, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 9.1483, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2677 }, { "epoch": 0.19424095162109234, "grad_norm": 1.75, "learning_rate": 0.0003, "loss": 9.1283, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2678 }, { "epoch": 0.19431348371654458, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.1041, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2679 }, { "epoch": 0.1943860158119968, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 9.3325, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2680 }, { "epoch": 0.19445854790744904, "grad_norm": 1.890625, "learning_rate": 0.0003, "loss": 9.3954, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2681 }, { "epoch": 0.19453108000290129, "grad_norm": 6.4375, "learning_rate": 0.0003, "loss": 8.9289, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2682 }, { "epoch": 0.19460361209835353, "grad_norm": 3.953125, "learning_rate": 0.0003, "loss": 9.0507, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2683 }, { "epoch": 0.19467614419380577, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 9.1661, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2684 }, { "epoch": 0.194748676289258, "grad_norm": 1.890625, "learning_rate": 0.0003, "loss": 9.2227, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2685 }, { "epoch": 0.19482120838471023, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 9.3082, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2686 }, { "epoch": 0.19489374048016248, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.3225, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2687 }, { "epoch": 0.19496627257561472, "grad_norm": 6.59375, "learning_rate": 0.0003, "loss": 8.7706, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2688 }, { "epoch": 0.19503880467106693, "grad_norm": 33.0, "learning_rate": 0.0003, "loss": 9.2595, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2689 }, { "epoch": 0.19511133676651918, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 9.2083, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2690 }, { "epoch": 0.19518386886197142, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 9.059, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2691 }, { "epoch": 0.19525640095742366, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.6074, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2692 }, { "epoch": 0.1953289330528759, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 8.9294, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2693 }, { "epoch": 0.19540146514832812, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 8.8766, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2694 }, { "epoch": 0.19547399724378037, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 9.1023, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2695 }, { "epoch": 0.1955465293392326, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 8.4687, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2696 }, { "epoch": 0.19561906143468485, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.8576, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2697 }, { "epoch": 0.1956915935301371, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 9.1632, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2698 }, { "epoch": 0.1957641256255893, "grad_norm": 1.3125, "learning_rate": 0.0003, "loss": 9.1511, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2699 }, { "epoch": 0.19583665772104156, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 9.2445, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2700 }, { "epoch": 0.1959091898164938, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.7964, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2701 }, { "epoch": 0.19598172191194604, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 9.5864, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2702 }, { "epoch": 0.1960542540073983, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.2529, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2703 }, { "epoch": 0.1961267861028505, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 9.2538, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2704 }, { "epoch": 0.19619931819830275, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.2313, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2705 }, { "epoch": 0.196271850293755, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 9.2099, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2706 }, { "epoch": 0.19634438238920723, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.8427, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2707 }, { "epoch": 0.19641691448465945, "grad_norm": 12.5, "learning_rate": 0.0003, "loss": 9.0843, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2708 }, { "epoch": 0.1964894465801117, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 9.1404, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2709 }, { "epoch": 0.19656197867556394, "grad_norm": 1.6640625, "learning_rate": 0.0003, "loss": 9.4514, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2710 }, { "epoch": 0.19663451077101618, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 8.8987, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2711 }, { "epoch": 0.19670704286646842, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 9.2152, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2712 }, { "epoch": 0.19677957496192064, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 9.0517, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2713 }, { "epoch": 0.19685210705737288, "grad_norm": 1.859375, "learning_rate": 0.0003, "loss": 9.4319, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2714 }, { "epoch": 0.19692463915282513, "grad_norm": 9.1875, "learning_rate": 0.0003, "loss": 8.8115, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2715 }, { "epoch": 0.19699717124827737, "grad_norm": 7.59375, "learning_rate": 0.0003, "loss": 8.9241, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2716 }, { "epoch": 0.1970697033437296, "grad_norm": 1.875, "learning_rate": 0.0003, "loss": 9.0927, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2717 }, { "epoch": 0.19714223543918183, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 9.2919, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2718 }, { "epoch": 0.19721476753463407, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.4939, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2719 }, { "epoch": 0.19728729963008632, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 9.2998, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2720 }, { "epoch": 0.19735983172553856, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 9.507, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2721 }, { "epoch": 0.19743236382099077, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.1442, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2722 }, { "epoch": 0.19750489591644302, "grad_norm": 9.8125, "learning_rate": 0.0003, "loss": 9.3737, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2723 }, { "epoch": 0.19757742801189526, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.814, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2724 }, { "epoch": 0.1976499601073475, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 8.8261, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2725 }, { "epoch": 0.19772249220279975, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.4947, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2726 }, { "epoch": 0.19779502429825196, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.0607, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2727 }, { "epoch": 0.1978675563937042, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.4394, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2728 }, { "epoch": 0.19794008848915645, "grad_norm": 12.625, "learning_rate": 0.0003, "loss": 9.2681, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2729 }, { "epoch": 0.1980126205846087, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 8.8571, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2730 }, { "epoch": 0.19808515268006094, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 9.1062, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2731 }, { "epoch": 0.19815768477551315, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.855, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2732 }, { "epoch": 0.1982302168709654, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 9.2121, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2733 }, { "epoch": 0.19830274896641764, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.3985, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2734 }, { "epoch": 0.19837528106186988, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 9.3989, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2735 }, { "epoch": 0.19844781315732213, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 9.3191, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2736 }, { "epoch": 0.19852034525277434, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 9.2748, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2737 }, { "epoch": 0.1985928773482266, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.1973, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2738 }, { "epoch": 0.19866540944367883, "grad_norm": 1.6640625, "learning_rate": 0.0003, "loss": 9.1854, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2739 }, { "epoch": 0.19873794153913107, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.3156, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2740 }, { "epoch": 0.1988104736345833, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 9.3228, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2741 }, { "epoch": 0.19888300573003553, "grad_norm": 1.78125, "learning_rate": 0.0003, "loss": 9.0896, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2742 }, { "epoch": 0.19895553782548778, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 9.5463, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2743 }, { "epoch": 0.19902806992094002, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 9.4835, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2744 }, { "epoch": 0.19910060201639226, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 9.3783, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2745 }, { "epoch": 0.19917313411184448, "grad_norm": 7.28125, "learning_rate": 0.0003, "loss": 9.0573, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2746 }, { "epoch": 0.19924566620729672, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 8.9741, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2747 }, { "epoch": 0.19931819830274897, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.9715, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2748 }, { "epoch": 0.1993907303982012, "grad_norm": 1.84375, "learning_rate": 0.0003, "loss": 9.6519, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2749 }, { "epoch": 0.19946326249365345, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 9.4044, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2750 }, { "epoch": 0.19953579458910567, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.6044, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2751 }, { "epoch": 0.1996083266845579, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.9442, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2752 }, { "epoch": 0.19968085878001016, "grad_norm": 1.7265625, "learning_rate": 0.0003, "loss": 9.2696, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2753 }, { "epoch": 0.1997533908754624, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.558, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2754 }, { "epoch": 0.19982592297091464, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 9.0894, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2755 }, { "epoch": 0.19989845506636686, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.9791, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2756 }, { "epoch": 0.1999709871618191, "grad_norm": 34.5, "learning_rate": 0.0003, "loss": 9.3026, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2757 }, { "epoch": 0.20004351925727135, "grad_norm": 1.5859375, "learning_rate": 0.0003, "loss": 9.2289, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2758 }, { "epoch": 0.2001160513527236, "grad_norm": 1.671875, "learning_rate": 0.0003, "loss": 8.9762, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2759 }, { "epoch": 0.2001885834481758, "grad_norm": 7.25, "learning_rate": 0.0003, "loss": 9.2062, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2760 }, { "epoch": 0.20026111554362805, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.0519, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2761 }, { "epoch": 0.2003336476390803, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 8.9991, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2762 }, { "epoch": 0.20040617973453254, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 9.2251, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2763 }, { "epoch": 0.20047871182998478, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 9.2588, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2764 }, { "epoch": 0.200551243925437, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 9.1598, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2765 }, { "epoch": 0.20062377602088924, "grad_norm": 1.8515625, "learning_rate": 0.0003, "loss": 8.7268, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2766 }, { "epoch": 0.20069630811634148, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 9.0259, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2767 }, { "epoch": 0.20076884021179373, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 8.677, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2768 }, { "epoch": 0.20084137230724597, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 9.0366, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2769 }, { "epoch": 0.20091390440269818, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 8.9293, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2770 }, { "epoch": 0.20098643649815043, "grad_norm": 1.7265625, "learning_rate": 0.0003, "loss": 9.192, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2771 }, { "epoch": 0.20105896859360267, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 9.4358, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2772 }, { "epoch": 0.20113150068905492, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 9.0091, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2773 }, { "epoch": 0.20120403278450713, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.9397, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2774 }, { "epoch": 0.20127656487995937, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.8392, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2775 }, { "epoch": 0.20134909697541162, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 9.0108, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2776 }, { "epoch": 0.20142162907086386, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 9.0954, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2777 }, { "epoch": 0.2014941611663161, "grad_norm": 10.5625, "learning_rate": 0.0003, "loss": 9.1542, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2778 }, { "epoch": 0.20156669326176832, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.596, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2779 }, { "epoch": 0.20163922535722056, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.2752, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2780 }, { "epoch": 0.2017117574526728, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.7993, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2781 }, { "epoch": 0.20178428954812505, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 8.9673, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2782 }, { "epoch": 0.2018568216435773, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 9.0634, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2783 }, { "epoch": 0.2019293537390295, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 8.956, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2784 }, { "epoch": 0.20200188583448175, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.8207, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2785 }, { "epoch": 0.202074417929934, "grad_norm": 1.8125, "learning_rate": 0.0003, "loss": 9.0637, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2786 }, { "epoch": 0.20214695002538624, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.4319, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2787 }, { "epoch": 0.20221948212083848, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.4017, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2788 }, { "epoch": 0.2022920142162907, "grad_norm": 1.984375, "learning_rate": 0.0003, "loss": 9.4861, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2789 }, { "epoch": 0.20236454631174294, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 9.2533, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2790 }, { "epoch": 0.2024370784071952, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 9.2329, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2791 }, { "epoch": 0.20250961050264743, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.0606, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2792 }, { "epoch": 0.20258214259809965, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 9.0029, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2793 }, { "epoch": 0.2026546746935519, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.9468, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2794 }, { "epoch": 0.20272720678900413, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.2125, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2795 }, { "epoch": 0.20279973888445638, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 9.2288, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2796 }, { "epoch": 0.20287227097990862, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 8.6384, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2797 }, { "epoch": 0.20294480307536084, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.0014, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2798 }, { "epoch": 0.20301733517081308, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 9.0359, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2799 }, { "epoch": 0.20308986726626532, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.9472, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2800 }, { "epoch": 0.20316239936171757, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.2412, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2801 }, { "epoch": 0.2032349314571698, "grad_norm": 8.3125, "learning_rate": 0.0003, "loss": 9.1296, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2802 }, { "epoch": 0.20330746355262203, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 8.6653, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2803 }, { "epoch": 0.20337999564807427, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.7797, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2804 }, { "epoch": 0.2034525277435265, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 8.9511, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2805 }, { "epoch": 0.20352505983897876, "grad_norm": 6.0625, "learning_rate": 0.0003, "loss": 8.7581, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2806 }, { "epoch": 0.203597591934431, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.1577, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2807 }, { "epoch": 0.20367012402988321, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.7181, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2808 }, { "epoch": 0.20374265612533546, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 9.4332, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2809 }, { "epoch": 0.2038151882207877, "grad_norm": 8.5, "learning_rate": 0.0003, "loss": 9.163, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2810 }, { "epoch": 0.20388772031623995, "grad_norm": 5.65625, "learning_rate": 0.0003, "loss": 8.8095, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2811 }, { "epoch": 0.20396025241169216, "grad_norm": 14.25, "learning_rate": 0.0003, "loss": 9.1042, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2812 }, { "epoch": 0.2040327845071444, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 8.9418, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2813 }, { "epoch": 0.20410531660259665, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 9.3781, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2814 }, { "epoch": 0.2041778486980489, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 9.0153, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2815 }, { "epoch": 0.20425038079350114, "grad_norm": 1.859375, "learning_rate": 0.0003, "loss": 9.3073, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2816 }, { "epoch": 0.20432291288895335, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 9.0035, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2817 }, { "epoch": 0.2043954449844056, "grad_norm": 1.609375, "learning_rate": 0.0003, "loss": 9.1136, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2818 }, { "epoch": 0.20446797707985784, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 8.5066, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2819 }, { "epoch": 0.20454050917531008, "grad_norm": 18.875, "learning_rate": 0.0003, "loss": 9.0556, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2820 }, { "epoch": 0.20461304127076232, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 8.9699, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2821 }, { "epoch": 0.20468557336621454, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 9.0294, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2822 }, { "epoch": 0.20475810546166678, "grad_norm": 1.6484375, "learning_rate": 0.0003, "loss": 8.9647, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2823 }, { "epoch": 0.20483063755711903, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 9.0634, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2824 }, { "epoch": 0.20490316965257127, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.9955, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2825 }, { "epoch": 0.2049757017480235, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.9809, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2826 }, { "epoch": 0.20504823384347573, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 9.0188, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2827 }, { "epoch": 0.20512076593892797, "grad_norm": 7.8125, "learning_rate": 0.0003, "loss": 8.8922, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2828 }, { "epoch": 0.20519329803438022, "grad_norm": 12.4375, "learning_rate": 0.0003, "loss": 8.663, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2829 }, { "epoch": 0.20526583012983246, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 9.1542, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2830 }, { "epoch": 0.20533836222528468, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.9609, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2831 }, { "epoch": 0.20541089432073692, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 8.7472, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2832 }, { "epoch": 0.20548342641618916, "grad_norm": 1.6796875, "learning_rate": 0.0003, "loss": 8.8, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2833 }, { "epoch": 0.2055559585116414, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 9.1045, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2834 }, { "epoch": 0.20562849060709365, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 8.8312, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2835 }, { "epoch": 0.20570102270254587, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 9.0683, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2836 }, { "epoch": 0.2057735547979981, "grad_norm": 72.0, "learning_rate": 0.0003, "loss": 9.2056, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2837 }, { "epoch": 0.20584608689345035, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.3762, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2838 }, { "epoch": 0.2059186189889026, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 9.4035, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2839 }, { "epoch": 0.20599115108435484, "grad_norm": 13.9375, "learning_rate": 0.0003, "loss": 8.7868, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2840 }, { "epoch": 0.20606368317980706, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 9.0257, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2841 }, { "epoch": 0.2061362152752593, "grad_norm": 19.875, "learning_rate": 0.0003, "loss": 9.3706, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2842 }, { "epoch": 0.20620874737071154, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.8104, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2843 }, { "epoch": 0.20628127946616379, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 9.2371, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2844 }, { "epoch": 0.206353811561616, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 9.2425, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2845 }, { "epoch": 0.20642634365706825, "grad_norm": 7.8125, "learning_rate": 0.0003, "loss": 9.7408, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2846 }, { "epoch": 0.2064988757525205, "grad_norm": 7.15625, "learning_rate": 0.0003, "loss": 9.3183, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2847 }, { "epoch": 0.20657140784797273, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.9976, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2848 }, { "epoch": 0.20664393994342498, "grad_norm": 7.125, "learning_rate": 0.0003, "loss": 8.9555, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2849 }, { "epoch": 0.2067164720388772, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.7917, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2850 }, { "epoch": 0.20678900413432943, "grad_norm": 1.9375, "learning_rate": 0.0003, "loss": 9.3157, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2851 }, { "epoch": 0.20686153622978168, "grad_norm": 8.875, "learning_rate": 0.0003, "loss": 9.3471, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2852 }, { "epoch": 0.20693406832523392, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.2178, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2853 }, { "epoch": 0.20700660042068617, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.8241, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2854 }, { "epoch": 0.20707913251613838, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.7939, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2855 }, { "epoch": 0.20715166461159062, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.3468, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2856 }, { "epoch": 0.20722419670704287, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 9.2647, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2857 }, { "epoch": 0.2072967288024951, "grad_norm": 13.3125, "learning_rate": 0.0003, "loss": 8.7698, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2858 }, { "epoch": 0.20736926089794736, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.5039, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2859 }, { "epoch": 0.20744179299339957, "grad_norm": 10.4375, "learning_rate": 0.0003, "loss": 9.181, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2860 }, { "epoch": 0.20751432508885181, "grad_norm": 8.6875, "learning_rate": 0.0003, "loss": 9.1229, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2861 }, { "epoch": 0.20758685718430406, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 9.2787, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2862 }, { "epoch": 0.2076593892797563, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 9.5465, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2863 }, { "epoch": 0.20773192137520852, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 9.0122, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2864 }, { "epoch": 0.20780445347066076, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 9.2267, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2865 }, { "epoch": 0.207876985566113, "grad_norm": 1.734375, "learning_rate": 0.0003, "loss": 9.4252, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2866 }, { "epoch": 0.20794951766156525, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.8243, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2867 }, { "epoch": 0.2080220497570175, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 9.0821, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2868 }, { "epoch": 0.2080945818524697, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.5153, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2869 }, { "epoch": 0.20816711394792195, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 9.0067, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2870 }, { "epoch": 0.2082396460433742, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.2125, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2871 }, { "epoch": 0.20831217813882644, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.8679, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2872 }, { "epoch": 0.20838471023427868, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.8427, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2873 }, { "epoch": 0.2084572423297309, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 9.1771, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2874 }, { "epoch": 0.20852977442518314, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.0805, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2875 }, { "epoch": 0.20860230652063538, "grad_norm": 6.59375, "learning_rate": 0.0003, "loss": 9.0998, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2876 }, { "epoch": 0.20867483861608763, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 9.1966, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2877 }, { "epoch": 0.20874737071153987, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.002, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2878 }, { "epoch": 0.20881990280699209, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.2641, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2879 }, { "epoch": 0.20889243490244433, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.7775, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2880 }, { "epoch": 0.20896496699789657, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 9.412, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2881 }, { "epoch": 0.20903749909334882, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 8.9132, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2882 }, { "epoch": 0.20911003118880103, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 9.07, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2883 }, { "epoch": 0.20918256328425328, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 9.0915, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2884 }, { "epoch": 0.20925509537970552, "grad_norm": 6.15625, "learning_rate": 0.0003, "loss": 9.1229, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2885 }, { "epoch": 0.20932762747515776, "grad_norm": 6.21875, "learning_rate": 0.0003, "loss": 9.0954, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2886 }, { "epoch": 0.20940015957061, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.4816, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2887 }, { "epoch": 0.20947269166606222, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 8.8827, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2888 }, { "epoch": 0.20954522376151447, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 8.9637, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2889 }, { "epoch": 0.2096177558569667, "grad_norm": 14.6875, "learning_rate": 0.0003, "loss": 9.5851, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2890 }, { "epoch": 0.20969028795241895, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 8.8333, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2891 }, { "epoch": 0.2097628200478712, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.9617, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2892 }, { "epoch": 0.2098353521433234, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 8.9787, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2893 }, { "epoch": 0.20990788423877565, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 9.0379, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2894 }, { "epoch": 0.2099804163342279, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 9.0957, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2895 }, { "epoch": 0.21005294842968014, "grad_norm": 1.8046875, "learning_rate": 0.0003, "loss": 8.8714, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2896 }, { "epoch": 0.21012548052513236, "grad_norm": 12.6875, "learning_rate": 0.0003, "loss": 8.6811, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2897 }, { "epoch": 0.2101980126205846, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.5405, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2898 }, { "epoch": 0.21027054471603684, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 9.4497, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2899 }, { "epoch": 0.2103430768114891, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 8.9138, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2900 }, { "epoch": 0.21041560890694133, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 9.2624, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2901 }, { "epoch": 0.21048814100239355, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.6713, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2902 }, { "epoch": 0.2105606730978458, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 9.0547, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2903 }, { "epoch": 0.21063320519329803, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 9.1256, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2904 }, { "epoch": 0.21070573728875028, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.0534, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2905 }, { "epoch": 0.21077826938420252, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 9.2146, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2906 }, { "epoch": 0.21085080147965474, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 9.4377, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2907 }, { "epoch": 0.21092333357510698, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 9.254, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2908 }, { "epoch": 0.21099586567055922, "grad_norm": 6.0, "learning_rate": 0.0003, "loss": 9.0986, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2909 }, { "epoch": 0.21106839776601147, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.3064, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2910 }, { "epoch": 0.2111409298614637, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.9365, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2911 }, { "epoch": 0.21121346195691593, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 9.0894, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2912 }, { "epoch": 0.21128599405236817, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.8792, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2913 }, { "epoch": 0.2113585261478204, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 8.7636, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2914 }, { "epoch": 0.21143105824327266, "grad_norm": 8.75, "learning_rate": 0.0003, "loss": 9.1681, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2915 }, { "epoch": 0.21150359033872487, "grad_norm": 7.40625, "learning_rate": 0.0003, "loss": 9.3515, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2916 }, { "epoch": 0.21157612243417712, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 9.0049, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2917 }, { "epoch": 0.21164865452962936, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 8.998, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2918 }, { "epoch": 0.2117211866250816, "grad_norm": 55.5, "learning_rate": 0.0003, "loss": 9.2109, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2919 }, { "epoch": 0.21179371872053385, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 9.223, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2920 }, { "epoch": 0.21186625081598606, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 9.0693, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2921 }, { "epoch": 0.2119387829114383, "grad_norm": 1.9375, "learning_rate": 0.0003, "loss": 8.9705, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2922 }, { "epoch": 0.21201131500689055, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.9973, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2923 }, { "epoch": 0.2120838471023428, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 9.2003, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2924 }, { "epoch": 0.21215637919779504, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 9.2524, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2925 }, { "epoch": 0.21222891129324725, "grad_norm": 1.8125, "learning_rate": 0.0003, "loss": 9.3677, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2926 }, { "epoch": 0.2123014433886995, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 8.807, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2927 }, { "epoch": 0.21237397548415174, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.992, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2928 }, { "epoch": 0.21244650757960398, "grad_norm": 13.625, "learning_rate": 0.0003, "loss": 8.6213, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2929 }, { "epoch": 0.21251903967505623, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 9.2939, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2930 }, { "epoch": 0.21259157177050844, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.8348, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2931 }, { "epoch": 0.21266410386596069, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.0921, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2932 }, { "epoch": 0.21273663596141293, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 9.2628, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2933 }, { "epoch": 0.21280916805686517, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 9.2117, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2934 }, { "epoch": 0.2128817001523174, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 9.0997, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2935 }, { "epoch": 0.21295423224776963, "grad_norm": 3.953125, "learning_rate": 0.0003, "loss": 9.2198, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2936 }, { "epoch": 0.21302676434322187, "grad_norm": 12.25, "learning_rate": 0.0003, "loss": 8.993, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2937 }, { "epoch": 0.21309929643867412, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 9.275, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2938 }, { "epoch": 0.21317182853412636, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 8.9911, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2939 }, { "epoch": 0.21324436062957858, "grad_norm": 6.9375, "learning_rate": 0.0003, "loss": 9.1827, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2940 }, { "epoch": 0.21331689272503082, "grad_norm": 1.984375, "learning_rate": 0.0003, "loss": 9.0183, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2941 }, { "epoch": 0.21338942482048306, "grad_norm": 1.9140625, "learning_rate": 0.0003, "loss": 8.8648, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2942 }, { "epoch": 0.2134619569159353, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.1537, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2943 }, { "epoch": 0.21353448901138755, "grad_norm": 11.875, "learning_rate": 0.0003, "loss": 8.9425, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2944 }, { "epoch": 0.21360702110683977, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 8.9761, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2945 }, { "epoch": 0.213679553202292, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.205, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2946 }, { "epoch": 0.21375208529774425, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 9.0876, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2947 }, { "epoch": 0.2138246173931965, "grad_norm": 70.5, "learning_rate": 0.0003, "loss": 9.2427, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2948 }, { "epoch": 0.2138971494886487, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 9.071, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2949 }, { "epoch": 0.21396968158410096, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 8.9814, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2950 }, { "epoch": 0.2140422136795532, "grad_norm": 27.375, "learning_rate": 0.0003, "loss": 9.3894, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2951 }, { "epoch": 0.21411474577500544, "grad_norm": 1.953125, "learning_rate": 0.0003, "loss": 9.3155, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2952 }, { "epoch": 0.2141872778704577, "grad_norm": 1.9140625, "learning_rate": 0.0003, "loss": 8.7702, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2953 }, { "epoch": 0.2142598099659099, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.3279, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2954 }, { "epoch": 0.21433234206136215, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.1281, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2955 }, { "epoch": 0.2144048741568144, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.3193, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2956 }, { "epoch": 0.21447740625226663, "grad_norm": 6.5, "learning_rate": 0.0003, "loss": 9.3728, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2957 }, { "epoch": 0.21454993834771888, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.9376, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2958 }, { "epoch": 0.2146224704431711, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 9.3686, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2959 }, { "epoch": 0.21469500253862334, "grad_norm": 1.984375, "learning_rate": 0.0003, "loss": 9.2316, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2960 }, { "epoch": 0.21476753463407558, "grad_norm": 6.34375, "learning_rate": 0.0003, "loss": 8.8437, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2961 }, { "epoch": 0.21484006672952782, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 8.8869, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2962 }, { "epoch": 0.21491259882498007, "grad_norm": 1.6171875, "learning_rate": 0.0003, "loss": 8.819, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2963 }, { "epoch": 0.21498513092043228, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.3419, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2964 }, { "epoch": 0.21505766301588453, "grad_norm": 1.7421875, "learning_rate": 0.0003, "loss": 8.9968, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2965 }, { "epoch": 0.21513019511133677, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.8717, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2966 }, { "epoch": 0.215202727206789, "grad_norm": 5.65625, "learning_rate": 0.0003, "loss": 9.0811, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2967 }, { "epoch": 0.21527525930224123, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.9663, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2968 }, { "epoch": 0.21534779139769347, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 8.6819, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2969 }, { "epoch": 0.21542032349314572, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.8035, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2970 }, { "epoch": 0.21549285558859796, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 9.0494, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2971 }, { "epoch": 0.2155653876840502, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.0293, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2972 }, { "epoch": 0.21563791977950242, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.7131, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2973 }, { "epoch": 0.21571045187495466, "grad_norm": 1.6015625, "learning_rate": 0.0003, "loss": 8.7478, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2974 }, { "epoch": 0.2157829839704069, "grad_norm": 1.7890625, "learning_rate": 0.0003, "loss": 9.3514, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2975 }, { "epoch": 0.21585551606585915, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.9465, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2976 }, { "epoch": 0.2159280481613114, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 9.3238, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2977 }, { "epoch": 0.2160005802567636, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.9017, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2978 }, { "epoch": 0.21607311235221585, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 9.5386, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2979 }, { "epoch": 0.2161456444476681, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 9.0538, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2980 }, { "epoch": 0.21621817654312034, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 9.3399, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2981 }, { "epoch": 0.21629070863857258, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.462, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2982 }, { "epoch": 0.2163632407340248, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 8.9455, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2983 }, { "epoch": 0.21643577282947704, "grad_norm": 8.5, "learning_rate": 0.0003, "loss": 9.1718, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2984 }, { "epoch": 0.21650830492492928, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 9.1855, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2985 }, { "epoch": 0.21658083702038153, "grad_norm": 6.1875, "learning_rate": 0.0003, "loss": 8.8121, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2986 }, { "epoch": 0.21665336911583374, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.8704, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2987 }, { "epoch": 0.216725901211286, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 9.1891, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2988 }, { "epoch": 0.21679843330673823, "grad_norm": 5.4375, "learning_rate": 0.0003, "loss": 8.7001, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2989 }, { "epoch": 0.21687096540219047, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.9003, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2990 }, { "epoch": 0.21694349749764272, "grad_norm": 5.4375, "learning_rate": 0.0003, "loss": 8.7462, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2991 }, { "epoch": 0.21701602959309493, "grad_norm": 7.65625, "learning_rate": 0.0003, "loss": 9.0731, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2992 }, { "epoch": 0.21708856168854718, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.9439, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2993 }, { "epoch": 0.21716109378399942, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.6127, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2994 }, { "epoch": 0.21723362587945166, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.3114, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2995 }, { "epoch": 0.2173061579749039, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 8.9635, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2996 }, { "epoch": 0.21737869007035612, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.0976, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2997 }, { "epoch": 0.21745122216580837, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.7422, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2998 }, { "epoch": 0.2175237542612606, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.0576, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 2999 }, { "epoch": 0.21759628635671285, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 9.5742, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3000 }, { "epoch": 0.21766881845216507, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 9.2882, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3001 }, { "epoch": 0.2177413505476173, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 8.808, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3002 }, { "epoch": 0.21781388264306956, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 9.1021, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3003 }, { "epoch": 0.2178864147385218, "grad_norm": 5.65625, "learning_rate": 0.0003, "loss": 8.6912, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3004 }, { "epoch": 0.21795894683397404, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 9.0633, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3005 }, { "epoch": 0.21803147892942626, "grad_norm": 6.84375, "learning_rate": 0.0003, "loss": 8.8015, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3006 }, { "epoch": 0.2181040110248785, "grad_norm": 1.9375, "learning_rate": 0.0003, "loss": 8.9776, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3007 }, { "epoch": 0.21817654312033075, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.4866, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3008 }, { "epoch": 0.218249075215783, "grad_norm": 9.5, "learning_rate": 0.0003, "loss": 8.8256, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3009 }, { "epoch": 0.21832160731123523, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 9.6521, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3010 }, { "epoch": 0.21839413940668745, "grad_norm": 1.5546875, "learning_rate": 0.0003, "loss": 9.3229, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3011 }, { "epoch": 0.2184666715021397, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.0678, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3012 }, { "epoch": 0.21853920359759194, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 9.1045, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3013 }, { "epoch": 0.21861173569304418, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 8.9263, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3014 }, { "epoch": 0.21868426778849642, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 9.2569, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3015 }, { "epoch": 0.21875679988394864, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 8.865, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3016 }, { "epoch": 0.21882933197940088, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.9077, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3017 }, { "epoch": 0.21890186407485313, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.8801, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3018 }, { "epoch": 0.21897439617030537, "grad_norm": 5.78125, "learning_rate": 0.0003, "loss": 8.9781, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3019 }, { "epoch": 0.21904692826575758, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 9.1587, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3020 }, { "epoch": 0.21911946036120983, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 9.2268, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3021 }, { "epoch": 0.21919199245666207, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 9.2575, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3022 }, { "epoch": 0.21926452455211431, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 9.4465, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3023 }, { "epoch": 0.21933705664756656, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.1195, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3024 }, { "epoch": 0.21940958874301877, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 8.631, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3025 }, { "epoch": 0.21948212083847102, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.1005, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3026 }, { "epoch": 0.21955465293392326, "grad_norm": 6.78125, "learning_rate": 0.0003, "loss": 8.7762, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3027 }, { "epoch": 0.2196271850293755, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.0512, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3028 }, { "epoch": 0.21969971712482775, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.5715, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3029 }, { "epoch": 0.21977224922027996, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 9.0891, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3030 }, { "epoch": 0.2198447813157322, "grad_norm": 6.65625, "learning_rate": 0.0003, "loss": 9.2673, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3031 }, { "epoch": 0.21991731341118445, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 9.3656, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3032 }, { "epoch": 0.2199898455066367, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.5628, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3033 }, { "epoch": 0.22006237760208894, "grad_norm": 8.375, "learning_rate": 0.0003, "loss": 9.512, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3034 }, { "epoch": 0.22013490969754115, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.8035, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3035 }, { "epoch": 0.2202074417929934, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.1855, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3036 }, { "epoch": 0.22027997388844564, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 9.1369, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3037 }, { "epoch": 0.22035250598389788, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.8448, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3038 }, { "epoch": 0.2204250380793501, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 9.0142, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3039 }, { "epoch": 0.22049757017480234, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.6538, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3040 }, { "epoch": 0.2205701022702546, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.9535, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3041 }, { "epoch": 0.22064263436570683, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.3381, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3042 }, { "epoch": 0.22071516646115907, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.9775, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3043 }, { "epoch": 0.2207876985566113, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.7149, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3044 }, { "epoch": 0.22086023065206353, "grad_norm": 1.921875, "learning_rate": 0.0003, "loss": 9.1759, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3045 }, { "epoch": 0.22093276274751578, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 9.1103, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3046 }, { "epoch": 0.22100529484296802, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.0308, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3047 }, { "epoch": 0.22107782693842026, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.3389, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3048 }, { "epoch": 0.22115035903387248, "grad_norm": 15.375, "learning_rate": 0.0003, "loss": 9.1983, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3049 }, { "epoch": 0.22122289112932472, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 9.1228, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3050 }, { "epoch": 0.22129542322477697, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 9.0014, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3051 }, { "epoch": 0.2213679553202292, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 9.1068, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3052 }, { "epoch": 0.22144048741568145, "grad_norm": 9.25, "learning_rate": 0.0003, "loss": 8.6845, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3053 }, { "epoch": 0.22151301951113367, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.8256, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3054 }, { "epoch": 0.2215855516065859, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 9.2464, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3055 }, { "epoch": 0.22165808370203816, "grad_norm": 1.734375, "learning_rate": 0.0003, "loss": 9.3013, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3056 }, { "epoch": 0.2217306157974904, "grad_norm": 6.75, "learning_rate": 0.0003, "loss": 9.1358, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3057 }, { "epoch": 0.22180314789294261, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 9.0262, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3058 }, { "epoch": 0.22187567998839486, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.7641, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3059 }, { "epoch": 0.2219482120838471, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 9.3418, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3060 }, { "epoch": 0.22202074417929935, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.9287, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3061 }, { "epoch": 0.2220932762747516, "grad_norm": 1.8515625, "learning_rate": 0.0003, "loss": 8.6344, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3062 }, { "epoch": 0.2221658083702038, "grad_norm": 8.0625, "learning_rate": 0.0003, "loss": 9.0676, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3063 }, { "epoch": 0.22223834046565605, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 8.9862, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3064 }, { "epoch": 0.2223108725611083, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 9.4687, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3065 }, { "epoch": 0.22238340465656053, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.5873, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3066 }, { "epoch": 0.22245593675201278, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.5243, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3067 }, { "epoch": 0.222528468847465, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.4371, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3068 }, { "epoch": 0.22260100094291724, "grad_norm": 1.65625, "learning_rate": 0.0003, "loss": 9.2516, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3069 }, { "epoch": 0.22267353303836948, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 8.7026, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3070 }, { "epoch": 0.22274606513382172, "grad_norm": 1.5625, "learning_rate": 0.0003, "loss": 9.1457, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3071 }, { "epoch": 0.22281859722927394, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.0585, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3072 }, { "epoch": 0.22289112932472618, "grad_norm": 7.21875, "learning_rate": 0.0003, "loss": 9.443, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3073 }, { "epoch": 0.22296366142017843, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 9.2025, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3074 }, { "epoch": 0.22303619351563067, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 9.7262, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3075 }, { "epoch": 0.22310872561108291, "grad_norm": 8.0, "learning_rate": 0.0003, "loss": 9.4168, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3076 }, { "epoch": 0.22318125770653513, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.3559, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3077 }, { "epoch": 0.22325378980198737, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.8201, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3078 }, { "epoch": 0.22332632189743962, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.7787, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3079 }, { "epoch": 0.22339885399289186, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 8.9123, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3080 }, { "epoch": 0.2234713860883441, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 9.0465, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3081 }, { "epoch": 0.22354391818379632, "grad_norm": 12.75, "learning_rate": 0.0003, "loss": 9.3272, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3082 }, { "epoch": 0.22361645027924856, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.1402, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3083 }, { "epoch": 0.2236889823747008, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.2717, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3084 }, { "epoch": 0.22376151447015305, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.9717, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3085 }, { "epoch": 0.2238340465656053, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 9.3841, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3086 }, { "epoch": 0.2239065786610575, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 9.1308, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3087 }, { "epoch": 0.22397911075650975, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 9.6293, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3088 }, { "epoch": 0.224051642851962, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 9.0995, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3089 }, { "epoch": 0.22412417494741424, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.3567, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3090 }, { "epoch": 0.22419670704286646, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 9.3333, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3091 }, { "epoch": 0.2242692391383187, "grad_norm": 8.375, "learning_rate": 0.0003, "loss": 8.7628, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3092 }, { "epoch": 0.22434177123377094, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 9.058, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3093 }, { "epoch": 0.22441430332922319, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.7693, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3094 }, { "epoch": 0.22448683542467543, "grad_norm": 8.5, "learning_rate": 0.0003, "loss": 8.9751, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3095 }, { "epoch": 0.22455936752012765, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 8.777, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3096 }, { "epoch": 0.2246318996155799, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 8.9142, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3097 }, { "epoch": 0.22470443171103213, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 9.2384, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3098 }, { "epoch": 0.22477696380648438, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 8.996, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3099 }, { "epoch": 0.22484949590193662, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.138, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3100 }, { "epoch": 0.22492202799738883, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 9.2029, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3101 }, { "epoch": 0.22499456009284108, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 8.936, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3102 }, { "epoch": 0.22506709218829332, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.4484, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3103 }, { "epoch": 0.22513962428374557, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 9.2226, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3104 }, { "epoch": 0.2252121563791978, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.8093, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3105 }, { "epoch": 0.22528468847465002, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.9192, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3106 }, { "epoch": 0.22535722057010227, "grad_norm": 8.25, "learning_rate": 0.0003, "loss": 9.0496, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3107 }, { "epoch": 0.2254297526655545, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 9.2318, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3108 }, { "epoch": 0.22550228476100675, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.6113, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3109 }, { "epoch": 0.22557481685645897, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.5894, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3110 }, { "epoch": 0.22564734895191121, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 9.1093, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3111 }, { "epoch": 0.22571988104736346, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 9.1539, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3112 }, { "epoch": 0.2257924131428157, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.0093, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3113 }, { "epoch": 0.22586494523826794, "grad_norm": 1.4140625, "learning_rate": 0.0003, "loss": 9.6548, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3114 }, { "epoch": 0.22593747733372016, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.0232, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3115 }, { "epoch": 0.2260100094291724, "grad_norm": 44.75, "learning_rate": 0.0003, "loss": 8.9999, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3116 }, { "epoch": 0.22608254152462465, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 9.0158, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3117 }, { "epoch": 0.2261550736200769, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.9619, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3118 }, { "epoch": 0.22622760571552913, "grad_norm": 8.0625, "learning_rate": 0.0003, "loss": 9.5137, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3119 }, { "epoch": 0.22630013781098135, "grad_norm": 1.796875, "learning_rate": 0.0003, "loss": 9.2073, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3120 }, { "epoch": 0.2263726699064336, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.4246, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3121 }, { "epoch": 0.22644520200188584, "grad_norm": 1.7890625, "learning_rate": 0.0003, "loss": 9.31, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3122 }, { "epoch": 0.22651773409733808, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 9.6073, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3123 }, { "epoch": 0.2265902661927903, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 9.1037, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3124 }, { "epoch": 0.22666279828824254, "grad_norm": 6.71875, "learning_rate": 0.0003, "loss": 9.0315, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3125 }, { "epoch": 0.22673533038369478, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 8.89, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3126 }, { "epoch": 0.22680786247914703, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 9.2455, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3127 }, { "epoch": 0.22688039457459927, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 8.762, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3128 }, { "epoch": 0.22695292667005149, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 9.2227, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3129 }, { "epoch": 0.22702545876550373, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 9.0983, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3130 }, { "epoch": 0.22709799086095597, "grad_norm": 1.9453125, "learning_rate": 0.0003, "loss": 9.2878, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3131 }, { "epoch": 0.22717052295640822, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 9.137, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3132 }, { "epoch": 0.22724305505186046, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 9.4037, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3133 }, { "epoch": 0.22731558714731268, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 9.4729, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3134 }, { "epoch": 0.22738811924276492, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.0036, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3135 }, { "epoch": 0.22746065133821716, "grad_norm": 1.7109375, "learning_rate": 0.0003, "loss": 9.274, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3136 }, { "epoch": 0.2275331834336694, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 9.1766, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3137 }, { "epoch": 0.22760571552912165, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.9802, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3138 }, { "epoch": 0.22767824762457387, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.9386, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3139 }, { "epoch": 0.2277507797200261, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.7329, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3140 }, { "epoch": 0.22782331181547835, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 8.6553, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3141 }, { "epoch": 0.2278958439109306, "grad_norm": 1.546875, "learning_rate": 0.0003, "loss": 9.0952, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3142 }, { "epoch": 0.2279683760063828, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.1083, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3143 }, { "epoch": 0.22804090810183505, "grad_norm": 12.5625, "learning_rate": 0.0003, "loss": 9.0481, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3144 }, { "epoch": 0.2281134401972873, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 8.9861, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3145 }, { "epoch": 0.22818597229273954, "grad_norm": 7.375, "learning_rate": 0.0003, "loss": 8.7616, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3146 }, { "epoch": 0.22825850438819179, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 8.9434, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3147 }, { "epoch": 0.228331036483644, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.6152, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3148 }, { "epoch": 0.22840356857909624, "grad_norm": 6.34375, "learning_rate": 0.0003, "loss": 8.8901, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3149 }, { "epoch": 0.2284761006745485, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 8.8723, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3150 }, { "epoch": 0.22854863277000073, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.1575, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3151 }, { "epoch": 0.22862116486545297, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.0253, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3152 }, { "epoch": 0.2286936969609052, "grad_norm": 8.1875, "learning_rate": 0.0003, "loss": 8.7215, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3153 }, { "epoch": 0.22876622905635743, "grad_norm": 10.0, "learning_rate": 0.0003, "loss": 9.452, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3154 }, { "epoch": 0.22883876115180968, "grad_norm": 9.875, "learning_rate": 0.0003, "loss": 8.5549, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3155 }, { "epoch": 0.22891129324726192, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 8.9002, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3156 }, { "epoch": 0.22898382534271416, "grad_norm": 1.65625, "learning_rate": 0.0003, "loss": 9.0907, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3157 }, { "epoch": 0.22905635743816638, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 8.9764, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3158 }, { "epoch": 0.22912888953361862, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.9893, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3159 }, { "epoch": 0.22920142162907087, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.8412, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3160 }, { "epoch": 0.2292739537245231, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 8.711, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3161 }, { "epoch": 0.22934648581997533, "grad_norm": 8.0625, "learning_rate": 0.0003, "loss": 8.8156, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3162 }, { "epoch": 0.22941901791542757, "grad_norm": 8.9375, "learning_rate": 0.0003, "loss": 8.5848, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3163 }, { "epoch": 0.2294915500108798, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 8.69, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3164 }, { "epoch": 0.22956408210633206, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.378, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3165 }, { "epoch": 0.2296366142017843, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 9.2545, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3166 }, { "epoch": 0.22970914629723652, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 9.0916, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3167 }, { "epoch": 0.22978167839268876, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.213, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3168 }, { "epoch": 0.229854210488141, "grad_norm": 10.375, "learning_rate": 0.0003, "loss": 9.1907, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3169 }, { "epoch": 0.22992674258359325, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 9.3147, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3170 }, { "epoch": 0.2299992746790455, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.1488, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3171 }, { "epoch": 0.2300718067744977, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.8837, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3172 }, { "epoch": 0.23014433886994995, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.9584, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3173 }, { "epoch": 0.2302168709654022, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 9.6721, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3174 }, { "epoch": 0.23028940306085444, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.9286, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3175 }, { "epoch": 0.23036193515630665, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 9.2758, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3176 }, { "epoch": 0.2304344672517589, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.5432, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3177 }, { "epoch": 0.23050699934721114, "grad_norm": 1.3203125, "learning_rate": 0.0003, "loss": 8.7122, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3178 }, { "epoch": 0.23057953144266338, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 9.1405, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3179 }, { "epoch": 0.23065206353811563, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 9.5546, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3180 }, { "epoch": 0.23072459563356784, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 9.4331, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3181 }, { "epoch": 0.23079712772902009, "grad_norm": 6.0, "learning_rate": 0.0003, "loss": 9.4355, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3182 }, { "epoch": 0.23086965982447233, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 9.2005, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3183 }, { "epoch": 0.23094219191992457, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.7914, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3184 }, { "epoch": 0.23101472401537682, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.5045, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3185 }, { "epoch": 0.23108725611082903, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 9.0044, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3186 }, { "epoch": 0.23115978820628127, "grad_norm": 25.625, "learning_rate": 0.0003, "loss": 9.4316, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3187 }, { "epoch": 0.23123232030173352, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.9808, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3188 }, { "epoch": 0.23130485239718576, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.8895, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3189 }, { "epoch": 0.231377384492638, "grad_norm": 6.1875, "learning_rate": 0.0003, "loss": 8.7366, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3190 }, { "epoch": 0.23144991658809022, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 9.0253, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3191 }, { "epoch": 0.23152244868354246, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.8839, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3192 }, { "epoch": 0.2315949807789947, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 9.154, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3193 }, { "epoch": 0.23166751287444695, "grad_norm": 7.65625, "learning_rate": 0.0003, "loss": 9.0804, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3194 }, { "epoch": 0.23174004496989917, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 9.3672, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3195 }, { "epoch": 0.2318125770653514, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 9.3864, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3196 }, { "epoch": 0.23188510916080365, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.2685, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3197 }, { "epoch": 0.2319576412562559, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 8.9109, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3198 }, { "epoch": 0.23203017335170814, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 9.1044, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3199 }, { "epoch": 0.23210270544716036, "grad_norm": 5.625, "learning_rate": 0.0003, "loss": 9.3587, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3200 }, { "epoch": 0.2321752375426126, "grad_norm": 1.546875, "learning_rate": 0.0003, "loss": 9.166, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3201 }, { "epoch": 0.23224776963806484, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 9.1943, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3202 }, { "epoch": 0.2323203017335171, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.2251, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3203 }, { "epoch": 0.23239283382896933, "grad_norm": 1.9765625, "learning_rate": 0.0003, "loss": 8.984, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3204 }, { "epoch": 0.23246536592442155, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.0534, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3205 }, { "epoch": 0.2325378980198738, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.8051, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3206 }, { "epoch": 0.23261043011532603, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 9.2368, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3207 }, { "epoch": 0.23268296221077828, "grad_norm": 5.46875, "learning_rate": 0.0003, "loss": 8.9876, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3208 }, { "epoch": 0.23275549430623052, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.0448, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3209 }, { "epoch": 0.23282802640168274, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 9.057, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3210 }, { "epoch": 0.23290055849713498, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.613, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3211 }, { "epoch": 0.23297309059258722, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.9788, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3212 }, { "epoch": 0.23304562268803947, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.2849, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3213 }, { "epoch": 0.23311815478349168, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.8189, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3214 }, { "epoch": 0.23319068687894393, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 9.212, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3215 }, { "epoch": 0.23326321897439617, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 9.4555, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3216 }, { "epoch": 0.2333357510698484, "grad_norm": 6.75, "learning_rate": 0.0003, "loss": 9.0169, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3217 }, { "epoch": 0.23340828316530066, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.1898, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3218 }, { "epoch": 0.23348081526075287, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 8.8591, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3219 }, { "epoch": 0.23355334735620512, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.7707, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3220 }, { "epoch": 0.23362587945165736, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.7443, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3221 }, { "epoch": 0.2336984115471096, "grad_norm": 31.375, "learning_rate": 0.0003, "loss": 9.2373, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3222 }, { "epoch": 0.23377094364256185, "grad_norm": 7.09375, "learning_rate": 0.0003, "loss": 9.3685, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3223 }, { "epoch": 0.23384347573801406, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 9.2313, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3224 }, { "epoch": 0.2339160078334663, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.0575, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3225 }, { "epoch": 0.23398853992891855, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 9.386, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3226 }, { "epoch": 0.2340610720243708, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.2261, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3227 }, { "epoch": 0.23413360411982304, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 9.5613, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3228 }, { "epoch": 0.23420613621527525, "grad_norm": 7.78125, "learning_rate": 0.0003, "loss": 8.9921, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3229 }, { "epoch": 0.2342786683107275, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 9.1127, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3230 }, { "epoch": 0.23435120040617974, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.5524, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3231 }, { "epoch": 0.23442373250163198, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.2876, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3232 }, { "epoch": 0.2344962645970842, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.6269, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3233 }, { "epoch": 0.23456879669253644, "grad_norm": 1.890625, "learning_rate": 0.0003, "loss": 9.2956, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3234 }, { "epoch": 0.23464132878798868, "grad_norm": 6.15625, "learning_rate": 0.0003, "loss": 8.973, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3235 }, { "epoch": 0.23471386088344093, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 9.0663, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3236 }, { "epoch": 0.23478639297889317, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 9.0283, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3237 }, { "epoch": 0.2348589250743454, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.9148, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3238 }, { "epoch": 0.23493145716979763, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.9801, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3239 }, { "epoch": 0.23500398926524987, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.0457, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3240 }, { "epoch": 0.23507652136070212, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 9.0117, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3241 }, { "epoch": 0.23514905345615436, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 9.0104, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3242 }, { "epoch": 0.23522158555160658, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 9.2625, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3243 }, { "epoch": 0.23529411764705882, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 9.2673, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3244 }, { "epoch": 0.23536664974251106, "grad_norm": 11.0, "learning_rate": 0.0003, "loss": 8.9593, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3245 }, { "epoch": 0.2354391818379633, "grad_norm": 9.3125, "learning_rate": 0.0003, "loss": 9.1352, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3246 }, { "epoch": 0.23551171393341552, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.0032, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3247 }, { "epoch": 0.23558424602886777, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.1082, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3248 }, { "epoch": 0.23565677812432, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 9.0689, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3249 }, { "epoch": 0.23572931021977225, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.821, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3250 }, { "epoch": 0.2358018423152245, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 9.2628, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3251 }, { "epoch": 0.2358743744106767, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.1386, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3252 }, { "epoch": 0.23594690650612896, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.6748, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3253 }, { "epoch": 0.2360194386015812, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 9.4802, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3254 }, { "epoch": 0.23609197069703344, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.8778, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3255 }, { "epoch": 0.2361645027924857, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.9371, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3256 }, { "epoch": 0.2362370348879379, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.7611, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3257 }, { "epoch": 0.23630956698339015, "grad_norm": 1.6328125, "learning_rate": 0.0003, "loss": 8.7994, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3258 }, { "epoch": 0.2363820990788424, "grad_norm": 7.0625, "learning_rate": 0.0003, "loss": 8.5168, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3259 }, { "epoch": 0.23645463117429463, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.4515, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3260 }, { "epoch": 0.23652716326974688, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 9.037, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3261 }, { "epoch": 0.2365996953651991, "grad_norm": 7.4375, "learning_rate": 0.0003, "loss": 9.388, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3262 }, { "epoch": 0.23667222746065134, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 9.136, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3263 }, { "epoch": 0.23674475955610358, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 9.6358, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3264 }, { "epoch": 0.23681729165155582, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.077, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3265 }, { "epoch": 0.23688982374700804, "grad_norm": 3.953125, "learning_rate": 0.0003, "loss": 9.133, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3266 }, { "epoch": 0.23696235584246028, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.0585, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3267 }, { "epoch": 0.23703488793791253, "grad_norm": 8.875, "learning_rate": 0.0003, "loss": 9.3038, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3268 }, { "epoch": 0.23710742003336477, "grad_norm": 9.375, "learning_rate": 0.0003, "loss": 9.2383, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3269 }, { "epoch": 0.237179952128817, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.1343, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3270 }, { "epoch": 0.23725248422426923, "grad_norm": 7.96875, "learning_rate": 0.0003, "loss": 9.1776, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3271 }, { "epoch": 0.23732501631972147, "grad_norm": 10.125, "learning_rate": 0.0003, "loss": 9.3535, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3272 }, { "epoch": 0.23739754841517371, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 9.1936, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3273 }, { "epoch": 0.23747008051062596, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 8.5965, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3274 }, { "epoch": 0.2375426126060782, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.0976, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3275 }, { "epoch": 0.23761514470153042, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 8.6695, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3276 }, { "epoch": 0.23768767679698266, "grad_norm": 9.75, "learning_rate": 0.0003, "loss": 8.8589, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3277 }, { "epoch": 0.2377602088924349, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.2672, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3278 }, { "epoch": 0.23783274098788715, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.7548, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3279 }, { "epoch": 0.2379052730833394, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 9.0217, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3280 }, { "epoch": 0.2379778051787916, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 9.3232, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3281 }, { "epoch": 0.23805033727424385, "grad_norm": 19.5, "learning_rate": 0.0003, "loss": 8.8992, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3282 }, { "epoch": 0.2381228693696961, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.0456, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3283 }, { "epoch": 0.23819540146514834, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 9.3665, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3284 }, { "epoch": 0.23826793356060055, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 9.1991, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3285 }, { "epoch": 0.2383404656560528, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 8.9526, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3286 }, { "epoch": 0.23841299775150504, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.9349, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3287 }, { "epoch": 0.23848552984695728, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.9591, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3288 }, { "epoch": 0.23855806194240953, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.8341, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3289 }, { "epoch": 0.23863059403786174, "grad_norm": 6.40625, "learning_rate": 0.0003, "loss": 9.3261, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3290 }, { "epoch": 0.238703126133314, "grad_norm": 12.375, "learning_rate": 0.0003, "loss": 8.7177, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3291 }, { "epoch": 0.23877565822876623, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 9.2322, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3292 }, { "epoch": 0.23884819032421847, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 9.1658, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3293 }, { "epoch": 0.23892072241967072, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.8236, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3294 }, { "epoch": 0.23899325451512293, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 8.6919, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3295 }, { "epoch": 0.23906578661057518, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.6559, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3296 }, { "epoch": 0.23913831870602742, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.5504, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3297 }, { "epoch": 0.23921085080147966, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.5032, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3298 }, { "epoch": 0.23928338289693188, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 8.8035, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3299 }, { "epoch": 0.23935591499238412, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 9.4055, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3300 }, { "epoch": 0.23942844708783637, "grad_norm": 6.46875, "learning_rate": 0.0003, "loss": 8.7879, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3301 }, { "epoch": 0.2395009791832886, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.879, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3302 }, { "epoch": 0.23957351127874085, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.723, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3303 }, { "epoch": 0.23964604337419307, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 8.9651, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3304 }, { "epoch": 0.2397185754696453, "grad_norm": 6.4375, "learning_rate": 0.0003, "loss": 8.8835, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3305 }, { "epoch": 0.23979110756509756, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 8.8256, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3306 }, { "epoch": 0.2398636396605498, "grad_norm": 6.46875, "learning_rate": 0.0003, "loss": 8.999, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3307 }, { "epoch": 0.23993617175600204, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.3789, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3308 }, { "epoch": 0.24000870385145426, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 9.0572, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3309 }, { "epoch": 0.2400812359469065, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.1492, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3310 }, { "epoch": 0.24015376804235875, "grad_norm": 8.0, "learning_rate": 0.0003, "loss": 9.0033, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3311 }, { "epoch": 0.240226300137811, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 8.7663, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3312 }, { "epoch": 0.24029883223326323, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 8.8126, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3313 }, { "epoch": 0.24037136432871545, "grad_norm": 7.5, "learning_rate": 0.0003, "loss": 9.1065, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3314 }, { "epoch": 0.2404438964241677, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.9003, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3315 }, { "epoch": 0.24051642851961993, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 9.0315, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3316 }, { "epoch": 0.24058896061507218, "grad_norm": 1.890625, "learning_rate": 0.0003, "loss": 9.1724, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3317 }, { "epoch": 0.2406614927105244, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 8.645, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3318 }, { "epoch": 0.24073402480597664, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.8699, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3319 }, { "epoch": 0.24080655690142888, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.0056, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3320 }, { "epoch": 0.24087908899688112, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.7562, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3321 }, { "epoch": 0.24095162109233337, "grad_norm": 13.5, "learning_rate": 0.0003, "loss": 8.901, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3322 }, { "epoch": 0.24102415318778558, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.0359, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3323 }, { "epoch": 0.24109668528323783, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.9415, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3324 }, { "epoch": 0.24116921737869007, "grad_norm": 7.53125, "learning_rate": 0.0003, "loss": 9.0977, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3325 }, { "epoch": 0.24124174947414231, "grad_norm": 1.8203125, "learning_rate": 0.0003, "loss": 8.9325, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3326 }, { "epoch": 0.24131428156959456, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 9.2997, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3327 }, { "epoch": 0.24138681366504677, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.8562, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3328 }, { "epoch": 0.24145934576049902, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 8.7487, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3329 }, { "epoch": 0.24153187785595126, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 9.1509, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3330 }, { "epoch": 0.2416044099514035, "grad_norm": 1.84375, "learning_rate": 0.0003, "loss": 9.1766, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3331 }, { "epoch": 0.24167694204685575, "grad_norm": 1.6640625, "learning_rate": 0.0003, "loss": 9.4919, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3332 }, { "epoch": 0.24174947414230796, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.8941, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3333 }, { "epoch": 0.2418220062377602, "grad_norm": 17.875, "learning_rate": 0.0003, "loss": 9.4231, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3334 }, { "epoch": 0.24189453833321245, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.9907, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3335 }, { "epoch": 0.2419670704286647, "grad_norm": 9.5, "learning_rate": 0.0003, "loss": 8.9897, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3336 }, { "epoch": 0.2420396025241169, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 9.3334, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3337 }, { "epoch": 0.24211213461956915, "grad_norm": 15.0, "learning_rate": 0.0003, "loss": 8.8342, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3338 }, { "epoch": 0.2421846667150214, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.9948, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3339 }, { "epoch": 0.24225719881047364, "grad_norm": 1.734375, "learning_rate": 0.0003, "loss": 9.1253, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3340 }, { "epoch": 0.24232973090592588, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.6692, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3341 }, { "epoch": 0.2424022630013781, "grad_norm": 1.96875, "learning_rate": 0.0003, "loss": 9.1409, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3342 }, { "epoch": 0.24247479509683034, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.9553, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3343 }, { "epoch": 0.24254732719228259, "grad_norm": 6.15625, "learning_rate": 0.0003, "loss": 9.1174, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3344 }, { "epoch": 0.24261985928773483, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 8.8265, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3345 }, { "epoch": 0.24269239138318707, "grad_norm": 8.3125, "learning_rate": 0.0003, "loss": 9.1635, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3346 }, { "epoch": 0.2427649234786393, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 9.5166, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3347 }, { "epoch": 0.24283745557409153, "grad_norm": 10.9375, "learning_rate": 0.0003, "loss": 9.1092, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3348 }, { "epoch": 0.24290998766954378, "grad_norm": 8.3125, "learning_rate": 0.0003, "loss": 9.0565, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3349 }, { "epoch": 0.24298251976499602, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 9.2577, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3350 }, { "epoch": 0.24305505186044823, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 8.9729, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3351 }, { "epoch": 0.24312758395590048, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 9.0143, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3352 }, { "epoch": 0.24320011605135272, "grad_norm": 5.625, "learning_rate": 0.0003, "loss": 9.205, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3353 }, { "epoch": 0.24327264814680497, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 8.5876, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3354 }, { "epoch": 0.2433451802422572, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.9904, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3355 }, { "epoch": 0.24341771233770942, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.0614, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3356 }, { "epoch": 0.24349024443316167, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 8.7338, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3357 }, { "epoch": 0.2435627765286139, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 9.4344, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3358 }, { "epoch": 0.24363530862406615, "grad_norm": 7.40625, "learning_rate": 0.0003, "loss": 9.0809, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3359 }, { "epoch": 0.2437078407195184, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 8.9553, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3360 }, { "epoch": 0.24378037281497061, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 8.9368, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3361 }, { "epoch": 0.24385290491042286, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 8.9024, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3362 }, { "epoch": 0.2439254370058751, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 9.0419, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3363 }, { "epoch": 0.24399796910132734, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 9.1466, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3364 }, { "epoch": 0.2440705011967796, "grad_norm": 24.375, "learning_rate": 0.0003, "loss": 8.9741, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3365 }, { "epoch": 0.2441430332922318, "grad_norm": 7.21875, "learning_rate": 0.0003, "loss": 9.2472, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3366 }, { "epoch": 0.24421556538768405, "grad_norm": 12.25, "learning_rate": 0.0003, "loss": 8.7529, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3367 }, { "epoch": 0.2442880974831363, "grad_norm": 7.28125, "learning_rate": 0.0003, "loss": 8.6889, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3368 }, { "epoch": 0.24436062957858853, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.7342, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3369 }, { "epoch": 0.24443316167404075, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 9.3132, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3370 }, { "epoch": 0.244505693769493, "grad_norm": 1.5390625, "learning_rate": 0.0003, "loss": 9.002, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3371 }, { "epoch": 0.24457822586494524, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.9941, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3372 }, { "epoch": 0.24465075796039748, "grad_norm": 1.734375, "learning_rate": 0.0003, "loss": 9.1769, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3373 }, { "epoch": 0.24472329005584972, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.3459, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3374 }, { "epoch": 0.24479582215130194, "grad_norm": 6.3125, "learning_rate": 0.0003, "loss": 9.2287, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3375 }, { "epoch": 0.24486835424675418, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 9.3372, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3376 }, { "epoch": 0.24494088634220643, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.8585, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3377 }, { "epoch": 0.24501341843765867, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.0643, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3378 }, { "epoch": 0.2450859505331109, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.1944, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3379 }, { "epoch": 0.24515848262856313, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.8371, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3380 }, { "epoch": 0.24523101472401537, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.8111, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3381 }, { "epoch": 0.24530354681946762, "grad_norm": 1.578125, "learning_rate": 0.0003, "loss": 9.0344, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3382 }, { "epoch": 0.24537607891491986, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.9971, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3383 }, { "epoch": 0.2454486110103721, "grad_norm": 1.515625, "learning_rate": 0.0003, "loss": 9.4512, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3384 }, { "epoch": 0.24552114310582432, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 9.4976, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3385 }, { "epoch": 0.24559367520127656, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.1074, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3386 }, { "epoch": 0.2456662072967288, "grad_norm": 1.625, "learning_rate": 0.0003, "loss": 8.9427, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3387 }, { "epoch": 0.24573873939218105, "grad_norm": 6.59375, "learning_rate": 0.0003, "loss": 9.1031, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3388 }, { "epoch": 0.24581127148763326, "grad_norm": 1.796875, "learning_rate": 0.0003, "loss": 9.1173, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3389 }, { "epoch": 0.2458838035830855, "grad_norm": 7.5625, "learning_rate": 0.0003, "loss": 8.7517, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3390 }, { "epoch": 0.24595633567853775, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.1753, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3391 }, { "epoch": 0.24602886777399, "grad_norm": 7.0, "learning_rate": 0.0003, "loss": 8.7928, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3392 }, { "epoch": 0.24610139986944224, "grad_norm": 27.375, "learning_rate": 0.0003, "loss": 8.9116, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3393 }, { "epoch": 0.24617393196489445, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 8.7888, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3394 }, { "epoch": 0.2462464640603467, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 8.9069, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3395 }, { "epoch": 0.24631899615579894, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.852, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3396 }, { "epoch": 0.24639152825125119, "grad_norm": 1.78125, "learning_rate": 0.0003, "loss": 9.0068, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3397 }, { "epoch": 0.24646406034670343, "grad_norm": 1.9453125, "learning_rate": 0.0003, "loss": 9.3199, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3398 }, { "epoch": 0.24653659244215564, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 8.9509, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3399 }, { "epoch": 0.2466091245376079, "grad_norm": 1.7265625, "learning_rate": 0.0003, "loss": 9.4577, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3400 }, { "epoch": 0.24668165663306013, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.2382, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3401 }, { "epoch": 0.24675418872851237, "grad_norm": 44.25, "learning_rate": 0.0003, "loss": 9.0544, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3402 }, { "epoch": 0.24682672082396462, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 9.0676, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3403 }, { "epoch": 0.24689925291941683, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 9.1763, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3404 }, { "epoch": 0.24697178501486908, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 9.0696, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3405 }, { "epoch": 0.24704431711032132, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 9.2541, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3406 }, { "epoch": 0.24711684920577356, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.9543, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3407 }, { "epoch": 0.24718938130122578, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.9348, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3408 }, { "epoch": 0.24726191339667802, "grad_norm": 1.953125, "learning_rate": 0.0003, "loss": 9.1751, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3409 }, { "epoch": 0.24733444549213027, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 8.639, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3410 }, { "epoch": 0.2474069775875825, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.9604, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3411 }, { "epoch": 0.24747950968303475, "grad_norm": 1.578125, "learning_rate": 0.0003, "loss": 9.1512, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3412 }, { "epoch": 0.24755204177848697, "grad_norm": 6.40625, "learning_rate": 0.0003, "loss": 9.1128, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3413 }, { "epoch": 0.2476245738739392, "grad_norm": 6.28125, "learning_rate": 0.0003, "loss": 8.6444, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3414 }, { "epoch": 0.24769710596939146, "grad_norm": 6.3125, "learning_rate": 0.0003, "loss": 8.7555, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3415 }, { "epoch": 0.2477696380648437, "grad_norm": 1.7578125, "learning_rate": 0.0003, "loss": 9.0014, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3416 }, { "epoch": 0.24784217016029594, "grad_norm": 143.0, "learning_rate": 0.0003, "loss": 8.839, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3417 }, { "epoch": 0.24791470225574816, "grad_norm": 9.0625, "learning_rate": 0.0003, "loss": 9.4293, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3418 }, { "epoch": 0.2479872343512004, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 8.9923, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3419 }, { "epoch": 0.24805976644665265, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.3284, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3420 }, { "epoch": 0.2481322985421049, "grad_norm": 6.34375, "learning_rate": 0.0003, "loss": 9.1857, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3421 }, { "epoch": 0.2482048306375571, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.8334, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3422 }, { "epoch": 0.24827736273300935, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 9.1656, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3423 }, { "epoch": 0.2483498948284616, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 9.3702, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3424 }, { "epoch": 0.24842242692391384, "grad_norm": 1.9765625, "learning_rate": 0.0003, "loss": 9.2862, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3425 }, { "epoch": 0.24849495901936608, "grad_norm": 1.7265625, "learning_rate": 0.0003, "loss": 9.5978, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3426 }, { "epoch": 0.2485674911148183, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.6698, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3427 }, { "epoch": 0.24864002321027054, "grad_norm": 5.78125, "learning_rate": 0.0003, "loss": 8.9168, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3428 }, { "epoch": 0.24871255530572278, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 9.1734, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3429 }, { "epoch": 0.24878508740117503, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 9.3573, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3430 }, { "epoch": 0.24885761949662727, "grad_norm": 20.125, "learning_rate": 0.0003, "loss": 8.9502, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3431 }, { "epoch": 0.24893015159207948, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 8.8721, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3432 }, { "epoch": 0.24900268368753173, "grad_norm": 17.75, "learning_rate": 0.0003, "loss": 9.5039, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3433 }, { "epoch": 0.24907521578298397, "grad_norm": 1.84375, "learning_rate": 0.0003, "loss": 9.0532, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3434 }, { "epoch": 0.24914774787843622, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 9.2506, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3435 }, { "epoch": 0.24922027997388846, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.8441, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3436 }, { "epoch": 0.24929281206934067, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 9.65, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3437 }, { "epoch": 0.24936534416479292, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 9.0473, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3438 }, { "epoch": 0.24943787626024516, "grad_norm": 1.78125, "learning_rate": 0.0003, "loss": 8.6169, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3439 }, { "epoch": 0.2495104083556974, "grad_norm": 1.9375, "learning_rate": 0.0003, "loss": 8.9227, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3440 }, { "epoch": 0.24958294045114962, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 9.4315, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3441 }, { "epoch": 0.24965547254660186, "grad_norm": 5.6875, "learning_rate": 0.0003, "loss": 8.4486, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3442 }, { "epoch": 0.2497280046420541, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.8867, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3443 }, { "epoch": 0.24980053673750635, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.9317, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3444 }, { "epoch": 0.2498730688329586, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.9805, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3445 }, { "epoch": 0.2499456009284108, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 8.7809, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3446 }, { "epoch": 0.25001813302386305, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 9.3863, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3447 }, { "epoch": 0.25009066511931527, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 9.284, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3448 }, { "epoch": 0.25016319721476754, "grad_norm": 1.9296875, "learning_rate": 0.0003, "loss": 9.324, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3449 }, { "epoch": 0.25023572931021976, "grad_norm": 10.8125, "learning_rate": 0.0003, "loss": 9.177, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3450 }, { "epoch": 0.25030826140567203, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 8.8906, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3451 }, { "epoch": 0.25038079350112424, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 9.0599, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3452 }, { "epoch": 0.25045332559657646, "grad_norm": 56.0, "learning_rate": 0.0003, "loss": 9.2545, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3453 }, { "epoch": 0.25052585769202873, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.9593, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3454 }, { "epoch": 0.25059838978748095, "grad_norm": 11.125, "learning_rate": 0.0003, "loss": 9.1409, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3455 }, { "epoch": 0.2506709218829332, "grad_norm": 1.765625, "learning_rate": 0.0003, "loss": 8.6748, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3456 }, { "epoch": 0.25074345397838543, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 9.2227, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3457 }, { "epoch": 0.25081598607383765, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.1576, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3458 }, { "epoch": 0.2508885181692899, "grad_norm": 7.1875, "learning_rate": 0.0003, "loss": 8.9039, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3459 }, { "epoch": 0.25096105026474214, "grad_norm": 1.640625, "learning_rate": 0.0003, "loss": 8.7577, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3460 }, { "epoch": 0.2510335823601944, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 9.2214, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3461 }, { "epoch": 0.2511061144556466, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 8.8453, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3462 }, { "epoch": 0.25117864655109884, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 9.2378, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3463 }, { "epoch": 0.2512511786465511, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.8481, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3464 }, { "epoch": 0.2513237107420033, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.287, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3465 }, { "epoch": 0.2513962428374556, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.0106, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3466 }, { "epoch": 0.2514687749329078, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 9.3202, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3467 }, { "epoch": 0.25154130702836003, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 8.7585, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3468 }, { "epoch": 0.2516138391238123, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 8.7703, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3469 }, { "epoch": 0.2516863712192645, "grad_norm": 9.75, "learning_rate": 0.0003, "loss": 9.1561, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3470 }, { "epoch": 0.2517589033147168, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 9.0268, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3471 }, { "epoch": 0.251831435410169, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 9.1276, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3472 }, { "epoch": 0.2519039675056212, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 9.189, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3473 }, { "epoch": 0.2519764996010735, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 9.1606, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3474 }, { "epoch": 0.2520490316965257, "grad_norm": 10.3125, "learning_rate": 0.0003, "loss": 8.673, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3475 }, { "epoch": 0.252121563791978, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 9.0119, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3476 }, { "epoch": 0.2521940958874302, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 8.8752, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3477 }, { "epoch": 0.2522666279828824, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.1981, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3478 }, { "epoch": 0.2523391600783347, "grad_norm": 5.375, "learning_rate": 0.0003, "loss": 9.1105, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3479 }, { "epoch": 0.2524116921737869, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.1637, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3480 }, { "epoch": 0.25248422426923917, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.239, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3481 }, { "epoch": 0.2525567563646914, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 9.1203, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3482 }, { "epoch": 0.2526292884601436, "grad_norm": 8.375, "learning_rate": 0.0003, "loss": 8.8388, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3483 }, { "epoch": 0.25270182055559587, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 8.6519, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3484 }, { "epoch": 0.2527743526510481, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 8.6932, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3485 }, { "epoch": 0.2528468847465003, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 8.6683, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3486 }, { "epoch": 0.25291941684195257, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 9.0132, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3487 }, { "epoch": 0.2529919489374048, "grad_norm": 8.1875, "learning_rate": 0.0003, "loss": 8.6873, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3488 }, { "epoch": 0.25306448103285706, "grad_norm": 1.578125, "learning_rate": 0.0003, "loss": 9.1042, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3489 }, { "epoch": 0.2531370131283093, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 8.991, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3490 }, { "epoch": 0.2532095452237615, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.2883, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3491 }, { "epoch": 0.25328207731921376, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 8.6005, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3492 }, { "epoch": 0.253354609414666, "grad_norm": 1.53125, "learning_rate": 0.0003, "loss": 8.8121, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3493 }, { "epoch": 0.25342714151011825, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.9649, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3494 }, { "epoch": 0.25349967360557046, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.2411, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3495 }, { "epoch": 0.2535722057010227, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 8.9123, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3496 }, { "epoch": 0.25364473779647495, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.7548, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3497 }, { "epoch": 0.25371726989192717, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 9.1283, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3498 }, { "epoch": 0.25378980198737944, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.5976, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3499 }, { "epoch": 0.25386233408283165, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.8421, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3500 }, { "epoch": 0.25393486617828387, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 9.0531, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3501 }, { "epoch": 0.25400739827373614, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.0606, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3502 }, { "epoch": 0.25407993036918836, "grad_norm": 9.5625, "learning_rate": 0.0003, "loss": 9.3016, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3503 }, { "epoch": 0.2541524624646406, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 9.0841, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3504 }, { "epoch": 0.25422499456009284, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 9.441, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3505 }, { "epoch": 0.25429752665554506, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 9.1871, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3506 }, { "epoch": 0.25437005875099733, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.07, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3507 }, { "epoch": 0.25444259084644955, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 8.9663, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3508 }, { "epoch": 0.2545151229419018, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.5799, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3509 }, { "epoch": 0.25458765503735403, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 9.1343, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3510 }, { "epoch": 0.25466018713280625, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 8.7423, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3511 }, { "epoch": 0.2547327192282585, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.4448, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3512 }, { "epoch": 0.25480525132371074, "grad_norm": 1.984375, "learning_rate": 0.0003, "loss": 9.554, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3513 }, { "epoch": 0.254877783419163, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 9.3356, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3514 }, { "epoch": 0.2549503155146152, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.0797, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3515 }, { "epoch": 0.25502284761006744, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 9.0796, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3516 }, { "epoch": 0.2550953797055197, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.3792, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3517 }, { "epoch": 0.2551679118009719, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.1398, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3518 }, { "epoch": 0.25524044389642414, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.5748, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3519 }, { "epoch": 0.2553129759918764, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.8938, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3520 }, { "epoch": 0.2553855080873286, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 9.1124, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3521 }, { "epoch": 0.2554580401827809, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.1514, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3522 }, { "epoch": 0.2555305722782331, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.2577, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3523 }, { "epoch": 0.25560310437368533, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.6513, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3524 }, { "epoch": 0.2556756364691376, "grad_norm": 1.65625, "learning_rate": 0.0003, "loss": 9.0969, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3525 }, { "epoch": 0.2557481685645898, "grad_norm": 1.765625, "learning_rate": 0.0003, "loss": 8.9509, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3526 }, { "epoch": 0.2558207006600421, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.2468, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3527 }, { "epoch": 0.2558932327554943, "grad_norm": 10.75, "learning_rate": 0.0003, "loss": 8.8049, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3528 }, { "epoch": 0.2559657648509465, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 9.6103, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3529 }, { "epoch": 0.2560382969463988, "grad_norm": 12.8125, "learning_rate": 0.0003, "loss": 9.4348, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3530 }, { "epoch": 0.256110829041851, "grad_norm": 6.15625, "learning_rate": 0.0003, "loss": 8.6956, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3531 }, { "epoch": 0.2561833611373033, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 9.0345, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3532 }, { "epoch": 0.2562558932327555, "grad_norm": 15.1875, "learning_rate": 0.0003, "loss": 9.068, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3533 }, { "epoch": 0.2563284253282077, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 8.6916, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3534 }, { "epoch": 0.25640095742366, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 9.2664, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3535 }, { "epoch": 0.2564734895191122, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.8795, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3536 }, { "epoch": 0.25654602161456447, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 9.1469, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3537 }, { "epoch": 0.2566185537100167, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 9.2407, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3538 }, { "epoch": 0.2566910858054689, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.9007, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3539 }, { "epoch": 0.25676361790092117, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.0006, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3540 }, { "epoch": 0.2568361499963734, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 8.9241, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3541 }, { "epoch": 0.25690868209182566, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.543, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3542 }, { "epoch": 0.2569812141872779, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 9.2472, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3543 }, { "epoch": 0.2570537462827301, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.9349, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3544 }, { "epoch": 0.25712627837818236, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 9.3824, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3545 }, { "epoch": 0.2571988104736346, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.8694, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3546 }, { "epoch": 0.25727134256908685, "grad_norm": 7.40625, "learning_rate": 0.0003, "loss": 9.4051, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3547 }, { "epoch": 0.25734387466453906, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.3159, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3548 }, { "epoch": 0.2574164067599913, "grad_norm": 1.3828125, "learning_rate": 0.0003, "loss": 9.0867, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3549 }, { "epoch": 0.25748893885544355, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 9.1168, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3550 }, { "epoch": 0.25756147095089577, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 9.0471, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3551 }, { "epoch": 0.257634003046348, "grad_norm": 10.5625, "learning_rate": 0.0003, "loss": 8.7401, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3552 }, { "epoch": 0.25770653514180025, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 9.1302, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3553 }, { "epoch": 0.25777906723725247, "grad_norm": 1.875, "learning_rate": 0.0003, "loss": 9.1633, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3554 }, { "epoch": 0.25785159933270474, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 9.3057, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3555 }, { "epoch": 0.25792413142815696, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.7628, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3556 }, { "epoch": 0.25799666352360917, "grad_norm": 8.5, "learning_rate": 0.0003, "loss": 8.7586, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3557 }, { "epoch": 0.25806919561906144, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.4758, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3558 }, { "epoch": 0.25814172771451366, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.9424, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3559 }, { "epoch": 0.25821425980996593, "grad_norm": 17.75, "learning_rate": 0.0003, "loss": 9.0836, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3560 }, { "epoch": 0.25828679190541814, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 9.4087, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3561 }, { "epoch": 0.25835932400087036, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.4115, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3562 }, { "epoch": 0.25843185609632263, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 9.1701, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3563 }, { "epoch": 0.25850438819177485, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.6153, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3564 }, { "epoch": 0.2585769202872271, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.8825, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3565 }, { "epoch": 0.25864945238267933, "grad_norm": 5.5625, "learning_rate": 0.0003, "loss": 9.2232, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3566 }, { "epoch": 0.25872198447813155, "grad_norm": 1.7265625, "learning_rate": 0.0003, "loss": 9.2626, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3567 }, { "epoch": 0.2587945165735838, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.9833, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3568 }, { "epoch": 0.25886704866903604, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.7282, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3569 }, { "epoch": 0.2589395807644883, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 9.0837, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3570 }, { "epoch": 0.2590121128599405, "grad_norm": 16.75, "learning_rate": 0.0003, "loss": 8.9575, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3571 }, { "epoch": 0.25908464495539274, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 8.825, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3572 }, { "epoch": 0.259157177050845, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 9.4106, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3573 }, { "epoch": 0.2592297091462972, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.3751, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3574 }, { "epoch": 0.2593022412417495, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.7443, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3575 }, { "epoch": 0.2593747733372017, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 8.8358, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3576 }, { "epoch": 0.25944730543265393, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 9.6058, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3577 }, { "epoch": 0.2595198375281062, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.9848, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3578 }, { "epoch": 0.2595923696235584, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.356, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3579 }, { "epoch": 0.2596649017190107, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 9.2255, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3580 }, { "epoch": 0.2597374338144629, "grad_norm": 6.8125, "learning_rate": 0.0003, "loss": 9.6265, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3581 }, { "epoch": 0.2598099659099151, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 9.1446, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3582 }, { "epoch": 0.2598824980053674, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.772, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3583 }, { "epoch": 0.2599550301008196, "grad_norm": 7.75, "learning_rate": 0.0003, "loss": 9.0112, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3584 }, { "epoch": 0.2600275621962719, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 8.8586, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3585 }, { "epoch": 0.2601000942917241, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.462, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3586 }, { "epoch": 0.2601726263871763, "grad_norm": 14.8125, "learning_rate": 0.0003, "loss": 8.8547, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3587 }, { "epoch": 0.2602451584826286, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 9.4804, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3588 }, { "epoch": 0.2603176905780808, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 9.3473, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3589 }, { "epoch": 0.260390222673533, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 8.5689, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3590 }, { "epoch": 0.2604627547689853, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.0053, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3591 }, { "epoch": 0.2605352868644375, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.1714, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3592 }, { "epoch": 0.26060781895988977, "grad_norm": 1.875, "learning_rate": 0.0003, "loss": 9.2093, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3593 }, { "epoch": 0.260680351055342, "grad_norm": 7.34375, "learning_rate": 0.0003, "loss": 9.0227, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3594 }, { "epoch": 0.2607528831507942, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.8109, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3595 }, { "epoch": 0.2608254152462465, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 8.9665, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3596 }, { "epoch": 0.2608979473416987, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 8.9529, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3597 }, { "epoch": 0.26097047943715096, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.2619, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3598 }, { "epoch": 0.2610430115326032, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 9.2481, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3599 }, { "epoch": 0.2611155436280554, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 8.8292, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3600 }, { "epoch": 0.26118807572350766, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 9.1998, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3601 }, { "epoch": 0.2612606078189599, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.7583, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3602 }, { "epoch": 0.26133313991441215, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 9.0256, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3603 }, { "epoch": 0.26140567200986436, "grad_norm": 7.28125, "learning_rate": 0.0003, "loss": 9.2879, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3604 }, { "epoch": 0.2614782041053166, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.1321, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3605 }, { "epoch": 0.26155073620076885, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.076, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3606 }, { "epoch": 0.26162326829622107, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.1712, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3607 }, { "epoch": 0.26169580039167334, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 8.9328, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3608 }, { "epoch": 0.26176833248712555, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.7154, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3609 }, { "epoch": 0.26184086458257777, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.8989, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3610 }, { "epoch": 0.26191339667803004, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 9.1958, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3611 }, { "epoch": 0.26198592877348226, "grad_norm": 8.8125, "learning_rate": 0.0003, "loss": 9.1603, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3612 }, { "epoch": 0.26205846086893453, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.8355, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3613 }, { "epoch": 0.26213099296438674, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.0418, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3614 }, { "epoch": 0.26220352505983896, "grad_norm": 11.0, "learning_rate": 0.0003, "loss": 8.9457, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3615 }, { "epoch": 0.26227605715529123, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.0351, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3616 }, { "epoch": 0.26234858925074345, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 9.0658, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3617 }, { "epoch": 0.2624211213461957, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 9.4156, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3618 }, { "epoch": 0.26249365344164793, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.9683, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3619 }, { "epoch": 0.26256618553710015, "grad_norm": 1.9296875, "learning_rate": 0.0003, "loss": 9.5868, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3620 }, { "epoch": 0.2626387176325524, "grad_norm": 1.859375, "learning_rate": 0.0003, "loss": 9.1312, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3621 }, { "epoch": 0.26271124972800464, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.176, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3622 }, { "epoch": 0.26278378182345685, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 8.8268, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3623 }, { "epoch": 0.2628563139189091, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.0806, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3624 }, { "epoch": 0.26292884601436134, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.4597, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3625 }, { "epoch": 0.2630013781098136, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.3092, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3626 }, { "epoch": 0.2630739102052658, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 9.2119, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3627 }, { "epoch": 0.26314644230071804, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.9183, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3628 }, { "epoch": 0.2632189743961703, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 8.9425, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3629 }, { "epoch": 0.26329150649162253, "grad_norm": 7.125, "learning_rate": 0.0003, "loss": 9.0943, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3630 }, { "epoch": 0.2633640385870748, "grad_norm": 1.5234375, "learning_rate": 0.0003, "loss": 9.194, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3631 }, { "epoch": 0.263436570682527, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.9931, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3632 }, { "epoch": 0.26350910277797923, "grad_norm": 7.5, "learning_rate": 0.0003, "loss": 8.3373, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3633 }, { "epoch": 0.2635816348734315, "grad_norm": 6.5, "learning_rate": 0.0003, "loss": 9.0333, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3634 }, { "epoch": 0.2636541669688837, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.0542, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3635 }, { "epoch": 0.263726699064336, "grad_norm": 17.625, "learning_rate": 0.0003, "loss": 8.4927, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3636 }, { "epoch": 0.2637992311597882, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.9274, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3637 }, { "epoch": 0.2638717632552404, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.4341, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3638 }, { "epoch": 0.2639442953506927, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 9.5007, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3639 }, { "epoch": 0.2640168274461449, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.2469, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3640 }, { "epoch": 0.2640893595415972, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.7719, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3641 }, { "epoch": 0.2641618916370494, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.0095, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3642 }, { "epoch": 0.2642344237325016, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 8.8113, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3643 }, { "epoch": 0.2643069558279539, "grad_norm": 11.1875, "learning_rate": 0.0003, "loss": 8.64, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3644 }, { "epoch": 0.2643794879234061, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 8.8584, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3645 }, { "epoch": 0.26445202001885837, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 9.0446, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3646 }, { "epoch": 0.2645245521143106, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.2209, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3647 }, { "epoch": 0.2645970842097628, "grad_norm": 17.875, "learning_rate": 0.0003, "loss": 8.962, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3648 }, { "epoch": 0.26466961630521507, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 9.3687, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3649 }, { "epoch": 0.2647421484006673, "grad_norm": 5.65625, "learning_rate": 0.0003, "loss": 8.7557, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3650 }, { "epoch": 0.26481468049611956, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 8.8767, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3651 }, { "epoch": 0.2648872125915718, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.2341, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3652 }, { "epoch": 0.264959744687024, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 9.2196, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3653 }, { "epoch": 0.26503227678247626, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.4001, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3654 }, { "epoch": 0.2651048088779285, "grad_norm": 5.46875, "learning_rate": 0.0003, "loss": 9.1454, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3655 }, { "epoch": 0.26517734097338075, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.2832, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3656 }, { "epoch": 0.26524987306883296, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 8.8316, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3657 }, { "epoch": 0.2653224051642852, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 9.4568, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3658 }, { "epoch": 0.26539493725973745, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.2726, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3659 }, { "epoch": 0.26546746935518967, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 8.7466, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3660 }, { "epoch": 0.2655400014506419, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 9.1229, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3661 }, { "epoch": 0.26561253354609415, "grad_norm": 1.59375, "learning_rate": 0.0003, "loss": 9.4274, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3662 }, { "epoch": 0.26568506564154637, "grad_norm": 6.40625, "learning_rate": 0.0003, "loss": 8.7682, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3663 }, { "epoch": 0.26575759773699864, "grad_norm": 6.96875, "learning_rate": 0.0003, "loss": 9.1551, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3664 }, { "epoch": 0.26583012983245086, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.1307, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3665 }, { "epoch": 0.2659026619279031, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.1444, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3666 }, { "epoch": 0.26597519402335534, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.6104, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3667 }, { "epoch": 0.26604772611880756, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.0438, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3668 }, { "epoch": 0.26612025821425983, "grad_norm": 6.9375, "learning_rate": 0.0003, "loss": 8.9434, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3669 }, { "epoch": 0.26619279030971205, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 8.9901, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3670 }, { "epoch": 0.26626532240516426, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 9.0538, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3671 }, { "epoch": 0.26633785450061653, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.7611, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3672 }, { "epoch": 0.26641038659606875, "grad_norm": 63.0, "learning_rate": 0.0003, "loss": 8.7436, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3673 }, { "epoch": 0.266482918691521, "grad_norm": 1.875, "learning_rate": 0.0003, "loss": 9.1321, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3674 }, { "epoch": 0.26655545078697324, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 9.2442, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3675 }, { "epoch": 0.26662798288242545, "grad_norm": 1.640625, "learning_rate": 0.0003, "loss": 9.3027, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3676 }, { "epoch": 0.2667005149778777, "grad_norm": 1.4140625, "learning_rate": 0.0003, "loss": 8.9298, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3677 }, { "epoch": 0.26677304707332994, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.0538, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3678 }, { "epoch": 0.2668455791687822, "grad_norm": 1.75, "learning_rate": 0.0003, "loss": 9.3174, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3679 }, { "epoch": 0.2669181112642344, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 8.6299, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3680 }, { "epoch": 0.26699064335968664, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 8.6535, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3681 }, { "epoch": 0.2670631754551389, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 9.1697, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3682 }, { "epoch": 0.26713570755059113, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.7705, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3683 }, { "epoch": 0.2672082396460434, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 9.4153, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3684 }, { "epoch": 0.2672807717414956, "grad_norm": 13.6875, "learning_rate": 0.0003, "loss": 9.3461, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3685 }, { "epoch": 0.26735330383694783, "grad_norm": 1.703125, "learning_rate": 0.0003, "loss": 9.1995, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3686 }, { "epoch": 0.2674258359324001, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 8.6371, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3687 }, { "epoch": 0.2674983680278523, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 9.3041, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3688 }, { "epoch": 0.2675709001233046, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 9.3794, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3689 }, { "epoch": 0.2676434322187568, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.8533, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3690 }, { "epoch": 0.267715964314209, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 9.1888, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3691 }, { "epoch": 0.2677884964096613, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 9.5074, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3692 }, { "epoch": 0.2678610285051135, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 8.838, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3693 }, { "epoch": 0.2679335606005657, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.9118, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3694 }, { "epoch": 0.268006092696018, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 9.362, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3695 }, { "epoch": 0.2680786247914702, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 9.2733, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3696 }, { "epoch": 0.2681511568869225, "grad_norm": 7.59375, "learning_rate": 0.0003, "loss": 9.0772, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3697 }, { "epoch": 0.2682236889823747, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.2498, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3698 }, { "epoch": 0.2682962210778269, "grad_norm": 8.4375, "learning_rate": 0.0003, "loss": 9.142, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3699 }, { "epoch": 0.2683687531732792, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.8106, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3700 }, { "epoch": 0.2684412852687314, "grad_norm": 40.25, "learning_rate": 0.0003, "loss": 9.0291, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3701 }, { "epoch": 0.26851381736418367, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 9.0048, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3702 }, { "epoch": 0.2685863494596359, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.5367, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3703 }, { "epoch": 0.2686588815550881, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.2236, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3704 }, { "epoch": 0.2687314136505404, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 8.7286, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3705 }, { "epoch": 0.2688039457459926, "grad_norm": 5.5625, "learning_rate": 0.0003, "loss": 8.9982, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3706 }, { "epoch": 0.26887647784144486, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.678, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3707 }, { "epoch": 0.2689490099368971, "grad_norm": 44.0, "learning_rate": 0.0003, "loss": 9.2106, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3708 }, { "epoch": 0.2690215420323493, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.8461, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3709 }, { "epoch": 0.26909407412780156, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.9071, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3710 }, { "epoch": 0.2691666062232538, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.8334, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3711 }, { "epoch": 0.26923913831870605, "grad_norm": 8.5625, "learning_rate": 0.0003, "loss": 8.7756, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3712 }, { "epoch": 0.26931167041415827, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.9849, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3713 }, { "epoch": 0.2693842025096105, "grad_norm": 1.734375, "learning_rate": 0.0003, "loss": 8.7111, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3714 }, { "epoch": 0.26945673460506275, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 9.1325, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3715 }, { "epoch": 0.26952926670051497, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.3567, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3716 }, { "epoch": 0.26960179879596724, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 8.8194, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3717 }, { "epoch": 0.26967433089141946, "grad_norm": 1.84375, "learning_rate": 0.0003, "loss": 8.5492, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3718 }, { "epoch": 0.26974686298687167, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.2237, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3719 }, { "epoch": 0.26981939508232394, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.9131, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3720 }, { "epoch": 0.26989192717777616, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.9671, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3721 }, { "epoch": 0.26996445927322843, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 9.0056, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3722 }, { "epoch": 0.27003699136868065, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.2495, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3723 }, { "epoch": 0.27010952346413286, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 9.1153, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3724 }, { "epoch": 0.27018205555958513, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.4283, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3725 }, { "epoch": 0.27025458765503735, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.1799, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3726 }, { "epoch": 0.27032711975048956, "grad_norm": 9.25, "learning_rate": 0.0003, "loss": 8.8291, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3727 }, { "epoch": 0.27039965184594184, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 8.8895, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3728 }, { "epoch": 0.27047218394139405, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.9619, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3729 }, { "epoch": 0.2705447160368463, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 8.5776, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3730 }, { "epoch": 0.27061724813229854, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 9.1903, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3731 }, { "epoch": 0.27068978022775075, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 9.0247, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3732 }, { "epoch": 0.270762312323203, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.9871, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3733 }, { "epoch": 0.27083484441865524, "grad_norm": 9.6875, "learning_rate": 0.0003, "loss": 9.0188, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3734 }, { "epoch": 0.2709073765141075, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.0514, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3735 }, { "epoch": 0.2709799086095597, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 9.0253, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3736 }, { "epoch": 0.27105244070501194, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.601, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3737 }, { "epoch": 0.2711249728004642, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 8.8437, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3738 }, { "epoch": 0.27119750489591643, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 9.3119, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3739 }, { "epoch": 0.2712700369913687, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.3903, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3740 }, { "epoch": 0.2713425690868209, "grad_norm": 7.28125, "learning_rate": 0.0003, "loss": 8.9064, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3741 }, { "epoch": 0.27141510118227313, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.9343, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3742 }, { "epoch": 0.2714876332777254, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.8352, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3743 }, { "epoch": 0.2715601653731776, "grad_norm": 9.625, "learning_rate": 0.0003, "loss": 8.896, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3744 }, { "epoch": 0.2716326974686299, "grad_norm": 6.90625, "learning_rate": 0.0003, "loss": 9.1151, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3745 }, { "epoch": 0.2717052295640821, "grad_norm": 6.84375, "learning_rate": 0.0003, "loss": 8.9362, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3746 }, { "epoch": 0.2717777616595343, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.7651, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3747 }, { "epoch": 0.2718502937549866, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 9.3175, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3748 }, { "epoch": 0.2719228258504388, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.9679, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3749 }, { "epoch": 0.2719953579458911, "grad_norm": 30.0, "learning_rate": 0.0003, "loss": 9.5834, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3750 }, { "epoch": 0.2720678900413433, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 8.8699, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3751 }, { "epoch": 0.2721404221367955, "grad_norm": 6.15625, "learning_rate": 0.0003, "loss": 8.5409, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3752 }, { "epoch": 0.2722129542322478, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.8651, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3753 }, { "epoch": 0.2722854863277, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.3425, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3754 }, { "epoch": 0.27235801842315227, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.2289, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3755 }, { "epoch": 0.2724305505186045, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.6164, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3756 }, { "epoch": 0.2725030826140567, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.6678, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3757 }, { "epoch": 0.272575614709509, "grad_norm": 1.640625, "learning_rate": 0.0003, "loss": 8.8761, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3758 }, { "epoch": 0.2726481468049612, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 8.7722, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3759 }, { "epoch": 0.27272067890041346, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 8.9243, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3760 }, { "epoch": 0.2727932109958657, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 9.2347, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3761 }, { "epoch": 0.2728657430913179, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.7838, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3762 }, { "epoch": 0.27293827518677016, "grad_norm": 5.21875, "learning_rate": 0.0003, "loss": 8.8836, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3763 }, { "epoch": 0.2730108072822224, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 8.8227, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3764 }, { "epoch": 0.2730833393776746, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 9.199, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3765 }, { "epoch": 0.27315587147312687, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.8066, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3766 }, { "epoch": 0.2732284035685791, "grad_norm": 5.96875, "learning_rate": 0.0003, "loss": 8.7728, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3767 }, { "epoch": 0.27330093566403135, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.9474, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3768 }, { "epoch": 0.27337346775948357, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 9.1164, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3769 }, { "epoch": 0.2734459998549358, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 9.4035, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3770 }, { "epoch": 0.27351853195038806, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.1699, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3771 }, { "epoch": 0.27359106404584027, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.0546, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3772 }, { "epoch": 0.27366359614129254, "grad_norm": 5.5625, "learning_rate": 0.0003, "loss": 8.8899, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3773 }, { "epoch": 0.27373612823674476, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.944, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3774 }, { "epoch": 0.273808660332197, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 9.1999, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3775 }, { "epoch": 0.27388119242764924, "grad_norm": 13.875, "learning_rate": 0.0003, "loss": 8.8526, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3776 }, { "epoch": 0.27395372452310146, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.2469, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3777 }, { "epoch": 0.27402625661855373, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 9.0174, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3778 }, { "epoch": 0.27409878871400595, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.9452, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3779 }, { "epoch": 0.27417132080945816, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.2211, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3780 }, { "epoch": 0.27424385290491043, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 8.9929, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3781 }, { "epoch": 0.27431638500036265, "grad_norm": 7.09375, "learning_rate": 0.0003, "loss": 9.1888, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3782 }, { "epoch": 0.2743889170958149, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 9.0071, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3783 }, { "epoch": 0.27446144919126714, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 8.5341, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3784 }, { "epoch": 0.27453398128671935, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.8607, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3785 }, { "epoch": 0.2746065133821716, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 9.1384, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3786 }, { "epoch": 0.27467904547762384, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.361, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3787 }, { "epoch": 0.2747515775730761, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.9131, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3788 }, { "epoch": 0.2748241096685283, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 8.601, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3789 }, { "epoch": 0.27489664176398054, "grad_norm": 18.5, "learning_rate": 0.0003, "loss": 9.178, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3790 }, { "epoch": 0.2749691738594328, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.4985, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3791 }, { "epoch": 0.27504170595488503, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 9.0604, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3792 }, { "epoch": 0.2751142380503373, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 9.3733, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3793 }, { "epoch": 0.2751867701457895, "grad_norm": 1.828125, "learning_rate": 0.0003, "loss": 9.0923, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3794 }, { "epoch": 0.27525930224124173, "grad_norm": 1.7421875, "learning_rate": 0.0003, "loss": 9.2168, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3795 }, { "epoch": 0.275331834336694, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.6729, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3796 }, { "epoch": 0.2754043664321462, "grad_norm": 29.5, "learning_rate": 0.0003, "loss": 8.5581, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3797 }, { "epoch": 0.27547689852759843, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 9.0572, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3798 }, { "epoch": 0.2755494306230507, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 9.3812, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3799 }, { "epoch": 0.2756219627185029, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.9223, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3800 }, { "epoch": 0.2756944948139552, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.6502, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3801 }, { "epoch": 0.2757670269094074, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 8.6682, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3802 }, { "epoch": 0.2758395590048596, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 8.8833, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3803 }, { "epoch": 0.2759120911003119, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.5358, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3804 }, { "epoch": 0.2759846231957641, "grad_norm": 1.9140625, "learning_rate": 0.0003, "loss": 8.6559, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3805 }, { "epoch": 0.2760571552912164, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.7102, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3806 }, { "epoch": 0.2761296873866686, "grad_norm": 9.0, "learning_rate": 0.0003, "loss": 8.7971, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3807 }, { "epoch": 0.2762022194821208, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.0773, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3808 }, { "epoch": 0.2762747515775731, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 9.2803, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3809 }, { "epoch": 0.2763472836730253, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 8.8968, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3810 }, { "epoch": 0.2764198157684776, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 9.131, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3811 }, { "epoch": 0.2764923478639298, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.9117, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3812 }, { "epoch": 0.276564879959382, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 9.1354, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3813 }, { "epoch": 0.2766374120548343, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 9.0681, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3814 }, { "epoch": 0.2767099441502865, "grad_norm": 1.5390625, "learning_rate": 0.0003, "loss": 8.8788, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3815 }, { "epoch": 0.27678247624573876, "grad_norm": 1.71875, "learning_rate": 0.0003, "loss": 9.2124, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3816 }, { "epoch": 0.276855008341191, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 8.8678, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3817 }, { "epoch": 0.2769275404366432, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 9.232, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3818 }, { "epoch": 0.27700007253209546, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.8513, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3819 }, { "epoch": 0.2770726046275477, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.2335, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3820 }, { "epoch": 0.27714513672299995, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 9.2465, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3821 }, { "epoch": 0.27721766881845217, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.5698, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3822 }, { "epoch": 0.2772902009139044, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 9.0355, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3823 }, { "epoch": 0.27736273300935665, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 8.6092, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3824 }, { "epoch": 0.27743526510480887, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.7326, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3825 }, { "epoch": 0.27750779720026114, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 8.95, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3826 }, { "epoch": 0.27758032929571336, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.2588, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3827 }, { "epoch": 0.2776528613911656, "grad_norm": 5.90625, "learning_rate": 0.0003, "loss": 9.1368, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3828 }, { "epoch": 0.27772539348661784, "grad_norm": 1.8984375, "learning_rate": 0.0003, "loss": 9.1237, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3829 }, { "epoch": 0.27779792558207006, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 9.2788, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3830 }, { "epoch": 0.27787045767752233, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.7663, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3831 }, { "epoch": 0.27794298977297455, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.9181, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3832 }, { "epoch": 0.27801552186842676, "grad_norm": 9.8125, "learning_rate": 0.0003, "loss": 8.9008, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3833 }, { "epoch": 0.27808805396387903, "grad_norm": 7.78125, "learning_rate": 0.0003, "loss": 8.4338, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3834 }, { "epoch": 0.27816058605933125, "grad_norm": 5.90625, "learning_rate": 0.0003, "loss": 8.9023, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3835 }, { "epoch": 0.27823311815478347, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.7829, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3836 }, { "epoch": 0.27830565025023574, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.0212, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3837 }, { "epoch": 0.27837818234568795, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.865, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3838 }, { "epoch": 0.2784507144411402, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.5296, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3839 }, { "epoch": 0.27852324653659244, "grad_norm": 12.5, "learning_rate": 0.0003, "loss": 8.5765, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3840 }, { "epoch": 0.27859577863204465, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.017, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3841 }, { "epoch": 0.2786683107274969, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 8.9578, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3842 }, { "epoch": 0.27874084282294914, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 9.2977, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3843 }, { "epoch": 0.2788133749184014, "grad_norm": 21.0, "learning_rate": 0.0003, "loss": 8.6277, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3844 }, { "epoch": 0.27888590701385363, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 9.5634, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3845 }, { "epoch": 0.27895843910930584, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.8746, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3846 }, { "epoch": 0.2790309712047581, "grad_norm": 1.8515625, "learning_rate": 0.0003, "loss": 9.3174, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3847 }, { "epoch": 0.27910350330021033, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 9.1867, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3848 }, { "epoch": 0.2791760353956626, "grad_norm": 6.21875, "learning_rate": 0.0003, "loss": 9.0111, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3849 }, { "epoch": 0.2792485674911148, "grad_norm": 1.9765625, "learning_rate": 0.0003, "loss": 8.8577, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3850 }, { "epoch": 0.27932109958656703, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.9218, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3851 }, { "epoch": 0.2793936316820193, "grad_norm": 7.3125, "learning_rate": 0.0003, "loss": 8.8686, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3852 }, { "epoch": 0.2794661637774715, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 9.0883, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3853 }, { "epoch": 0.2795386958729238, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 9.3964, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3854 }, { "epoch": 0.279611227968376, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.8445, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3855 }, { "epoch": 0.2796837600638282, "grad_norm": 1.6328125, "learning_rate": 0.0003, "loss": 9.5623, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3856 }, { "epoch": 0.2797562921592805, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.1186, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3857 }, { "epoch": 0.2798288242547327, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 9.0785, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3858 }, { "epoch": 0.279901356350185, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 9.533, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3859 }, { "epoch": 0.2799738884456372, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.7243, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3860 }, { "epoch": 0.2800464205410894, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 9.1676, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3861 }, { "epoch": 0.2801189526365417, "grad_norm": 1.90625, "learning_rate": 0.0003, "loss": 8.7575, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3862 }, { "epoch": 0.2801914847319939, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.8972, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3863 }, { "epoch": 0.28026401682744617, "grad_norm": 1.6875, "learning_rate": 0.0003, "loss": 8.8884, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3864 }, { "epoch": 0.2803365489228984, "grad_norm": 1.9296875, "learning_rate": 0.0003, "loss": 9.071, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3865 }, { "epoch": 0.2804090810183506, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 9.2851, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3866 }, { "epoch": 0.2804816131138029, "grad_norm": 6.4375, "learning_rate": 0.0003, "loss": 9.2392, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3867 }, { "epoch": 0.2805541452092551, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 8.8377, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3868 }, { "epoch": 0.2806266773047073, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.4013, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3869 }, { "epoch": 0.2806992094001596, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 9.4169, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3870 }, { "epoch": 0.2807717414956118, "grad_norm": 12.125, "learning_rate": 0.0003, "loss": 9.2472, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3871 }, { "epoch": 0.28084427359106406, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 9.0164, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3872 }, { "epoch": 0.2809168056865163, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.3024, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3873 }, { "epoch": 0.2809893377819685, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 8.9936, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3874 }, { "epoch": 0.28106186987742077, "grad_norm": 6.0, "learning_rate": 0.0003, "loss": 9.0223, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3875 }, { "epoch": 0.281134401972873, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 9.2856, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3876 }, { "epoch": 0.28120693406832525, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 8.9554, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3877 }, { "epoch": 0.28127946616377747, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.0498, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3878 }, { "epoch": 0.2813519982592297, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.8433, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3879 }, { "epoch": 0.28142453035468196, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.5092, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3880 }, { "epoch": 0.2814970624501342, "grad_norm": 9.8125, "learning_rate": 0.0003, "loss": 8.6595, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3881 }, { "epoch": 0.28156959454558644, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 9.0357, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3882 }, { "epoch": 0.28164212664103866, "grad_norm": 6.75, "learning_rate": 0.0003, "loss": 8.813, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3883 }, { "epoch": 0.2817146587364909, "grad_norm": 1.8984375, "learning_rate": 0.0003, "loss": 8.8742, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3884 }, { "epoch": 0.28178719083194315, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.5974, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3885 }, { "epoch": 0.28185972292739536, "grad_norm": 7.28125, "learning_rate": 0.0003, "loss": 9.1207, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3886 }, { "epoch": 0.28193225502284763, "grad_norm": 1.96875, "learning_rate": 0.0003, "loss": 8.8425, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3887 }, { "epoch": 0.28200478711829985, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 8.7234, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3888 }, { "epoch": 0.28207731921375206, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 9.606, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3889 }, { "epoch": 0.28214985130920434, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.1132, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3890 }, { "epoch": 0.28222238340465655, "grad_norm": 1.625, "learning_rate": 0.0003, "loss": 9.2568, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3891 }, { "epoch": 0.2822949155001088, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 8.7958, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3892 }, { "epoch": 0.28236744759556104, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.4665, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3893 }, { "epoch": 0.28243997969101325, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.0382, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3894 }, { "epoch": 0.2825125117864655, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.805, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3895 }, { "epoch": 0.28258504388191774, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 9.157, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3896 }, { "epoch": 0.28265757597737, "grad_norm": 90.0, "learning_rate": 0.0003, "loss": 8.9018, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3897 }, { "epoch": 0.28273010807282223, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.9819, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3898 }, { "epoch": 0.28280264016827444, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.7728, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3899 }, { "epoch": 0.2828751722637267, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 8.9629, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3900 }, { "epoch": 0.28294770435917893, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.0124, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3901 }, { "epoch": 0.28302023645463115, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 9.3705, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3902 }, { "epoch": 0.2830927685500834, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.01, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3903 }, { "epoch": 0.28316530064553563, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 9.0914, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3904 }, { "epoch": 0.2832378327409879, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.9996, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3905 }, { "epoch": 0.2833103648364401, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.0862, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3906 }, { "epoch": 0.28338289693189234, "grad_norm": 6.6875, "learning_rate": 0.0003, "loss": 9.2324, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3907 }, { "epoch": 0.2834554290273446, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.9972, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3908 }, { "epoch": 0.2835279611227968, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 9.2274, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3909 }, { "epoch": 0.2836004932182491, "grad_norm": 33.5, "learning_rate": 0.0003, "loss": 8.5123, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3910 }, { "epoch": 0.2836730253137013, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 9.0187, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3911 }, { "epoch": 0.2837455574091535, "grad_norm": 13.3125, "learning_rate": 0.0003, "loss": 9.0498, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3912 }, { "epoch": 0.2838180895046058, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.1669, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3913 }, { "epoch": 0.283890621600058, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 9.0334, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3914 }, { "epoch": 0.2839631536955103, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.0427, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3915 }, { "epoch": 0.2840356857909625, "grad_norm": 8.0, "learning_rate": 0.0003, "loss": 8.9865, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3916 }, { "epoch": 0.2841082178864147, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 8.9416, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3917 }, { "epoch": 0.284180749981867, "grad_norm": 6.5, "learning_rate": 0.0003, "loss": 8.8696, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3918 }, { "epoch": 0.2842532820773192, "grad_norm": 1.953125, "learning_rate": 0.0003, "loss": 9.4184, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3919 }, { "epoch": 0.2843258141727715, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.7412, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3920 }, { "epoch": 0.2843983462682237, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 8.8207, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3921 }, { "epoch": 0.2844708783636759, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.098, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3922 }, { "epoch": 0.2845434104591282, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 9.0051, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3923 }, { "epoch": 0.2846159425545804, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.964, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3924 }, { "epoch": 0.28468847465003266, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.6272, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3925 }, { "epoch": 0.2847610067454849, "grad_norm": 7.1875, "learning_rate": 0.0003, "loss": 8.9099, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3926 }, { "epoch": 0.2848335388409371, "grad_norm": 1.421875, "learning_rate": 0.0003, "loss": 8.7741, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3927 }, { "epoch": 0.28490607093638937, "grad_norm": 22.625, "learning_rate": 0.0003, "loss": 8.9299, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3928 }, { "epoch": 0.2849786030318416, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 9.6521, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3929 }, { "epoch": 0.28505113512729385, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 9.2577, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3930 }, { "epoch": 0.28512366722274607, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 9.1227, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3931 }, { "epoch": 0.2851961993181983, "grad_norm": 1.7265625, "learning_rate": 0.0003, "loss": 9.2555, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3932 }, { "epoch": 0.28526873141365056, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.8695, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3933 }, { "epoch": 0.28534126350910277, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 9.0754, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3934 }, { "epoch": 0.28541379560455504, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.9912, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3935 }, { "epoch": 0.28548632770000726, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.8254, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3936 }, { "epoch": 0.2855588597954595, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 8.6182, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3937 }, { "epoch": 0.28563139189091175, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.1008, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3938 }, { "epoch": 0.28570392398636396, "grad_norm": 11.9375, "learning_rate": 0.0003, "loss": 8.9086, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3939 }, { "epoch": 0.2857764560818162, "grad_norm": 12.5, "learning_rate": 0.0003, "loss": 9.4017, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3940 }, { "epoch": 0.28584898817726845, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.3071, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3941 }, { "epoch": 0.28592152027272066, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.8772, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3942 }, { "epoch": 0.28599405236817294, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.8244, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3943 }, { "epoch": 0.28606658446362515, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 8.9453, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3944 }, { "epoch": 0.28613911655907737, "grad_norm": 5.65625, "learning_rate": 0.0003, "loss": 9.0828, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3945 }, { "epoch": 0.28621164865452964, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 9.1262, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3946 }, { "epoch": 0.28628418074998185, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.1657, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3947 }, { "epoch": 0.2863567128454341, "grad_norm": 8.0625, "learning_rate": 0.0003, "loss": 8.5878, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3948 }, { "epoch": 0.28642924494088634, "grad_norm": 24.125, "learning_rate": 0.0003, "loss": 8.8696, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3949 }, { "epoch": 0.28650177703633856, "grad_norm": 24.25, "learning_rate": 0.0003, "loss": 8.9605, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3950 }, { "epoch": 0.2865743091317908, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.7066, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3951 }, { "epoch": 0.28664684122724304, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 8.8858, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3952 }, { "epoch": 0.2867193733226953, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 9.4342, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3953 }, { "epoch": 0.28679190541814753, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.8072, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3954 }, { "epoch": 0.28686443751359975, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.4505, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3955 }, { "epoch": 0.286936969609052, "grad_norm": 8.8125, "learning_rate": 0.0003, "loss": 9.1202, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3956 }, { "epoch": 0.28700950170450423, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 9.168, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3957 }, { "epoch": 0.2870820337999565, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 9.0021, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3958 }, { "epoch": 0.2871545658954087, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 9.5316, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3959 }, { "epoch": 0.28722709799086094, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 8.3566, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3960 }, { "epoch": 0.2872996300863132, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.3453, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3961 }, { "epoch": 0.2873721621817654, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 9.1448, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3962 }, { "epoch": 0.2874446942772177, "grad_norm": 1.8125, "learning_rate": 0.0003, "loss": 8.8048, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3963 }, { "epoch": 0.2875172263726699, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.846, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3964 }, { "epoch": 0.2875897584681221, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 9.3938, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3965 }, { "epoch": 0.2876622905635744, "grad_norm": 9.5, "learning_rate": 0.0003, "loss": 8.567, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3966 }, { "epoch": 0.2877348226590266, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 9.2937, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3967 }, { "epoch": 0.2878073547544789, "grad_norm": 1.875, "learning_rate": 0.0003, "loss": 9.102, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3968 }, { "epoch": 0.2878798868499311, "grad_norm": 23.375, "learning_rate": 0.0003, "loss": 8.2087, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3969 }, { "epoch": 0.2879524189453833, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.2873, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3970 }, { "epoch": 0.2880249510408356, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.464, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3971 }, { "epoch": 0.2880974831362878, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.0102, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3972 }, { "epoch": 0.28817001523174, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 8.7453, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3973 }, { "epoch": 0.2882425473271923, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 8.9243, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3974 }, { "epoch": 0.2883150794226445, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 8.9263, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3975 }, { "epoch": 0.2883876115180968, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 8.715, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3976 }, { "epoch": 0.288460143613549, "grad_norm": 61.0, "learning_rate": 0.0003, "loss": 9.1977, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3977 }, { "epoch": 0.2885326757090012, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 9.6804, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3978 }, { "epoch": 0.2886052078044535, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.8775, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3979 }, { "epoch": 0.2886777398999057, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 9.3723, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3980 }, { "epoch": 0.28875027199535797, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 9.2652, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3981 }, { "epoch": 0.2888228040908102, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.0976, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3982 }, { "epoch": 0.2888953361862624, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 9.0947, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3983 }, { "epoch": 0.28896786828171467, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 8.6808, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3984 }, { "epoch": 0.2890404003771669, "grad_norm": 27.625, "learning_rate": 0.0003, "loss": 8.7518, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3985 }, { "epoch": 0.28911293247261916, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 9.2619, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3986 }, { "epoch": 0.28918546456807137, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.0545, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3987 }, { "epoch": 0.2892579966635236, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.2034, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3988 }, { "epoch": 0.28933052875897586, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.8185, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3989 }, { "epoch": 0.2894030608544281, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 9.0691, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3990 }, { "epoch": 0.28947559294988034, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 8.9807, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3991 }, { "epoch": 0.28954812504533256, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 9.351, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3992 }, { "epoch": 0.2896206571407848, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.0372, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3993 }, { "epoch": 0.28969318923623705, "grad_norm": 14.3125, "learning_rate": 0.0003, "loss": 8.7189, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3994 }, { "epoch": 0.28976572133168926, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.1054, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3995 }, { "epoch": 0.28983825342714153, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 9.0074, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3996 }, { "epoch": 0.28991078552259375, "grad_norm": 6.9375, "learning_rate": 0.0003, "loss": 8.8865, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3997 }, { "epoch": 0.28998331761804597, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 9.0656, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3998 }, { "epoch": 0.29005584971349824, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 8.8195, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 3999 }, { "epoch": 0.29012838180895045, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.8854, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4000 }, { "epoch": 0.2902009139044027, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.2061, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4001 }, { "epoch": 0.29027344599985494, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.5832, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4002 }, { "epoch": 0.29034597809530716, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.8575, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4003 }, { "epoch": 0.2904185101907594, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 9.2676, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4004 }, { "epoch": 0.29049104228621164, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 9.1002, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4005 }, { "epoch": 0.2905635743816639, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.6286, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4006 }, { "epoch": 0.29063610647711613, "grad_norm": 1.71875, "learning_rate": 0.0003, "loss": 8.469, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4007 }, { "epoch": 0.29070863857256835, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 9.0108, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4008 }, { "epoch": 0.2907811706680206, "grad_norm": 7.625, "learning_rate": 0.0003, "loss": 9.1209, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4009 }, { "epoch": 0.29085370276347283, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.6462, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4010 }, { "epoch": 0.29092623485892505, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 9.2091, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4011 }, { "epoch": 0.2909987669543773, "grad_norm": 17.25, "learning_rate": 0.0003, "loss": 8.5884, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4012 }, { "epoch": 0.29107129904982953, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 9.3418, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4013 }, { "epoch": 0.2911438311452818, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 9.3018, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4014 }, { "epoch": 0.291216363240734, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.2205, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4015 }, { "epoch": 0.29128889533618624, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.7524, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4016 }, { "epoch": 0.2913614274316385, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 9.2756, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4017 }, { "epoch": 0.2914339595270907, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 8.9458, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4018 }, { "epoch": 0.291506491622543, "grad_norm": 8.25, "learning_rate": 0.0003, "loss": 8.6889, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4019 }, { "epoch": 0.2915790237179952, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.7668, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4020 }, { "epoch": 0.2916515558134474, "grad_norm": 8.9375, "learning_rate": 0.0003, "loss": 9.3322, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4021 }, { "epoch": 0.2917240879088997, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.5294, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4022 }, { "epoch": 0.2917966200043519, "grad_norm": 1.640625, "learning_rate": 0.0003, "loss": 8.7175, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4023 }, { "epoch": 0.2918691520998042, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 9.1781, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4024 }, { "epoch": 0.2919416841952564, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.7046, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4025 }, { "epoch": 0.2920142162907086, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 9.1306, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4026 }, { "epoch": 0.2920867483861609, "grad_norm": 1.625, "learning_rate": 0.0003, "loss": 9.0311, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4027 }, { "epoch": 0.2921592804816131, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.8317, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4028 }, { "epoch": 0.2922318125770654, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.405, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4029 }, { "epoch": 0.2923043446725176, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.8892, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4030 }, { "epoch": 0.2923768767679698, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.2842, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4031 }, { "epoch": 0.2924494088634221, "grad_norm": 9.9375, "learning_rate": 0.0003, "loss": 8.8854, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4032 }, { "epoch": 0.2925219409588743, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 8.9341, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4033 }, { "epoch": 0.29259447305432656, "grad_norm": 11.9375, "learning_rate": 0.0003, "loss": 8.7167, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4034 }, { "epoch": 0.2926670051497788, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 9.1526, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4035 }, { "epoch": 0.292739537245231, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 8.7885, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4036 }, { "epoch": 0.29281206934068327, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 8.6028, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4037 }, { "epoch": 0.2928846014361355, "grad_norm": 1.8984375, "learning_rate": 0.0003, "loss": 8.9198, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4038 }, { "epoch": 0.29295713353158775, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 9.1325, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4039 }, { "epoch": 0.29302966562703997, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 9.1006, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4040 }, { "epoch": 0.2931021977224922, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 9.3151, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4041 }, { "epoch": 0.29317472981794446, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.8244, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4042 }, { "epoch": 0.2932472619133967, "grad_norm": 1.9375, "learning_rate": 0.0003, "loss": 8.7932, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4043 }, { "epoch": 0.2933197940088489, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 9.3073, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4044 }, { "epoch": 0.29339232610430116, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 9.0594, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4045 }, { "epoch": 0.2934648581997534, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.981, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4046 }, { "epoch": 0.29353739029520565, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.2411, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4047 }, { "epoch": 0.29360992239065786, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.8853, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4048 }, { "epoch": 0.2936824544861101, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.9956, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4049 }, { "epoch": 0.29375498658156235, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.5843, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4050 }, { "epoch": 0.29382751867701457, "grad_norm": 6.0, "learning_rate": 0.0003, "loss": 9.6843, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4051 }, { "epoch": 0.29390005077246684, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.6341, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4052 }, { "epoch": 0.29397258286791905, "grad_norm": 90.5, "learning_rate": 0.0003, "loss": 8.9771, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4053 }, { "epoch": 0.29404511496337127, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 9.1388, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4054 }, { "epoch": 0.29411764705882354, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 9.3062, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4055 }, { "epoch": 0.29419017915427575, "grad_norm": 1.9375, "learning_rate": 0.0003, "loss": 8.6583, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4056 }, { "epoch": 0.294262711249728, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 8.804, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4057 }, { "epoch": 0.29433524334518024, "grad_norm": 8.75, "learning_rate": 0.0003, "loss": 8.8241, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4058 }, { "epoch": 0.29440777544063246, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 9.2439, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4059 }, { "epoch": 0.29448030753608473, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.9244, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4060 }, { "epoch": 0.29455283963153694, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 9.2846, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4061 }, { "epoch": 0.2946253717269892, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.8058, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4062 }, { "epoch": 0.29469790382244143, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 8.7078, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4063 }, { "epoch": 0.29477043591789365, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 9.574, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4064 }, { "epoch": 0.2948429680133459, "grad_norm": 7.9375, "learning_rate": 0.0003, "loss": 9.0567, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4065 }, { "epoch": 0.29491550010879813, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 8.7637, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4066 }, { "epoch": 0.2949880322042504, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.6564, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4067 }, { "epoch": 0.2950605642997026, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.6344, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4068 }, { "epoch": 0.29513309639515484, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 9.0477, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4069 }, { "epoch": 0.2952056284906071, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.0713, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4070 }, { "epoch": 0.2952781605860593, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.7635, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4071 }, { "epoch": 0.2953506926815116, "grad_norm": 1.6015625, "learning_rate": 0.0003, "loss": 9.2617, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4072 }, { "epoch": 0.2954232247769638, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.6367, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4073 }, { "epoch": 0.295495756872416, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.0614, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4074 }, { "epoch": 0.2955682889678683, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.8401, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4075 }, { "epoch": 0.2956408210633205, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.9189, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4076 }, { "epoch": 0.29571335315877273, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 9.1256, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4077 }, { "epoch": 0.295785885254225, "grad_norm": 10.125, "learning_rate": 0.0003, "loss": 8.7841, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4078 }, { "epoch": 0.2958584173496772, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.8833, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4079 }, { "epoch": 0.2959309494451295, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.0484, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4080 }, { "epoch": 0.2960034815405817, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.6005, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4081 }, { "epoch": 0.2960760136360339, "grad_norm": 1.8671875, "learning_rate": 0.0003, "loss": 8.919, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4082 }, { "epoch": 0.2961485457314862, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.7694, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4083 }, { "epoch": 0.2962210778269384, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.3421, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4084 }, { "epoch": 0.2962936099223907, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 8.9113, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4085 }, { "epoch": 0.2963661420178429, "grad_norm": 44.5, "learning_rate": 0.0003, "loss": 8.7096, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4086 }, { "epoch": 0.2964386741132951, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 8.5914, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4087 }, { "epoch": 0.2965112062087474, "grad_norm": 1.953125, "learning_rate": 0.0003, "loss": 9.1424, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4088 }, { "epoch": 0.2965837383041996, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.1775, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4089 }, { "epoch": 0.29665627039965187, "grad_norm": 5.96875, "learning_rate": 0.0003, "loss": 8.6975, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4090 }, { "epoch": 0.2967288024951041, "grad_norm": 6.4375, "learning_rate": 0.0003, "loss": 9.1471, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4091 }, { "epoch": 0.2968013345905563, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.5664, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4092 }, { "epoch": 0.29687386668600857, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 8.6672, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4093 }, { "epoch": 0.2969463987814608, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.2846, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4094 }, { "epoch": 0.29701893087691306, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 9.128, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4095 }, { "epoch": 0.29709146297236527, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 9.3502, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4096 }, { "epoch": 0.2971639950678175, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.1424, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4097 }, { "epoch": 0.29723652716326976, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.4075, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4098 }, { "epoch": 0.297309059258722, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.8947, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4099 }, { "epoch": 0.29738159135417425, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 9.2888, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4100 }, { "epoch": 0.29745412344962646, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 8.877, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4101 }, { "epoch": 0.2975266555450787, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 9.2012, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4102 }, { "epoch": 0.29759918764053095, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 9.156, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4103 }, { "epoch": 0.29767171973598316, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 8.7153, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4104 }, { "epoch": 0.29774425183143544, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 9.1924, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4105 }, { "epoch": 0.29781678392688765, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.7628, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4106 }, { "epoch": 0.29788931602233987, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.9839, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4107 }, { "epoch": 0.29796184811779214, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 9.0477, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4108 }, { "epoch": 0.29803438021324435, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.8609, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4109 }, { "epoch": 0.2981069123086966, "grad_norm": 6.34375, "learning_rate": 0.0003, "loss": 9.4219, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4110 }, { "epoch": 0.29817944440414884, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 8.9904, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4111 }, { "epoch": 0.29825197649960106, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 9.4947, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4112 }, { "epoch": 0.29832450859505333, "grad_norm": 1.953125, "learning_rate": 0.0003, "loss": 8.9101, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4113 }, { "epoch": 0.29839704069050554, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.1964, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4114 }, { "epoch": 0.29846957278595776, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.384, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4115 }, { "epoch": 0.29854210488141003, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 8.6476, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4116 }, { "epoch": 0.29861463697686225, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.3857, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4117 }, { "epoch": 0.2986871690723145, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 8.6944, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4118 }, { "epoch": 0.29875970116776673, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 8.9598, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4119 }, { "epoch": 0.29883223326321895, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 9.0432, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4120 }, { "epoch": 0.2989047653586712, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 8.8026, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4121 }, { "epoch": 0.29897729745412344, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 9.2135, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4122 }, { "epoch": 0.2990498295495757, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 9.4711, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4123 }, { "epoch": 0.2991223616450279, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.9597, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4124 }, { "epoch": 0.29919489374048014, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 8.835, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4125 }, { "epoch": 0.2992674258359324, "grad_norm": 1.828125, "learning_rate": 0.0003, "loss": 9.198, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4126 }, { "epoch": 0.2993399579313846, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.8753, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4127 }, { "epoch": 0.2994124900268369, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.614, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4128 }, { "epoch": 0.2994850221222891, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 8.5645, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4129 }, { "epoch": 0.29955755421774133, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.8337, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4130 }, { "epoch": 0.2996300863131936, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 8.8596, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4131 }, { "epoch": 0.2997026184086458, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 9.1451, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4132 }, { "epoch": 0.2997751505040981, "grad_norm": 13.4375, "learning_rate": 0.0003, "loss": 9.1001, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4133 }, { "epoch": 0.2998476825995503, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.4967, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4134 }, { "epoch": 0.2999202146950025, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.1976, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4135 }, { "epoch": 0.2999927467904548, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.9591, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4136 }, { "epoch": 0.300065278885907, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 8.8399, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4137 }, { "epoch": 0.3001378109813593, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.8283, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4138 }, { "epoch": 0.3002103430768115, "grad_norm": 6.84375, "learning_rate": 0.0003, "loss": 9.2125, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4139 }, { "epoch": 0.3002828751722637, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 8.8688, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4140 }, { "epoch": 0.300355407267716, "grad_norm": 9.9375, "learning_rate": 0.0003, "loss": 8.4431, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4141 }, { "epoch": 0.3004279393631682, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.028, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4142 }, { "epoch": 0.30050047145862047, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 8.7451, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4143 }, { "epoch": 0.3005730035540727, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.8945, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4144 }, { "epoch": 0.3006455356495249, "grad_norm": 10.3125, "learning_rate": 0.0003, "loss": 9.1525, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4145 }, { "epoch": 0.30071806774497717, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 8.9158, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4146 }, { "epoch": 0.3007905998404294, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.992, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4147 }, { "epoch": 0.3008631319358816, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 8.9814, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4148 }, { "epoch": 0.30093566403133387, "grad_norm": 1.7109375, "learning_rate": 0.0003, "loss": 8.8069, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4149 }, { "epoch": 0.3010081961267861, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.4041, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4150 }, { "epoch": 0.30108072822223836, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 8.9601, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4151 }, { "epoch": 0.3011532603176906, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.1789, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4152 }, { "epoch": 0.3012257924131428, "grad_norm": 20.125, "learning_rate": 0.0003, "loss": 8.831, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4153 }, { "epoch": 0.30129832450859506, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 9.3481, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4154 }, { "epoch": 0.3013708566040473, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.081, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4155 }, { "epoch": 0.30144338869949955, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.4117, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4156 }, { "epoch": 0.30151592079495176, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.9797, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4157 }, { "epoch": 0.301588452890404, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.8223, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4158 }, { "epoch": 0.30166098498585625, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.0992, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4159 }, { "epoch": 0.30173351708130847, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.6826, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4160 }, { "epoch": 0.30180604917676074, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 9.0085, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4161 }, { "epoch": 0.30187858127221295, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 8.5717, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4162 }, { "epoch": 0.30195111336766517, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 8.6731, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4163 }, { "epoch": 0.30202364546311744, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 8.7173, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4164 }, { "epoch": 0.30209617755856966, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 9.2468, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4165 }, { "epoch": 0.3021687096540219, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.9667, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4166 }, { "epoch": 0.30224124174947414, "grad_norm": 81.5, "learning_rate": 0.0003, "loss": 9.3157, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4167 }, { "epoch": 0.30231377384492636, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.573, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4168 }, { "epoch": 0.30238630594037863, "grad_norm": 1.5, "learning_rate": 0.0003, "loss": 9.2227, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4169 }, { "epoch": 0.30245883803583085, "grad_norm": 6.40625, "learning_rate": 0.0003, "loss": 9.1808, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4170 }, { "epoch": 0.3025313701312831, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 8.9768, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4171 }, { "epoch": 0.30260390222673533, "grad_norm": 1.859375, "learning_rate": 0.0003, "loss": 9.2032, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4172 }, { "epoch": 0.30267643432218755, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.3314, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4173 }, { "epoch": 0.3027489664176398, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.0604, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4174 }, { "epoch": 0.30282149851309204, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 8.9401, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4175 }, { "epoch": 0.3028940306085443, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 8.8499, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4176 }, { "epoch": 0.3029665627039965, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 9.1384, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4177 }, { "epoch": 0.30303909479944874, "grad_norm": 8.5625, "learning_rate": 0.0003, "loss": 8.9248, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4178 }, { "epoch": 0.303111626894901, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.7429, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4179 }, { "epoch": 0.3031841589903532, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.7544, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4180 }, { "epoch": 0.3032566910858055, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.1562, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4181 }, { "epoch": 0.3033292231812577, "grad_norm": 7.0, "learning_rate": 0.0003, "loss": 9.1239, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4182 }, { "epoch": 0.30340175527670993, "grad_norm": 7.375, "learning_rate": 0.0003, "loss": 8.8468, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4183 }, { "epoch": 0.3034742873721622, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 8.9765, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4184 }, { "epoch": 0.3035468194676144, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 9.1026, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4185 }, { "epoch": 0.30361935156306663, "grad_norm": 10.9375, "learning_rate": 0.0003, "loss": 8.5255, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4186 }, { "epoch": 0.3036918836585189, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 8.4712, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4187 }, { "epoch": 0.3037644157539711, "grad_norm": 7.1875, "learning_rate": 0.0003, "loss": 8.9227, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4188 }, { "epoch": 0.3038369478494234, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 8.9664, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4189 }, { "epoch": 0.3039094799448756, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.1521, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4190 }, { "epoch": 0.3039820120403278, "grad_norm": 1.65625, "learning_rate": 0.0003, "loss": 8.442, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4191 }, { "epoch": 0.3040545441357801, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 9.1822, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4192 }, { "epoch": 0.3041270762312323, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.2478, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4193 }, { "epoch": 0.3041996083266846, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 9.4775, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4194 }, { "epoch": 0.3042721404221368, "grad_norm": 16.125, "learning_rate": 0.0003, "loss": 9.6035, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4195 }, { "epoch": 0.304344672517589, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.8022, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4196 }, { "epoch": 0.3044172046130413, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.029, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4197 }, { "epoch": 0.3044897367084935, "grad_norm": 80.0, "learning_rate": 0.0003, "loss": 9.0067, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4198 }, { "epoch": 0.30456226880394577, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.2017, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4199 }, { "epoch": 0.304634800899398, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 9.316, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4200 }, { "epoch": 0.3047073329948502, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 9.0957, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4201 }, { "epoch": 0.30477986509030247, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 9.5243, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4202 }, { "epoch": 0.3048523971857547, "grad_norm": 17.125, "learning_rate": 0.0003, "loss": 8.6, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4203 }, { "epoch": 0.30492492928120696, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.4824, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4204 }, { "epoch": 0.3049974613766592, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.2152, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4205 }, { "epoch": 0.3050699934721114, "grad_norm": 10.6875, "learning_rate": 0.0003, "loss": 8.9227, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4206 }, { "epoch": 0.30514252556756366, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 9.4019, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4207 }, { "epoch": 0.3052150576630159, "grad_norm": 9.875, "learning_rate": 0.0003, "loss": 8.991, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4208 }, { "epoch": 0.30528758975846815, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 8.5565, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4209 }, { "epoch": 0.30536012185392036, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 9.2113, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4210 }, { "epoch": 0.3054326539493726, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 8.5413, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4211 }, { "epoch": 0.30550518604482485, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 8.6939, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4212 }, { "epoch": 0.30557771814027707, "grad_norm": 22.375, "learning_rate": 0.0003, "loss": 8.601, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4213 }, { "epoch": 0.30565025023572934, "grad_norm": 1.6484375, "learning_rate": 0.0003, "loss": 9.3896, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4214 }, { "epoch": 0.30572278233118155, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 8.9702, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4215 }, { "epoch": 0.30579531442663377, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 8.7972, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4216 }, { "epoch": 0.30586784652208604, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 8.5779, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4217 }, { "epoch": 0.30594037861753826, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 9.161, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4218 }, { "epoch": 0.30601291071299047, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 9.2516, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4219 }, { "epoch": 0.30608544280844274, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 9.1084, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4220 }, { "epoch": 0.30615797490389496, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 9.3103, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4221 }, { "epoch": 0.30623050699934723, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.2949, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4222 }, { "epoch": 0.30630303909479945, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 8.9157, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4223 }, { "epoch": 0.30637557119025166, "grad_norm": 5.65625, "learning_rate": 0.0003, "loss": 8.9205, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4224 }, { "epoch": 0.30644810328570393, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 8.5214, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4225 }, { "epoch": 0.30652063538115615, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.4596, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4226 }, { "epoch": 0.3065931674766084, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.2647, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4227 }, { "epoch": 0.30666569957206063, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 8.7498, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4228 }, { "epoch": 0.30673823166751285, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 8.8175, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4229 }, { "epoch": 0.3068107637629651, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 8.923, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4230 }, { "epoch": 0.30688329585841734, "grad_norm": 6.53125, "learning_rate": 0.0003, "loss": 8.653, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4231 }, { "epoch": 0.3069558279538696, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.7206, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4232 }, { "epoch": 0.3070283600493218, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 9.3222, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4233 }, { "epoch": 0.30710089214477404, "grad_norm": 14.8125, "learning_rate": 0.0003, "loss": 8.6161, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4234 }, { "epoch": 0.3071734242402263, "grad_norm": 1.734375, "learning_rate": 0.0003, "loss": 9.0004, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4235 }, { "epoch": 0.3072459563356785, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.9556, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4236 }, { "epoch": 0.3073184884311308, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 9.3911, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4237 }, { "epoch": 0.307391020526583, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 8.5041, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4238 }, { "epoch": 0.30746355262203523, "grad_norm": 1.53125, "learning_rate": 0.0003, "loss": 9.3213, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4239 }, { "epoch": 0.3075360847174875, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.0316, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4240 }, { "epoch": 0.3076086168129397, "grad_norm": 1.953125, "learning_rate": 0.0003, "loss": 9.0362, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4241 }, { "epoch": 0.307681148908392, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.9069, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4242 }, { "epoch": 0.3077536810038442, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.0184, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4243 }, { "epoch": 0.3078262130992964, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.858, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4244 }, { "epoch": 0.3078987451947487, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 9.1718, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4245 }, { "epoch": 0.3079712772902009, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.0533, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4246 }, { "epoch": 0.3080438093856532, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.1248, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4247 }, { "epoch": 0.3081163414811054, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 8.822, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4248 }, { "epoch": 0.3081888735765576, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 9.4466, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4249 }, { "epoch": 0.3082614056720099, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 9.0495, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4250 }, { "epoch": 0.3083339377674621, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 8.7934, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4251 }, { "epoch": 0.3084064698629143, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.7869, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4252 }, { "epoch": 0.3084790019583666, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 9.1399, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4253 }, { "epoch": 0.3085515340538188, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.8413, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4254 }, { "epoch": 0.30862406614927107, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 9.0078, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4255 }, { "epoch": 0.3086965982447233, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.6673, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4256 }, { "epoch": 0.3087691303401755, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.6601, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4257 }, { "epoch": 0.3088416624356278, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 9.0202, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4258 }, { "epoch": 0.30891419453108, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.7848, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4259 }, { "epoch": 0.30898672662653226, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 8.8341, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4260 }, { "epoch": 0.3090592587219845, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 8.938, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4261 }, { "epoch": 0.3091317908174367, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.7437, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4262 }, { "epoch": 0.30920432291288896, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 9.2742, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4263 }, { "epoch": 0.3092768550083412, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.1013, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4264 }, { "epoch": 0.30934938710379345, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 9.152, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4265 }, { "epoch": 0.30942191919924567, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.0102, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4266 }, { "epoch": 0.3094944512946979, "grad_norm": 1.984375, "learning_rate": 0.0003, "loss": 9.3639, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4267 }, { "epoch": 0.30956698339015015, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.7664, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4268 }, { "epoch": 0.30963951548560237, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.1437, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4269 }, { "epoch": 0.30971204758105464, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.8141, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4270 }, { "epoch": 0.30978457967650685, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 8.9918, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4271 }, { "epoch": 0.30985711177195907, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.6823, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4272 }, { "epoch": 0.30992964386741134, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.5215, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4273 }, { "epoch": 0.31000217596286356, "grad_norm": 17.125, "learning_rate": 0.0003, "loss": 8.5848, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4274 }, { "epoch": 0.31007470805831583, "grad_norm": 15.625, "learning_rate": 0.0003, "loss": 9.3365, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4275 }, { "epoch": 0.31014724015376804, "grad_norm": 5.90625, "learning_rate": 0.0003, "loss": 9.1076, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4276 }, { "epoch": 0.31021977224922026, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.9175, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4277 }, { "epoch": 0.31029230434467253, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 8.9236, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4278 }, { "epoch": 0.31036483644012475, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.5314, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4279 }, { "epoch": 0.310437368535577, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 8.9028, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4280 }, { "epoch": 0.31050990063102923, "grad_norm": 1.796875, "learning_rate": 0.0003, "loss": 9.0893, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4281 }, { "epoch": 0.31058243272648145, "grad_norm": 6.78125, "learning_rate": 0.0003, "loss": 9.0667, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4282 }, { "epoch": 0.3106549648219337, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.8871, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4283 }, { "epoch": 0.31072749691738594, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.8488, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4284 }, { "epoch": 0.3108000290128382, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.9895, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4285 }, { "epoch": 0.3108725611082904, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 9.1385, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4286 }, { "epoch": 0.31094509320374264, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 8.9505, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4287 }, { "epoch": 0.3110176252991949, "grad_norm": 1.453125, "learning_rate": 0.0003, "loss": 8.8073, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4288 }, { "epoch": 0.3110901573946471, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 8.8526, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4289 }, { "epoch": 0.31116268949009934, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 9.1216, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4290 }, { "epoch": 0.3112352215855516, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 9.1277, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4291 }, { "epoch": 0.31130775368100383, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.0634, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4292 }, { "epoch": 0.3113802857764561, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.9006, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4293 }, { "epoch": 0.3114528178719083, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 8.9334, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4294 }, { "epoch": 0.31152534996736053, "grad_norm": 7.5, "learning_rate": 0.0003, "loss": 9.1959, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4295 }, { "epoch": 0.3115978820628128, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 9.2853, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4296 }, { "epoch": 0.311670414158265, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.1752, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4297 }, { "epoch": 0.3117429462537173, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.692, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4298 }, { "epoch": 0.3118154783491695, "grad_norm": 11.4375, "learning_rate": 0.0003, "loss": 9.1453, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4299 }, { "epoch": 0.3118880104446217, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 8.9895, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4300 }, { "epoch": 0.311960542540074, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.2043, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4301 }, { "epoch": 0.3120330746355262, "grad_norm": 1.96875, "learning_rate": 0.0003, "loss": 8.6294, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4302 }, { "epoch": 0.3121056067309785, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.7471, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4303 }, { "epoch": 0.3121781388264307, "grad_norm": 6.1875, "learning_rate": 0.0003, "loss": 9.2902, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4304 }, { "epoch": 0.3122506709218829, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.9703, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4305 }, { "epoch": 0.3123232030173352, "grad_norm": 7.5, "learning_rate": 0.0003, "loss": 9.3768, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4306 }, { "epoch": 0.3123957351127874, "grad_norm": 1.640625, "learning_rate": 0.0003, "loss": 8.9257, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4307 }, { "epoch": 0.31246826720823967, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 8.9132, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4308 }, { "epoch": 0.3125407993036919, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.3054, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4309 }, { "epoch": 0.3126133313991441, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.4039, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4310 }, { "epoch": 0.31268586349459637, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.9949, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4311 }, { "epoch": 0.3127583955900486, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.0613, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4312 }, { "epoch": 0.31283092768550086, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 8.9332, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4313 }, { "epoch": 0.3129034597809531, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.6815, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4314 }, { "epoch": 0.3129759918764053, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.8071, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4315 }, { "epoch": 0.31304852397185756, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.5651, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4316 }, { "epoch": 0.3131210560673098, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 9.1833, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4317 }, { "epoch": 0.31319358816276205, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.586, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4318 }, { "epoch": 0.31326612025821426, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.1129, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4319 }, { "epoch": 0.3133386523536665, "grad_norm": 10.5625, "learning_rate": 0.0003, "loss": 9.2449, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4320 }, { "epoch": 0.31341118444911875, "grad_norm": 6.0625, "learning_rate": 0.0003, "loss": 8.8055, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4321 }, { "epoch": 0.31348371654457097, "grad_norm": 6.5625, "learning_rate": 0.0003, "loss": 8.8203, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4322 }, { "epoch": 0.3135562486400232, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 9.3828, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4323 }, { "epoch": 0.31362878073547545, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.0428, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4324 }, { "epoch": 0.31370131283092767, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.8502, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4325 }, { "epoch": 0.31377384492637994, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.1284, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4326 }, { "epoch": 0.31384637702183216, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.0708, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4327 }, { "epoch": 0.3139189091172844, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 9.2656, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4328 }, { "epoch": 0.31399144121273664, "grad_norm": 17.375, "learning_rate": 0.0003, "loss": 8.6657, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4329 }, { "epoch": 0.31406397330818886, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 9.1311, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4330 }, { "epoch": 0.31413650540364113, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 9.0749, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4331 }, { "epoch": 0.31420903749909335, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 8.9211, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4332 }, { "epoch": 0.31428156959454556, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.202, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4333 }, { "epoch": 0.31435410168999783, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.5551, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4334 }, { "epoch": 0.31442663378545005, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.6546, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4335 }, { "epoch": 0.3144991658809023, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 8.522, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4336 }, { "epoch": 0.31457169797635454, "grad_norm": 9.75, "learning_rate": 0.0003, "loss": 8.4659, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4337 }, { "epoch": 0.31464423007180675, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 8.5003, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4338 }, { "epoch": 0.314716762167259, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.292, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4339 }, { "epoch": 0.31478929426271124, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 9.1413, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4340 }, { "epoch": 0.3148618263581635, "grad_norm": 1.6953125, "learning_rate": 0.0003, "loss": 9.302, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4341 }, { "epoch": 0.3149343584536157, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.6751, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4342 }, { "epoch": 0.31500689054906794, "grad_norm": 5.96875, "learning_rate": 0.0003, "loss": 8.6238, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4343 }, { "epoch": 0.3150794226445202, "grad_norm": 1.921875, "learning_rate": 0.0003, "loss": 9.239, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4344 }, { "epoch": 0.31515195473997243, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 8.8541, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4345 }, { "epoch": 0.3152244868354247, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 9.2939, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4346 }, { "epoch": 0.3152970189308769, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 9.0353, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4347 }, { "epoch": 0.31536955102632913, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 9.3085, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4348 }, { "epoch": 0.3154420831217814, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.0775, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4349 }, { "epoch": 0.3155146152172336, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.4921, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4350 }, { "epoch": 0.3155871473126859, "grad_norm": 13.5625, "learning_rate": 0.0003, "loss": 9.039, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4351 }, { "epoch": 0.3156596794081381, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 9.0116, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4352 }, { "epoch": 0.3157322115035903, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.6314, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4353 }, { "epoch": 0.3158047435990426, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.3328, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4354 }, { "epoch": 0.3158772756944948, "grad_norm": 5.84375, "learning_rate": 0.0003, "loss": 9.0677, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4355 }, { "epoch": 0.3159498077899471, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.7342, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4356 }, { "epoch": 0.3160223398853993, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.7514, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4357 }, { "epoch": 0.3160948719808515, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.1847, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4358 }, { "epoch": 0.3161674040763038, "grad_norm": 1.9296875, "learning_rate": 0.0003, "loss": 9.2312, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4359 }, { "epoch": 0.316239936171756, "grad_norm": 6.9375, "learning_rate": 0.0003, "loss": 9.0459, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4360 }, { "epoch": 0.3163124682672082, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.2662, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4361 }, { "epoch": 0.3163850003626605, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.8579, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4362 }, { "epoch": 0.3164575324581127, "grad_norm": 7.8125, "learning_rate": 0.0003, "loss": 9.1792, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4363 }, { "epoch": 0.31653006455356497, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.833, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4364 }, { "epoch": 0.3166025966490172, "grad_norm": 5.65625, "learning_rate": 0.0003, "loss": 9.3201, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4365 }, { "epoch": 0.3166751287444694, "grad_norm": 3.953125, "learning_rate": 0.0003, "loss": 9.405, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4366 }, { "epoch": 0.3167476608399217, "grad_norm": 7.40625, "learning_rate": 0.0003, "loss": 8.94, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4367 }, { "epoch": 0.3168201929353739, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.9944, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4368 }, { "epoch": 0.31689272503082616, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.6874, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4369 }, { "epoch": 0.3169652571262784, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.7683, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4370 }, { "epoch": 0.3170377892217306, "grad_norm": 1.7421875, "learning_rate": 0.0003, "loss": 9.0402, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4371 }, { "epoch": 0.31711032131718286, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.0925, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4372 }, { "epoch": 0.3171828534126351, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 9.1081, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4373 }, { "epoch": 0.31725538550808735, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.2885, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4374 }, { "epoch": 0.31732791760353957, "grad_norm": 10.125, "learning_rate": 0.0003, "loss": 9.0828, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4375 }, { "epoch": 0.3174004496989918, "grad_norm": 3.953125, "learning_rate": 0.0003, "loss": 8.9284, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4376 }, { "epoch": 0.31747298179444405, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 9.0343, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4377 }, { "epoch": 0.31754551388989627, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 9.2455, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4378 }, { "epoch": 0.31761804598534854, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.0782, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4379 }, { "epoch": 0.31769057808080076, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.5779, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4380 }, { "epoch": 0.31776311017625297, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 8.9722, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4381 }, { "epoch": 0.31783564227170524, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 9.2563, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4382 }, { "epoch": 0.31790817436715746, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.0456, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4383 }, { "epoch": 0.31798070646260973, "grad_norm": 20.25, "learning_rate": 0.0003, "loss": 9.1822, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4384 }, { "epoch": 0.31805323855806195, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 8.3234, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4385 }, { "epoch": 0.31812577065351416, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.0708, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4386 }, { "epoch": 0.31819830274896643, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 9.102, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4387 }, { "epoch": 0.31827083484441865, "grad_norm": 1.9140625, "learning_rate": 0.0003, "loss": 9.4016, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4388 }, { "epoch": 0.3183433669398709, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.7384, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4389 }, { "epoch": 0.31841589903532314, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.0377, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4390 }, { "epoch": 0.31848843113077535, "grad_norm": 15.0, "learning_rate": 0.0003, "loss": 9.0752, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4391 }, { "epoch": 0.3185609632262276, "grad_norm": 6.0625, "learning_rate": 0.0003, "loss": 8.6208, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4392 }, { "epoch": 0.31863349532167984, "grad_norm": 32.0, "learning_rate": 0.0003, "loss": 9.0533, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4393 }, { "epoch": 0.31870602741713205, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 9.1952, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4394 }, { "epoch": 0.3187785595125843, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.7845, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4395 }, { "epoch": 0.31885109160803654, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 9.0779, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4396 }, { "epoch": 0.3189236237034888, "grad_norm": 1.9140625, "learning_rate": 0.0003, "loss": 9.0502, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4397 }, { "epoch": 0.31899615579894103, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.8667, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4398 }, { "epoch": 0.31906868789439324, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.1466, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4399 }, { "epoch": 0.3191412199898455, "grad_norm": 1.9609375, "learning_rate": 0.0003, "loss": 8.7186, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4400 }, { "epoch": 0.31921375208529773, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 9.2157, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4401 }, { "epoch": 0.31928628418075, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 8.9684, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4402 }, { "epoch": 0.3193588162762022, "grad_norm": 7.875, "learning_rate": 0.0003, "loss": 8.7482, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4403 }, { "epoch": 0.31943134837165443, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.169, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4404 }, { "epoch": 0.3195038804671067, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.0311, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4405 }, { "epoch": 0.3195764125625589, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 9.0931, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4406 }, { "epoch": 0.3196489446580112, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.1165, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4407 }, { "epoch": 0.3197214767534634, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.1225, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4408 }, { "epoch": 0.3197940088489156, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 9.1763, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4409 }, { "epoch": 0.3198665409443679, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 8.8714, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4410 }, { "epoch": 0.3199390730398201, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.767, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4411 }, { "epoch": 0.3200116051352724, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 9.3539, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4412 }, { "epoch": 0.3200841372307246, "grad_norm": 7.65625, "learning_rate": 0.0003, "loss": 8.4183, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4413 }, { "epoch": 0.3201566693261768, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 8.7957, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4414 }, { "epoch": 0.3202292014216291, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 8.6185, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4415 }, { "epoch": 0.3203017335170813, "grad_norm": 1.5703125, "learning_rate": 0.0003, "loss": 8.8147, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4416 }, { "epoch": 0.32037426561253357, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 9.0242, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4417 }, { "epoch": 0.3204467977079858, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.4291, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4418 }, { "epoch": 0.320519329803438, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 9.156, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4419 }, { "epoch": 0.3205918618988903, "grad_norm": 5.375, "learning_rate": 0.0003, "loss": 9.2251, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4420 }, { "epoch": 0.3206643939943425, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.1174, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4421 }, { "epoch": 0.32073692608979476, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 9.2832, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4422 }, { "epoch": 0.320809458185247, "grad_norm": 11.75, "learning_rate": 0.0003, "loss": 9.4302, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4423 }, { "epoch": 0.3208819902806992, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 8.5146, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4424 }, { "epoch": 0.32095452237615146, "grad_norm": 9.9375, "learning_rate": 0.0003, "loss": 8.8296, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4425 }, { "epoch": 0.3210270544716037, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.1896, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4426 }, { "epoch": 0.3210995865670559, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.2833, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4427 }, { "epoch": 0.32117211866250817, "grad_norm": 5.6875, "learning_rate": 0.0003, "loss": 8.5725, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4428 }, { "epoch": 0.3212446507579604, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.5933, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4429 }, { "epoch": 0.32131718285341265, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.7607, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4430 }, { "epoch": 0.32138971494886487, "grad_norm": 1.5859375, "learning_rate": 0.0003, "loss": 8.933, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4431 }, { "epoch": 0.3214622470443171, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 9.2204, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4432 }, { "epoch": 0.32153477913976936, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.3367, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4433 }, { "epoch": 0.32160731123522157, "grad_norm": 1.8046875, "learning_rate": 0.0003, "loss": 8.9013, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4434 }, { "epoch": 0.32167984333067384, "grad_norm": 14.9375, "learning_rate": 0.0003, "loss": 8.6994, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4435 }, { "epoch": 0.32175237542612606, "grad_norm": 1.9609375, "learning_rate": 0.0003, "loss": 9.2433, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4436 }, { "epoch": 0.3218249075215783, "grad_norm": 1.921875, "learning_rate": 0.0003, "loss": 9.0534, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4437 }, { "epoch": 0.32189743961703055, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 8.9752, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4438 }, { "epoch": 0.32196997171248276, "grad_norm": 10.5, "learning_rate": 0.0003, "loss": 8.8109, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4439 }, { "epoch": 0.32204250380793503, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.2064, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4440 }, { "epoch": 0.32211503590338725, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 8.8743, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4441 }, { "epoch": 0.32218756799883946, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.9301, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4442 }, { "epoch": 0.32226010009429173, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 8.7496, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4443 }, { "epoch": 0.32233263218974395, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.9597, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4444 }, { "epoch": 0.3224051642851962, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.9335, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4445 }, { "epoch": 0.32247769638064844, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.9416, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4446 }, { "epoch": 0.32255022847610065, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 8.6506, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4447 }, { "epoch": 0.3226227605715529, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 9.3032, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4448 }, { "epoch": 0.32269529266700514, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 8.6302, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4449 }, { "epoch": 0.3227678247624574, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 8.9249, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4450 }, { "epoch": 0.3228403568579096, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.7623, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4451 }, { "epoch": 0.32291288895336184, "grad_norm": 6.5, "learning_rate": 0.0003, "loss": 9.2535, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4452 }, { "epoch": 0.3229854210488141, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 9.2676, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4453 }, { "epoch": 0.32305795314426633, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 8.7947, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4454 }, { "epoch": 0.3231304852397186, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 9.0289, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4455 }, { "epoch": 0.3232030173351708, "grad_norm": 1.8671875, "learning_rate": 0.0003, "loss": 8.7659, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4456 }, { "epoch": 0.32327554943062303, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 8.956, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4457 }, { "epoch": 0.3233480815260753, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 8.8759, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4458 }, { "epoch": 0.3234206136215275, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 9.0411, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4459 }, { "epoch": 0.3234931457169798, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 9.3901, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4460 }, { "epoch": 0.323565677812432, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 9.0469, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4461 }, { "epoch": 0.3236382099078842, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 9.0454, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4462 }, { "epoch": 0.3237107420033365, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 9.1545, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4463 }, { "epoch": 0.3237832740987887, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 9.0539, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4464 }, { "epoch": 0.3238558061942409, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 8.5085, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4465 }, { "epoch": 0.3239283382896932, "grad_norm": 10.1875, "learning_rate": 0.0003, "loss": 8.6265, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4466 }, { "epoch": 0.3240008703851454, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.9927, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4467 }, { "epoch": 0.3240734024805977, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 9.2353, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4468 }, { "epoch": 0.3241459345760499, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.7004, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4469 }, { "epoch": 0.3242184666715021, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.9318, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4470 }, { "epoch": 0.3242909987669544, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.3077, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4471 }, { "epoch": 0.3243635308624066, "grad_norm": 1.46875, "learning_rate": 0.0003, "loss": 9.0457, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4472 }, { "epoch": 0.3244360629578589, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.1305, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4473 }, { "epoch": 0.3245085950533111, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.7792, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4474 }, { "epoch": 0.3245811271487633, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.9697, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4475 }, { "epoch": 0.3246536592442156, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 8.8898, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4476 }, { "epoch": 0.3247261913396678, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 8.9489, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4477 }, { "epoch": 0.32479872343512006, "grad_norm": 7.96875, "learning_rate": 0.0003, "loss": 8.9051, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4478 }, { "epoch": 0.3248712555305723, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 8.8484, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4479 }, { "epoch": 0.3249437876260245, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 9.1212, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4480 }, { "epoch": 0.32501631972147677, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.5294, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4481 }, { "epoch": 0.325088851816929, "grad_norm": 6.90625, "learning_rate": 0.0003, "loss": 9.0919, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4482 }, { "epoch": 0.32516138391238125, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.1179, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4483 }, { "epoch": 0.32523391600783347, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 9.1648, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4484 }, { "epoch": 0.3253064481032857, "grad_norm": 1.9296875, "learning_rate": 0.0003, "loss": 8.6696, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4485 }, { "epoch": 0.32537898019873795, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 9.0178, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4486 }, { "epoch": 0.32545151229419017, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 9.0475, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4487 }, { "epoch": 0.32552404438964244, "grad_norm": 1.5390625, "learning_rate": 0.0003, "loss": 9.2413, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4488 }, { "epoch": 0.32559657648509466, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 9.149, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4489 }, { "epoch": 0.3256691085805469, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.2506, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4490 }, { "epoch": 0.32574164067599914, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 9.264, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4491 }, { "epoch": 0.32581417277145136, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.089, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4492 }, { "epoch": 0.32588670486690363, "grad_norm": 9.4375, "learning_rate": 0.0003, "loss": 8.8445, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4493 }, { "epoch": 0.32595923696235585, "grad_norm": 9.625, "learning_rate": 0.0003, "loss": 8.6969, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4494 }, { "epoch": 0.32603176905780806, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.1638, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4495 }, { "epoch": 0.32610430115326033, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 8.3499, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4496 }, { "epoch": 0.32617683324871255, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 8.8097, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4497 }, { "epoch": 0.32624936534416477, "grad_norm": 6.96875, "learning_rate": 0.0003, "loss": 9.1952, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4498 }, { "epoch": 0.32632189743961704, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 9.4305, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4499 }, { "epoch": 0.32639442953506925, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 8.8767, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4500 }, { "epoch": 0.3264669616305215, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.7553, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4501 }, { "epoch": 0.32653949372597374, "grad_norm": 1.90625, "learning_rate": 0.0003, "loss": 9.0296, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4502 }, { "epoch": 0.32661202582142596, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 8.711, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4503 }, { "epoch": 0.3266845579168782, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.2679, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4504 }, { "epoch": 0.32675709001233044, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.7642, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4505 }, { "epoch": 0.3268296221077827, "grad_norm": 1.828125, "learning_rate": 0.0003, "loss": 8.7815, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4506 }, { "epoch": 0.32690215420323493, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.8954, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4507 }, { "epoch": 0.32697468629868714, "grad_norm": 7.3125, "learning_rate": 0.0003, "loss": 9.1218, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4508 }, { "epoch": 0.3270472183941394, "grad_norm": 6.6875, "learning_rate": 0.0003, "loss": 9.0171, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4509 }, { "epoch": 0.32711975048959163, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.0292, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4510 }, { "epoch": 0.3271922825850439, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.9353, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4511 }, { "epoch": 0.3272648146804961, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 9.3513, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4512 }, { "epoch": 0.32733734677594833, "grad_norm": 34.75, "learning_rate": 0.0003, "loss": 8.7876, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4513 }, { "epoch": 0.3274098788714006, "grad_norm": 12.75, "learning_rate": 0.0003, "loss": 9.3043, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4514 }, { "epoch": 0.3274824109668528, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 8.7054, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4515 }, { "epoch": 0.3275549430623051, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 8.9071, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4516 }, { "epoch": 0.3276274751577573, "grad_norm": 9.6875, "learning_rate": 0.0003, "loss": 8.7509, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4517 }, { "epoch": 0.3277000072532095, "grad_norm": 18.125, "learning_rate": 0.0003, "loss": 9.0615, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4518 }, { "epoch": 0.3277725393486618, "grad_norm": 10.4375, "learning_rate": 0.0003, "loss": 8.958, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4519 }, { "epoch": 0.327845071444114, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 9.115, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4520 }, { "epoch": 0.3279176035395663, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.8839, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4521 }, { "epoch": 0.3279901356350185, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 8.7252, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4522 }, { "epoch": 0.3280626677304707, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 9.035, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4523 }, { "epoch": 0.328135199825923, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.1209, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4524 }, { "epoch": 0.3282077319213752, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 9.0362, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4525 }, { "epoch": 0.32828026401682747, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.0465, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4526 }, { "epoch": 0.3283527961122797, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 9.023, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4527 }, { "epoch": 0.3284253282077319, "grad_norm": 5.375, "learning_rate": 0.0003, "loss": 8.8298, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4528 }, { "epoch": 0.3284978603031842, "grad_norm": 6.5, "learning_rate": 0.0003, "loss": 8.8803, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4529 }, { "epoch": 0.3285703923986364, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.6318, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4530 }, { "epoch": 0.32864292449408866, "grad_norm": 1.9765625, "learning_rate": 0.0003, "loss": 9.141, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4531 }, { "epoch": 0.3287154565895409, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.8549, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4532 }, { "epoch": 0.3287879886849931, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 8.5805, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4533 }, { "epoch": 0.32886052078044536, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 8.9864, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4534 }, { "epoch": 0.3289330528758976, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.9467, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4535 }, { "epoch": 0.3290055849713498, "grad_norm": 6.15625, "learning_rate": 0.0003, "loss": 8.655, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4536 }, { "epoch": 0.32907811706680207, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.8269, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4537 }, { "epoch": 0.3291506491622543, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 8.8317, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4538 }, { "epoch": 0.32922318125770655, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 8.7456, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4539 }, { "epoch": 0.32929571335315877, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 9.0501, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4540 }, { "epoch": 0.329368245448611, "grad_norm": 6.3125, "learning_rate": 0.0003, "loss": 8.8594, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4541 }, { "epoch": 0.32944077754406326, "grad_norm": 10.75, "learning_rate": 0.0003, "loss": 8.9962, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4542 }, { "epoch": 0.3295133096395155, "grad_norm": 14.5625, "learning_rate": 0.0003, "loss": 8.6142, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4543 }, { "epoch": 0.32958584173496774, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.8194, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4544 }, { "epoch": 0.32965837383041996, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 9.6889, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4545 }, { "epoch": 0.3297309059258722, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.7336, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4546 }, { "epoch": 0.32980343802132445, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.8607, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4547 }, { "epoch": 0.32987597011677666, "grad_norm": 1.890625, "learning_rate": 0.0003, "loss": 9.1198, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4548 }, { "epoch": 0.32994850221222893, "grad_norm": 15.3125, "learning_rate": 0.0003, "loss": 8.7887, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4549 }, { "epoch": 0.33002103430768115, "grad_norm": 7.0625, "learning_rate": 0.0003, "loss": 9.3687, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4550 }, { "epoch": 0.33009356640313336, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.5511, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4551 }, { "epoch": 0.33016609849858564, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.9055, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4552 }, { "epoch": 0.33023863059403785, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 8.8672, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4553 }, { "epoch": 0.3303111626894901, "grad_norm": 10.0625, "learning_rate": 0.0003, "loss": 8.9497, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4554 }, { "epoch": 0.33038369478494234, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.631, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4555 }, { "epoch": 0.33045622688039455, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 9.5218, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4556 }, { "epoch": 0.3305287589758468, "grad_norm": 28.25, "learning_rate": 0.0003, "loss": 8.6735, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4557 }, { "epoch": 0.33060129107129904, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 8.7358, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4558 }, { "epoch": 0.3306738231667513, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 8.5396, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4559 }, { "epoch": 0.33074635526220353, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.0761, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4560 }, { "epoch": 0.33081888735765574, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.1284, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4561 }, { "epoch": 0.330891419453108, "grad_norm": 8.375, "learning_rate": 0.0003, "loss": 9.0904, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4562 }, { "epoch": 0.33096395154856023, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 8.7561, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4563 }, { "epoch": 0.3310364836440125, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 8.9057, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4564 }, { "epoch": 0.3311090157394647, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.9387, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4565 }, { "epoch": 0.33118154783491693, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 9.03, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4566 }, { "epoch": 0.3312540799303692, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.6326, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4567 }, { "epoch": 0.3313266120258214, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 9.138, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4568 }, { "epoch": 0.33139914412127364, "grad_norm": 7.28125, "learning_rate": 0.0003, "loss": 8.6615, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4569 }, { "epoch": 0.3314716762167259, "grad_norm": 1.7578125, "learning_rate": 0.0003, "loss": 8.8943, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4570 }, { "epoch": 0.3315442083121781, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.4809, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4571 }, { "epoch": 0.3316167404076304, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.9697, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4572 }, { "epoch": 0.3316892725030826, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 9.0566, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4573 }, { "epoch": 0.3317618045985348, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.2495, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4574 }, { "epoch": 0.3318343366939871, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.055, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4575 }, { "epoch": 0.3319068687894393, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 8.8953, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4576 }, { "epoch": 0.3319794008848916, "grad_norm": 5.375, "learning_rate": 0.0003, "loss": 8.7213, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4577 }, { "epoch": 0.3320519329803438, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 9.1993, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4578 }, { "epoch": 0.332124465075796, "grad_norm": 1.953125, "learning_rate": 0.0003, "loss": 8.8904, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4579 }, { "epoch": 0.3321969971712483, "grad_norm": 6.78125, "learning_rate": 0.0003, "loss": 9.1427, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4580 }, { "epoch": 0.3322695292667005, "grad_norm": 7.28125, "learning_rate": 0.0003, "loss": 8.909, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4581 }, { "epoch": 0.3323420613621528, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.6511, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4582 }, { "epoch": 0.332414593457605, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 9.2825, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4583 }, { "epoch": 0.3324871255530572, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 9.1068, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4584 }, { "epoch": 0.3325596576485095, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 8.8497, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4585 }, { "epoch": 0.3326321897439617, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.299, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4586 }, { "epoch": 0.33270472183941396, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 9.038, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4587 }, { "epoch": 0.3327772539348662, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 9.2679, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4588 }, { "epoch": 0.3328497860303184, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 9.0233, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4589 }, { "epoch": 0.33292231812577067, "grad_norm": 11.0, "learning_rate": 0.0003, "loss": 9.1548, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4590 }, { "epoch": 0.3329948502212229, "grad_norm": 5.46875, "learning_rate": 0.0003, "loss": 9.3328, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4591 }, { "epoch": 0.33306738231667515, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 8.509, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4592 }, { "epoch": 0.33313991441212737, "grad_norm": 5.46875, "learning_rate": 0.0003, "loss": 8.8297, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4593 }, { "epoch": 0.3332124465075796, "grad_norm": 6.40625, "learning_rate": 0.0003, "loss": 8.9071, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4594 }, { "epoch": 0.33328497860303186, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.087, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4595 }, { "epoch": 0.33335751069848407, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 9.0722, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4596 }, { "epoch": 0.33343004279393634, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 8.8212, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4597 }, { "epoch": 0.33350257488938856, "grad_norm": 1.8984375, "learning_rate": 0.0003, "loss": 9.0013, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4598 }, { "epoch": 0.3335751069848408, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 9.0732, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4599 }, { "epoch": 0.33364763908029305, "grad_norm": 7.0, "learning_rate": 0.0003, "loss": 9.2248, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4600 }, { "epoch": 0.33372017117574526, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.7562, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4601 }, { "epoch": 0.3337927032711975, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 9.1263, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4602 }, { "epoch": 0.33386523536664975, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.3662, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4603 }, { "epoch": 0.33393776746210196, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 9.1076, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4604 }, { "epoch": 0.33401029955755424, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 9.2096, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4605 }, { "epoch": 0.33408283165300645, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 8.6879, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4606 }, { "epoch": 0.33415536374845867, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 9.0145, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4607 }, { "epoch": 0.33422789584391094, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.9322, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4608 }, { "epoch": 0.33430042793936315, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 9.2334, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4609 }, { "epoch": 0.3343729600348154, "grad_norm": 9.5, "learning_rate": 0.0003, "loss": 8.7892, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4610 }, { "epoch": 0.33444549213026764, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.2984, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4611 }, { "epoch": 0.33451802422571986, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 8.821, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4612 }, { "epoch": 0.33459055632117213, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.4428, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4613 }, { "epoch": 0.33466308841662434, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 9.2926, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4614 }, { "epoch": 0.3347356205120766, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.2167, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4615 }, { "epoch": 0.33480815260752883, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 9.0042, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4616 }, { "epoch": 0.33488068470298105, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.3501, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4617 }, { "epoch": 0.3349532167984333, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.1851, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4618 }, { "epoch": 0.33502574889388553, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 8.9263, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4619 }, { "epoch": 0.3350982809893378, "grad_norm": 30.125, "learning_rate": 0.0003, "loss": 9.2555, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4620 }, { "epoch": 0.33517081308479, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 9.0343, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4621 }, { "epoch": 0.33524334518024224, "grad_norm": 24.375, "learning_rate": 0.0003, "loss": 8.7413, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4622 }, { "epoch": 0.3353158772756945, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 8.9046, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4623 }, { "epoch": 0.3353884093711467, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 8.3962, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4624 }, { "epoch": 0.335460941466599, "grad_norm": 7.78125, "learning_rate": 0.0003, "loss": 9.0581, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4625 }, { "epoch": 0.3355334735620512, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.2972, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4626 }, { "epoch": 0.3356060056575034, "grad_norm": 1.8984375, "learning_rate": 0.0003, "loss": 8.8844, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4627 }, { "epoch": 0.3356785377529557, "grad_norm": 16.625, "learning_rate": 0.0003, "loss": 8.9685, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4628 }, { "epoch": 0.3357510698484079, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 9.4224, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4629 }, { "epoch": 0.3358236019438602, "grad_norm": 24.0, "learning_rate": 0.0003, "loss": 9.1247, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4630 }, { "epoch": 0.3358961340393124, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.5383, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4631 }, { "epoch": 0.3359686661347646, "grad_norm": 19.5, "learning_rate": 0.0003, "loss": 9.2901, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4632 }, { "epoch": 0.3360411982302169, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 9.4685, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4633 }, { "epoch": 0.3361137303256691, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 9.1052, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4634 }, { "epoch": 0.3361862624211214, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 8.4703, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4635 }, { "epoch": 0.3362587945165736, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.7655, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4636 }, { "epoch": 0.3363313266120258, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 8.6888, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4637 }, { "epoch": 0.3364038587074781, "grad_norm": 17.25, "learning_rate": 0.0003, "loss": 9.0886, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4638 }, { "epoch": 0.3364763908029303, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 9.1415, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4639 }, { "epoch": 0.3365489228983825, "grad_norm": 7.25, "learning_rate": 0.0003, "loss": 8.775, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4640 }, { "epoch": 0.3366214549938348, "grad_norm": 10.5, "learning_rate": 0.0003, "loss": 9.2022, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4641 }, { "epoch": 0.336693987089287, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.1814, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4642 }, { "epoch": 0.33676651918473927, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 9.4164, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4643 }, { "epoch": 0.3368390512801915, "grad_norm": 7.3125, "learning_rate": 0.0003, "loss": 8.9179, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4644 }, { "epoch": 0.3369115833756437, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.0311, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4645 }, { "epoch": 0.33698411547109597, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 8.9594, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4646 }, { "epoch": 0.3370566475665482, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.9373, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4647 }, { "epoch": 0.33712917966200046, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.6115, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4648 }, { "epoch": 0.33720171175745267, "grad_norm": 1.671875, "learning_rate": 0.0003, "loss": 9.2286, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4649 }, { "epoch": 0.3372742438529049, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 9.0482, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4650 }, { "epoch": 0.33734677594835716, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 8.8733, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4651 }, { "epoch": 0.3374193080438094, "grad_norm": 11.625, "learning_rate": 0.0003, "loss": 9.3243, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4652 }, { "epoch": 0.33749184013926165, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.8767, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4653 }, { "epoch": 0.33756437223471386, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.1599, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4654 }, { "epoch": 0.3376369043301661, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 9.0783, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4655 }, { "epoch": 0.33770943642561835, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 8.9406, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4656 }, { "epoch": 0.33778196852107056, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.9696, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4657 }, { "epoch": 0.33785450061652283, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 8.8151, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4658 }, { "epoch": 0.33792703271197505, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 9.0965, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4659 }, { "epoch": 0.33799956480742727, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.594, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4660 }, { "epoch": 0.33807209690287954, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 8.8599, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4661 }, { "epoch": 0.33814462899833175, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.334, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4662 }, { "epoch": 0.338217161093784, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.5903, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4663 }, { "epoch": 0.33828969318923624, "grad_norm": 1.5390625, "learning_rate": 0.0003, "loss": 9.1859, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4664 }, { "epoch": 0.33836222528468846, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.9661, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4665 }, { "epoch": 0.3384347573801407, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.8369, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4666 }, { "epoch": 0.33850728947559294, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.2367, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4667 }, { "epoch": 0.3385798215710452, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.3317, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4668 }, { "epoch": 0.33865235366649743, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 9.4132, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4669 }, { "epoch": 0.33872488576194965, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 8.6202, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4670 }, { "epoch": 0.3387974178574019, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 8.981, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4671 }, { "epoch": 0.33886994995285413, "grad_norm": 5.6875, "learning_rate": 0.0003, "loss": 8.6868, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4672 }, { "epoch": 0.33894248204830635, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.0429, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4673 }, { "epoch": 0.3390150141437586, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 8.8763, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4674 }, { "epoch": 0.33908754623921084, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 9.2407, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4675 }, { "epoch": 0.3391600783346631, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 8.8279, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4676 }, { "epoch": 0.3392326104301153, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 8.7783, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4677 }, { "epoch": 0.33930514252556754, "grad_norm": 1.734375, "learning_rate": 0.0003, "loss": 8.8832, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4678 }, { "epoch": 0.3393776746210198, "grad_norm": 5.78125, "learning_rate": 0.0003, "loss": 8.5938, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4679 }, { "epoch": 0.339450206716472, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 9.0775, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4680 }, { "epoch": 0.3395227388119243, "grad_norm": 7.65625, "learning_rate": 0.0003, "loss": 9.055, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4681 }, { "epoch": 0.3395952709073765, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 9.4797, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4682 }, { "epoch": 0.3396678030028287, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 9.3252, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4683 }, { "epoch": 0.339740335098281, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.557, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4684 }, { "epoch": 0.3398128671937332, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.9843, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4685 }, { "epoch": 0.3398853992891855, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.8701, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4686 }, { "epoch": 0.3399579313846377, "grad_norm": 1.7578125, "learning_rate": 0.0003, "loss": 9.2775, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4687 }, { "epoch": 0.3400304634800899, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.9486, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4688 }, { "epoch": 0.3401029955755422, "grad_norm": 7.1875, "learning_rate": 0.0003, "loss": 9.0416, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4689 }, { "epoch": 0.3401755276709944, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.9574, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4690 }, { "epoch": 0.3402480597664467, "grad_norm": 1.6875, "learning_rate": 0.0003, "loss": 8.8267, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4691 }, { "epoch": 0.3403205918618989, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 8.8222, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4692 }, { "epoch": 0.3403931239573511, "grad_norm": 1.8671875, "learning_rate": 0.0003, "loss": 9.2303, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4693 }, { "epoch": 0.3404656560528034, "grad_norm": 5.84375, "learning_rate": 0.0003, "loss": 9.4093, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4694 }, { "epoch": 0.3405381881482556, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 8.6923, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4695 }, { "epoch": 0.34061072024370787, "grad_norm": 24.75, "learning_rate": 0.0003, "loss": 8.8528, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4696 }, { "epoch": 0.3406832523391601, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.8868, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4697 }, { "epoch": 0.3407557844346123, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.679, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4698 }, { "epoch": 0.34082831653006457, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 8.9965, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4699 }, { "epoch": 0.3409008486255168, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.7876, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4700 }, { "epoch": 0.34097338072096905, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 9.2838, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4701 }, { "epoch": 0.34104591281642127, "grad_norm": 5.625, "learning_rate": 0.0003, "loss": 8.9346, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4702 }, { "epoch": 0.3411184449118735, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 8.6527, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4703 }, { "epoch": 0.34119097700732576, "grad_norm": 6.0625, "learning_rate": 0.0003, "loss": 8.8291, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4704 }, { "epoch": 0.341263509102778, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.2123, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4705 }, { "epoch": 0.34133604119823024, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 8.7581, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4706 }, { "epoch": 0.34140857329368246, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 9.3747, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4707 }, { "epoch": 0.3414811053891347, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 8.8303, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4708 }, { "epoch": 0.34155363748458695, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 9.1785, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4709 }, { "epoch": 0.34162616958003916, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 8.6194, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4710 }, { "epoch": 0.3416987016754914, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 9.19, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4711 }, { "epoch": 0.34177123377094365, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.9693, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4712 }, { "epoch": 0.34184376586639587, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.5277, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4713 }, { "epoch": 0.34191629796184814, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.9236, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4714 }, { "epoch": 0.34198883005730035, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.886, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4715 }, { "epoch": 0.34206136215275257, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 8.7034, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4716 }, { "epoch": 0.34213389424820484, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.9218, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4717 }, { "epoch": 0.34220642634365706, "grad_norm": 9.1875, "learning_rate": 0.0003, "loss": 8.8939, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4718 }, { "epoch": 0.3422789584391093, "grad_norm": 6.8125, "learning_rate": 0.0003, "loss": 9.4611, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4719 }, { "epoch": 0.34235149053456154, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 9.2248, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4720 }, { "epoch": 0.34242402263001376, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 9.3301, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4721 }, { "epoch": 0.34249655472546603, "grad_norm": 7.71875, "learning_rate": 0.0003, "loss": 8.6656, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4722 }, { "epoch": 0.34256908682091824, "grad_norm": 6.96875, "learning_rate": 0.0003, "loss": 8.9005, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4723 }, { "epoch": 0.3426416189163705, "grad_norm": 1.8828125, "learning_rate": 0.0003, "loss": 8.9558, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4724 }, { "epoch": 0.34271415101182273, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.9227, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4725 }, { "epoch": 0.34278668310727495, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 9.2408, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4726 }, { "epoch": 0.3428592152027272, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.3415, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4727 }, { "epoch": 0.34293174729817943, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.7186, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4728 }, { "epoch": 0.3430042793936317, "grad_norm": 22.875, "learning_rate": 0.0003, "loss": 9.0866, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4729 }, { "epoch": 0.3430768114890839, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.0142, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4730 }, { "epoch": 0.34314934358453614, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.1369, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4731 }, { "epoch": 0.3432218756799884, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.9083, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4732 }, { "epoch": 0.3432944077754406, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 9.2818, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4733 }, { "epoch": 0.3433669398708929, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.9074, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4734 }, { "epoch": 0.3434394719663451, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 9.1731, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4735 }, { "epoch": 0.3435120040617973, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 9.0769, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4736 }, { "epoch": 0.3435845361572496, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 8.8991, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4737 }, { "epoch": 0.3436570682527018, "grad_norm": 14.6875, "learning_rate": 0.0003, "loss": 8.7641, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4738 }, { "epoch": 0.3437296003481541, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.5399, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4739 }, { "epoch": 0.3438021324436063, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.6901, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4740 }, { "epoch": 0.3438746645390585, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.4141, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4741 }, { "epoch": 0.3439471966345108, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.9821, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4742 }, { "epoch": 0.344019728729963, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.2175, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4743 }, { "epoch": 0.3440922608254152, "grad_norm": 1.96875, "learning_rate": 0.0003, "loss": 8.823, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4744 }, { "epoch": 0.3441647929208675, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 9.2035, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4745 }, { "epoch": 0.3442373250163197, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.6436, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4746 }, { "epoch": 0.344309857111772, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.6669, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4747 }, { "epoch": 0.3443823892072242, "grad_norm": 12.375, "learning_rate": 0.0003, "loss": 9.1096, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4748 }, { "epoch": 0.3444549213026764, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.7011, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4749 }, { "epoch": 0.3445274533981287, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.9741, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4750 }, { "epoch": 0.3445999854935809, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 8.6511, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4751 }, { "epoch": 0.34467251758903317, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 9.2792, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4752 }, { "epoch": 0.3447450496844854, "grad_norm": 27.625, "learning_rate": 0.0003, "loss": 8.9584, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4753 }, { "epoch": 0.3448175817799376, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.7351, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4754 }, { "epoch": 0.34489011387538987, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.5415, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4755 }, { "epoch": 0.3449626459708421, "grad_norm": 5.21875, "learning_rate": 0.0003, "loss": 8.8028, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4756 }, { "epoch": 0.34503517806629436, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 9.3197, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4757 }, { "epoch": 0.3451077101617466, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 9.0243, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4758 }, { "epoch": 0.3451802422571988, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 9.0333, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4759 }, { "epoch": 0.34525277435265106, "grad_norm": 1.9296875, "learning_rate": 0.0003, "loss": 9.2159, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4760 }, { "epoch": 0.3453253064481033, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 8.7771, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4761 }, { "epoch": 0.34539783854355555, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.3341, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4762 }, { "epoch": 0.34547037063900776, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.1593, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4763 }, { "epoch": 0.34554290273446, "grad_norm": 13.25, "learning_rate": 0.0003, "loss": 9.0721, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4764 }, { "epoch": 0.34561543482991225, "grad_norm": 5.625, "learning_rate": 0.0003, "loss": 9.2687, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4765 }, { "epoch": 0.34568796692536446, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 8.9473, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4766 }, { "epoch": 0.34576049902081674, "grad_norm": 12.6875, "learning_rate": 0.0003, "loss": 8.7648, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4767 }, { "epoch": 0.34583303111626895, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 8.8485, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4768 }, { "epoch": 0.34590556321172117, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.8797, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4769 }, { "epoch": 0.34597809530717344, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.8693, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4770 }, { "epoch": 0.34605062740262565, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 8.8127, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4771 }, { "epoch": 0.3461231594980779, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.1485, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4772 }, { "epoch": 0.34619569159353014, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.8014, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4773 }, { "epoch": 0.34626822368898236, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 8.9449, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4774 }, { "epoch": 0.34634075578443463, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 8.8984, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4775 }, { "epoch": 0.34641328787988684, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 9.2989, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4776 }, { "epoch": 0.34648581997533906, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 9.6293, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4777 }, { "epoch": 0.34655835207079133, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.8104, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4778 }, { "epoch": 0.34663088416624355, "grad_norm": 6.15625, "learning_rate": 0.0003, "loss": 9.0292, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4779 }, { "epoch": 0.3467034162616958, "grad_norm": 7.03125, "learning_rate": 0.0003, "loss": 8.8415, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4780 }, { "epoch": 0.34677594835714803, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.8195, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4781 }, { "epoch": 0.34684848045260025, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 9.0287, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4782 }, { "epoch": 0.3469210125480525, "grad_norm": 11.75, "learning_rate": 0.0003, "loss": 9.2771, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4783 }, { "epoch": 0.34699354464350474, "grad_norm": 5.9375, "learning_rate": 0.0003, "loss": 8.7592, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4784 }, { "epoch": 0.347066076738957, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 8.787, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4785 }, { "epoch": 0.3471386088344092, "grad_norm": 10.875, "learning_rate": 0.0003, "loss": 8.4055, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4786 }, { "epoch": 0.34721114092986144, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.6063, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4787 }, { "epoch": 0.3472836730253137, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 9.0918, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4788 }, { "epoch": 0.3473562051207659, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 9.0708, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4789 }, { "epoch": 0.3474287372162182, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 8.9638, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4790 }, { "epoch": 0.3475012693116704, "grad_norm": 6.15625, "learning_rate": 0.0003, "loss": 9.27, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4791 }, { "epoch": 0.34757380140712263, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.976, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4792 }, { "epoch": 0.3476463335025749, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.3378, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4793 }, { "epoch": 0.3477188655980271, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.8607, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4794 }, { "epoch": 0.3477913976934794, "grad_norm": 1.890625, "learning_rate": 0.0003, "loss": 9.374, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4795 }, { "epoch": 0.3478639297889316, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 9.033, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4796 }, { "epoch": 0.3479364618843838, "grad_norm": 1.7109375, "learning_rate": 0.0003, "loss": 8.6632, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4797 }, { "epoch": 0.3480089939798361, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 9.0218, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4798 }, { "epoch": 0.3480815260752883, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.2426, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4799 }, { "epoch": 0.3481540581707406, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 8.8662, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4800 }, { "epoch": 0.3482265902661928, "grad_norm": 14.625, "learning_rate": 0.0003, "loss": 9.1048, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4801 }, { "epoch": 0.348299122361645, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.9421, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4802 }, { "epoch": 0.3483716544570973, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 8.7618, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4803 }, { "epoch": 0.3484441865525495, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 9.0953, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4804 }, { "epoch": 0.34851671864800177, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 9.2176, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4805 }, { "epoch": 0.348589250743454, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 9.0182, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4806 }, { "epoch": 0.3486617828389062, "grad_norm": 6.5, "learning_rate": 0.0003, "loss": 9.0571, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4807 }, { "epoch": 0.34873431493435847, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 9.3335, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4808 }, { "epoch": 0.3488068470298107, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.9307, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4809 }, { "epoch": 0.34887937912526296, "grad_norm": 10.0625, "learning_rate": 0.0003, "loss": 9.2383, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4810 }, { "epoch": 0.34895191122071517, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.7162, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4811 }, { "epoch": 0.3490244433161674, "grad_norm": 6.9375, "learning_rate": 0.0003, "loss": 9.3471, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4812 }, { "epoch": 0.34909697541161966, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 9.2033, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4813 }, { "epoch": 0.3491695075070719, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 8.4692, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4814 }, { "epoch": 0.3492420396025241, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.0369, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4815 }, { "epoch": 0.34931457169797636, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.726, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4816 }, { "epoch": 0.3493871037934286, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 8.867, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4817 }, { "epoch": 0.34945963588888085, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 9.4842, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4818 }, { "epoch": 0.34953216798433306, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 9.5081, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4819 }, { "epoch": 0.3496047000797853, "grad_norm": 22.125, "learning_rate": 0.0003, "loss": 8.9064, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4820 }, { "epoch": 0.34967723217523755, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.5663, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4821 }, { "epoch": 0.34974976427068977, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.8572, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4822 }, { "epoch": 0.34982229636614204, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 9.1687, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4823 }, { "epoch": 0.34989482846159425, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 8.8562, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4824 }, { "epoch": 0.34996736055704647, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.0284, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4825 }, { "epoch": 0.35003989265249874, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.6317, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4826 }, { "epoch": 0.35011242474795096, "grad_norm": 1.515625, "learning_rate": 0.0003, "loss": 9.0977, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4827 }, { "epoch": 0.35018495684340323, "grad_norm": 6.25, "learning_rate": 0.0003, "loss": 8.968, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4828 }, { "epoch": 0.35025748893885544, "grad_norm": 1.8046875, "learning_rate": 0.0003, "loss": 9.451, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4829 }, { "epoch": 0.35033002103430766, "grad_norm": 9.0, "learning_rate": 0.0003, "loss": 8.8476, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4830 }, { "epoch": 0.35040255312975993, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.7508, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4831 }, { "epoch": 0.35047508522521215, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 9.0175, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4832 }, { "epoch": 0.3505476173206644, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.6216, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4833 }, { "epoch": 0.35062014941611663, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.0799, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4834 }, { "epoch": 0.35069268151156885, "grad_norm": 8.6875, "learning_rate": 0.0003, "loss": 9.1214, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4835 }, { "epoch": 0.3507652136070211, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 8.6507, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4836 }, { "epoch": 0.35083774570247334, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 9.0264, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4837 }, { "epoch": 0.3509102777979256, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 9.1571, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4838 }, { "epoch": 0.3509828098933778, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 9.0408, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4839 }, { "epoch": 0.35105534198883004, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 9.1186, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4840 }, { "epoch": 0.3511278740842823, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 9.3744, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4841 }, { "epoch": 0.3512004061797345, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 9.0564, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4842 }, { "epoch": 0.3512729382751868, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 8.422, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4843 }, { "epoch": 0.351345470370639, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 8.8299, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4844 }, { "epoch": 0.35141800246609123, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 8.8173, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4845 }, { "epoch": 0.3514905345615435, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 8.976, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4846 }, { "epoch": 0.3515630666569957, "grad_norm": 6.15625, "learning_rate": 0.0003, "loss": 9.0194, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4847 }, { "epoch": 0.35163559875244793, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 9.1224, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4848 }, { "epoch": 0.3517081308479002, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.8809, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4849 }, { "epoch": 0.3517806629433524, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.9021, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4850 }, { "epoch": 0.3518531950388047, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.7793, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4851 }, { "epoch": 0.3519257271342569, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 8.3947, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4852 }, { "epoch": 0.3519982592297091, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 9.3959, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4853 }, { "epoch": 0.3520707913251614, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.8867, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4854 }, { "epoch": 0.3521433234206136, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.6981, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4855 }, { "epoch": 0.3522158555160659, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.8759, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4856 }, { "epoch": 0.3522883876115181, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.8909, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4857 }, { "epoch": 0.3523609197069703, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.5133, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4858 }, { "epoch": 0.3524334518024226, "grad_norm": 8.8125, "learning_rate": 0.0003, "loss": 8.9277, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4859 }, { "epoch": 0.3525059838978748, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.1622, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4860 }, { "epoch": 0.35257851599332707, "grad_norm": 28.5, "learning_rate": 0.0003, "loss": 9.3028, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4861 }, { "epoch": 0.3526510480887793, "grad_norm": 1.90625, "learning_rate": 0.0003, "loss": 9.1627, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4862 }, { "epoch": 0.3527235801842315, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 9.1899, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4863 }, { "epoch": 0.35279611227968377, "grad_norm": 6.84375, "learning_rate": 0.0003, "loss": 9.1067, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4864 }, { "epoch": 0.352868644375136, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 8.9153, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4865 }, { "epoch": 0.35294117647058826, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.5506, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4866 }, { "epoch": 0.3530137085660405, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 9.1891, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4867 }, { "epoch": 0.3530862406614927, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.7516, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4868 }, { "epoch": 0.35315877275694496, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 9.124, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4869 }, { "epoch": 0.3532313048523972, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 9.0874, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4870 }, { "epoch": 0.35330383694784945, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.7444, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4871 }, { "epoch": 0.35337636904330166, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 9.249, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4872 }, { "epoch": 0.3534489011387539, "grad_norm": 1.765625, "learning_rate": 0.0003, "loss": 9.3413, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4873 }, { "epoch": 0.35352143323420615, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 9.4852, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4874 }, { "epoch": 0.35359396532965837, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.3546, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4875 }, { "epoch": 0.35366649742511064, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.1257, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4876 }, { "epoch": 0.35373902952056285, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 9.2927, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4877 }, { "epoch": 0.35381156161601507, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.9564, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4878 }, { "epoch": 0.35388409371146734, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 8.9926, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4879 }, { "epoch": 0.35395662580691956, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 8.7411, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4880 }, { "epoch": 0.3540291579023718, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.0093, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4881 }, { "epoch": 0.35410168999782404, "grad_norm": 8.5625, "learning_rate": 0.0003, "loss": 9.2551, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4882 }, { "epoch": 0.35417422209327626, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.971, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4883 }, { "epoch": 0.35424675418872853, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.2296, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4884 }, { "epoch": 0.35431928628418075, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.9325, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4885 }, { "epoch": 0.35439181837963296, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.572, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4886 }, { "epoch": 0.35446435047508523, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.5953, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4887 }, { "epoch": 0.35453688257053745, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 8.851, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4888 }, { "epoch": 0.3546094146659897, "grad_norm": 5.9375, "learning_rate": 0.0003, "loss": 8.7376, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4889 }, { "epoch": 0.35468194676144194, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 8.712, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4890 }, { "epoch": 0.35475447885689415, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 9.0446, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4891 }, { "epoch": 0.3548270109523464, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.9751, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4892 }, { "epoch": 0.35489954304779864, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 8.5718, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4893 }, { "epoch": 0.3549720751432509, "grad_norm": 7.6875, "learning_rate": 0.0003, "loss": 8.7659, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4894 }, { "epoch": 0.3550446072387031, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 8.5792, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4895 }, { "epoch": 0.35511713933415534, "grad_norm": 13.3125, "learning_rate": 0.0003, "loss": 8.6204, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4896 }, { "epoch": 0.3551896714296076, "grad_norm": 8.0625, "learning_rate": 0.0003, "loss": 9.0796, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4897 }, { "epoch": 0.3552622035250598, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.4247, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4898 }, { "epoch": 0.3553347356205121, "grad_norm": 10.375, "learning_rate": 0.0003, "loss": 9.2552, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4899 }, { "epoch": 0.3554072677159643, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 8.9831, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4900 }, { "epoch": 0.35547979981141653, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.7354, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4901 }, { "epoch": 0.3555523319068688, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 8.4853, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4902 }, { "epoch": 0.355624864002321, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.9346, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4903 }, { "epoch": 0.3556973960977733, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.0386, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4904 }, { "epoch": 0.3557699281932255, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 9.2659, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4905 }, { "epoch": 0.3558424602886777, "grad_norm": 67.5, "learning_rate": 0.0003, "loss": 8.8479, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4906 }, { "epoch": 0.35591499238413, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 8.9753, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4907 }, { "epoch": 0.3559875244795822, "grad_norm": 7.71875, "learning_rate": 0.0003, "loss": 8.9575, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4908 }, { "epoch": 0.3560600565750345, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 9.1696, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4909 }, { "epoch": 0.3561325886704867, "grad_norm": 5.21875, "learning_rate": 0.0003, "loss": 9.2079, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4910 }, { "epoch": 0.3562051207659389, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.9573, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4911 }, { "epoch": 0.3562776528613912, "grad_norm": 1.875, "learning_rate": 0.0003, "loss": 8.9023, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4912 }, { "epoch": 0.3563501849568434, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.8756, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4913 }, { "epoch": 0.35642271705229567, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.8543, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4914 }, { "epoch": 0.3564952491477479, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.609, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4915 }, { "epoch": 0.3565677812432001, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.7153, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4916 }, { "epoch": 0.35664031333865237, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 8.7138, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4917 }, { "epoch": 0.3567128454341046, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 8.8119, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4918 }, { "epoch": 0.3567853775295568, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.8944, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4919 }, { "epoch": 0.3568579096250091, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.2574, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4920 }, { "epoch": 0.3569304417204613, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 9.288, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4921 }, { "epoch": 0.35700297381591356, "grad_norm": 7.3125, "learning_rate": 0.0003, "loss": 8.9489, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4922 }, { "epoch": 0.3570755059113658, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 8.5543, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4923 }, { "epoch": 0.357148038006818, "grad_norm": 6.34375, "learning_rate": 0.0003, "loss": 9.3519, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4924 }, { "epoch": 0.35722057010227026, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.8192, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4925 }, { "epoch": 0.3572931021977225, "grad_norm": 1.5859375, "learning_rate": 0.0003, "loss": 8.9161, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4926 }, { "epoch": 0.35736563429317475, "grad_norm": 1.8671875, "learning_rate": 0.0003, "loss": 9.1599, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4927 }, { "epoch": 0.35743816638862697, "grad_norm": 8.0, "learning_rate": 0.0003, "loss": 8.8699, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4928 }, { "epoch": 0.3575106984840792, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 9.1915, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4929 }, { "epoch": 0.35758323057953145, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 8.4121, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4930 }, { "epoch": 0.35765576267498367, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 9.1444, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4931 }, { "epoch": 0.35772829477043594, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.2866, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4932 }, { "epoch": 0.35780082686588816, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 9.33, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4933 }, { "epoch": 0.35787335896134037, "grad_norm": 5.375, "learning_rate": 0.0003, "loss": 8.965, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4934 }, { "epoch": 0.35794589105679264, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 8.8304, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4935 }, { "epoch": 0.35801842315224486, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 8.6868, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4936 }, { "epoch": 0.35809095524769713, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 8.8532, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4937 }, { "epoch": 0.35816348734314934, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 9.1352, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4938 }, { "epoch": 0.35823601943860156, "grad_norm": 5.5625, "learning_rate": 0.0003, "loss": 9.0547, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4939 }, { "epoch": 0.35830855153405383, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 8.6962, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4940 }, { "epoch": 0.35838108362950605, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 8.9943, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4941 }, { "epoch": 0.3584536157249583, "grad_norm": 14.4375, "learning_rate": 0.0003, "loss": 9.5673, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4942 }, { "epoch": 0.35852614782041053, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.8519, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4943 }, { "epoch": 0.35859867991586275, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.2085, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4944 }, { "epoch": 0.358671212011315, "grad_norm": 26.75, "learning_rate": 0.0003, "loss": 9.0571, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4945 }, { "epoch": 0.35874374410676724, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 9.2183, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4946 }, { "epoch": 0.3588162762022195, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.5004, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4947 }, { "epoch": 0.3588888082976717, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 8.7314, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4948 }, { "epoch": 0.35896134039312394, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 9.125, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4949 }, { "epoch": 0.3590338724885762, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 9.5308, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4950 }, { "epoch": 0.3591064045840284, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.8699, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4951 }, { "epoch": 0.35917893667948064, "grad_norm": 1.8125, "learning_rate": 0.0003, "loss": 9.2839, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4952 }, { "epoch": 0.3592514687749329, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 8.6385, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4953 }, { "epoch": 0.35932400087038513, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 9.2584, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4954 }, { "epoch": 0.3593965329658374, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 9.147, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4955 }, { "epoch": 0.3594690650612896, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 9.3014, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4956 }, { "epoch": 0.35954159715674183, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.0744, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4957 }, { "epoch": 0.3596141292521941, "grad_norm": 1.546875, "learning_rate": 0.0003, "loss": 8.7753, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4958 }, { "epoch": 0.3596866613476463, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.0373, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4959 }, { "epoch": 0.3597591934430986, "grad_norm": 8.0, "learning_rate": 0.0003, "loss": 9.1237, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4960 }, { "epoch": 0.3598317255385508, "grad_norm": 1.609375, "learning_rate": 0.0003, "loss": 9.0756, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4961 }, { "epoch": 0.359904257634003, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 8.8435, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4962 }, { "epoch": 0.3599767897294553, "grad_norm": 1.984375, "learning_rate": 0.0003, "loss": 9.1657, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4963 }, { "epoch": 0.3600493218249075, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 9.1815, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4964 }, { "epoch": 0.3601218539203598, "grad_norm": 7.65625, "learning_rate": 0.0003, "loss": 9.3631, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4965 }, { "epoch": 0.360194386015812, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.8776, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4966 }, { "epoch": 0.3602669181112642, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.4549, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4967 }, { "epoch": 0.3603394502067165, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 9.0469, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4968 }, { "epoch": 0.3604119823021687, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.7638, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4969 }, { "epoch": 0.36048451439762097, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.6895, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4970 }, { "epoch": 0.3605570464930732, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.3156, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4971 }, { "epoch": 0.3606295785885254, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 8.5379, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4972 }, { "epoch": 0.3607021106839777, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.0996, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4973 }, { "epoch": 0.3607746427794299, "grad_norm": 1.5390625, "learning_rate": 0.0003, "loss": 9.4434, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4974 }, { "epoch": 0.36084717487488216, "grad_norm": 6.21875, "learning_rate": 0.0003, "loss": 9.0997, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4975 }, { "epoch": 0.3609197069703344, "grad_norm": 62.5, "learning_rate": 0.0003, "loss": 8.8151, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4976 }, { "epoch": 0.3609922390657866, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.1736, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4977 }, { "epoch": 0.36106477116123886, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.8819, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4978 }, { "epoch": 0.3611373032566911, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 9.0617, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4979 }, { "epoch": 0.36120983535214335, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.0899, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4980 }, { "epoch": 0.36128236744759556, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.7425, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4981 }, { "epoch": 0.3613548995430478, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 8.9249, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4982 }, { "epoch": 0.36142743163850005, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 8.6809, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4983 }, { "epoch": 0.36149996373395227, "grad_norm": 6.25, "learning_rate": 0.0003, "loss": 8.6727, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4984 }, { "epoch": 0.36157249582940454, "grad_norm": 1.9453125, "learning_rate": 0.0003, "loss": 8.7506, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4985 }, { "epoch": 0.36164502792485675, "grad_norm": 5.21875, "learning_rate": 0.0003, "loss": 8.7069, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4986 }, { "epoch": 0.36171756002030897, "grad_norm": 6.65625, "learning_rate": 0.0003, "loss": 9.1219, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4987 }, { "epoch": 0.36179009211576124, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.937, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4988 }, { "epoch": 0.36186262421121346, "grad_norm": 7.4375, "learning_rate": 0.0003, "loss": 9.2064, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4989 }, { "epoch": 0.3619351563066657, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 8.6298, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4990 }, { "epoch": 0.36200768840211794, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.4427, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4991 }, { "epoch": 0.36208022049757016, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 9.128, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4992 }, { "epoch": 0.36215275259302243, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.0774, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4993 }, { "epoch": 0.36222528468847465, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 9.2166, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4994 }, { "epoch": 0.36229781678392686, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.7593, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4995 }, { "epoch": 0.36237034887937913, "grad_norm": 1.8984375, "learning_rate": 0.0003, "loss": 8.9632, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4996 }, { "epoch": 0.36244288097483135, "grad_norm": 8.3125, "learning_rate": 0.0003, "loss": 8.9054, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4997 }, { "epoch": 0.3625154130702836, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.6026, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4998 }, { "epoch": 0.36258794516573584, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 9.2111, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 4999 }, { "epoch": 0.36266047726118805, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 9.2182, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5000 }, { "epoch": 0.3627330093566403, "grad_norm": 7.25, "learning_rate": 0.0003, "loss": 8.9152, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5001 }, { "epoch": 0.36280554145209254, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 9.1483, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5002 }, { "epoch": 0.3628780735475448, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.1177, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5003 }, { "epoch": 0.362950605642997, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.7878, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5004 }, { "epoch": 0.36302313773844924, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 8.7865, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5005 }, { "epoch": 0.3630956698339015, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 9.377, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5006 }, { "epoch": 0.36316820192935373, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 9.1724, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5007 }, { "epoch": 0.363240734024806, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.7677, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5008 }, { "epoch": 0.3633132661202582, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 9.0927, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5009 }, { "epoch": 0.36338579821571043, "grad_norm": 1.9296875, "learning_rate": 0.0003, "loss": 9.0419, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5010 }, { "epoch": 0.3634583303111627, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 8.765, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5011 }, { "epoch": 0.3635308624066149, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 8.9474, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5012 }, { "epoch": 0.3636033945020672, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.8144, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5013 }, { "epoch": 0.3636759265975194, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 9.1131, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5014 }, { "epoch": 0.3637484586929716, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 9.183, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5015 }, { "epoch": 0.3638209907884239, "grad_norm": 13.625, "learning_rate": 0.0003, "loss": 8.7783, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5016 }, { "epoch": 0.3638935228838761, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 9.1947, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5017 }, { "epoch": 0.3639660549793284, "grad_norm": 5.84375, "learning_rate": 0.0003, "loss": 8.9604, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5018 }, { "epoch": 0.3640385870747806, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.2047, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5019 }, { "epoch": 0.3641111191702328, "grad_norm": 1.84375, "learning_rate": 0.0003, "loss": 8.7798, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5020 }, { "epoch": 0.3641836512656851, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.6484, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5021 }, { "epoch": 0.3642561833611373, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.7587, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5022 }, { "epoch": 0.3643287154565895, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 9.1562, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5023 }, { "epoch": 0.3644012475520418, "grad_norm": 1.3515625, "learning_rate": 0.0003, "loss": 8.6077, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5024 }, { "epoch": 0.364473779647494, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.8024, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5025 }, { "epoch": 0.36454631174294627, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.6896, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5026 }, { "epoch": 0.3646188438383985, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.8411, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5027 }, { "epoch": 0.3646913759338507, "grad_norm": 5.9375, "learning_rate": 0.0003, "loss": 8.9665, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5028 }, { "epoch": 0.364763908029303, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.9883, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5029 }, { "epoch": 0.3648364401247552, "grad_norm": 7.375, "learning_rate": 0.0003, "loss": 8.4914, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5030 }, { "epoch": 0.36490897222020746, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.8946, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5031 }, { "epoch": 0.3649815043156597, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.8158, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5032 }, { "epoch": 0.3650540364111119, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.8028, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5033 }, { "epoch": 0.36512656850656416, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.3406, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5034 }, { "epoch": 0.3651991006020164, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 9.0004, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5035 }, { "epoch": 0.36527163269746865, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.8378, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5036 }, { "epoch": 0.36534416479292087, "grad_norm": 10.9375, "learning_rate": 0.0003, "loss": 8.6138, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5037 }, { "epoch": 0.3654166968883731, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 8.6938, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5038 }, { "epoch": 0.36548922898382535, "grad_norm": 10.125, "learning_rate": 0.0003, "loss": 9.2883, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5039 }, { "epoch": 0.36556176107927757, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.7926, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5040 }, { "epoch": 0.36563429317472984, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 8.7961, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5041 }, { "epoch": 0.36570682527018206, "grad_norm": 6.15625, "learning_rate": 0.0003, "loss": 8.716, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5042 }, { "epoch": 0.3657793573656343, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.7837, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5043 }, { "epoch": 0.36585188946108654, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.9169, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5044 }, { "epoch": 0.36592442155653876, "grad_norm": 7.59375, "learning_rate": 0.0003, "loss": 9.0245, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5045 }, { "epoch": 0.36599695365199103, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.2277, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5046 }, { "epoch": 0.36606948574744325, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 9.1523, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5047 }, { "epoch": 0.36614201784289546, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.6998, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5048 }, { "epoch": 0.36621454993834773, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.819, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5049 }, { "epoch": 0.36628708203379995, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.6312, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5050 }, { "epoch": 0.3663596141292522, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 9.0605, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5051 }, { "epoch": 0.36643214622470444, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 8.9296, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5052 }, { "epoch": 0.36650467832015665, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 8.8669, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5053 }, { "epoch": 0.3665772104156089, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.1216, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5054 }, { "epoch": 0.36664974251106114, "grad_norm": 6.28125, "learning_rate": 0.0003, "loss": 8.8515, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5055 }, { "epoch": 0.3667222746065134, "grad_norm": 6.8125, "learning_rate": 0.0003, "loss": 9.3783, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5056 }, { "epoch": 0.3667948067019656, "grad_norm": 1.8984375, "learning_rate": 0.0003, "loss": 9.1677, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5057 }, { "epoch": 0.36686733879741784, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.6329, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5058 }, { "epoch": 0.3669398708928701, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.8051, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5059 }, { "epoch": 0.36701240298832233, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 8.8188, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5060 }, { "epoch": 0.36708493508377454, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.6259, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5061 }, { "epoch": 0.3671574671792268, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.213, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5062 }, { "epoch": 0.36722999927467903, "grad_norm": 16.125, "learning_rate": 0.0003, "loss": 9.0022, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5063 }, { "epoch": 0.3673025313701313, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.7923, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5064 }, { "epoch": 0.3673750634655835, "grad_norm": 6.9375, "learning_rate": 0.0003, "loss": 8.7646, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5065 }, { "epoch": 0.36744759556103573, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 9.2322, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5066 }, { "epoch": 0.367520127656488, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 9.1772, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5067 }, { "epoch": 0.3675926597519402, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 9.2558, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5068 }, { "epoch": 0.3676651918473925, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 9.3734, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5069 }, { "epoch": 0.3677377239428447, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 9.1223, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5070 }, { "epoch": 0.3678102560382969, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 8.5987, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5071 }, { "epoch": 0.3678827881337492, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 8.9442, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5072 }, { "epoch": 0.3679553202292014, "grad_norm": 1.640625, "learning_rate": 0.0003, "loss": 9.1195, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5073 }, { "epoch": 0.3680278523246537, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.9626, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5074 }, { "epoch": 0.3681003844201059, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 9.0084, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5075 }, { "epoch": 0.3681729165155581, "grad_norm": 1.5859375, "learning_rate": 0.0003, "loss": 9.253, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5076 }, { "epoch": 0.3682454486110104, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.9632, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5077 }, { "epoch": 0.3683179807064626, "grad_norm": 6.96875, "learning_rate": 0.0003, "loss": 8.7616, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5078 }, { "epoch": 0.36839051280191487, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 9.1629, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5079 }, { "epoch": 0.3684630448973671, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.0346, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5080 }, { "epoch": 0.3685355769928193, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.0172, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5081 }, { "epoch": 0.3686081090882716, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.5558, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5082 }, { "epoch": 0.3686806411837238, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.0585, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5083 }, { "epoch": 0.36875317327917606, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.9255, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5084 }, { "epoch": 0.3688257053746283, "grad_norm": 1.671875, "learning_rate": 0.0003, "loss": 8.8952, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5085 }, { "epoch": 0.3688982374700805, "grad_norm": 7.375, "learning_rate": 0.0003, "loss": 9.0542, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5086 }, { "epoch": 0.36897076956553276, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.0805, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5087 }, { "epoch": 0.369043301660985, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 9.082, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5088 }, { "epoch": 0.36911583375643725, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.4284, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5089 }, { "epoch": 0.36918836585188947, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 9.2968, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5090 }, { "epoch": 0.3692608979473417, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.8406, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5091 }, { "epoch": 0.36933343004279395, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.9301, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5092 }, { "epoch": 0.36940596213824617, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 8.8505, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5093 }, { "epoch": 0.3694784942336984, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 8.7931, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5094 }, { "epoch": 0.36955102632915066, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 9.1274, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5095 }, { "epoch": 0.36962355842460287, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.5494, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5096 }, { "epoch": 0.36969609052005514, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 8.9688, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5097 }, { "epoch": 0.36976862261550736, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.2323, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5098 }, { "epoch": 0.3698411547109596, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 9.0283, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5099 }, { "epoch": 0.36991368680641185, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 9.2292, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5100 }, { "epoch": 0.36998621890186406, "grad_norm": 5.5625, "learning_rate": 0.0003, "loss": 8.6178, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5101 }, { "epoch": 0.37005875099731633, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 8.8072, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5102 }, { "epoch": 0.37013128309276855, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.2645, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5103 }, { "epoch": 0.37020381518822076, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.7317, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5104 }, { "epoch": 0.37027634728367304, "grad_norm": 9.5, "learning_rate": 0.0003, "loss": 8.7113, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5105 }, { "epoch": 0.37034887937912525, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.9202, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5106 }, { "epoch": 0.3704214114745775, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 9.0172, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5107 }, { "epoch": 0.37049394357002974, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 8.8509, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5108 }, { "epoch": 0.37056647566548195, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.3407, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5109 }, { "epoch": 0.3706390077609342, "grad_norm": 1.921875, "learning_rate": 0.0003, "loss": 8.7661, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5110 }, { "epoch": 0.37071153985638644, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 8.9943, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5111 }, { "epoch": 0.3707840719518387, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.4137, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5112 }, { "epoch": 0.3708566040472909, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.8051, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5113 }, { "epoch": 0.37092913614274314, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 9.0017, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5114 }, { "epoch": 0.3710016682381954, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 9.1169, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5115 }, { "epoch": 0.37107420033364763, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.9644, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5116 }, { "epoch": 0.3711467324290999, "grad_norm": 1.9296875, "learning_rate": 0.0003, "loss": 9.0853, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5117 }, { "epoch": 0.3712192645245521, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.7779, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5118 }, { "epoch": 0.37129179662000433, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 9.1674, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5119 }, { "epoch": 0.3713643287154566, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.9333, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5120 }, { "epoch": 0.3714368608109088, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.9987, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5121 }, { "epoch": 0.3715093929063611, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.6446, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5122 }, { "epoch": 0.3715819250018133, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 8.9245, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5123 }, { "epoch": 0.3716544570972655, "grad_norm": 5.90625, "learning_rate": 0.0003, "loss": 8.6982, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5124 }, { "epoch": 0.3717269891927178, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 8.436, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5125 }, { "epoch": 0.37179952128817, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.9346, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5126 }, { "epoch": 0.3718720533836222, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 8.6934, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5127 }, { "epoch": 0.3719445854790745, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 8.7078, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5128 }, { "epoch": 0.3720171175745267, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 9.16, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5129 }, { "epoch": 0.372089649669979, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 8.7857, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5130 }, { "epoch": 0.3721621817654312, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.8984, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5131 }, { "epoch": 0.3722347138608834, "grad_norm": 6.59375, "learning_rate": 0.0003, "loss": 8.5084, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5132 }, { "epoch": 0.3723072459563357, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.9304, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5133 }, { "epoch": 0.3723797780517879, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.6035, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5134 }, { "epoch": 0.3724523101472402, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.8861, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5135 }, { "epoch": 0.3725248422426924, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.9202, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5136 }, { "epoch": 0.3725973743381446, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.2106, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5137 }, { "epoch": 0.3726699064335969, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.1726, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5138 }, { "epoch": 0.3727424385290491, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 9.0056, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5139 }, { "epoch": 0.37281497062450136, "grad_norm": 6.1875, "learning_rate": 0.0003, "loss": 8.2174, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5140 }, { "epoch": 0.3728875027199536, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.4386, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5141 }, { "epoch": 0.3729600348154058, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.8584, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5142 }, { "epoch": 0.37303256691085807, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 9.3908, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5143 }, { "epoch": 0.3731050990063103, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.9609, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5144 }, { "epoch": 0.37317763110176255, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 9.2617, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5145 }, { "epoch": 0.37325016319721477, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.0804, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5146 }, { "epoch": 0.373322695292667, "grad_norm": 1.5859375, "learning_rate": 0.0003, "loss": 9.2442, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5147 }, { "epoch": 0.37339522738811926, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 9.3307, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5148 }, { "epoch": 0.37346775948357147, "grad_norm": 9.5625, "learning_rate": 0.0003, "loss": 8.625, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5149 }, { "epoch": 0.37354029157902374, "grad_norm": 1.5625, "learning_rate": 0.0003, "loss": 9.093, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5150 }, { "epoch": 0.37361282367447596, "grad_norm": 1.5546875, "learning_rate": 0.0003, "loss": 9.0329, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5151 }, { "epoch": 0.3736853557699282, "grad_norm": 1.6640625, "learning_rate": 0.0003, "loss": 8.8561, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5152 }, { "epoch": 0.37375788786538044, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 9.154, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5153 }, { "epoch": 0.37383041996083266, "grad_norm": 7.15625, "learning_rate": 0.0003, "loss": 9.0971, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5154 }, { "epoch": 0.37390295205628493, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 8.9923, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5155 }, { "epoch": 0.37397548415173715, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 9.1539, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5156 }, { "epoch": 0.37404801624718936, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 9.0087, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5157 }, { "epoch": 0.37412054834264163, "grad_norm": 5.5625, "learning_rate": 0.0003, "loss": 8.9811, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5158 }, { "epoch": 0.37419308043809385, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 8.9065, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5159 }, { "epoch": 0.3742656125335461, "grad_norm": 15.4375, "learning_rate": 0.0003, "loss": 8.9686, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5160 }, { "epoch": 0.37433814462899834, "grad_norm": 6.15625, "learning_rate": 0.0003, "loss": 9.1641, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5161 }, { "epoch": 0.37441067672445055, "grad_norm": 12.625, "learning_rate": 0.0003, "loss": 9.0023, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5162 }, { "epoch": 0.3744832088199028, "grad_norm": 16.75, "learning_rate": 0.0003, "loss": 9.2039, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5163 }, { "epoch": 0.37455574091535504, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.9523, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5164 }, { "epoch": 0.37462827301080726, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.1295, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5165 }, { "epoch": 0.3747008051062595, "grad_norm": 10.5625, "learning_rate": 0.0003, "loss": 8.9577, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5166 }, { "epoch": 0.37477333720171174, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 9.3195, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5167 }, { "epoch": 0.374845869297164, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.8487, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5168 }, { "epoch": 0.37491840139261623, "grad_norm": 1.7734375, "learning_rate": 0.0003, "loss": 8.9005, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5169 }, { "epoch": 0.37499093348806845, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 9.166, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5170 }, { "epoch": 0.3750634655835207, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 9.1447, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5171 }, { "epoch": 0.37513599767897293, "grad_norm": 1.921875, "learning_rate": 0.0003, "loss": 8.8697, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5172 }, { "epoch": 0.3752085297744252, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 9.3979, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5173 }, { "epoch": 0.3752810618698774, "grad_norm": 1.8828125, "learning_rate": 0.0003, "loss": 8.9457, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5174 }, { "epoch": 0.37535359396532963, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.3772, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5175 }, { "epoch": 0.3754261260607819, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 9.1792, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5176 }, { "epoch": 0.3754986581562341, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.8291, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5177 }, { "epoch": 0.3755711902516864, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 8.7387, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5178 }, { "epoch": 0.3756437223471386, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 9.0137, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5179 }, { "epoch": 0.3757162544425908, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 9.3132, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5180 }, { "epoch": 0.3757887865380431, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 8.7733, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5181 }, { "epoch": 0.3758613186334953, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 9.1478, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5182 }, { "epoch": 0.3759338507289476, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 8.8617, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5183 }, { "epoch": 0.3760063828243998, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 9.0628, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5184 }, { "epoch": 0.376078914919852, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 9.0404, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5185 }, { "epoch": 0.3761514470153043, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.9419, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5186 }, { "epoch": 0.3762239791107565, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 9.0926, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5187 }, { "epoch": 0.3762965112062088, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.3519, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5188 }, { "epoch": 0.376369043301661, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 9.0836, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5189 }, { "epoch": 0.3764415753971132, "grad_norm": 1.46875, "learning_rate": 0.0003, "loss": 8.9842, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5190 }, { "epoch": 0.3765141074925655, "grad_norm": 5.4375, "learning_rate": 0.0003, "loss": 8.9308, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5191 }, { "epoch": 0.3765866395880177, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 8.3533, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5192 }, { "epoch": 0.37665917168346996, "grad_norm": 1.90625, "learning_rate": 0.0003, "loss": 9.0439, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5193 }, { "epoch": 0.3767317037789222, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 9.3477, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5194 }, { "epoch": 0.3768042358743744, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.575, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5195 }, { "epoch": 0.37687676796982666, "grad_norm": 5.84375, "learning_rate": 0.0003, "loss": 8.8842, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5196 }, { "epoch": 0.3769493000652789, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 9.0215, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5197 }, { "epoch": 0.3770218321607311, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.7944, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5198 }, { "epoch": 0.37709436425618337, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.3062, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5199 }, { "epoch": 0.3771668963516356, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.9508, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5200 }, { "epoch": 0.37723942844708785, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 8.3154, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5201 }, { "epoch": 0.37731196054254007, "grad_norm": 1.9453125, "learning_rate": 0.0003, "loss": 9.2232, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5202 }, { "epoch": 0.3773844926379923, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 9.088, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5203 }, { "epoch": 0.37745702473344456, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.4324, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5204 }, { "epoch": 0.3775295568288968, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 9.2682, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5205 }, { "epoch": 0.37760208892434904, "grad_norm": 9.625, "learning_rate": 0.0003, "loss": 8.4076, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5206 }, { "epoch": 0.37767462101980126, "grad_norm": 1.5546875, "learning_rate": 0.0003, "loss": 9.1589, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5207 }, { "epoch": 0.3777471531152535, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 9.2523, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5208 }, { "epoch": 0.37781968521070575, "grad_norm": 9.4375, "learning_rate": 0.0003, "loss": 8.7514, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5209 }, { "epoch": 0.37789221730615796, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 9.1172, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5210 }, { "epoch": 0.37796474940161023, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.1724, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5211 }, { "epoch": 0.37803728149706245, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.7329, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5212 }, { "epoch": 0.37810981359251467, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 9.2972, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5213 }, { "epoch": 0.37818234568796694, "grad_norm": 10.1875, "learning_rate": 0.0003, "loss": 9.5339, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5214 }, { "epoch": 0.37825487778341915, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.2767, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5215 }, { "epoch": 0.3783274098788714, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 8.7487, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5216 }, { "epoch": 0.37839994197432364, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.1621, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5217 }, { "epoch": 0.37847247406977585, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 9.2186, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5218 }, { "epoch": 0.3785450061652281, "grad_norm": 6.15625, "learning_rate": 0.0003, "loss": 8.8395, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5219 }, { "epoch": 0.37861753826068034, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.9807, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5220 }, { "epoch": 0.3786900703561326, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.9037, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5221 }, { "epoch": 0.37876260245158483, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.8617, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5222 }, { "epoch": 0.37883513454703704, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.2118, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5223 }, { "epoch": 0.3789076666424893, "grad_norm": 1.9296875, "learning_rate": 0.0003, "loss": 9.0388, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5224 }, { "epoch": 0.37898019873794153, "grad_norm": 5.90625, "learning_rate": 0.0003, "loss": 9.125, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5225 }, { "epoch": 0.3790527308333938, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.7395, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5226 }, { "epoch": 0.379125262928846, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 8.8436, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5227 }, { "epoch": 0.37919779502429823, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 9.2305, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5228 }, { "epoch": 0.3792703271197505, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.8941, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5229 }, { "epoch": 0.3793428592152027, "grad_norm": 7.5625, "learning_rate": 0.0003, "loss": 8.9493, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5230 }, { "epoch": 0.379415391310655, "grad_norm": 10.8125, "learning_rate": 0.0003, "loss": 9.0893, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5231 }, { "epoch": 0.3794879234061072, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 9.2869, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5232 }, { "epoch": 0.3795604555015594, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 8.9545, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5233 }, { "epoch": 0.3796329875970117, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.9346, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5234 }, { "epoch": 0.3797055196924639, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.0892, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5235 }, { "epoch": 0.3797780517879161, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.6486, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5236 }, { "epoch": 0.3798505838833684, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 8.6654, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5237 }, { "epoch": 0.3799231159788206, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.4835, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5238 }, { "epoch": 0.3799956480742729, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 9.0378, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5239 }, { "epoch": 0.3800681801697251, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 9.2128, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5240 }, { "epoch": 0.3801407122651773, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 9.3156, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5241 }, { "epoch": 0.3802132443606296, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.8389, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5242 }, { "epoch": 0.3802857764560818, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 8.9644, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5243 }, { "epoch": 0.3803583085515341, "grad_norm": 5.90625, "learning_rate": 0.0003, "loss": 8.8323, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5244 }, { "epoch": 0.3804308406469863, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 9.2728, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5245 }, { "epoch": 0.3805033727424385, "grad_norm": 1.78125, "learning_rate": 0.0003, "loss": 8.6517, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5246 }, { "epoch": 0.3805759048378908, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 9.0489, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5247 }, { "epoch": 0.380648436933343, "grad_norm": 1.796875, "learning_rate": 0.0003, "loss": 9.2667, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5248 }, { "epoch": 0.38072096902879526, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 9.2132, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5249 }, { "epoch": 0.3807935011242475, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 8.7771, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5250 }, { "epoch": 0.3808660332196997, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.7197, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5251 }, { "epoch": 0.38093856531515197, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.9946, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5252 }, { "epoch": 0.3810110974106042, "grad_norm": 1.9296875, "learning_rate": 0.0003, "loss": 8.8139, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5253 }, { "epoch": 0.38108362950605645, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 8.5782, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5254 }, { "epoch": 0.38115616160150867, "grad_norm": 22.5, "learning_rate": 0.0003, "loss": 9.0238, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5255 }, { "epoch": 0.3812286936969609, "grad_norm": 11.875, "learning_rate": 0.0003, "loss": 8.9547, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5256 }, { "epoch": 0.38130122579241316, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 8.6162, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5257 }, { "epoch": 0.38137375788786537, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 8.6383, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5258 }, { "epoch": 0.38144628998331764, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 9.1049, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5259 }, { "epoch": 0.38151882207876986, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 8.5905, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5260 }, { "epoch": 0.3815913541742221, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.9319, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5261 }, { "epoch": 0.38166388626967435, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.3451, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5262 }, { "epoch": 0.38173641836512656, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.1939, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5263 }, { "epoch": 0.38180895046057883, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.8355, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5264 }, { "epoch": 0.38188148255603105, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.9701, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5265 }, { "epoch": 0.38195401465148326, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.368, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5266 }, { "epoch": 0.38202654674693554, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.0382, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5267 }, { "epoch": 0.38209907884238775, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 9.7736, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5268 }, { "epoch": 0.38217161093783997, "grad_norm": 8.4375, "learning_rate": 0.0003, "loss": 9.0365, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5269 }, { "epoch": 0.38224414303329224, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 8.9683, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5270 }, { "epoch": 0.38231667512874445, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.5075, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5271 }, { "epoch": 0.3823892072241967, "grad_norm": 6.8125, "learning_rate": 0.0003, "loss": 9.119, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5272 }, { "epoch": 0.38246173931964894, "grad_norm": 6.5625, "learning_rate": 0.0003, "loss": 8.985, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5273 }, { "epoch": 0.38253427141510116, "grad_norm": 12.5, "learning_rate": 0.0003, "loss": 9.4291, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5274 }, { "epoch": 0.38260680351055343, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 9.0233, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5275 }, { "epoch": 0.38267933560600564, "grad_norm": 1.9375, "learning_rate": 0.0003, "loss": 9.3382, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5276 }, { "epoch": 0.3827518677014579, "grad_norm": 12.4375, "learning_rate": 0.0003, "loss": 9.2348, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5277 }, { "epoch": 0.38282439979691013, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 9.5804, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5278 }, { "epoch": 0.38289693189236235, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 9.2106, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5279 }, { "epoch": 0.3829694639878146, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.126, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5280 }, { "epoch": 0.38304199608326683, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.8135, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5281 }, { "epoch": 0.3831145281787191, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 9.2255, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5282 }, { "epoch": 0.3831870602741713, "grad_norm": 6.78125, "learning_rate": 0.0003, "loss": 8.8147, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5283 }, { "epoch": 0.38325959236962354, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.0438, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5284 }, { "epoch": 0.3833321244650758, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 9.1651, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5285 }, { "epoch": 0.383404656560528, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.3762, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5286 }, { "epoch": 0.3834771886559803, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 8.8196, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5287 }, { "epoch": 0.3835497207514325, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.1085, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5288 }, { "epoch": 0.3836222528468847, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.909, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5289 }, { "epoch": 0.383694784942337, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 9.2643, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5290 }, { "epoch": 0.3837673170377892, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 9.0625, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5291 }, { "epoch": 0.3838398491332415, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.991, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5292 }, { "epoch": 0.3839123812286937, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 9.3376, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5293 }, { "epoch": 0.3839849133241459, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 9.0293, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5294 }, { "epoch": 0.3840574454195982, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.1174, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5295 }, { "epoch": 0.3841299775150504, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.7482, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5296 }, { "epoch": 0.3842025096105027, "grad_norm": 6.9375, "learning_rate": 0.0003, "loss": 8.9525, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5297 }, { "epoch": 0.3842750417059549, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 9.0009, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5298 }, { "epoch": 0.3843475738014071, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 9.3746, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5299 }, { "epoch": 0.3844201058968594, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 9.0718, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5300 }, { "epoch": 0.3844926379923116, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 9.3311, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5301 }, { "epoch": 0.3845651700877638, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.8114, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5302 }, { "epoch": 0.3846377021832161, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.2982, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5303 }, { "epoch": 0.3847102342786683, "grad_norm": 5.9375, "learning_rate": 0.0003, "loss": 9.1209, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5304 }, { "epoch": 0.38478276637412057, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 8.7133, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5305 }, { "epoch": 0.3848552984695728, "grad_norm": 7.65625, "learning_rate": 0.0003, "loss": 9.1753, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5306 }, { "epoch": 0.384927830565025, "grad_norm": 1.7890625, "learning_rate": 0.0003, "loss": 9.2817, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5307 }, { "epoch": 0.38500036266047727, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 8.7979, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5308 }, { "epoch": 0.3850728947559295, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 8.6459, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5309 }, { "epoch": 0.38514542685138176, "grad_norm": 1.25, "learning_rate": 0.0003, "loss": 9.1798, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5310 }, { "epoch": 0.38521795894683397, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 8.7382, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5311 }, { "epoch": 0.3852904910422862, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.5811, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5312 }, { "epoch": 0.38536302313773846, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 8.6348, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5313 }, { "epoch": 0.3854355552331907, "grad_norm": 14.125, "learning_rate": 0.0003, "loss": 8.8015, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5314 }, { "epoch": 0.38550808732864295, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 8.79, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5315 }, { "epoch": 0.38558061942409516, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 9.4293, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5316 }, { "epoch": 0.3856531515195474, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.7939, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5317 }, { "epoch": 0.38572568361499965, "grad_norm": 1.7421875, "learning_rate": 0.0003, "loss": 9.2452, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5318 }, { "epoch": 0.38579821571045186, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 8.6995, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5319 }, { "epoch": 0.38587074780590414, "grad_norm": 1.9296875, "learning_rate": 0.0003, "loss": 9.191, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5320 }, { "epoch": 0.38594327990135635, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 8.5244, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5321 }, { "epoch": 0.38601581199680857, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.1322, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5322 }, { "epoch": 0.38608834409226084, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 9.3619, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5323 }, { "epoch": 0.38616087618771305, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.9772, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5324 }, { "epoch": 0.3862334082831653, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.0174, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5325 }, { "epoch": 0.38630594037861754, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.7841, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5326 }, { "epoch": 0.38637847247406976, "grad_norm": 1.765625, "learning_rate": 0.0003, "loss": 8.9583, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5327 }, { "epoch": 0.386451004569522, "grad_norm": 63.5, "learning_rate": 0.0003, "loss": 8.9774, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5328 }, { "epoch": 0.38652353666497424, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.8587, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5329 }, { "epoch": 0.3865960687604265, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 9.1817, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5330 }, { "epoch": 0.38666860085587873, "grad_norm": 10.375, "learning_rate": 0.0003, "loss": 8.6404, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5331 }, { "epoch": 0.38674113295133095, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 9.174, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5332 }, { "epoch": 0.3868136650467832, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 8.8199, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5333 }, { "epoch": 0.38688619714223543, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 9.2332, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5334 }, { "epoch": 0.3869587292376877, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.9889, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5335 }, { "epoch": 0.3870312613331399, "grad_norm": 1.9609375, "learning_rate": 0.0003, "loss": 9.3846, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5336 }, { "epoch": 0.38710379342859214, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.0777, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5337 }, { "epoch": 0.3871763255240444, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.4534, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5338 }, { "epoch": 0.3872488576194966, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 9.2261, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5339 }, { "epoch": 0.38732138971494884, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 8.7439, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5340 }, { "epoch": 0.3873939218104011, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 9.1332, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5341 }, { "epoch": 0.3874664539058533, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.6942, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5342 }, { "epoch": 0.3875389860013056, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.5657, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5343 }, { "epoch": 0.3876115180967578, "grad_norm": 1.8125, "learning_rate": 0.0003, "loss": 9.1058, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5344 }, { "epoch": 0.38768405019221003, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 9.0239, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5345 }, { "epoch": 0.3877565822876623, "grad_norm": 1.9453125, "learning_rate": 0.0003, "loss": 8.9936, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5346 }, { "epoch": 0.3878291143831145, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.4564, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5347 }, { "epoch": 0.3879016464785668, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.1251, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5348 }, { "epoch": 0.387974178574019, "grad_norm": 7.125, "learning_rate": 0.0003, "loss": 8.7241, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5349 }, { "epoch": 0.3880467106694712, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.1282, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5350 }, { "epoch": 0.3881192427649235, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 8.7428, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5351 }, { "epoch": 0.3881917748603757, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.5854, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5352 }, { "epoch": 0.388264306955828, "grad_norm": 7.65625, "learning_rate": 0.0003, "loss": 8.9233, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5353 }, { "epoch": 0.3883368390512802, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.5719, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5354 }, { "epoch": 0.3884093711467324, "grad_norm": 26.0, "learning_rate": 0.0003, "loss": 9.0851, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5355 }, { "epoch": 0.3884819032421847, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.5627, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5356 }, { "epoch": 0.3885544353376369, "grad_norm": 6.5, "learning_rate": 0.0003, "loss": 9.0598, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5357 }, { "epoch": 0.38862696743308917, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 9.6867, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5358 }, { "epoch": 0.3886994995285414, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 9.0904, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5359 }, { "epoch": 0.3887720316239936, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.1305, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5360 }, { "epoch": 0.38884456371944587, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.4675, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5361 }, { "epoch": 0.3889170958148981, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 9.0308, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5362 }, { "epoch": 0.38898962791035036, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 9.5925, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5363 }, { "epoch": 0.38906216000580257, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 8.5286, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5364 }, { "epoch": 0.3891346921012548, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 9.5902, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5365 }, { "epoch": 0.38920722419670706, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 9.0081, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5366 }, { "epoch": 0.3892797562921593, "grad_norm": 10.0625, "learning_rate": 0.0003, "loss": 9.1742, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5367 }, { "epoch": 0.38935228838761154, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 9.3701, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5368 }, { "epoch": 0.38942482048306376, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 8.8528, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5369 }, { "epoch": 0.389497352578516, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 8.9112, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5370 }, { "epoch": 0.38956988467396825, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 9.2939, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5371 }, { "epoch": 0.38964241676942046, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 9.2256, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5372 }, { "epoch": 0.3897149488648727, "grad_norm": 1.484375, "learning_rate": 0.0003, "loss": 9.1442, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5373 }, { "epoch": 0.38978748096032495, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 9.7321, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5374 }, { "epoch": 0.38986001305577717, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.3729, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5375 }, { "epoch": 0.38993254515122944, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.7592, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5376 }, { "epoch": 0.39000507724668165, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 8.9249, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5377 }, { "epoch": 0.39007760934213387, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.4902, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5378 }, { "epoch": 0.39015014143758614, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 8.4287, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5379 }, { "epoch": 0.39022267353303836, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.9159, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5380 }, { "epoch": 0.3902952056284906, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 9.3067, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5381 }, { "epoch": 0.39036773772394284, "grad_norm": 19.375, "learning_rate": 0.0003, "loss": 9.0327, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5382 }, { "epoch": 0.39044026981939506, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.7209, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5383 }, { "epoch": 0.39051280191484733, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.8302, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5384 }, { "epoch": 0.39058533401029955, "grad_norm": 7.0, "learning_rate": 0.0003, "loss": 8.9552, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5385 }, { "epoch": 0.3906578661057518, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.9538, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5386 }, { "epoch": 0.39073039820120403, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 8.6944, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5387 }, { "epoch": 0.39080293029665625, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.8149, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5388 }, { "epoch": 0.3908754623921085, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.1574, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5389 }, { "epoch": 0.39094799448756073, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 9.0501, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5390 }, { "epoch": 0.391020526583013, "grad_norm": 1.625, "learning_rate": 0.0003, "loss": 9.0955, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5391 }, { "epoch": 0.3910930586784652, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 8.9075, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5392 }, { "epoch": 0.39116559077391744, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.3855, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5393 }, { "epoch": 0.3912381228693697, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.3119, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5394 }, { "epoch": 0.3913106549648219, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.2226, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5395 }, { "epoch": 0.3913831870602742, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.4217, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5396 }, { "epoch": 0.3914557191557264, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 9.2443, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5397 }, { "epoch": 0.3915282512511786, "grad_norm": 8.1875, "learning_rate": 0.0003, "loss": 9.0311, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5398 }, { "epoch": 0.3916007833466309, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.8938, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5399 }, { "epoch": 0.3916733154420831, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.249, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5400 }, { "epoch": 0.3917458475375354, "grad_norm": 7.4375, "learning_rate": 0.0003, "loss": 9.2501, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5401 }, { "epoch": 0.3918183796329876, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 9.2657, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5402 }, { "epoch": 0.3918909117284398, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 9.1391, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5403 }, { "epoch": 0.3919634438238921, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.1196, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5404 }, { "epoch": 0.3920359759193443, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.6996, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5405 }, { "epoch": 0.3921085080147966, "grad_norm": 5.375, "learning_rate": 0.0003, "loss": 9.27, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5406 }, { "epoch": 0.3921810401102488, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 8.8218, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5407 }, { "epoch": 0.392253572205701, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 8.9476, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5408 }, { "epoch": 0.3923261043011533, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.8628, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5409 }, { "epoch": 0.3923986363966055, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 9.3004, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5410 }, { "epoch": 0.3924711684920577, "grad_norm": 1.6953125, "learning_rate": 0.0003, "loss": 9.1706, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5411 }, { "epoch": 0.39254370058751, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.258, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5412 }, { "epoch": 0.3926162326829622, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.9103, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5413 }, { "epoch": 0.39268876477841447, "grad_norm": 6.65625, "learning_rate": 0.0003, "loss": 9.1083, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5414 }, { "epoch": 0.3927612968738667, "grad_norm": 1.921875, "learning_rate": 0.0003, "loss": 9.492, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5415 }, { "epoch": 0.3928338289693189, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 9.1965, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5416 }, { "epoch": 0.39290636106477117, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.0708, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5417 }, { "epoch": 0.3929788931602234, "grad_norm": 1.9296875, "learning_rate": 0.0003, "loss": 8.6937, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5418 }, { "epoch": 0.39305142525567566, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 8.774, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5419 }, { "epoch": 0.3931239573511279, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 9.1579, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5420 }, { "epoch": 0.3931964894465801, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.0278, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5421 }, { "epoch": 0.39326902154203236, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.0066, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5422 }, { "epoch": 0.3933415536374846, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 8.8792, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5423 }, { "epoch": 0.39341408573293685, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.6504, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5424 }, { "epoch": 0.39348661782838906, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 9.0101, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5425 }, { "epoch": 0.3935591499238413, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.7582, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5426 }, { "epoch": 0.39363168201929355, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.7395, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5427 }, { "epoch": 0.39370421411474577, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 8.8288, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5428 }, { "epoch": 0.39377674621019804, "grad_norm": 1.9140625, "learning_rate": 0.0003, "loss": 9.1775, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5429 }, { "epoch": 0.39384927830565025, "grad_norm": 10.375, "learning_rate": 0.0003, "loss": 8.7579, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5430 }, { "epoch": 0.39392181040110247, "grad_norm": 7.375, "learning_rate": 0.0003, "loss": 9.0271, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5431 }, { "epoch": 0.39399434249655474, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 9.2206, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5432 }, { "epoch": 0.39406687459200695, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.7402, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5433 }, { "epoch": 0.3941394066874592, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 9.1093, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5434 }, { "epoch": 0.39421193878291144, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.2221, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5435 }, { "epoch": 0.39428447087836366, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 8.7332, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5436 }, { "epoch": 0.39435700297381593, "grad_norm": 8.8125, "learning_rate": 0.0003, "loss": 8.9367, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5437 }, { "epoch": 0.39442953506926814, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.9246, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5438 }, { "epoch": 0.3945020671647204, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.413, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5439 }, { "epoch": 0.39457459926017263, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 8.9781, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5440 }, { "epoch": 0.39464713135562485, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 8.976, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5441 }, { "epoch": 0.3947196634510771, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 8.4809, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5442 }, { "epoch": 0.39479219554652933, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.591, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5443 }, { "epoch": 0.39486472764198155, "grad_norm": 14.75, "learning_rate": 0.0003, "loss": 8.9465, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5444 }, { "epoch": 0.3949372597374338, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 9.559, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5445 }, { "epoch": 0.39500979183288604, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 9.1754, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5446 }, { "epoch": 0.3950823239283383, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 8.7969, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5447 }, { "epoch": 0.3951548560237905, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 9.159, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5448 }, { "epoch": 0.39522738811924274, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.2321, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5449 }, { "epoch": 0.395299920214695, "grad_norm": 1.6796875, "learning_rate": 0.0003, "loss": 8.8096, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5450 }, { "epoch": 0.3953724523101472, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 8.843, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5451 }, { "epoch": 0.3954449844055995, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 8.9689, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5452 }, { "epoch": 0.3955175165010517, "grad_norm": 6.96875, "learning_rate": 0.0003, "loss": 9.1121, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5453 }, { "epoch": 0.39559004859650393, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.5758, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5454 }, { "epoch": 0.3956625806919562, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.6582, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5455 }, { "epoch": 0.3957351127874084, "grad_norm": 6.9375, "learning_rate": 0.0003, "loss": 8.8011, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5456 }, { "epoch": 0.3958076448828607, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 9.0434, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5457 }, { "epoch": 0.3958801769783129, "grad_norm": 9.3125, "learning_rate": 0.0003, "loss": 8.9317, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5458 }, { "epoch": 0.3959527090737651, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 8.2792, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5459 }, { "epoch": 0.3960252411692174, "grad_norm": 14.875, "learning_rate": 0.0003, "loss": 8.657, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5460 }, { "epoch": 0.3960977732646696, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.017, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5461 }, { "epoch": 0.3961703053601219, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.0389, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5462 }, { "epoch": 0.3962428374555741, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.7445, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5463 }, { "epoch": 0.3963153695510263, "grad_norm": 1.9765625, "learning_rate": 0.0003, "loss": 9.1836, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5464 }, { "epoch": 0.3963879016464786, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 8.9378, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5465 }, { "epoch": 0.3964604337419308, "grad_norm": 8.375, "learning_rate": 0.0003, "loss": 8.9834, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5466 }, { "epoch": 0.39653296583738307, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.7474, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5467 }, { "epoch": 0.3966054979328353, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 8.6427, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5468 }, { "epoch": 0.3966780300282875, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 9.1659, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5469 }, { "epoch": 0.39675056212373977, "grad_norm": 16.125, "learning_rate": 0.0003, "loss": 8.782, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5470 }, { "epoch": 0.396823094219192, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.908, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5471 }, { "epoch": 0.39689562631464426, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.6728, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5472 }, { "epoch": 0.39696815841009647, "grad_norm": 6.125, "learning_rate": 0.0003, "loss": 8.8031, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5473 }, { "epoch": 0.3970406905055487, "grad_norm": 14.8125, "learning_rate": 0.0003, "loss": 9.1658, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5474 }, { "epoch": 0.39711322260100096, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.6939, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5475 }, { "epoch": 0.3971857546964532, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.4884, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5476 }, { "epoch": 0.3972582867919054, "grad_norm": 5.96875, "learning_rate": 0.0003, "loss": 8.9128, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5477 }, { "epoch": 0.39733081888735766, "grad_norm": 10.25, "learning_rate": 0.0003, "loss": 8.9062, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5478 }, { "epoch": 0.3974033509828099, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 8.7831, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5479 }, { "epoch": 0.39747588307826215, "grad_norm": 1.9140625, "learning_rate": 0.0003, "loss": 9.1721, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5480 }, { "epoch": 0.39754841517371436, "grad_norm": 1.546875, "learning_rate": 0.0003, "loss": 9.4537, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5481 }, { "epoch": 0.3976209472691666, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 9.2047, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5482 }, { "epoch": 0.39769347936461885, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.0885, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5483 }, { "epoch": 0.39776601146007107, "grad_norm": 5.6875, "learning_rate": 0.0003, "loss": 8.9372, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5484 }, { "epoch": 0.39783854355552334, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 8.7894, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5485 }, { "epoch": 0.39791107565097555, "grad_norm": 7.25, "learning_rate": 0.0003, "loss": 8.7525, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5486 }, { "epoch": 0.39798360774642777, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 9.3675, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5487 }, { "epoch": 0.39805613984188004, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.7746, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5488 }, { "epoch": 0.39812867193733226, "grad_norm": 1.84375, "learning_rate": 0.0003, "loss": 8.5252, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5489 }, { "epoch": 0.39820120403278453, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 9.493, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5490 }, { "epoch": 0.39827373612823674, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.0655, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5491 }, { "epoch": 0.39834626822368896, "grad_norm": 13.625, "learning_rate": 0.0003, "loss": 9.6972, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5492 }, { "epoch": 0.39841880031914123, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 9.4049, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5493 }, { "epoch": 0.39849133241459345, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.7479, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5494 }, { "epoch": 0.3985638645100457, "grad_norm": 24.875, "learning_rate": 0.0003, "loss": 8.8742, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5495 }, { "epoch": 0.39863639660549793, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.7753, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5496 }, { "epoch": 0.39870892870095015, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.8281, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5497 }, { "epoch": 0.3987814607964024, "grad_norm": 64.5, "learning_rate": 0.0003, "loss": 8.7886, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5498 }, { "epoch": 0.39885399289185464, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.3715, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5499 }, { "epoch": 0.3989265249873069, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.7348, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5500 }, { "epoch": 0.3989990570827591, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 8.6918, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5501 }, { "epoch": 0.39907158917821134, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 9.2067, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5502 }, { "epoch": 0.3991441212736636, "grad_norm": 18.875, "learning_rate": 0.0003, "loss": 8.8512, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5503 }, { "epoch": 0.3992166533691158, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.1942, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5504 }, { "epoch": 0.3992891854645681, "grad_norm": 33.5, "learning_rate": 0.0003, "loss": 9.3402, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5505 }, { "epoch": 0.3993617175600203, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 9.3409, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5506 }, { "epoch": 0.39943424965547253, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.7144, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5507 }, { "epoch": 0.3995067817509248, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.5559, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5508 }, { "epoch": 0.399579313846377, "grad_norm": 8.25, "learning_rate": 0.0003, "loss": 8.829, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5509 }, { "epoch": 0.3996518459418293, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 9.0795, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5510 }, { "epoch": 0.3997243780372815, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 8.9125, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5511 }, { "epoch": 0.3997969101327337, "grad_norm": 6.53125, "learning_rate": 0.0003, "loss": 9.0971, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5512 }, { "epoch": 0.399869442228186, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.672, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5513 }, { "epoch": 0.3999419743236382, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 9.2287, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5514 }, { "epoch": 0.4000145064190904, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.3237, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5515 }, { "epoch": 0.4000870385145427, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 8.9992, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5516 }, { "epoch": 0.4001595706099949, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 8.565, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5517 }, { "epoch": 0.4002321027054472, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 9.2243, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5518 }, { "epoch": 0.4003046348008994, "grad_norm": 6.1875, "learning_rate": 0.0003, "loss": 8.6527, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5519 }, { "epoch": 0.4003771668963516, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.1504, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5520 }, { "epoch": 0.4004496989918039, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 8.8711, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5521 }, { "epoch": 0.4005222310872561, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.1833, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5522 }, { "epoch": 0.40059476318270837, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.8332, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5523 }, { "epoch": 0.4006672952781606, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 9.0885, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5524 }, { "epoch": 0.4007398273736128, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.4568, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5525 }, { "epoch": 0.40081235946906507, "grad_norm": 1.703125, "learning_rate": 0.0003, "loss": 8.8901, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5526 }, { "epoch": 0.4008848915645173, "grad_norm": 20.875, "learning_rate": 0.0003, "loss": 8.8963, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5527 }, { "epoch": 0.40095742365996956, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.7658, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5528 }, { "epoch": 0.4010299557554218, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.7715, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5529 }, { "epoch": 0.401102487850874, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.2602, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5530 }, { "epoch": 0.40117501994632626, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 9.2434, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5531 }, { "epoch": 0.4012475520417785, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 9.4508, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5532 }, { "epoch": 0.40132008413723075, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.6064, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5533 }, { "epoch": 0.40139261623268296, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 8.9993, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5534 }, { "epoch": 0.4014651483281352, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.9628, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5535 }, { "epoch": 0.40153768042358745, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.1141, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5536 }, { "epoch": 0.40161021251903967, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.9804, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5537 }, { "epoch": 0.40168274461449194, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.7685, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5538 }, { "epoch": 0.40175527670994415, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.9275, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5539 }, { "epoch": 0.40182780880539637, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.6471, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5540 }, { "epoch": 0.40190034090084864, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.2336, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5541 }, { "epoch": 0.40197287299630086, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 8.9566, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5542 }, { "epoch": 0.4020454050917531, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.8391, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5543 }, { "epoch": 0.40211793718720534, "grad_norm": 11.0625, "learning_rate": 0.0003, "loss": 8.9612, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5544 }, { "epoch": 0.40219046928265756, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 9.3066, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5545 }, { "epoch": 0.40226300137810983, "grad_norm": 8.5625, "learning_rate": 0.0003, "loss": 9.2395, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5546 }, { "epoch": 0.40233553347356205, "grad_norm": 13.5625, "learning_rate": 0.0003, "loss": 9.0568, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5547 }, { "epoch": 0.40240806556901426, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.475, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5548 }, { "epoch": 0.40248059766446653, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 8.8925, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5549 }, { "epoch": 0.40255312975991875, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.2438, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5550 }, { "epoch": 0.402625661855371, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 8.9733, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5551 }, { "epoch": 0.40269819395082324, "grad_norm": 7.5, "learning_rate": 0.0003, "loss": 8.9124, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5552 }, { "epoch": 0.40277072604627545, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.5857, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5553 }, { "epoch": 0.4028432581417277, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.1933, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5554 }, { "epoch": 0.40291579023717994, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.5292, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5555 }, { "epoch": 0.4029883223326322, "grad_norm": 22.375, "learning_rate": 0.0003, "loss": 9.0792, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5556 }, { "epoch": 0.4030608544280844, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 8.7958, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5557 }, { "epoch": 0.40313338652353664, "grad_norm": 1.703125, "learning_rate": 0.0003, "loss": 9.3704, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5558 }, { "epoch": 0.4032059186189889, "grad_norm": 5.21875, "learning_rate": 0.0003, "loss": 9.2959, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5559 }, { "epoch": 0.40327845071444113, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 8.6594, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5560 }, { "epoch": 0.4033509828098934, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 9.0333, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5561 }, { "epoch": 0.4034235149053456, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 8.7151, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5562 }, { "epoch": 0.40349604700079783, "grad_norm": 6.625, "learning_rate": 0.0003, "loss": 9.0703, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5563 }, { "epoch": 0.4035685790962501, "grad_norm": 1.6875, "learning_rate": 0.0003, "loss": 8.8419, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5564 }, { "epoch": 0.4036411111917023, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 8.5003, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5565 }, { "epoch": 0.4037136432871546, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.8134, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5566 }, { "epoch": 0.4037861753826068, "grad_norm": 9.5625, "learning_rate": 0.0003, "loss": 9.1512, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5567 }, { "epoch": 0.403858707478059, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.9216, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5568 }, { "epoch": 0.4039312395735113, "grad_norm": 1.9375, "learning_rate": 0.0003, "loss": 8.8746, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5569 }, { "epoch": 0.4040037716689635, "grad_norm": 6.3125, "learning_rate": 0.0003, "loss": 9.1087, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5570 }, { "epoch": 0.4040763037644158, "grad_norm": 11.6875, "learning_rate": 0.0003, "loss": 9.1536, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5571 }, { "epoch": 0.404148835859868, "grad_norm": 1.953125, "learning_rate": 0.0003, "loss": 8.8244, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5572 }, { "epoch": 0.4042213679553202, "grad_norm": 1.5859375, "learning_rate": 0.0003, "loss": 9.4528, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5573 }, { "epoch": 0.4042939000507725, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.7133, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5574 }, { "epoch": 0.4043664321462247, "grad_norm": 15.125, "learning_rate": 0.0003, "loss": 9.014, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5575 }, { "epoch": 0.40443896424167697, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.2653, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5576 }, { "epoch": 0.4045114963371292, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 9.2948, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5577 }, { "epoch": 0.4045840284325814, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.9941, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5578 }, { "epoch": 0.40465656052803367, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.0029, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5579 }, { "epoch": 0.4047290926234859, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.1841, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5580 }, { "epoch": 0.40480162471893816, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 9.052, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5581 }, { "epoch": 0.4048741568143904, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.841, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5582 }, { "epoch": 0.4049466889098426, "grad_norm": 13.0625, "learning_rate": 0.0003, "loss": 8.5973, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5583 }, { "epoch": 0.40501922100529486, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.6208, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5584 }, { "epoch": 0.4050917531007471, "grad_norm": 19.625, "learning_rate": 0.0003, "loss": 8.6786, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5585 }, { "epoch": 0.4051642851961993, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.0354, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5586 }, { "epoch": 0.40523681729165156, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.7382, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5587 }, { "epoch": 0.4053093493871038, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.5648, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5588 }, { "epoch": 0.40538188148255605, "grad_norm": 17.625, "learning_rate": 0.0003, "loss": 8.4443, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5589 }, { "epoch": 0.40545441357800827, "grad_norm": 8.0625, "learning_rate": 0.0003, "loss": 8.7906, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5590 }, { "epoch": 0.4055269456734605, "grad_norm": 1.953125, "learning_rate": 0.0003, "loss": 8.6386, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5591 }, { "epoch": 0.40559947776891275, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.8053, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5592 }, { "epoch": 0.40567200986436497, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 9.0588, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5593 }, { "epoch": 0.40574454195981724, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 9.2144, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5594 }, { "epoch": 0.40581707405526946, "grad_norm": 1.7734375, "learning_rate": 0.0003, "loss": 9.0612, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5595 }, { "epoch": 0.40588960615072167, "grad_norm": 8.625, "learning_rate": 0.0003, "loss": 8.7629, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5596 }, { "epoch": 0.40596213824617394, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.7826, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5597 }, { "epoch": 0.40603467034162616, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 8.9288, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5598 }, { "epoch": 0.40610720243707843, "grad_norm": 7.75, "learning_rate": 0.0003, "loss": 9.1482, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5599 }, { "epoch": 0.40617973453253065, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.2159, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5600 }, { "epoch": 0.40625226662798286, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 8.8809, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5601 }, { "epoch": 0.40632479872343513, "grad_norm": 8.1875, "learning_rate": 0.0003, "loss": 8.9599, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5602 }, { "epoch": 0.40639733081888735, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.1337, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5603 }, { "epoch": 0.4064698629143396, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.7887, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5604 }, { "epoch": 0.40654239500979183, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 8.4426, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5605 }, { "epoch": 0.40661492710524405, "grad_norm": 6.5, "learning_rate": 0.0003, "loss": 8.9908, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5606 }, { "epoch": 0.4066874592006963, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 9.0067, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5607 }, { "epoch": 0.40675999129614854, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 8.9887, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5608 }, { "epoch": 0.4068325233916008, "grad_norm": 9.5, "learning_rate": 0.0003, "loss": 9.2193, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5609 }, { "epoch": 0.406905055487053, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 9.0552, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5610 }, { "epoch": 0.40697758758250524, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 8.4787, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5611 }, { "epoch": 0.4070501196779575, "grad_norm": 6.65625, "learning_rate": 0.0003, "loss": 8.9088, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5612 }, { "epoch": 0.4071226517734097, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.7392, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5613 }, { "epoch": 0.407195183868862, "grad_norm": 7.28125, "learning_rate": 0.0003, "loss": 9.3483, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5614 }, { "epoch": 0.4072677159643142, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.6744, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5615 }, { "epoch": 0.40734024805976643, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 9.3646, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5616 }, { "epoch": 0.4074127801552187, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.9113, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5617 }, { "epoch": 0.4074853122506709, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 9.1116, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5618 }, { "epoch": 0.40755784434612313, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.0614, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5619 }, { "epoch": 0.4076303764415754, "grad_norm": 16.125, "learning_rate": 0.0003, "loss": 9.0974, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5620 }, { "epoch": 0.4077029085370276, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.1057, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5621 }, { "epoch": 0.4077754406324799, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.1286, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5622 }, { "epoch": 0.4078479727279321, "grad_norm": 6.0, "learning_rate": 0.0003, "loss": 9.0621, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5623 }, { "epoch": 0.4079205048233843, "grad_norm": 5.9375, "learning_rate": 0.0003, "loss": 9.0705, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5624 }, { "epoch": 0.4079930369188366, "grad_norm": 5.6875, "learning_rate": 0.0003, "loss": 8.5599, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5625 }, { "epoch": 0.4080655690142888, "grad_norm": 12.375, "learning_rate": 0.0003, "loss": 8.7238, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5626 }, { "epoch": 0.4081381011097411, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 9.1469, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5627 }, { "epoch": 0.4082106332051933, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 8.9292, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5628 }, { "epoch": 0.4082831653006455, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.0461, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5629 }, { "epoch": 0.4083556973960978, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 9.411, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5630 }, { "epoch": 0.40842822949155, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 9.1656, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5631 }, { "epoch": 0.40850076158700227, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 8.3649, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5632 }, { "epoch": 0.4085732936824545, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.8898, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5633 }, { "epoch": 0.4086458257779067, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.7158, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5634 }, { "epoch": 0.408718357873359, "grad_norm": 7.71875, "learning_rate": 0.0003, "loss": 9.2542, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5635 }, { "epoch": 0.4087908899688112, "grad_norm": 8.6875, "learning_rate": 0.0003, "loss": 8.7634, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5636 }, { "epoch": 0.40886342206426346, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 9.1795, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5637 }, { "epoch": 0.4089359541597157, "grad_norm": 6.3125, "learning_rate": 0.0003, "loss": 8.9962, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5638 }, { "epoch": 0.4090084862551679, "grad_norm": 7.75, "learning_rate": 0.0003, "loss": 8.6661, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5639 }, { "epoch": 0.40908101835062016, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.0092, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5640 }, { "epoch": 0.4091535504460724, "grad_norm": 1.9375, "learning_rate": 0.0003, "loss": 8.7702, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5641 }, { "epoch": 0.40922608254152465, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 8.8761, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5642 }, { "epoch": 0.40929861463697687, "grad_norm": 8.3125, "learning_rate": 0.0003, "loss": 9.457, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5643 }, { "epoch": 0.4093711467324291, "grad_norm": 5.5625, "learning_rate": 0.0003, "loss": 8.6918, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5644 }, { "epoch": 0.40944367882788135, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 8.8643, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5645 }, { "epoch": 0.40951621092333357, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.0269, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5646 }, { "epoch": 0.40958874301878584, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.8669, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5647 }, { "epoch": 0.40966127511423805, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 9.2482, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5648 }, { "epoch": 0.40973380720969027, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 8.8432, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5649 }, { "epoch": 0.40980633930514254, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.7692, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5650 }, { "epoch": 0.40987887140059476, "grad_norm": 10.375, "learning_rate": 0.0003, "loss": 9.048, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5651 }, { "epoch": 0.409951403496047, "grad_norm": 1.7734375, "learning_rate": 0.0003, "loss": 8.8701, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5652 }, { "epoch": 0.41002393559149924, "grad_norm": 1.7890625, "learning_rate": 0.0003, "loss": 8.7065, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5653 }, { "epoch": 0.41009646768695146, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 9.0772, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5654 }, { "epoch": 0.41016899978240373, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.0279, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5655 }, { "epoch": 0.41024153187785595, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.2845, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5656 }, { "epoch": 0.41031406397330816, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.9206, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5657 }, { "epoch": 0.41038659606876043, "grad_norm": 10.375, "learning_rate": 0.0003, "loss": 8.9438, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5658 }, { "epoch": 0.41045912816421265, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.2833, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5659 }, { "epoch": 0.4105316602596649, "grad_norm": 1.5234375, "learning_rate": 0.0003, "loss": 8.5725, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5660 }, { "epoch": 0.41060419235511714, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.7444, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5661 }, { "epoch": 0.41067672445056935, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 9.2444, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5662 }, { "epoch": 0.4107492565460216, "grad_norm": 7.40625, "learning_rate": 0.0003, "loss": 8.7211, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5663 }, { "epoch": 0.41082178864147384, "grad_norm": 8.4375, "learning_rate": 0.0003, "loss": 8.6562, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5664 }, { "epoch": 0.4108943207369261, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.5577, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5665 }, { "epoch": 0.4109668528323783, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 8.6785, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5666 }, { "epoch": 0.41103938492783054, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 9.0279, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5667 }, { "epoch": 0.4111119170232828, "grad_norm": 15.125, "learning_rate": 0.0003, "loss": 9.2285, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5668 }, { "epoch": 0.41118444911873503, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.6092, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5669 }, { "epoch": 0.4112569812141873, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 8.7184, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5670 }, { "epoch": 0.4113295133096395, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 9.2364, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5671 }, { "epoch": 0.41140204540509173, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 9.3032, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5672 }, { "epoch": 0.411474577500544, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.6232, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5673 }, { "epoch": 0.4115471095959962, "grad_norm": 14.0625, "learning_rate": 0.0003, "loss": 9.2827, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5674 }, { "epoch": 0.4116196416914485, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.8991, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5675 }, { "epoch": 0.4116921737869007, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 8.913, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5676 }, { "epoch": 0.4117647058823529, "grad_norm": 6.90625, "learning_rate": 0.0003, "loss": 9.0481, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5677 }, { "epoch": 0.4118372379778052, "grad_norm": 16.125, "learning_rate": 0.0003, "loss": 8.2674, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5678 }, { "epoch": 0.4119097700732574, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.4333, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5679 }, { "epoch": 0.4119823021687097, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.1841, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5680 }, { "epoch": 0.4120548342641619, "grad_norm": 19.25, "learning_rate": 0.0003, "loss": 8.8976, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5681 }, { "epoch": 0.4121273663596141, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 9.051, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5682 }, { "epoch": 0.4121998984550664, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.8758, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5683 }, { "epoch": 0.4122724305505186, "grad_norm": 1.8359375, "learning_rate": 0.0003, "loss": 8.7021, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5684 }, { "epoch": 0.41234496264597087, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 9.5934, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5685 }, { "epoch": 0.4124174947414231, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.5611, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5686 }, { "epoch": 0.4124900268368753, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.5046, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5687 }, { "epoch": 0.41256255893232757, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 9.0776, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5688 }, { "epoch": 0.4126350910277798, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.0264, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5689 }, { "epoch": 0.412707623123232, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.5594, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5690 }, { "epoch": 0.4127801552186843, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 8.9929, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5691 }, { "epoch": 0.4128526873141365, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.8863, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5692 }, { "epoch": 0.41292521940958876, "grad_norm": 8.75, "learning_rate": 0.0003, "loss": 9.1028, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5693 }, { "epoch": 0.412997751505041, "grad_norm": 7.625, "learning_rate": 0.0003, "loss": 9.2172, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5694 }, { "epoch": 0.4130702836004932, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.698, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5695 }, { "epoch": 0.41314281569594546, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.9821, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5696 }, { "epoch": 0.4132153477913977, "grad_norm": 32.75, "learning_rate": 0.0003, "loss": 8.5989, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5697 }, { "epoch": 0.41328787988684995, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 9.1739, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5698 }, { "epoch": 0.41336041198230217, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 8.7998, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5699 }, { "epoch": 0.4134329440777544, "grad_norm": 7.96875, "learning_rate": 0.0003, "loss": 8.9918, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5700 }, { "epoch": 0.41350547617320665, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 9.4488, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5701 }, { "epoch": 0.41357800826865887, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.2363, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5702 }, { "epoch": 0.41365054036411114, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 9.0548, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5703 }, { "epoch": 0.41372307245956336, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.7327, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5704 }, { "epoch": 0.4137956045550156, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.5323, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5705 }, { "epoch": 0.41386813665046784, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.6995, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5706 }, { "epoch": 0.41394066874592006, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.8591, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5707 }, { "epoch": 0.41401320084137233, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 8.9201, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5708 }, { "epoch": 0.41408573293682455, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 9.2907, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5709 }, { "epoch": 0.41415826503227676, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 8.7482, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5710 }, { "epoch": 0.41423079712772903, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.88, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5711 }, { "epoch": 0.41430332922318125, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.6022, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5712 }, { "epoch": 0.4143758613186335, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 9.1729, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5713 }, { "epoch": 0.41444839341408574, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 8.8762, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5714 }, { "epoch": 0.41452092550953795, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 8.7514, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5715 }, { "epoch": 0.4145934576049902, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.1921, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5716 }, { "epoch": 0.41466598970044244, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 8.7409, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5717 }, { "epoch": 0.4147385217958947, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.8544, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5718 }, { "epoch": 0.4148110538913469, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 9.0879, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5719 }, { "epoch": 0.41488358598679914, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.7674, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5720 }, { "epoch": 0.4149561180822514, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.3749, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5721 }, { "epoch": 0.41502865017770363, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.9701, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5722 }, { "epoch": 0.41510118227315584, "grad_norm": 1.9140625, "learning_rate": 0.0003, "loss": 8.6873, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5723 }, { "epoch": 0.4151737143686081, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.6936, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5724 }, { "epoch": 0.41524624646406033, "grad_norm": 6.5, "learning_rate": 0.0003, "loss": 9.1591, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5725 }, { "epoch": 0.4153187785595126, "grad_norm": 7.6875, "learning_rate": 0.0003, "loss": 9.2705, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5726 }, { "epoch": 0.4153913106549648, "grad_norm": 43.25, "learning_rate": 0.0003, "loss": 8.9788, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5727 }, { "epoch": 0.41546384275041703, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 9.1457, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5728 }, { "epoch": 0.4155363748458693, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 8.8989, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5729 }, { "epoch": 0.4156089069413215, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.8235, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5730 }, { "epoch": 0.4156814390367738, "grad_norm": 7.40625, "learning_rate": 0.0003, "loss": 9.2114, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5731 }, { "epoch": 0.415753971132226, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.5998, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5732 }, { "epoch": 0.4158265032276782, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.4727, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5733 }, { "epoch": 0.4158990353231305, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 8.8068, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5734 }, { "epoch": 0.4159715674185827, "grad_norm": 1.9453125, "learning_rate": 0.0003, "loss": 9.4592, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5735 }, { "epoch": 0.416044099514035, "grad_norm": 8.375, "learning_rate": 0.0003, "loss": 8.9863, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5736 }, { "epoch": 0.4161166316094872, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 9.2951, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5737 }, { "epoch": 0.4161891637049394, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 8.5227, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5738 }, { "epoch": 0.4162616958003917, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.8674, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5739 }, { "epoch": 0.4163342278958439, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.0606, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5740 }, { "epoch": 0.41640675999129617, "grad_norm": 7.0, "learning_rate": 0.0003, "loss": 9.0748, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5741 }, { "epoch": 0.4164792920867484, "grad_norm": 11.0, "learning_rate": 0.0003, "loss": 9.2319, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5742 }, { "epoch": 0.4165518241822006, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 8.7165, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5743 }, { "epoch": 0.4166243562776529, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 8.9423, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5744 }, { "epoch": 0.4166968883731051, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 8.6107, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5745 }, { "epoch": 0.41676942046855736, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 8.5749, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5746 }, { "epoch": 0.4168419525640096, "grad_norm": 13.0625, "learning_rate": 0.0003, "loss": 9.2994, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5747 }, { "epoch": 0.4169144846594618, "grad_norm": 9.3125, "learning_rate": 0.0003, "loss": 9.5168, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5748 }, { "epoch": 0.41698701675491406, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.573, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5749 }, { "epoch": 0.4170595488503663, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.7369, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5750 }, { "epoch": 0.41713208094581855, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.9757, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5751 }, { "epoch": 0.41720461304127077, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.9372, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5752 }, { "epoch": 0.417277145136723, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.8593, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5753 }, { "epoch": 0.41734967723217525, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 9.2491, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5754 }, { "epoch": 0.41742220932762747, "grad_norm": 1.84375, "learning_rate": 0.0003, "loss": 9.1213, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5755 }, { "epoch": 0.41749474142307974, "grad_norm": 1.7265625, "learning_rate": 0.0003, "loss": 8.8594, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5756 }, { "epoch": 0.41756727351853196, "grad_norm": 7.28125, "learning_rate": 0.0003, "loss": 8.7094, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5757 }, { "epoch": 0.41763980561398417, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 9.2167, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5758 }, { "epoch": 0.41771233770943644, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 8.4996, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5759 }, { "epoch": 0.41778486980488866, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 8.7284, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5760 }, { "epoch": 0.4178574019003409, "grad_norm": 9.5, "learning_rate": 0.0003, "loss": 9.4473, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5761 }, { "epoch": 0.41792993399579315, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.756, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5762 }, { "epoch": 0.41800246609124536, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 9.3641, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5763 }, { "epoch": 0.41807499818669763, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.2517, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5764 }, { "epoch": 0.41814753028214985, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 9.0826, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5765 }, { "epoch": 0.41822006237760206, "grad_norm": 8.8125, "learning_rate": 0.0003, "loss": 9.5018, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5766 }, { "epoch": 0.41829259447305434, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 9.0457, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5767 }, { "epoch": 0.41836512656850655, "grad_norm": 10.125, "learning_rate": 0.0003, "loss": 9.0969, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5768 }, { "epoch": 0.4184376586639588, "grad_norm": 1.9765625, "learning_rate": 0.0003, "loss": 9.1603, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5769 }, { "epoch": 0.41851019075941104, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 9.0769, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5770 }, { "epoch": 0.41858272285486325, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 8.6307, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5771 }, { "epoch": 0.4186552549503155, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 8.7146, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5772 }, { "epoch": 0.41872778704576774, "grad_norm": 1.8671875, "learning_rate": 0.0003, "loss": 8.6212, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5773 }, { "epoch": 0.41880031914122, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.7142, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5774 }, { "epoch": 0.41887285123667223, "grad_norm": 1.84375, "learning_rate": 0.0003, "loss": 8.5303, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5775 }, { "epoch": 0.41894538333212444, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.8641, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5776 }, { "epoch": 0.4190179154275767, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.1199, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5777 }, { "epoch": 0.41909044752302893, "grad_norm": 12.375, "learning_rate": 0.0003, "loss": 8.7033, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5778 }, { "epoch": 0.4191629796184812, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 8.7115, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5779 }, { "epoch": 0.4192355117139334, "grad_norm": 10.1875, "learning_rate": 0.0003, "loss": 8.3045, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5780 }, { "epoch": 0.41930804380938563, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.6218, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5781 }, { "epoch": 0.4193805759048379, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.7756, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5782 }, { "epoch": 0.4194531080002901, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.7383, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5783 }, { "epoch": 0.4195256400957424, "grad_norm": 34.75, "learning_rate": 0.0003, "loss": 8.8003, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5784 }, { "epoch": 0.4195981721911946, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.4378, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5785 }, { "epoch": 0.4196707042866468, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.576, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5786 }, { "epoch": 0.4197432363820991, "grad_norm": 7.59375, "learning_rate": 0.0003, "loss": 9.2304, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5787 }, { "epoch": 0.4198157684775513, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 9.0139, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5788 }, { "epoch": 0.4198883005730036, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 8.9104, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5789 }, { "epoch": 0.4199608326684558, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 8.7702, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5790 }, { "epoch": 0.420033364763908, "grad_norm": 8.5, "learning_rate": 0.0003, "loss": 9.4222, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5791 }, { "epoch": 0.4201058968593603, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.9229, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5792 }, { "epoch": 0.4201784289548125, "grad_norm": 15.6875, "learning_rate": 0.0003, "loss": 9.0474, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5793 }, { "epoch": 0.4202509610502647, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.9866, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5794 }, { "epoch": 0.420323493145717, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.1202, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5795 }, { "epoch": 0.4203960252411692, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 9.0555, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5796 }, { "epoch": 0.4204685573366215, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.6819, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5797 }, { "epoch": 0.4205410894320737, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 9.151, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5798 }, { "epoch": 0.4206136215275259, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 8.9158, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5799 }, { "epoch": 0.4206861536229782, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.8801, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5800 }, { "epoch": 0.4207586857184304, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 8.8732, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5801 }, { "epoch": 0.42083121781388266, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.7045, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5802 }, { "epoch": 0.4209037499093349, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 9.2736, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5803 }, { "epoch": 0.4209762820047871, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.8234, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5804 }, { "epoch": 0.42104881410023937, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 9.1756, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5805 }, { "epoch": 0.4211213461956916, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.8807, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5806 }, { "epoch": 0.42119387829114385, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 9.1468, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5807 }, { "epoch": 0.42126641038659607, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.6582, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5808 }, { "epoch": 0.4213389424820483, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 9.2329, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5809 }, { "epoch": 0.42141147457750056, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 9.1938, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5810 }, { "epoch": 0.42148400667295277, "grad_norm": 1.7578125, "learning_rate": 0.0003, "loss": 8.9893, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5811 }, { "epoch": 0.42155653876840504, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 8.7383, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5812 }, { "epoch": 0.42162907086385726, "grad_norm": 7.65625, "learning_rate": 0.0003, "loss": 8.9738, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5813 }, { "epoch": 0.4217016029593095, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 9.1175, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5814 }, { "epoch": 0.42177413505476175, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.4169, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5815 }, { "epoch": 0.42184666715021396, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 9.1675, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5816 }, { "epoch": 0.42191919924566623, "grad_norm": 1.8671875, "learning_rate": 0.0003, "loss": 8.2283, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5817 }, { "epoch": 0.42199173134111845, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 8.7796, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5818 }, { "epoch": 0.42206426343657066, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.4951, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5819 }, { "epoch": 0.42213679553202293, "grad_norm": 6.3125, "learning_rate": 0.0003, "loss": 8.7804, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5820 }, { "epoch": 0.42220932762747515, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.0885, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5821 }, { "epoch": 0.4222818597229274, "grad_norm": 9.625, "learning_rate": 0.0003, "loss": 8.8456, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5822 }, { "epoch": 0.42235439181837964, "grad_norm": 7.125, "learning_rate": 0.0003, "loss": 8.6814, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5823 }, { "epoch": 0.42242692391383185, "grad_norm": 1.984375, "learning_rate": 0.0003, "loss": 9.4251, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5824 }, { "epoch": 0.4224994560092841, "grad_norm": 7.125, "learning_rate": 0.0003, "loss": 8.7452, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5825 }, { "epoch": 0.42257198810473634, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 8.8111, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5826 }, { "epoch": 0.42264452020018856, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 8.8805, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5827 }, { "epoch": 0.4227170522956408, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 9.1715, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5828 }, { "epoch": 0.42278958439109304, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.8756, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5829 }, { "epoch": 0.4228621164865453, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.9385, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5830 }, { "epoch": 0.42293464858199753, "grad_norm": 7.9375, "learning_rate": 0.0003, "loss": 9.0081, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5831 }, { "epoch": 0.42300718067744975, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 9.2594, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5832 }, { "epoch": 0.423079712772902, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 9.1464, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5833 }, { "epoch": 0.42315224486835423, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 9.1447, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5834 }, { "epoch": 0.4232247769638065, "grad_norm": 7.21875, "learning_rate": 0.0003, "loss": 8.7846, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5835 }, { "epoch": 0.4232973090592587, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.8203, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5836 }, { "epoch": 0.42336984115471094, "grad_norm": 1.703125, "learning_rate": 0.0003, "loss": 9.1779, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5837 }, { "epoch": 0.4234423732501632, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.2269, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5838 }, { "epoch": 0.4235149053456154, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 8.7932, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5839 }, { "epoch": 0.4235874374410677, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.8732, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5840 }, { "epoch": 0.4236599695365199, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 9.0881, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5841 }, { "epoch": 0.4237325016319721, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 9.039, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5842 }, { "epoch": 0.4238050337274244, "grad_norm": 10.625, "learning_rate": 0.0003, "loss": 9.3266, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5843 }, { "epoch": 0.4238775658228766, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 9.0035, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5844 }, { "epoch": 0.4239500979183289, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 9.484, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5845 }, { "epoch": 0.4240226300137811, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 9.1084, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5846 }, { "epoch": 0.4240951621092333, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 8.6581, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5847 }, { "epoch": 0.4241676942046856, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 9.0114, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5848 }, { "epoch": 0.4242402263001378, "grad_norm": 1.984375, "learning_rate": 0.0003, "loss": 9.2902, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5849 }, { "epoch": 0.4243127583955901, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.9218, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5850 }, { "epoch": 0.4243852904910423, "grad_norm": 1.4375, "learning_rate": 0.0003, "loss": 8.7509, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5851 }, { "epoch": 0.4244578225864945, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.019, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5852 }, { "epoch": 0.4245303546819468, "grad_norm": 10.25, "learning_rate": 0.0003, "loss": 9.0515, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5853 }, { "epoch": 0.424602886777399, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.6539, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5854 }, { "epoch": 0.42467541887285126, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.1404, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5855 }, { "epoch": 0.4247479509683035, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 8.8553, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5856 }, { "epoch": 0.4248204830637557, "grad_norm": 32.5, "learning_rate": 0.0003, "loss": 9.1518, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5857 }, { "epoch": 0.42489301515920797, "grad_norm": 7.59375, "learning_rate": 0.0003, "loss": 8.6868, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5858 }, { "epoch": 0.4249655472546602, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 9.0075, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5859 }, { "epoch": 0.42503807935011245, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 8.6868, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5860 }, { "epoch": 0.42511061144556467, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 8.7033, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5861 }, { "epoch": 0.4251831435410169, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 9.2601, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5862 }, { "epoch": 0.42525567563646915, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.0298, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5863 }, { "epoch": 0.42532820773192137, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 9.1049, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5864 }, { "epoch": 0.4254007398273736, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 8.9959, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5865 }, { "epoch": 0.42547327192282586, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.6454, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5866 }, { "epoch": 0.4255458040182781, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 9.5523, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5867 }, { "epoch": 0.42561833611373034, "grad_norm": 6.46875, "learning_rate": 0.0003, "loss": 9.1118, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5868 }, { "epoch": 0.42569086820918256, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.3311, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5869 }, { "epoch": 0.4257634003046348, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 8.9583, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5870 }, { "epoch": 0.42583593240008705, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 8.8655, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5871 }, { "epoch": 0.42590846449553926, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 9.0426, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5872 }, { "epoch": 0.42598099659099153, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 9.1221, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5873 }, { "epoch": 0.42605352868644375, "grad_norm": 12.8125, "learning_rate": 0.0003, "loss": 9.1848, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5874 }, { "epoch": 0.42612606078189597, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 9.053, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5875 }, { "epoch": 0.42619859287734824, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.772, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5876 }, { "epoch": 0.42627112497280045, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 9.0431, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5877 }, { "epoch": 0.4263436570682527, "grad_norm": 13.625, "learning_rate": 0.0003, "loss": 8.9173, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5878 }, { "epoch": 0.42641618916370494, "grad_norm": 12.125, "learning_rate": 0.0003, "loss": 8.886, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5879 }, { "epoch": 0.42648872125915716, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 8.8841, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5880 }, { "epoch": 0.4265612533546094, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.2023, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5881 }, { "epoch": 0.42663378545006164, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.9312, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5882 }, { "epoch": 0.4267063175455139, "grad_norm": 1.7734375, "learning_rate": 0.0003, "loss": 8.9917, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5883 }, { "epoch": 0.42677884964096613, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.1936, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5884 }, { "epoch": 0.42685138173641834, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 9.1357, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5885 }, { "epoch": 0.4269239138318706, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 8.8663, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5886 }, { "epoch": 0.42699644592732283, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.9827, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5887 }, { "epoch": 0.4270689780227751, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.8109, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5888 }, { "epoch": 0.4271415101182273, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.8003, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5889 }, { "epoch": 0.42721404221367953, "grad_norm": 5.9375, "learning_rate": 0.0003, "loss": 9.1972, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5890 }, { "epoch": 0.4272865743091318, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.9302, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5891 }, { "epoch": 0.427359106404584, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 9.0417, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5892 }, { "epoch": 0.4274316385000363, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.2168, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5893 }, { "epoch": 0.4275041705954885, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.6444, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5894 }, { "epoch": 0.4275767026909407, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.5976, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5895 }, { "epoch": 0.427649234786393, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 8.6244, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5896 }, { "epoch": 0.4277217668818452, "grad_norm": 16.625, "learning_rate": 0.0003, "loss": 8.4394, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5897 }, { "epoch": 0.4277942989772974, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.7927, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5898 }, { "epoch": 0.4278668310727497, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.9522, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5899 }, { "epoch": 0.4279393631682019, "grad_norm": 6.5, "learning_rate": 0.0003, "loss": 8.9547, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5900 }, { "epoch": 0.4280118952636542, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 9.133, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5901 }, { "epoch": 0.4280844273591064, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.9328, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5902 }, { "epoch": 0.4281569594545586, "grad_norm": 6.25, "learning_rate": 0.0003, "loss": 9.2502, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5903 }, { "epoch": 0.4282294915500109, "grad_norm": 5.96875, "learning_rate": 0.0003, "loss": 9.5999, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5904 }, { "epoch": 0.4283020236454631, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 8.7409, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5905 }, { "epoch": 0.4283745557409154, "grad_norm": 6.65625, "learning_rate": 0.0003, "loss": 8.9593, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5906 }, { "epoch": 0.4284470878363676, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 8.8543, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5907 }, { "epoch": 0.4285196199318198, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 8.6069, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5908 }, { "epoch": 0.4285921520272721, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 8.8579, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5909 }, { "epoch": 0.4286646841227243, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 8.9132, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5910 }, { "epoch": 0.42873721621817656, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 8.9915, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5911 }, { "epoch": 0.4288097483136288, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.8637, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5912 }, { "epoch": 0.428882280409081, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 9.0937, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5913 }, { "epoch": 0.42895481250453327, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.1971, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5914 }, { "epoch": 0.4290273445999855, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 8.9325, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5915 }, { "epoch": 0.42909987669543775, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.0102, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5916 }, { "epoch": 0.42917240879088997, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.7951, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5917 }, { "epoch": 0.4292449408863422, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 9.0623, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5918 }, { "epoch": 0.42931747298179446, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 9.1894, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5919 }, { "epoch": 0.4293900050772467, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.2339, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5920 }, { "epoch": 0.42946253717269894, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.0456, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5921 }, { "epoch": 0.42953506926815116, "grad_norm": 9.625, "learning_rate": 0.0003, "loss": 8.587, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5922 }, { "epoch": 0.4296076013636034, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.4003, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5923 }, { "epoch": 0.42968013345905565, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.4273, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5924 }, { "epoch": 0.42975266555450786, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 9.0523, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5925 }, { "epoch": 0.42982519764996013, "grad_norm": 9.0625, "learning_rate": 0.0003, "loss": 8.5302, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5926 }, { "epoch": 0.42989772974541235, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 9.4434, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5927 }, { "epoch": 0.42997026184086456, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 8.2491, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5928 }, { "epoch": 0.43004279393631684, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.1828, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5929 }, { "epoch": 0.43011532603176905, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 8.9761, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5930 }, { "epoch": 0.4301878581272213, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.8482, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5931 }, { "epoch": 0.43026039022267354, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.7904, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5932 }, { "epoch": 0.43033292231812575, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.6183, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5933 }, { "epoch": 0.430405454413578, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.4165, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5934 }, { "epoch": 0.43047798650903024, "grad_norm": 21.375, "learning_rate": 0.0003, "loss": 9.162, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5935 }, { "epoch": 0.43055051860448246, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.2063, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5936 }, { "epoch": 0.43062305069993473, "grad_norm": 8.25, "learning_rate": 0.0003, "loss": 9.0818, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5937 }, { "epoch": 0.43069558279538694, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.0939, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5938 }, { "epoch": 0.4307681148908392, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.6466, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5939 }, { "epoch": 0.43084064698629143, "grad_norm": 1.8515625, "learning_rate": 0.0003, "loss": 8.9448, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5940 }, { "epoch": 0.43091317908174365, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.1265, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5941 }, { "epoch": 0.4309857111771959, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.1613, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5942 }, { "epoch": 0.43105824327264813, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.2344, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5943 }, { "epoch": 0.4311307753681004, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 8.8783, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5944 }, { "epoch": 0.4312033074635526, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 8.4944, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5945 }, { "epoch": 0.43127583955900484, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.6745, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5946 }, { "epoch": 0.4313483716544571, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 8.9095, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5947 }, { "epoch": 0.4314209037499093, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.8083, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5948 }, { "epoch": 0.4314934358453616, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.6314, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5949 }, { "epoch": 0.4315659679408138, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.1046, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5950 }, { "epoch": 0.431638500036266, "grad_norm": 16.625, "learning_rate": 0.0003, "loss": 8.8444, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5951 }, { "epoch": 0.4317110321317183, "grad_norm": 7.84375, "learning_rate": 0.0003, "loss": 8.6595, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5952 }, { "epoch": 0.4317835642271705, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.9904, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5953 }, { "epoch": 0.4318560963226228, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.585, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5954 }, { "epoch": 0.431928628418075, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.9238, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5955 }, { "epoch": 0.4320011605135272, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.9052, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5956 }, { "epoch": 0.4320736926089795, "grad_norm": 1.921875, "learning_rate": 0.0003, "loss": 9.2157, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5957 }, { "epoch": 0.4321462247044317, "grad_norm": 7.03125, "learning_rate": 0.0003, "loss": 8.9083, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5958 }, { "epoch": 0.432218756799884, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 8.8743, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5959 }, { "epoch": 0.4322912888953362, "grad_norm": 6.03125, "learning_rate": 0.0003, "loss": 9.2422, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5960 }, { "epoch": 0.4323638209907884, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.4198, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5961 }, { "epoch": 0.4324363530862407, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 9.4009, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5962 }, { "epoch": 0.4325088851816929, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.0522, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5963 }, { "epoch": 0.43258141727714516, "grad_norm": 9.4375, "learning_rate": 0.0003, "loss": 9.3062, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5964 }, { "epoch": 0.4326539493725974, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 9.3598, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5965 }, { "epoch": 0.4327264814680496, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 9.1591, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5966 }, { "epoch": 0.43279901356350187, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.8746, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5967 }, { "epoch": 0.4328715456589541, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.0101, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5968 }, { "epoch": 0.4329440777544063, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.7451, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5969 }, { "epoch": 0.43301660984985857, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.7445, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5970 }, { "epoch": 0.4330891419453108, "grad_norm": 1.8359375, "learning_rate": 0.0003, "loss": 8.9674, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5971 }, { "epoch": 0.43316167404076306, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 9.2554, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5972 }, { "epoch": 0.43323420613621527, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 8.8816, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5973 }, { "epoch": 0.4333067382316675, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 8.969, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5974 }, { "epoch": 0.43337927032711976, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 9.2024, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5975 }, { "epoch": 0.433451802422572, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 8.9938, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5976 }, { "epoch": 0.43352433451802425, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 9.1253, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5977 }, { "epoch": 0.43359686661347646, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 9.3112, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5978 }, { "epoch": 0.4336693987089287, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 8.8367, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5979 }, { "epoch": 0.43374193080438095, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.9055, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5980 }, { "epoch": 0.43381446289983316, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.9979, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5981 }, { "epoch": 0.43388699499528544, "grad_norm": 6.8125, "learning_rate": 0.0003, "loss": 9.3191, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5982 }, { "epoch": 0.43395952709073765, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 8.5419, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5983 }, { "epoch": 0.43403205918618987, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.8035, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5984 }, { "epoch": 0.43410459128164214, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.4392, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5985 }, { "epoch": 0.43417712337709435, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.8489, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5986 }, { "epoch": 0.4342496554725466, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.6418, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5987 }, { "epoch": 0.43432218756799884, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 9.0568, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5988 }, { "epoch": 0.43439471966345106, "grad_norm": 13.125, "learning_rate": 0.0003, "loss": 8.8321, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5989 }, { "epoch": 0.43446725175890333, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 9.0168, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5990 }, { "epoch": 0.43453978385435554, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.4273, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5991 }, { "epoch": 0.4346123159498078, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 9.064, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5992 }, { "epoch": 0.43468484804526003, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.7854, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5993 }, { "epoch": 0.43475738014071225, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 9.2621, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5994 }, { "epoch": 0.4348299122361645, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.0446, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5995 }, { "epoch": 0.43490244433161673, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.6108, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5996 }, { "epoch": 0.434974976427069, "grad_norm": 1.8125, "learning_rate": 0.0003, "loss": 9.0628, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5997 }, { "epoch": 0.4350475085225212, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 9.0921, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5998 }, { "epoch": 0.43512004061797344, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.53, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 5999 }, { "epoch": 0.4351925727134257, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.044, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6000 }, { "epoch": 0.4352651048088779, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.6354, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6001 }, { "epoch": 0.43533763690433014, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 9.0068, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6002 }, { "epoch": 0.4354101689997824, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 9.3556, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6003 }, { "epoch": 0.4354827010952346, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 9.1753, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6004 }, { "epoch": 0.4355552331906869, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.8052, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6005 }, { "epoch": 0.4356277652861391, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.0622, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6006 }, { "epoch": 0.43570029738159133, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.8573, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6007 }, { "epoch": 0.4357728294770436, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 9.099, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6008 }, { "epoch": 0.4358453615724958, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.8314, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6009 }, { "epoch": 0.4359178936679481, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.016, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6010 }, { "epoch": 0.4359904257634003, "grad_norm": 1.875, "learning_rate": 0.0003, "loss": 9.0185, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6011 }, { "epoch": 0.4360629578588525, "grad_norm": 5.78125, "learning_rate": 0.0003, "loss": 9.3323, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6012 }, { "epoch": 0.4361354899543048, "grad_norm": 17.875, "learning_rate": 0.0003, "loss": 8.7501, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6013 }, { "epoch": 0.436208022049757, "grad_norm": 12.1875, "learning_rate": 0.0003, "loss": 8.9647, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6014 }, { "epoch": 0.4362805541452093, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 9.2339, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6015 }, { "epoch": 0.4363530862406615, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.6096, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6016 }, { "epoch": 0.4364256183361137, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.7646, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6017 }, { "epoch": 0.436498150431566, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.1838, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6018 }, { "epoch": 0.4365706825270182, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.9862, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6019 }, { "epoch": 0.43664321462247047, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.6666, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6020 }, { "epoch": 0.4367157467179227, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 9.6423, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6021 }, { "epoch": 0.4367882788133749, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 9.032, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6022 }, { "epoch": 0.43686081090882717, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 9.1155, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6023 }, { "epoch": 0.4369333430042794, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 8.8654, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6024 }, { "epoch": 0.43700587509973166, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.7938, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6025 }, { "epoch": 0.43707840719518387, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.9445, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6026 }, { "epoch": 0.4371509392906361, "grad_norm": 11.5, "learning_rate": 0.0003, "loss": 9.09, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6027 }, { "epoch": 0.43722347138608836, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 9.2627, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6028 }, { "epoch": 0.4372960034815406, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.8142, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6029 }, { "epoch": 0.43736853557699285, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 8.7649, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6030 }, { "epoch": 0.43744106767244506, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.8304, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6031 }, { "epoch": 0.4375135997678973, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.6955, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6032 }, { "epoch": 0.43758613186334955, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 8.8306, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6033 }, { "epoch": 0.43765866395880176, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 9.4761, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6034 }, { "epoch": 0.43773119605425403, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.8011, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6035 }, { "epoch": 0.43780372814970625, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 8.8618, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6036 }, { "epoch": 0.43787626024515847, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.4144, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6037 }, { "epoch": 0.43794879234061074, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.9754, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6038 }, { "epoch": 0.43802132443606295, "grad_norm": 7.03125, "learning_rate": 0.0003, "loss": 8.84, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6039 }, { "epoch": 0.43809385653151517, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.8029, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6040 }, { "epoch": 0.43816638862696744, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 9.0326, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6041 }, { "epoch": 0.43823892072241966, "grad_norm": 21.5, "learning_rate": 0.0003, "loss": 9.053, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6042 }, { "epoch": 0.4383114528178719, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.7562, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6043 }, { "epoch": 0.43838398491332414, "grad_norm": 9.125, "learning_rate": 0.0003, "loss": 8.8379, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6044 }, { "epoch": 0.43845651700877636, "grad_norm": 1.84375, "learning_rate": 0.0003, "loss": 8.7767, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6045 }, { "epoch": 0.43852904910422863, "grad_norm": 8.5, "learning_rate": 0.0003, "loss": 9.2936, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6046 }, { "epoch": 0.43860158119968085, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 8.5958, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6047 }, { "epoch": 0.4386741132951331, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.1441, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6048 }, { "epoch": 0.43874664539058533, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.4726, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6049 }, { "epoch": 0.43881917748603755, "grad_norm": 6.1875, "learning_rate": 0.0003, "loss": 8.3594, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6050 }, { "epoch": 0.4388917095814898, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.9171, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6051 }, { "epoch": 0.43896424167694204, "grad_norm": 14.875, "learning_rate": 0.0003, "loss": 8.8549, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6052 }, { "epoch": 0.4390367737723943, "grad_norm": 10.1875, "learning_rate": 0.0003, "loss": 8.8007, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6053 }, { "epoch": 0.4391093058678465, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 8.9687, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6054 }, { "epoch": 0.43918183796329874, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 8.7631, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6055 }, { "epoch": 0.439254370058751, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 8.5189, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6056 }, { "epoch": 0.4393269021542032, "grad_norm": 20.375, "learning_rate": 0.0003, "loss": 8.5404, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6057 }, { "epoch": 0.4393994342496555, "grad_norm": 91.0, "learning_rate": 0.0003, "loss": 8.8404, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6058 }, { "epoch": 0.4394719663451077, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.0207, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6059 }, { "epoch": 0.4395444984405599, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.489, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6060 }, { "epoch": 0.4396170305360122, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 9.07, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6061 }, { "epoch": 0.4396895626314644, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.0929, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6062 }, { "epoch": 0.4397620947269167, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.8027, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6063 }, { "epoch": 0.4398346268223689, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.4409, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6064 }, { "epoch": 0.4399071589178211, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 9.2035, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6065 }, { "epoch": 0.4399796910132734, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.8623, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6066 }, { "epoch": 0.4400522231087256, "grad_norm": 1.9765625, "learning_rate": 0.0003, "loss": 9.0415, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6067 }, { "epoch": 0.4401247552041779, "grad_norm": 7.875, "learning_rate": 0.0003, "loss": 8.7498, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6068 }, { "epoch": 0.4401972872996301, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.0725, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6069 }, { "epoch": 0.4402698193950823, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 8.8903, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6070 }, { "epoch": 0.4403423514905346, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 9.0899, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6071 }, { "epoch": 0.4404148835859868, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.1949, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6072 }, { "epoch": 0.440487415681439, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.0476, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6073 }, { "epoch": 0.4405599477768913, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 8.5446, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6074 }, { "epoch": 0.4406324798723435, "grad_norm": 6.0625, "learning_rate": 0.0003, "loss": 8.7638, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6075 }, { "epoch": 0.44070501196779577, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.2174, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6076 }, { "epoch": 0.440777544063248, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.871, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6077 }, { "epoch": 0.4408500761587002, "grad_norm": 6.40625, "learning_rate": 0.0003, "loss": 8.8712, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6078 }, { "epoch": 0.44092260825415247, "grad_norm": 8.125, "learning_rate": 0.0003, "loss": 8.9825, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6079 }, { "epoch": 0.4409951403496047, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 8.6713, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6080 }, { "epoch": 0.44106767244505696, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 8.7916, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6081 }, { "epoch": 0.4411402045405092, "grad_norm": 5.6875, "learning_rate": 0.0003, "loss": 9.0378, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6082 }, { "epoch": 0.4412127366359614, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 9.2321, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6083 }, { "epoch": 0.44128526873141366, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.3327, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6084 }, { "epoch": 0.4413578008268659, "grad_norm": 1.921875, "learning_rate": 0.0003, "loss": 9.0462, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6085 }, { "epoch": 0.44143033292231815, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 8.9719, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6086 }, { "epoch": 0.44150286501777036, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.8482, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6087 }, { "epoch": 0.4415753971132226, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 9.1112, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6088 }, { "epoch": 0.44164792920867485, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.3059, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6089 }, { "epoch": 0.44172046130412707, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.8208, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6090 }, { "epoch": 0.44179299339957934, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.3849, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6091 }, { "epoch": 0.44186552549503155, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 9.1544, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6092 }, { "epoch": 0.44193805759048377, "grad_norm": 6.71875, "learning_rate": 0.0003, "loss": 8.2977, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6093 }, { "epoch": 0.44201058968593604, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 8.568, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6094 }, { "epoch": 0.44208312178138826, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 9.1695, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6095 }, { "epoch": 0.4421556538768405, "grad_norm": 1.5078125, "learning_rate": 0.0003, "loss": 9.1261, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6096 }, { "epoch": 0.44222818597229274, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 9.148, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6097 }, { "epoch": 0.44230071806774496, "grad_norm": 9.0, "learning_rate": 0.0003, "loss": 9.5051, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6098 }, { "epoch": 0.44237325016319723, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 8.6564, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6099 }, { "epoch": 0.44244578225864944, "grad_norm": 5.46875, "learning_rate": 0.0003, "loss": 8.443, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6100 }, { "epoch": 0.4425183143541017, "grad_norm": 5.6875, "learning_rate": 0.0003, "loss": 9.1574, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6101 }, { "epoch": 0.44259084644955393, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 8.6766, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6102 }, { "epoch": 0.44266337854500615, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.9523, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6103 }, { "epoch": 0.4427359106404584, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 9.2216, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6104 }, { "epoch": 0.44280844273591063, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.2078, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6105 }, { "epoch": 0.4428809748313629, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 8.9049, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6106 }, { "epoch": 0.4429535069268151, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 8.8928, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6107 }, { "epoch": 0.44302603902226734, "grad_norm": 7.5, "learning_rate": 0.0003, "loss": 8.8711, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6108 }, { "epoch": 0.4430985711177196, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 9.3117, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6109 }, { "epoch": 0.4431711032131718, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 8.7997, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6110 }, { "epoch": 0.44324363530862404, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 8.6886, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6111 }, { "epoch": 0.4433161674040763, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.7336, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6112 }, { "epoch": 0.4433886994995285, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 8.6608, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6113 }, { "epoch": 0.4434612315949808, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.1284, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6114 }, { "epoch": 0.443533763690433, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 8.7035, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6115 }, { "epoch": 0.44360629578588523, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 9.186, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6116 }, { "epoch": 0.4436788278813375, "grad_norm": 7.59375, "learning_rate": 0.0003, "loss": 8.8277, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6117 }, { "epoch": 0.4437513599767897, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 8.9795, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6118 }, { "epoch": 0.443823892072242, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.9191, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6119 }, { "epoch": 0.4438964241676942, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 9.1767, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6120 }, { "epoch": 0.4439689562631464, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.0049, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6121 }, { "epoch": 0.4440414883585987, "grad_norm": 1.890625, "learning_rate": 0.0003, "loss": 9.0075, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6122 }, { "epoch": 0.4441140204540509, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.6077, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6123 }, { "epoch": 0.4441865525495032, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 9.2298, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6124 }, { "epoch": 0.4442590846449554, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 8.9386, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6125 }, { "epoch": 0.4443316167404076, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.8476, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6126 }, { "epoch": 0.4444041488358599, "grad_norm": 1.8671875, "learning_rate": 0.0003, "loss": 9.1263, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6127 }, { "epoch": 0.4444766809313121, "grad_norm": 1.5546875, "learning_rate": 0.0003, "loss": 9.4236, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6128 }, { "epoch": 0.44454921302676437, "grad_norm": 6.1875, "learning_rate": 0.0003, "loss": 8.9064, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6129 }, { "epoch": 0.4446217451222166, "grad_norm": 27.5, "learning_rate": 0.0003, "loss": 9.2884, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6130 }, { "epoch": 0.4446942772176688, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 8.7512, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6131 }, { "epoch": 0.44476680931312107, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.3749, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6132 }, { "epoch": 0.4448393414085733, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.9628, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6133 }, { "epoch": 0.44491187350402556, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 9.1044, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6134 }, { "epoch": 0.4449844055994778, "grad_norm": 1.609375, "learning_rate": 0.0003, "loss": 8.8455, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6135 }, { "epoch": 0.44505693769493, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.7604, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6136 }, { "epoch": 0.44512946979038226, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.3404, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6137 }, { "epoch": 0.4452020018858345, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 8.7247, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6138 }, { "epoch": 0.44527453398128675, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.8687, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6139 }, { "epoch": 0.44534706607673896, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 9.1869, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6140 }, { "epoch": 0.4454195981721912, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.0503, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6141 }, { "epoch": 0.44549213026764345, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 8.9341, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6142 }, { "epoch": 0.44556466236309566, "grad_norm": 1.9296875, "learning_rate": 0.0003, "loss": 9.1502, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6143 }, { "epoch": 0.4456371944585479, "grad_norm": 7.6875, "learning_rate": 0.0003, "loss": 8.8454, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6144 }, { "epoch": 0.44570972655400015, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.7712, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6145 }, { "epoch": 0.44578225864945237, "grad_norm": 1.859375, "learning_rate": 0.0003, "loss": 9.0546, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6146 }, { "epoch": 0.44585479074490464, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.1053, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6147 }, { "epoch": 0.44592732284035685, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 9.2934, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6148 }, { "epoch": 0.44599985493580907, "grad_norm": 1.5703125, "learning_rate": 0.0003, "loss": 8.8077, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6149 }, { "epoch": 0.44607238703126134, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 9.1997, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6150 }, { "epoch": 0.44614491912671356, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.8579, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6151 }, { "epoch": 0.44621745122216583, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.6749, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6152 }, { "epoch": 0.44628998331761804, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 8.843, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6153 }, { "epoch": 0.44636251541307026, "grad_norm": 1.71875, "learning_rate": 0.0003, "loss": 8.6709, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6154 }, { "epoch": 0.44643504750852253, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.0698, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6155 }, { "epoch": 0.44650757960397475, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.9449, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6156 }, { "epoch": 0.446580111699427, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 8.7592, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6157 }, { "epoch": 0.44665264379487923, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.8245, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6158 }, { "epoch": 0.44672517589033145, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.6257, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6159 }, { "epoch": 0.4467977079857837, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.2241, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6160 }, { "epoch": 0.44687024008123594, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.3867, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6161 }, { "epoch": 0.4469427721766882, "grad_norm": 1.8984375, "learning_rate": 0.0003, "loss": 8.7117, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6162 }, { "epoch": 0.4470153042721404, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 9.6293, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6163 }, { "epoch": 0.44708783636759264, "grad_norm": 1.671875, "learning_rate": 0.0003, "loss": 8.894, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6164 }, { "epoch": 0.4471603684630449, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.0243, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6165 }, { "epoch": 0.4472329005584971, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 9.0263, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6166 }, { "epoch": 0.4473054326539494, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 9.2221, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6167 }, { "epoch": 0.4473779647494016, "grad_norm": 9.3125, "learning_rate": 0.0003, "loss": 9.1909, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6168 }, { "epoch": 0.44745049684485383, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 8.8336, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6169 }, { "epoch": 0.4475230289403061, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.7464, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6170 }, { "epoch": 0.4475955610357583, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 9.0575, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6171 }, { "epoch": 0.4476680931312106, "grad_norm": 21.5, "learning_rate": 0.0003, "loss": 8.7301, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6172 }, { "epoch": 0.4477406252266628, "grad_norm": 1.875, "learning_rate": 0.0003, "loss": 8.6535, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6173 }, { "epoch": 0.447813157322115, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 8.7522, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6174 }, { "epoch": 0.4478856894175673, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 8.9148, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6175 }, { "epoch": 0.4479582215130195, "grad_norm": 1.609375, "learning_rate": 0.0003, "loss": 8.9772, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6176 }, { "epoch": 0.4480307536084717, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.6924, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6177 }, { "epoch": 0.448103285703924, "grad_norm": 5.84375, "learning_rate": 0.0003, "loss": 9.0235, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6178 }, { "epoch": 0.4481758177993762, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 9.3129, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6179 }, { "epoch": 0.4482483498948285, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.7339, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6180 }, { "epoch": 0.4483208819902807, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 9.0181, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6181 }, { "epoch": 0.4483934140857329, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.5903, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6182 }, { "epoch": 0.4484659461811852, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.2546, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6183 }, { "epoch": 0.4485384782766374, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 8.9215, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6184 }, { "epoch": 0.44861101037208967, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 9.2522, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6185 }, { "epoch": 0.4486835424675419, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.753, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6186 }, { "epoch": 0.4487560745629941, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.0827, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6187 }, { "epoch": 0.44882860665844637, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.0916, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6188 }, { "epoch": 0.4489011387538986, "grad_norm": 6.21875, "learning_rate": 0.0003, "loss": 8.7336, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6189 }, { "epoch": 0.44897367084935086, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.7774, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6190 }, { "epoch": 0.4490462029448031, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.7618, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6191 }, { "epoch": 0.4491187350402553, "grad_norm": 7.03125, "learning_rate": 0.0003, "loss": 8.5333, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6192 }, { "epoch": 0.44919126713570756, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.9858, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6193 }, { "epoch": 0.4492637992311598, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 8.8399, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6194 }, { "epoch": 0.44933633132661205, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.7037, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6195 }, { "epoch": 0.44940886342206426, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.1458, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6196 }, { "epoch": 0.4494813955175165, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.5984, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6197 }, { "epoch": 0.44955392761296875, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.5176, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6198 }, { "epoch": 0.44962645970842097, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.6936, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6199 }, { "epoch": 0.44969899180387324, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 9.281, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6200 }, { "epoch": 0.44977152389932545, "grad_norm": 1.859375, "learning_rate": 0.0003, "loss": 9.3714, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6201 }, { "epoch": 0.44984405599477767, "grad_norm": 6.5625, "learning_rate": 0.0003, "loss": 9.0692, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6202 }, { "epoch": 0.44991658809022994, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 9.0478, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6203 }, { "epoch": 0.44998912018568216, "grad_norm": 8.25, "learning_rate": 0.0003, "loss": 9.0396, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6204 }, { "epoch": 0.45006165228113443, "grad_norm": 1.3984375, "learning_rate": 0.0003, "loss": 8.8317, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6205 }, { "epoch": 0.45013418437658664, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 9.3095, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6206 }, { "epoch": 0.45020671647203886, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 8.959, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6207 }, { "epoch": 0.45027924856749113, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 9.0813, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6208 }, { "epoch": 0.45035178066294335, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 9.0974, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6209 }, { "epoch": 0.4504243127583956, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 8.0253, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6210 }, { "epoch": 0.45049684485384783, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 9.0441, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6211 }, { "epoch": 0.45056937694930005, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 9.0513, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6212 }, { "epoch": 0.4506419090447523, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 8.6848, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6213 }, { "epoch": 0.45071444114020454, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 9.1366, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6214 }, { "epoch": 0.45078697323565675, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 9.1132, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6215 }, { "epoch": 0.450859505331109, "grad_norm": 9.9375, "learning_rate": 0.0003, "loss": 8.7757, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6216 }, { "epoch": 0.45093203742656124, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 9.223, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6217 }, { "epoch": 0.4510045695220135, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 9.0103, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6218 }, { "epoch": 0.4510771016174657, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 9.4453, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6219 }, { "epoch": 0.45114963371291794, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.8373, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6220 }, { "epoch": 0.4512221658083702, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 9.2034, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6221 }, { "epoch": 0.45129469790382243, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.5434, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6222 }, { "epoch": 0.4513672299992747, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.6372, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6223 }, { "epoch": 0.4514397620947269, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.89, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6224 }, { "epoch": 0.45151229419017913, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.9243, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6225 }, { "epoch": 0.4515848262856314, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.751, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6226 }, { "epoch": 0.4516573583810836, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 8.8018, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6227 }, { "epoch": 0.4517298904765359, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.6324, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6228 }, { "epoch": 0.4518024225719881, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.9796, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6229 }, { "epoch": 0.4518749546674403, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.142, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6230 }, { "epoch": 0.4519474867628926, "grad_norm": 1.609375, "learning_rate": 0.0003, "loss": 8.5433, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6231 }, { "epoch": 0.4520200188583448, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 8.9845, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6232 }, { "epoch": 0.4520925509537971, "grad_norm": 1.7734375, "learning_rate": 0.0003, "loss": 8.9944, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6233 }, { "epoch": 0.4521650830492493, "grad_norm": 6.4375, "learning_rate": 0.0003, "loss": 8.6053, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6234 }, { "epoch": 0.4522376151447015, "grad_norm": 36.5, "learning_rate": 0.0003, "loss": 9.2451, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6235 }, { "epoch": 0.4523101472401538, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 9.2225, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6236 }, { "epoch": 0.452382679335606, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 8.4121, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6237 }, { "epoch": 0.45245521143105827, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.8276, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6238 }, { "epoch": 0.4525277435265105, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 8.8063, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6239 }, { "epoch": 0.4526002756219627, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 8.7473, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6240 }, { "epoch": 0.45267280771741497, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.1524, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6241 }, { "epoch": 0.4527453398128672, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.7804, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6242 }, { "epoch": 0.45281787190831946, "grad_norm": 29.5, "learning_rate": 0.0003, "loss": 9.3582, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6243 }, { "epoch": 0.4528904040037717, "grad_norm": 5.6875, "learning_rate": 0.0003, "loss": 9.0108, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6244 }, { "epoch": 0.4529629360992239, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 9.4918, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6245 }, { "epoch": 0.45303546819467616, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 9.217, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6246 }, { "epoch": 0.4531080002901284, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 9.1297, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6247 }, { "epoch": 0.4531805323855806, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.0991, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6248 }, { "epoch": 0.45325306448103286, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 8.9659, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6249 }, { "epoch": 0.4533255965764851, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 8.8117, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6250 }, { "epoch": 0.45339812867193735, "grad_norm": 7.59375, "learning_rate": 0.0003, "loss": 8.8136, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6251 }, { "epoch": 0.45347066076738957, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.4826, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6252 }, { "epoch": 0.4535431928628418, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.2452, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6253 }, { "epoch": 0.45361572495829405, "grad_norm": 5.84375, "learning_rate": 0.0003, "loss": 8.7462, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6254 }, { "epoch": 0.45368825705374627, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.1746, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6255 }, { "epoch": 0.45376078914919854, "grad_norm": 1.7421875, "learning_rate": 0.0003, "loss": 9.2678, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6256 }, { "epoch": 0.45383332124465076, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.691, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6257 }, { "epoch": 0.45390585334010297, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 9.0718, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6258 }, { "epoch": 0.45397838543555524, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 8.8277, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6259 }, { "epoch": 0.45405091753100746, "grad_norm": 1.6171875, "learning_rate": 0.0003, "loss": 9.2179, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6260 }, { "epoch": 0.45412344962645973, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 9.1772, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6261 }, { "epoch": 0.45419598172191195, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.8388, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6262 }, { "epoch": 0.45426851381736416, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.6427, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6263 }, { "epoch": 0.45434104591281643, "grad_norm": 7.9375, "learning_rate": 0.0003, "loss": 8.5891, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6264 }, { "epoch": 0.45441357800826865, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.4446, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6265 }, { "epoch": 0.4544861101037209, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.1733, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6266 }, { "epoch": 0.45455864219917314, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 9.2832, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6267 }, { "epoch": 0.45463117429462535, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 9.0057, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6268 }, { "epoch": 0.4547037063900776, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 8.6491, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6269 }, { "epoch": 0.45477623848552984, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.7165, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6270 }, { "epoch": 0.4548487705809821, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 8.5647, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6271 }, { "epoch": 0.4549213026764343, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 8.7003, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6272 }, { "epoch": 0.45499383477188654, "grad_norm": 1.6328125, "learning_rate": 0.0003, "loss": 8.7952, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6273 }, { "epoch": 0.4550663668673388, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.7931, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6274 }, { "epoch": 0.455138898962791, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 9.1702, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6275 }, { "epoch": 0.4552114310582433, "grad_norm": 11.625, "learning_rate": 0.0003, "loss": 9.0628, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6276 }, { "epoch": 0.4552839631536955, "grad_norm": 8.8125, "learning_rate": 0.0003, "loss": 9.1518, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6277 }, { "epoch": 0.45535649524914773, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 9.0513, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6278 }, { "epoch": 0.4554290273446, "grad_norm": 1.7734375, "learning_rate": 0.0003, "loss": 8.77, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6279 }, { "epoch": 0.4555015594400522, "grad_norm": 9.875, "learning_rate": 0.0003, "loss": 9.0935, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6280 }, { "epoch": 0.4555740915355045, "grad_norm": 10.0625, "learning_rate": 0.0003, "loss": 8.7034, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6281 }, { "epoch": 0.4556466236309567, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 8.7264, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6282 }, { "epoch": 0.4557191557264089, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 8.8265, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6283 }, { "epoch": 0.4557916878218612, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.3413, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6284 }, { "epoch": 0.4558642199173134, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 9.2533, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6285 }, { "epoch": 0.4559367520127656, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 9.0429, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6286 }, { "epoch": 0.4560092841082179, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.8905, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6287 }, { "epoch": 0.4560818162036701, "grad_norm": 14.6875, "learning_rate": 0.0003, "loss": 8.9582, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6288 }, { "epoch": 0.4561543482991224, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.9688, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6289 }, { "epoch": 0.4562268803945746, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.6301, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6290 }, { "epoch": 0.4562994124900268, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.382, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6291 }, { "epoch": 0.4563719445854791, "grad_norm": 7.0, "learning_rate": 0.0003, "loss": 8.7373, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6292 }, { "epoch": 0.4564444766809313, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 9.0269, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6293 }, { "epoch": 0.45651700877638357, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 9.4376, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6294 }, { "epoch": 0.4565895408718358, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 8.3849, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6295 }, { "epoch": 0.456662072967288, "grad_norm": 6.34375, "learning_rate": 0.0003, "loss": 9.3324, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6296 }, { "epoch": 0.4567346050627403, "grad_norm": 13.9375, "learning_rate": 0.0003, "loss": 9.2574, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6297 }, { "epoch": 0.4568071371581925, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.0322, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6298 }, { "epoch": 0.45687966925364476, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 9.0437, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6299 }, { "epoch": 0.456952201349097, "grad_norm": 5.90625, "learning_rate": 0.0003, "loss": 8.6966, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6300 }, { "epoch": 0.4570247334445492, "grad_norm": 1.7421875, "learning_rate": 0.0003, "loss": 9.0347, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6301 }, { "epoch": 0.45709726554000146, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.6978, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6302 }, { "epoch": 0.4571697976354537, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 9.4037, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6303 }, { "epoch": 0.45724232973090595, "grad_norm": 1.6796875, "learning_rate": 0.0003, "loss": 9.0759, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6304 }, { "epoch": 0.45731486182635817, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 9.1187, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6305 }, { "epoch": 0.4573873939218104, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.7228, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6306 }, { "epoch": 0.45745992601726265, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.9818, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6307 }, { "epoch": 0.45753245811271487, "grad_norm": 1.6484375, "learning_rate": 0.0003, "loss": 9.1464, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6308 }, { "epoch": 0.45760499020816714, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 9.0423, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6309 }, { "epoch": 0.45767752230361936, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.2062, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6310 }, { "epoch": 0.45775005439907157, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 9.1567, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6311 }, { "epoch": 0.45782258649452384, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.1069, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6312 }, { "epoch": 0.45789511858997606, "grad_norm": 8.625, "learning_rate": 0.0003, "loss": 8.8125, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6313 }, { "epoch": 0.45796765068542833, "grad_norm": 11.375, "learning_rate": 0.0003, "loss": 9.1356, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6314 }, { "epoch": 0.45804018278088054, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 8.5762, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6315 }, { "epoch": 0.45811271487633276, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.4142, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6316 }, { "epoch": 0.45818524697178503, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 9.1638, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6317 }, { "epoch": 0.45825777906723725, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.925, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6318 }, { "epoch": 0.45833031116268946, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 9.0188, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6319 }, { "epoch": 0.45840284325814173, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 8.602, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6320 }, { "epoch": 0.45847537535359395, "grad_norm": 9.4375, "learning_rate": 0.0003, "loss": 9.2008, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6321 }, { "epoch": 0.4585479074490462, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.7981, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6322 }, { "epoch": 0.45862043954449844, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 9.2824, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6323 }, { "epoch": 0.45869297163995065, "grad_norm": 1.78125, "learning_rate": 0.0003, "loss": 8.9787, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6324 }, { "epoch": 0.4587655037354029, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 9.4402, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6325 }, { "epoch": 0.45883803583085514, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 8.3711, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6326 }, { "epoch": 0.4589105679263074, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 8.8677, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6327 }, { "epoch": 0.4589831000217596, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 8.6856, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6328 }, { "epoch": 0.45905563211721184, "grad_norm": 14.1875, "learning_rate": 0.0003, "loss": 8.4743, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6329 }, { "epoch": 0.4591281642126641, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 8.905, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6330 }, { "epoch": 0.45920069630811633, "grad_norm": 1.8203125, "learning_rate": 0.0003, "loss": 9.0828, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6331 }, { "epoch": 0.4592732284035686, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 9.3662, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6332 }, { "epoch": 0.4593457604990208, "grad_norm": 86.0, "learning_rate": 0.0003, "loss": 9.3207, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6333 }, { "epoch": 0.45941829259447303, "grad_norm": 1.9765625, "learning_rate": 0.0003, "loss": 8.9574, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6334 }, { "epoch": 0.4594908246899253, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 9.5156, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6335 }, { "epoch": 0.4595633567853775, "grad_norm": 7.75, "learning_rate": 0.0003, "loss": 8.3312, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6336 }, { "epoch": 0.4596358888808298, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.9299, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6337 }, { "epoch": 0.459708420976282, "grad_norm": 10.375, "learning_rate": 0.0003, "loss": 9.3875, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6338 }, { "epoch": 0.4597809530717342, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 8.4808, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6339 }, { "epoch": 0.4598534851671865, "grad_norm": 1.984375, "learning_rate": 0.0003, "loss": 9.0442, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6340 }, { "epoch": 0.4599260172626387, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.5771, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6341 }, { "epoch": 0.459998549358091, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.3513, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6342 }, { "epoch": 0.4600710814535432, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.7251, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6343 }, { "epoch": 0.4601436135489954, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 9.0096, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6344 }, { "epoch": 0.4602161456444477, "grad_norm": 7.34375, "learning_rate": 0.0003, "loss": 8.963, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6345 }, { "epoch": 0.4602886777398999, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 8.9481, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6346 }, { "epoch": 0.46036120983535217, "grad_norm": 6.8125, "learning_rate": 0.0003, "loss": 8.8651, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6347 }, { "epoch": 0.4604337419308044, "grad_norm": 7.3125, "learning_rate": 0.0003, "loss": 9.1605, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6348 }, { "epoch": 0.4605062740262566, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 8.3184, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6349 }, { "epoch": 0.4605788061217089, "grad_norm": 5.84375, "learning_rate": 0.0003, "loss": 9.0758, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6350 }, { "epoch": 0.4606513382171611, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 9.1551, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6351 }, { "epoch": 0.4607238703126133, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 8.5303, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6352 }, { "epoch": 0.4607964024080656, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.7284, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6353 }, { "epoch": 0.4608689345035178, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 9.1871, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6354 }, { "epoch": 0.46094146659897006, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 8.8026, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6355 }, { "epoch": 0.4610139986944223, "grad_norm": 8.0, "learning_rate": 0.0003, "loss": 8.3253, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6356 }, { "epoch": 0.4610865307898745, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 8.0888, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6357 }, { "epoch": 0.46115906288532676, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.3372, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6358 }, { "epoch": 0.461231594980779, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.9651, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6359 }, { "epoch": 0.46130412707623125, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 8.6118, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6360 }, { "epoch": 0.46137665917168347, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.9306, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6361 }, { "epoch": 0.4614491912671357, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 9.2188, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6362 }, { "epoch": 0.46152172336258795, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 8.5329, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6363 }, { "epoch": 0.46159425545804017, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 9.3767, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6364 }, { "epoch": 0.46166678755349244, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.5364, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6365 }, { "epoch": 0.46173931964894466, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 9.2682, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6366 }, { "epoch": 0.4618118517443969, "grad_norm": 8.8125, "learning_rate": 0.0003, "loss": 8.7144, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6367 }, { "epoch": 0.46188438383984914, "grad_norm": 7.75, "learning_rate": 0.0003, "loss": 8.856, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6368 }, { "epoch": 0.46195691593530136, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.3125, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6369 }, { "epoch": 0.46202944803075363, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.2738, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6370 }, { "epoch": 0.46210198012620585, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.4966, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6371 }, { "epoch": 0.46217451222165806, "grad_norm": 5.46875, "learning_rate": 0.0003, "loss": 8.8857, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6372 }, { "epoch": 0.46224704431711033, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 9.2581, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6373 }, { "epoch": 0.46231957641256255, "grad_norm": 7.3125, "learning_rate": 0.0003, "loss": 8.8606, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6374 }, { "epoch": 0.4623921085080148, "grad_norm": 1.796875, "learning_rate": 0.0003, "loss": 8.7009, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6375 }, { "epoch": 0.46246464060346704, "grad_norm": 1.546875, "learning_rate": 0.0003, "loss": 8.6913, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6376 }, { "epoch": 0.46253717269891925, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.0137, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6377 }, { "epoch": 0.4626097047943715, "grad_norm": 10.1875, "learning_rate": 0.0003, "loss": 9.6245, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6378 }, { "epoch": 0.46268223688982374, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 9.1327, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6379 }, { "epoch": 0.462754768985276, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.6662, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6380 }, { "epoch": 0.4628273010807282, "grad_norm": 9.0625, "learning_rate": 0.0003, "loss": 9.071, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6381 }, { "epoch": 0.46289983317618044, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.6069, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6382 }, { "epoch": 0.4629723652716327, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.221, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6383 }, { "epoch": 0.46304489736708493, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.0124, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6384 }, { "epoch": 0.4631174294625372, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.9953, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6385 }, { "epoch": 0.4631899615579894, "grad_norm": 9.6875, "learning_rate": 0.0003, "loss": 9.2806, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6386 }, { "epoch": 0.46326249365344163, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.5579, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6387 }, { "epoch": 0.4633350257488939, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 9.0346, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6388 }, { "epoch": 0.4634075578443461, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 8.9769, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6389 }, { "epoch": 0.46348008993979833, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.8165, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6390 }, { "epoch": 0.4635526220352506, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.4604, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6391 }, { "epoch": 0.4636251541307028, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.7034, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6392 }, { "epoch": 0.4636976862261551, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.9159, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6393 }, { "epoch": 0.4637702183216073, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 9.0284, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6394 }, { "epoch": 0.4638427504170595, "grad_norm": 14.4375, "learning_rate": 0.0003, "loss": 8.6985, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6395 }, { "epoch": 0.4639152825125118, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.5146, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6396 }, { "epoch": 0.463987814607964, "grad_norm": 5.4375, "learning_rate": 0.0003, "loss": 8.8952, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6397 }, { "epoch": 0.4640603467034163, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.6414, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6398 }, { "epoch": 0.4641328787988685, "grad_norm": 7.625, "learning_rate": 0.0003, "loss": 9.1562, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6399 }, { "epoch": 0.4642054108943207, "grad_norm": 6.53125, "learning_rate": 0.0003, "loss": 8.6779, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6400 }, { "epoch": 0.464277942989773, "grad_norm": 3.953125, "learning_rate": 0.0003, "loss": 9.3873, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6401 }, { "epoch": 0.4643504750852252, "grad_norm": 1.7265625, "learning_rate": 0.0003, "loss": 8.5944, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6402 }, { "epoch": 0.46442300718067747, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 9.0577, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6403 }, { "epoch": 0.4644955392761297, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 8.8325, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6404 }, { "epoch": 0.4645680713715819, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.0709, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6405 }, { "epoch": 0.4646406034670342, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.7552, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6406 }, { "epoch": 0.4647131355624864, "grad_norm": 1.953125, "learning_rate": 0.0003, "loss": 8.7873, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6407 }, { "epoch": 0.46478566765793866, "grad_norm": 5.65625, "learning_rate": 0.0003, "loss": 8.4667, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6408 }, { "epoch": 0.4648581997533909, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.3744, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6409 }, { "epoch": 0.4649307318488431, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 9.1163, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6410 }, { "epoch": 0.46500326394429536, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.9682, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6411 }, { "epoch": 0.4650757960397476, "grad_norm": 7.84375, "learning_rate": 0.0003, "loss": 8.5489, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6412 }, { "epoch": 0.46514832813519985, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.9758, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6413 }, { "epoch": 0.46522086023065207, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 8.8559, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6414 }, { "epoch": 0.4652933923261043, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.8348, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6415 }, { "epoch": 0.46536592442155655, "grad_norm": 7.3125, "learning_rate": 0.0003, "loss": 8.7764, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6416 }, { "epoch": 0.46543845651700877, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 9.3027, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6417 }, { "epoch": 0.46551098861246104, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.8088, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6418 }, { "epoch": 0.46558352070791326, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 8.8822, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6419 }, { "epoch": 0.46565605280336547, "grad_norm": 10.625, "learning_rate": 0.0003, "loss": 8.7111, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6420 }, { "epoch": 0.46572858489881774, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.2724, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6421 }, { "epoch": 0.46580111699426996, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 8.6846, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6422 }, { "epoch": 0.4658736490897222, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 8.5716, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6423 }, { "epoch": 0.46594618118517445, "grad_norm": 1.8125, "learning_rate": 0.0003, "loss": 9.3378, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6424 }, { "epoch": 0.46601871328062666, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.7458, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6425 }, { "epoch": 0.46609124537607893, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.6904, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6426 }, { "epoch": 0.46616377747153115, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 9.0242, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6427 }, { "epoch": 0.46623630956698336, "grad_norm": 25.875, "learning_rate": 0.0003, "loss": 8.0813, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6428 }, { "epoch": 0.46630884166243564, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.9241, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6429 }, { "epoch": 0.46638137375788785, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.3895, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6430 }, { "epoch": 0.4664539058533401, "grad_norm": 108.0, "learning_rate": 0.0003, "loss": 8.8246, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6431 }, { "epoch": 0.46652643794879234, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.9642, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6432 }, { "epoch": 0.46659897004424455, "grad_norm": 9.1875, "learning_rate": 0.0003, "loss": 8.6743, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6433 }, { "epoch": 0.4666715021396968, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 8.978, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6434 }, { "epoch": 0.46674403423514904, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 9.3711, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6435 }, { "epoch": 0.4668165663306013, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.7836, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6436 }, { "epoch": 0.46688909842605353, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 8.6877, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6437 }, { "epoch": 0.46696163052150574, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 8.8926, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6438 }, { "epoch": 0.467034162616958, "grad_norm": 5.4375, "learning_rate": 0.0003, "loss": 9.3141, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6439 }, { "epoch": 0.46710669471241023, "grad_norm": 7.5, "learning_rate": 0.0003, "loss": 9.116, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6440 }, { "epoch": 0.4671792268078625, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 8.8746, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6441 }, { "epoch": 0.4672517589033147, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 8.9068, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6442 }, { "epoch": 0.46732429099876693, "grad_norm": 7.34375, "learning_rate": 0.0003, "loss": 9.3913, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6443 }, { "epoch": 0.4673968230942192, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.2747, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6444 }, { "epoch": 0.4674693551896714, "grad_norm": 8.3125, "learning_rate": 0.0003, "loss": 8.6108, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6445 }, { "epoch": 0.4675418872851237, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.2405, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6446 }, { "epoch": 0.4676144193805759, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 7.8969, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6447 }, { "epoch": 0.4676869514760281, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 9.0018, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6448 }, { "epoch": 0.4677594835714804, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 8.6996, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6449 }, { "epoch": 0.4678320156669326, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.9194, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6450 }, { "epoch": 0.4679045477623849, "grad_norm": 8.625, "learning_rate": 0.0003, "loss": 9.2186, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6451 }, { "epoch": 0.4679770798578371, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.8592, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6452 }, { "epoch": 0.4680496119532893, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.8989, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6453 }, { "epoch": 0.4681221440487416, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 9.2477, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6454 }, { "epoch": 0.4681946761441938, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.7467, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6455 }, { "epoch": 0.46826720823964607, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 9.1669, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6456 }, { "epoch": 0.4683397403350983, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.2509, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6457 }, { "epoch": 0.4684122724305505, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.6993, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6458 }, { "epoch": 0.4684848045260028, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.7296, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6459 }, { "epoch": 0.468557336621455, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.9193, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6460 }, { "epoch": 0.4686298687169072, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.0809, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6461 }, { "epoch": 0.4687024008123595, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.7986, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6462 }, { "epoch": 0.4687749329078117, "grad_norm": 1.8515625, "learning_rate": 0.0003, "loss": 8.9719, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6463 }, { "epoch": 0.46884746500326396, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.9163, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6464 }, { "epoch": 0.4689199970987162, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.6591, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6465 }, { "epoch": 0.4689925291941684, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.5079, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6466 }, { "epoch": 0.46906506128962067, "grad_norm": 6.21875, "learning_rate": 0.0003, "loss": 8.7377, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6467 }, { "epoch": 0.4691375933850729, "grad_norm": 6.34375, "learning_rate": 0.0003, "loss": 8.9919, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6468 }, { "epoch": 0.46921012548052515, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 8.5915, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6469 }, { "epoch": 0.46928265757597737, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 8.6184, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6470 }, { "epoch": 0.4693551896714296, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 8.761, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6471 }, { "epoch": 0.46942772176688186, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 9.0022, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6472 }, { "epoch": 0.46950025386233407, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 9.0702, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6473 }, { "epoch": 0.46957278595778634, "grad_norm": 8.375, "learning_rate": 0.0003, "loss": 8.8804, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6474 }, { "epoch": 0.46964531805323856, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 9.1639, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6475 }, { "epoch": 0.4697178501486908, "grad_norm": 6.40625, "learning_rate": 0.0003, "loss": 9.1282, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6476 }, { "epoch": 0.46979038224414305, "grad_norm": 6.1875, "learning_rate": 0.0003, "loss": 9.1748, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6477 }, { "epoch": 0.46986291433959526, "grad_norm": 5.78125, "learning_rate": 0.0003, "loss": 8.9502, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6478 }, { "epoch": 0.46993544643504753, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 9.1487, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6479 }, { "epoch": 0.47000797853049975, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.9427, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6480 }, { "epoch": 0.47008051062595196, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 8.5499, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6481 }, { "epoch": 0.47015304272140424, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 8.867, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6482 }, { "epoch": 0.47022557481685645, "grad_norm": 1.4453125, "learning_rate": 0.0003, "loss": 8.4259, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6483 }, { "epoch": 0.4702981069123087, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 9.1933, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6484 }, { "epoch": 0.47037063900776094, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 8.728, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6485 }, { "epoch": 0.47044317110321315, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.9241, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6486 }, { "epoch": 0.4705157031986654, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.0557, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6487 }, { "epoch": 0.47058823529411764, "grad_norm": 6.03125, "learning_rate": 0.0003, "loss": 9.187, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6488 }, { "epoch": 0.4706607673895699, "grad_norm": 7.96875, "learning_rate": 0.0003, "loss": 9.3822, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6489 }, { "epoch": 0.4707332994850221, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.7257, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6490 }, { "epoch": 0.47080583158047434, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.8078, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6491 }, { "epoch": 0.4708783636759266, "grad_norm": 12.25, "learning_rate": 0.0003, "loss": 9.3639, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6492 }, { "epoch": 0.47095089577137883, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 8.5961, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6493 }, { "epoch": 0.47102342786683105, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 8.8522, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6494 }, { "epoch": 0.4710959599622833, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.3663, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6495 }, { "epoch": 0.47116849205773553, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 8.7406, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6496 }, { "epoch": 0.4712410241531878, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.5824, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6497 }, { "epoch": 0.47131355624864, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 9.3973, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6498 }, { "epoch": 0.47138608834409224, "grad_norm": 9.25, "learning_rate": 0.0003, "loss": 8.4571, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6499 }, { "epoch": 0.4714586204395445, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.1194, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6500 }, { "epoch": 0.4715311525349967, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.8659, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6501 }, { "epoch": 0.471603684630449, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.877, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6502 }, { "epoch": 0.4716762167259012, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 8.6427, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6503 }, { "epoch": 0.4717487488213534, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.6184, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6504 }, { "epoch": 0.4718212809168057, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.3353, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6505 }, { "epoch": 0.4718938130122579, "grad_norm": 1.7421875, "learning_rate": 0.0003, "loss": 9.0789, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6506 }, { "epoch": 0.4719663451077102, "grad_norm": 5.6875, "learning_rate": 0.0003, "loss": 9.2177, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6507 }, { "epoch": 0.4720388772031624, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.8587, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6508 }, { "epoch": 0.4721114092986146, "grad_norm": 6.6875, "learning_rate": 0.0003, "loss": 8.9957, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6509 }, { "epoch": 0.4721839413940669, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.653, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6510 }, { "epoch": 0.4722564734895191, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.0059, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6511 }, { "epoch": 0.4723290055849714, "grad_norm": 6.75, "learning_rate": 0.0003, "loss": 9.3263, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6512 }, { "epoch": 0.4724015376804236, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 9.243, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6513 }, { "epoch": 0.4724740697758758, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 8.7272, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6514 }, { "epoch": 0.4725466018713281, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 8.9241, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6515 }, { "epoch": 0.4726191339667803, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.8056, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6516 }, { "epoch": 0.47269166606223256, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 9.1319, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6517 }, { "epoch": 0.4727641981576848, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 8.9906, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6518 }, { "epoch": 0.472836730253137, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 9.0606, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6519 }, { "epoch": 0.47290926234858927, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.1462, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6520 }, { "epoch": 0.4729817944440415, "grad_norm": 9.625, "learning_rate": 0.0003, "loss": 8.911, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6521 }, { "epoch": 0.47305432653949375, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 9.1479, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6522 }, { "epoch": 0.47312685863494597, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 9.185, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6523 }, { "epoch": 0.4731993907303982, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 8.8783, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6524 }, { "epoch": 0.47327192282585046, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.6571, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6525 }, { "epoch": 0.47334445492130267, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 8.6267, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6526 }, { "epoch": 0.4734169870167549, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 9.2355, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6527 }, { "epoch": 0.47348951911220716, "grad_norm": 13.9375, "learning_rate": 0.0003, "loss": 8.9514, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6528 }, { "epoch": 0.4735620512076594, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.9587, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6529 }, { "epoch": 0.47363458330311164, "grad_norm": 12.75, "learning_rate": 0.0003, "loss": 8.7147, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6530 }, { "epoch": 0.47370711539856386, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 9.0793, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6531 }, { "epoch": 0.4737796474940161, "grad_norm": 20.25, "learning_rate": 0.0003, "loss": 8.3636, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6532 }, { "epoch": 0.47385217958946835, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.649, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6533 }, { "epoch": 0.47392471168492056, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 9.0471, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6534 }, { "epoch": 0.47399724378037283, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 8.5903, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6535 }, { "epoch": 0.47406977587582505, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.8563, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6536 }, { "epoch": 0.47414230797127727, "grad_norm": 6.84375, "learning_rate": 0.0003, "loss": 8.8858, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6537 }, { "epoch": 0.47421484006672954, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 9.1536, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6538 }, { "epoch": 0.47428737216218175, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.6797, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6539 }, { "epoch": 0.474359904257634, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.88, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6540 }, { "epoch": 0.47443243635308624, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 8.7585, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6541 }, { "epoch": 0.47450496844853846, "grad_norm": 6.25, "learning_rate": 0.0003, "loss": 8.7639, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6542 }, { "epoch": 0.4745775005439907, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.9107, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6543 }, { "epoch": 0.47465003263944294, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 8.7988, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6544 }, { "epoch": 0.4747225647348952, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.8618, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6545 }, { "epoch": 0.47479509683034743, "grad_norm": 8.1875, "learning_rate": 0.0003, "loss": 8.3251, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6546 }, { "epoch": 0.47486762892579965, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.1625, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6547 }, { "epoch": 0.4749401610212519, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 8.4203, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6548 }, { "epoch": 0.47501269311670413, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.7753, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6549 }, { "epoch": 0.4750852252121564, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 8.7762, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6550 }, { "epoch": 0.4751577573076086, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.9832, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6551 }, { "epoch": 0.47523028940306083, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.0421, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6552 }, { "epoch": 0.4753028214985131, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 8.7268, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6553 }, { "epoch": 0.4753753535939653, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.555, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6554 }, { "epoch": 0.4754478856894176, "grad_norm": 18.375, "learning_rate": 0.0003, "loss": 8.9355, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6555 }, { "epoch": 0.4755204177848698, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.8343, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6556 }, { "epoch": 0.475592949880322, "grad_norm": 6.34375, "learning_rate": 0.0003, "loss": 9.1216, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6557 }, { "epoch": 0.4756654819757743, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.7717, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6558 }, { "epoch": 0.4757380140712265, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.0657, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6559 }, { "epoch": 0.4758105461666788, "grad_norm": 1.84375, "learning_rate": 0.0003, "loss": 8.8142, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6560 }, { "epoch": 0.475883078262131, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.0487, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6561 }, { "epoch": 0.4759556103575832, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 9.4147, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6562 }, { "epoch": 0.4760281424530355, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 9.6035, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6563 }, { "epoch": 0.4761006745484877, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.2825, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6564 }, { "epoch": 0.4761732066439399, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.7203, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6565 }, { "epoch": 0.4762457387393922, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.5542, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6566 }, { "epoch": 0.4763182708348444, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.8635, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6567 }, { "epoch": 0.4763908029302967, "grad_norm": 12.4375, "learning_rate": 0.0003, "loss": 8.8078, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6568 }, { "epoch": 0.4764633350257489, "grad_norm": 6.28125, "learning_rate": 0.0003, "loss": 9.2321, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6569 }, { "epoch": 0.4765358671212011, "grad_norm": 3.953125, "learning_rate": 0.0003, "loss": 9.1579, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6570 }, { "epoch": 0.4766083992166534, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.9276, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6571 }, { "epoch": 0.4766809313121056, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 8.6434, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6572 }, { "epoch": 0.47675346340755786, "grad_norm": 6.5, "learning_rate": 0.0003, "loss": 9.0845, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6573 }, { "epoch": 0.4768259955030101, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 8.7656, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6574 }, { "epoch": 0.4768985275984623, "grad_norm": 7.8125, "learning_rate": 0.0003, "loss": 8.8716, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6575 }, { "epoch": 0.47697105969391457, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 9.2126, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6576 }, { "epoch": 0.4770435917893668, "grad_norm": 1.640625, "learning_rate": 0.0003, "loss": 9.3015, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6577 }, { "epoch": 0.47711612388481905, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.9587, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6578 }, { "epoch": 0.47718865598027127, "grad_norm": 23.125, "learning_rate": 0.0003, "loss": 9.1111, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6579 }, { "epoch": 0.4772611880757235, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 9.0536, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6580 }, { "epoch": 0.47733372017117576, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.4263, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6581 }, { "epoch": 0.477406252266628, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 8.8835, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6582 }, { "epoch": 0.47747878436208024, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.1596, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6583 }, { "epoch": 0.47755131645753246, "grad_norm": 8.375, "learning_rate": 0.0003, "loss": 9.0423, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6584 }, { "epoch": 0.4776238485529847, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.9604, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6585 }, { "epoch": 0.47769638064843695, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 9.0362, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6586 }, { "epoch": 0.47776891274388916, "grad_norm": 23.375, "learning_rate": 0.0003, "loss": 9.135, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6587 }, { "epoch": 0.47784144483934143, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.8779, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6588 }, { "epoch": 0.47791397693479365, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.7805, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6589 }, { "epoch": 0.47798650903024587, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 8.7697, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6590 }, { "epoch": 0.47805904112569814, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 8.7946, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6591 }, { "epoch": 0.47813157322115035, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 8.8587, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6592 }, { "epoch": 0.4782041053166026, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 8.8526, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6593 }, { "epoch": 0.47827663741205484, "grad_norm": 6.15625, "learning_rate": 0.0003, "loss": 8.9078, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6594 }, { "epoch": 0.47834916950750705, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 8.9029, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6595 }, { "epoch": 0.4784217016029593, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 9.17, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6596 }, { "epoch": 0.47849423369841154, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.9785, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6597 }, { "epoch": 0.47856676579386376, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 9.0247, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6598 }, { "epoch": 0.47863929788931603, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.2481, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6599 }, { "epoch": 0.47871182998476824, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.758, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6600 }, { "epoch": 0.4787843620802205, "grad_norm": 5.90625, "learning_rate": 0.0003, "loss": 8.8381, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6601 }, { "epoch": 0.47885689417567273, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.8209, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6602 }, { "epoch": 0.47892942627112495, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 9.3358, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6603 }, { "epoch": 0.4790019583665772, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.039, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6604 }, { "epoch": 0.47907449046202943, "grad_norm": 6.46875, "learning_rate": 0.0003, "loss": 8.657, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6605 }, { "epoch": 0.4791470225574817, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 9.037, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6606 }, { "epoch": 0.4792195546529339, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 9.0674, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6607 }, { "epoch": 0.47929208674838614, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 9.0579, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6608 }, { "epoch": 0.4793646188438384, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 9.1112, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6609 }, { "epoch": 0.4794371509392906, "grad_norm": 1.96875, "learning_rate": 0.0003, "loss": 8.8234, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6610 }, { "epoch": 0.4795096830347429, "grad_norm": 6.21875, "learning_rate": 0.0003, "loss": 8.907, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6611 }, { "epoch": 0.4795822151301951, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 9.1378, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6612 }, { "epoch": 0.4796547472256473, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.1619, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6613 }, { "epoch": 0.4797272793210996, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.8006, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6614 }, { "epoch": 0.4797998114165518, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.9654, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6615 }, { "epoch": 0.4798723435120041, "grad_norm": 8.4375, "learning_rate": 0.0003, "loss": 8.8541, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6616 }, { "epoch": 0.4799448756074563, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 8.4548, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6617 }, { "epoch": 0.4800174077029085, "grad_norm": 26.75, "learning_rate": 0.0003, "loss": 9.2281, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6618 }, { "epoch": 0.4800899397983608, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 9.1424, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6619 }, { "epoch": 0.480162471893813, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 9.3149, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6620 }, { "epoch": 0.4802350039892653, "grad_norm": 6.71875, "learning_rate": 0.0003, "loss": 8.687, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6621 }, { "epoch": 0.4803075360847175, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 9.0215, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6622 }, { "epoch": 0.4803800681801697, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 9.5918, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6623 }, { "epoch": 0.480452600275622, "grad_norm": 20.75, "learning_rate": 0.0003, "loss": 9.0237, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6624 }, { "epoch": 0.4805251323710742, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.9829, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6625 }, { "epoch": 0.48059766446652646, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.1257, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6626 }, { "epoch": 0.4806701965619787, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.1505, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6627 }, { "epoch": 0.4807427286574309, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.8984, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6628 }, { "epoch": 0.48081526075288317, "grad_norm": 1.90625, "learning_rate": 0.0003, "loss": 8.9989, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6629 }, { "epoch": 0.4808877928483354, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 9.4286, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6630 }, { "epoch": 0.48096032494378765, "grad_norm": 8.1875, "learning_rate": 0.0003, "loss": 8.5593, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6631 }, { "epoch": 0.48103285703923987, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.7115, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6632 }, { "epoch": 0.4811053891346921, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.8008, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6633 }, { "epoch": 0.48117792123014436, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.983, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6634 }, { "epoch": 0.48125045332559657, "grad_norm": 5.96875, "learning_rate": 0.0003, "loss": 9.0094, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6635 }, { "epoch": 0.4813229854210488, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.5518, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6636 }, { "epoch": 0.48139551751650106, "grad_norm": 3.953125, "learning_rate": 0.0003, "loss": 8.8184, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6637 }, { "epoch": 0.4814680496119533, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.7544, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6638 }, { "epoch": 0.48154058170740555, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.9155, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6639 }, { "epoch": 0.48161311380285776, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 8.6541, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6640 }, { "epoch": 0.48168564589831, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 8.6375, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6641 }, { "epoch": 0.48175817799376225, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 9.1701, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6642 }, { "epoch": 0.48183071008921446, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.4997, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6643 }, { "epoch": 0.48190324218466674, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.7343, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6644 }, { "epoch": 0.48197577428011895, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 8.9218, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6645 }, { "epoch": 0.48204830637557117, "grad_norm": 1.65625, "learning_rate": 0.0003, "loss": 9.0646, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6646 }, { "epoch": 0.48212083847102344, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.5407, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6647 }, { "epoch": 0.48219337056647565, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.9175, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6648 }, { "epoch": 0.4822659026619279, "grad_norm": 1.703125, "learning_rate": 0.0003, "loss": 8.9453, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6649 }, { "epoch": 0.48233843475738014, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.5893, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6650 }, { "epoch": 0.48241096685283236, "grad_norm": 6.0, "learning_rate": 0.0003, "loss": 9.2858, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6651 }, { "epoch": 0.48248349894828463, "grad_norm": 1.9140625, "learning_rate": 0.0003, "loss": 8.68, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6652 }, { "epoch": 0.48255603104373684, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.8165, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6653 }, { "epoch": 0.4826285631391891, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.19, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6654 }, { "epoch": 0.48270109523464133, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 8.8675, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6655 }, { "epoch": 0.48277362733009355, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 8.9356, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6656 }, { "epoch": 0.4828461594255458, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.7529, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6657 }, { "epoch": 0.48291869152099803, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.8276, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6658 }, { "epoch": 0.4829912236164503, "grad_norm": 13.9375, "learning_rate": 0.0003, "loss": 8.5836, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6659 }, { "epoch": 0.4830637557119025, "grad_norm": 11.5, "learning_rate": 0.0003, "loss": 9.0858, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6660 }, { "epoch": 0.48313628780735474, "grad_norm": 6.40625, "learning_rate": 0.0003, "loss": 9.0252, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6661 }, { "epoch": 0.483208819902807, "grad_norm": 87.0, "learning_rate": 0.0003, "loss": 9.1221, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6662 }, { "epoch": 0.4832813519982592, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 8.4986, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6663 }, { "epoch": 0.4833538840937115, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.9733, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6664 }, { "epoch": 0.4834264161891637, "grad_norm": 6.4375, "learning_rate": 0.0003, "loss": 9.2777, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6665 }, { "epoch": 0.4834989482846159, "grad_norm": 16.375, "learning_rate": 0.0003, "loss": 8.9769, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6666 }, { "epoch": 0.4835714803800682, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 8.734, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6667 }, { "epoch": 0.4836440124755204, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.2353, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6668 }, { "epoch": 0.48371654457097263, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.3596, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6669 }, { "epoch": 0.4837890766664249, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.8527, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6670 }, { "epoch": 0.4838616087618771, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 8.9171, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6671 }, { "epoch": 0.4839341408573294, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 9.1656, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6672 }, { "epoch": 0.4840066729527816, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.8521, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6673 }, { "epoch": 0.4840792050482338, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.9742, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6674 }, { "epoch": 0.4841517371436861, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.1413, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6675 }, { "epoch": 0.4842242692391383, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 8.4454, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6676 }, { "epoch": 0.4842968013345906, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 8.7505, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6677 }, { "epoch": 0.4843693334300428, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 8.4988, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6678 }, { "epoch": 0.484441865525495, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 9.1881, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6679 }, { "epoch": 0.4845143976209473, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 9.1618, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6680 }, { "epoch": 0.4845869297163995, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 9.0028, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6681 }, { "epoch": 0.48465946181185177, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 9.2299, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6682 }, { "epoch": 0.484731993907304, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.0395, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6683 }, { "epoch": 0.4848045260027562, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 9.3213, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6684 }, { "epoch": 0.48487705809820847, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 9.0972, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6685 }, { "epoch": 0.4849495901936607, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 8.8565, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6686 }, { "epoch": 0.48502212228911296, "grad_norm": 22.0, "learning_rate": 0.0003, "loss": 8.5042, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6687 }, { "epoch": 0.48509465438456517, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.9595, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6688 }, { "epoch": 0.4851671864800174, "grad_norm": 6.125, "learning_rate": 0.0003, "loss": 9.0266, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6689 }, { "epoch": 0.48523971857546966, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.4923, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6690 }, { "epoch": 0.4853122506709219, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.6341, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6691 }, { "epoch": 0.48538478276637415, "grad_norm": 23.875, "learning_rate": 0.0003, "loss": 8.7104, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6692 }, { "epoch": 0.48545731486182636, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 9.005, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6693 }, { "epoch": 0.4855298469572786, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 9.1909, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6694 }, { "epoch": 0.48560237905273085, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.6904, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6695 }, { "epoch": 0.48567491114818306, "grad_norm": 17.625, "learning_rate": 0.0003, "loss": 8.6453, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6696 }, { "epoch": 0.48574744324363534, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.3359, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6697 }, { "epoch": 0.48581997533908755, "grad_norm": 9.875, "learning_rate": 0.0003, "loss": 9.1683, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6698 }, { "epoch": 0.48589250743453977, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 8.7956, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6699 }, { "epoch": 0.48596503952999204, "grad_norm": 1.921875, "learning_rate": 0.0003, "loss": 8.4932, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6700 }, { "epoch": 0.48603757162544425, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 9.2532, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6701 }, { "epoch": 0.48611010372089647, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 9.3416, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6702 }, { "epoch": 0.48618263581634874, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 8.762, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6703 }, { "epoch": 0.48625516791180096, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 9.3815, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6704 }, { "epoch": 0.4863277000072532, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.0409, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6705 }, { "epoch": 0.48640023210270544, "grad_norm": 5.46875, "learning_rate": 0.0003, "loss": 9.1682, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6706 }, { "epoch": 0.48647276419815766, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 8.7361, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6707 }, { "epoch": 0.48654529629360993, "grad_norm": 11.25, "learning_rate": 0.0003, "loss": 8.6767, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6708 }, { "epoch": 0.48661782838906215, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.7569, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6709 }, { "epoch": 0.4866903604845144, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.2228, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6710 }, { "epoch": 0.48676289257996663, "grad_norm": 5.625, "learning_rate": 0.0003, "loss": 8.8124, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6711 }, { "epoch": 0.48683542467541885, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 8.8149, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6712 }, { "epoch": 0.4869079567708711, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 8.7864, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6713 }, { "epoch": 0.48698048886632334, "grad_norm": 18.25, "learning_rate": 0.0003, "loss": 9.0016, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6714 }, { "epoch": 0.4870530209617756, "grad_norm": 7.0, "learning_rate": 0.0003, "loss": 8.7375, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6715 }, { "epoch": 0.4871255530572278, "grad_norm": 6.59375, "learning_rate": 0.0003, "loss": 9.0811, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6716 }, { "epoch": 0.48719808515268004, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.2249, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6717 }, { "epoch": 0.4872706172481323, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.6692, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6718 }, { "epoch": 0.4873431493435845, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 9.1065, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6719 }, { "epoch": 0.4874156814390368, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 8.6957, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6720 }, { "epoch": 0.487488213534489, "grad_norm": 9.875, "learning_rate": 0.0003, "loss": 9.1816, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6721 }, { "epoch": 0.48756074562994123, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.5007, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6722 }, { "epoch": 0.4876332777253935, "grad_norm": 5.96875, "learning_rate": 0.0003, "loss": 9.2693, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6723 }, { "epoch": 0.4877058098208457, "grad_norm": 1.8046875, "learning_rate": 0.0003, "loss": 8.7742, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6724 }, { "epoch": 0.487778341916298, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 8.4384, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6725 }, { "epoch": 0.4878508740117502, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 9.0236, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6726 }, { "epoch": 0.4879234061072024, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 8.9895, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6727 }, { "epoch": 0.4879959382026547, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.9106, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6728 }, { "epoch": 0.4880684702981069, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.4918, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6729 }, { "epoch": 0.4881410023935592, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 9.1994, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6730 }, { "epoch": 0.4882135344890114, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 9.0091, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6731 }, { "epoch": 0.4882860665844636, "grad_norm": 1.8671875, "learning_rate": 0.0003, "loss": 8.9905, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6732 }, { "epoch": 0.4883585986799159, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.4515, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6733 }, { "epoch": 0.4884311307753681, "grad_norm": 16.75, "learning_rate": 0.0003, "loss": 9.2628, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6734 }, { "epoch": 0.48850366287082037, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.9616, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6735 }, { "epoch": 0.4885761949662726, "grad_norm": 13.625, "learning_rate": 0.0003, "loss": 8.7677, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6736 }, { "epoch": 0.4886487270617248, "grad_norm": 96.0, "learning_rate": 0.0003, "loss": 8.8897, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6737 }, { "epoch": 0.48872125915717707, "grad_norm": 7.84375, "learning_rate": 0.0003, "loss": 8.8807, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6738 }, { "epoch": 0.4887937912526293, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 8.8329, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6739 }, { "epoch": 0.4888663233480815, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.1709, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6740 }, { "epoch": 0.48893885544353377, "grad_norm": 9.8125, "learning_rate": 0.0003, "loss": 9.0692, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6741 }, { "epoch": 0.489011387538986, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.856, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6742 }, { "epoch": 0.48908391963443826, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 8.8709, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6743 }, { "epoch": 0.4891564517298905, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 8.7746, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6744 }, { "epoch": 0.4892289838253427, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 9.1746, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6745 }, { "epoch": 0.48930151592079496, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 9.1927, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6746 }, { "epoch": 0.4893740480162472, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.9832, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6747 }, { "epoch": 0.48944658011169945, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 9.3789, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6748 }, { "epoch": 0.48951911220715166, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 8.8351, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6749 }, { "epoch": 0.4895916443026039, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 9.0346, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6750 }, { "epoch": 0.48966417639805615, "grad_norm": 6.4375, "learning_rate": 0.0003, "loss": 8.9996, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6751 }, { "epoch": 0.48973670849350837, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 9.0338, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6752 }, { "epoch": 0.48980924058896064, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 9.4861, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6753 }, { "epoch": 0.48988177268441285, "grad_norm": 9.5625, "learning_rate": 0.0003, "loss": 8.8388, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6754 }, { "epoch": 0.48995430477986507, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.8428, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6755 }, { "epoch": 0.49002683687531734, "grad_norm": 6.15625, "learning_rate": 0.0003, "loss": 9.0573, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6756 }, { "epoch": 0.49009936897076956, "grad_norm": 1.984375, "learning_rate": 0.0003, "loss": 8.7811, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6757 }, { "epoch": 0.4901719010662218, "grad_norm": 1.9375, "learning_rate": 0.0003, "loss": 9.3855, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6758 }, { "epoch": 0.49024443316167404, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 8.493, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6759 }, { "epoch": 0.49031696525712626, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 8.9307, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6760 }, { "epoch": 0.49038949735257853, "grad_norm": 7.21875, "learning_rate": 0.0003, "loss": 8.8268, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6761 }, { "epoch": 0.49046202944803075, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 8.7791, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6762 }, { "epoch": 0.490534561543483, "grad_norm": 14.3125, "learning_rate": 0.0003, "loss": 8.7122, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6763 }, { "epoch": 0.49060709363893523, "grad_norm": 1.8828125, "learning_rate": 0.0003, "loss": 8.8081, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6764 }, { "epoch": 0.49067962573438745, "grad_norm": 7.28125, "learning_rate": 0.0003, "loss": 8.608, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6765 }, { "epoch": 0.4907521578298397, "grad_norm": 1.828125, "learning_rate": 0.0003, "loss": 8.6933, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6766 }, { "epoch": 0.49082468992529193, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.5746, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6767 }, { "epoch": 0.4908972220207442, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.781, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6768 }, { "epoch": 0.4909697541161964, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.3824, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6769 }, { "epoch": 0.49104228621164864, "grad_norm": 1.8125, "learning_rate": 0.0003, "loss": 8.8872, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6770 }, { "epoch": 0.4911148183071009, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.8629, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6771 }, { "epoch": 0.4911873504025531, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 8.571, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6772 }, { "epoch": 0.49125988249800534, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.8048, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6773 }, { "epoch": 0.4913324145934576, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.6525, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6774 }, { "epoch": 0.4914049466889098, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 8.7417, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6775 }, { "epoch": 0.4914774787843621, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 9.2682, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6776 }, { "epoch": 0.4915500108798143, "grad_norm": 1.65625, "learning_rate": 0.0003, "loss": 8.701, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6777 }, { "epoch": 0.49162254297526653, "grad_norm": 16.125, "learning_rate": 0.0003, "loss": 8.6266, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6778 }, { "epoch": 0.4916950750707188, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 8.91, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6779 }, { "epoch": 0.491767607166171, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.7694, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6780 }, { "epoch": 0.4918401392616233, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.2343, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6781 }, { "epoch": 0.4919126713570755, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 8.109, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6782 }, { "epoch": 0.4919852034525277, "grad_norm": 1.6484375, "learning_rate": 0.0003, "loss": 9.1867, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6783 }, { "epoch": 0.49205773554798, "grad_norm": 7.03125, "learning_rate": 0.0003, "loss": 9.3273, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6784 }, { "epoch": 0.4921302676434322, "grad_norm": 1.828125, "learning_rate": 0.0003, "loss": 8.4578, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6785 }, { "epoch": 0.4922027997388845, "grad_norm": 5.6875, "learning_rate": 0.0003, "loss": 8.554, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6786 }, { "epoch": 0.4922753318343367, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 9.219, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6787 }, { "epoch": 0.4923478639297889, "grad_norm": 14.625, "learning_rate": 0.0003, "loss": 9.2618, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6788 }, { "epoch": 0.4924203960252412, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.1428, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6789 }, { "epoch": 0.4924929281206934, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.3316, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6790 }, { "epoch": 0.49256546021614567, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.9913, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6791 }, { "epoch": 0.4926379923115979, "grad_norm": 1.7578125, "learning_rate": 0.0003, "loss": 9.1603, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6792 }, { "epoch": 0.4927105244070501, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 9.4341, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6793 }, { "epoch": 0.49278305650250237, "grad_norm": 13.375, "learning_rate": 0.0003, "loss": 8.7733, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6794 }, { "epoch": 0.4928555885979546, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.8086, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6795 }, { "epoch": 0.49292812069340686, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.7814, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6796 }, { "epoch": 0.4930006527888591, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 8.8791, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6797 }, { "epoch": 0.4930731848843113, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.7428, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6798 }, { "epoch": 0.49314571697976356, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 9.1358, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6799 }, { "epoch": 0.4932182490752158, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.9054, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6800 }, { "epoch": 0.49329078117066805, "grad_norm": 1.90625, "learning_rate": 0.0003, "loss": 8.6584, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6801 }, { "epoch": 0.49336331326612026, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.4488, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6802 }, { "epoch": 0.4934358453615725, "grad_norm": 9.3125, "learning_rate": 0.0003, "loss": 8.5773, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6803 }, { "epoch": 0.49350837745702475, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.7346, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6804 }, { "epoch": 0.49358090955247697, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.7211, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6805 }, { "epoch": 0.49365344164792924, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.99, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6806 }, { "epoch": 0.49372597374338145, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 8.7901, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6807 }, { "epoch": 0.49379850583883367, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.7379, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6808 }, { "epoch": 0.49387103793428594, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.9922, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6809 }, { "epoch": 0.49394357002973815, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 8.7812, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6810 }, { "epoch": 0.49401610212519037, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 8.6709, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6811 }, { "epoch": 0.49408863422064264, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.9351, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6812 }, { "epoch": 0.49416116631609486, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.753, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6813 }, { "epoch": 0.49423369841154713, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 8.7083, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6814 }, { "epoch": 0.49430623050699934, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 9.3847, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6815 }, { "epoch": 0.49437876260245156, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.8905, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6816 }, { "epoch": 0.49445129469790383, "grad_norm": 10.3125, "learning_rate": 0.0003, "loss": 8.7709, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6817 }, { "epoch": 0.49452382679335605, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 8.8773, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6818 }, { "epoch": 0.4945963588888083, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 9.1932, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6819 }, { "epoch": 0.49466889098426053, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.799, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6820 }, { "epoch": 0.49474142307971275, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 9.3642, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6821 }, { "epoch": 0.494813955175165, "grad_norm": 1.53125, "learning_rate": 0.0003, "loss": 8.4217, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6822 }, { "epoch": 0.49488648727061724, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.0513, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6823 }, { "epoch": 0.4949590193660695, "grad_norm": 14.25, "learning_rate": 0.0003, "loss": 8.9812, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6824 }, { "epoch": 0.4950315514615217, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 9.1858, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6825 }, { "epoch": 0.49510408355697394, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.9655, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6826 }, { "epoch": 0.4951766156524262, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 9.1634, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6827 }, { "epoch": 0.4952491477478784, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 8.9803, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6828 }, { "epoch": 0.4953216798433307, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 9.1338, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6829 }, { "epoch": 0.4953942119387829, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 9.4856, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6830 }, { "epoch": 0.49546674403423513, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.6361, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6831 }, { "epoch": 0.4955392761296874, "grad_norm": 7.4375, "learning_rate": 0.0003, "loss": 8.8438, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6832 }, { "epoch": 0.4956118082251396, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 8.7052, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6833 }, { "epoch": 0.4956843403205919, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 9.2847, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6834 }, { "epoch": 0.4957568724160441, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.0426, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6835 }, { "epoch": 0.4958294045114963, "grad_norm": 7.6875, "learning_rate": 0.0003, "loss": 8.8306, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6836 }, { "epoch": 0.4959019366069486, "grad_norm": 5.78125, "learning_rate": 0.0003, "loss": 8.788, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6837 }, { "epoch": 0.4959744687024008, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.6034, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6838 }, { "epoch": 0.4960470007978531, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.7882, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6839 }, { "epoch": 0.4961195328933053, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.5961, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6840 }, { "epoch": 0.4961920649887575, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.3265, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6841 }, { "epoch": 0.4962645970842098, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.9873, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6842 }, { "epoch": 0.496337129179662, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.9903, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6843 }, { "epoch": 0.4964096612751142, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.8629, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6844 }, { "epoch": 0.4964821933705665, "grad_norm": 1.515625, "learning_rate": 0.0003, "loss": 9.6511, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6845 }, { "epoch": 0.4965547254660187, "grad_norm": 23.75, "learning_rate": 0.0003, "loss": 8.9974, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6846 }, { "epoch": 0.49662725756147097, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 9.0197, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6847 }, { "epoch": 0.4966997896569232, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.0377, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6848 }, { "epoch": 0.4967723217523754, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.972, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6849 }, { "epoch": 0.49684485384782767, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 9.0285, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6850 }, { "epoch": 0.4969173859432799, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 8.9859, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6851 }, { "epoch": 0.49698991803873216, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.9765, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6852 }, { "epoch": 0.4970624501341844, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.08, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6853 }, { "epoch": 0.4971349822296366, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 8.8883, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6854 }, { "epoch": 0.49720751432508886, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.458, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6855 }, { "epoch": 0.4972800464205411, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.9523, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6856 }, { "epoch": 0.49735257851599335, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 9.1885, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6857 }, { "epoch": 0.49742511061144556, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 8.857, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6858 }, { "epoch": 0.4974976427068978, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.3109, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6859 }, { "epoch": 0.49757017480235005, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 8.7825, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6860 }, { "epoch": 0.49764270689780227, "grad_norm": 1.890625, "learning_rate": 0.0003, "loss": 8.8907, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6861 }, { "epoch": 0.49771523899325454, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.3015, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6862 }, { "epoch": 0.49778777108870675, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 9.0665, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6863 }, { "epoch": 0.49786030318415897, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 8.7615, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6864 }, { "epoch": 0.49793283527961124, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.9562, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6865 }, { "epoch": 0.49800536737506346, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.8647, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6866 }, { "epoch": 0.49807789947051573, "grad_norm": 12.0625, "learning_rate": 0.0003, "loss": 8.962, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6867 }, { "epoch": 0.49815043156596794, "grad_norm": 7.3125, "learning_rate": 0.0003, "loss": 9.421, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6868 }, { "epoch": 0.49822296366142016, "grad_norm": 5.5625, "learning_rate": 0.0003, "loss": 8.7804, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6869 }, { "epoch": 0.49829549575687243, "grad_norm": 1.8125, "learning_rate": 0.0003, "loss": 9.3669, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6870 }, { "epoch": 0.49836802785232465, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.8321, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6871 }, { "epoch": 0.4984405599477769, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.8382, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6872 }, { "epoch": 0.49851309204322913, "grad_norm": 8.375, "learning_rate": 0.0003, "loss": 8.7339, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6873 }, { "epoch": 0.49858562413868135, "grad_norm": 6.4375, "learning_rate": 0.0003, "loss": 8.8533, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6874 }, { "epoch": 0.4986581562341336, "grad_norm": 5.375, "learning_rate": 0.0003, "loss": 8.6743, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6875 }, { "epoch": 0.49873068832958584, "grad_norm": 25.125, "learning_rate": 0.0003, "loss": 8.7355, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6876 }, { "epoch": 0.49880322042503805, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.796, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6877 }, { "epoch": 0.4988757525204903, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 8.9425, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6878 }, { "epoch": 0.49894828461594254, "grad_norm": 1.9765625, "learning_rate": 0.0003, "loss": 9.1869, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6879 }, { "epoch": 0.4990208167113948, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.6854, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6880 }, { "epoch": 0.499093348806847, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 9.3623, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6881 }, { "epoch": 0.49916588090229924, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 8.4464, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6882 }, { "epoch": 0.4992384129977515, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 9.1656, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6883 }, { "epoch": 0.49931094509320373, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 8.9159, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6884 }, { "epoch": 0.499383477188656, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.936, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6885 }, { "epoch": 0.4994560092841082, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 9.2455, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6886 }, { "epoch": 0.49952854137956043, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 8.9436, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6887 }, { "epoch": 0.4996010734750127, "grad_norm": 7.5, "learning_rate": 0.0003, "loss": 8.5703, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6888 }, { "epoch": 0.4996736055704649, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 9.0169, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6889 }, { "epoch": 0.4997461376659172, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.7447, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6890 }, { "epoch": 0.4998186697613694, "grad_norm": 1.453125, "learning_rate": 0.0003, "loss": 8.9739, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6891 }, { "epoch": 0.4998912018568216, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.9818, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6892 }, { "epoch": 0.4999637339522739, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.4589, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6893 }, { "epoch": 0.5000362660477261, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 8.4493, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6894 }, { "epoch": 0.5001087981431783, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.4522, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6895 }, { "epoch": 0.5001813302386305, "grad_norm": 3.953125, "learning_rate": 0.0003, "loss": 8.4553, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6896 }, { "epoch": 0.5002538623340829, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 8.4994, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6897 }, { "epoch": 0.5003263944295351, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 8.7329, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6898 }, { "epoch": 0.5003989265249873, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.8818, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6899 }, { "epoch": 0.5004714586204395, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 8.6114, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6900 }, { "epoch": 0.5005439907158917, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 8.833, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6901 }, { "epoch": 0.5006165228113441, "grad_norm": 13.6875, "learning_rate": 0.0003, "loss": 8.8134, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6902 }, { "epoch": 0.5006890549067963, "grad_norm": 5.46875, "learning_rate": 0.0003, "loss": 8.8061, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6903 }, { "epoch": 0.5007615870022485, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 9.0501, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6904 }, { "epoch": 0.5008341190977007, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 8.7289, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6905 }, { "epoch": 0.5009066511931529, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.0837, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6906 }, { "epoch": 0.5009791832886052, "grad_norm": 6.53125, "learning_rate": 0.0003, "loss": 8.5432, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6907 }, { "epoch": 0.5010517153840575, "grad_norm": 5.375, "learning_rate": 0.0003, "loss": 8.5879, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6908 }, { "epoch": 0.5011242474795097, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 9.5971, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6909 }, { "epoch": 0.5011967795749619, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 8.5531, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6910 }, { "epoch": 0.5012693116704141, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 8.6354, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6911 }, { "epoch": 0.5013418437658664, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 8.5395, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6912 }, { "epoch": 0.5014143758613187, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.0527, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6913 }, { "epoch": 0.5014869079567709, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.2876, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6914 }, { "epoch": 0.5015594400522231, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.4341, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6915 }, { "epoch": 0.5016319721476753, "grad_norm": 24.5, "learning_rate": 0.0003, "loss": 8.9881, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6916 }, { "epoch": 0.5017045042431276, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.6118, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6917 }, { "epoch": 0.5017770363385798, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.0806, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6918 }, { "epoch": 0.5018495684340321, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.7165, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6919 }, { "epoch": 0.5019221005294843, "grad_norm": 10.8125, "learning_rate": 0.0003, "loss": 9.1782, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6920 }, { "epoch": 0.5019946326249365, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.0451, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6921 }, { "epoch": 0.5020671647203888, "grad_norm": 6.78125, "learning_rate": 0.0003, "loss": 9.0617, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6922 }, { "epoch": 0.502139696815841, "grad_norm": 11.25, "learning_rate": 0.0003, "loss": 9.1196, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6923 }, { "epoch": 0.5022122289112932, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 8.8582, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6924 }, { "epoch": 0.5022847610067455, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.3552, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6925 }, { "epoch": 0.5023572931021977, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.5669, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6926 }, { "epoch": 0.50242982519765, "grad_norm": 1.78125, "learning_rate": 0.0003, "loss": 8.9587, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6927 }, { "epoch": 0.5025023572931022, "grad_norm": 6.90625, "learning_rate": 0.0003, "loss": 8.7995, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6928 }, { "epoch": 0.5025748893885544, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 9.0048, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6929 }, { "epoch": 0.5026474214840067, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.9249, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6930 }, { "epoch": 0.5027199535794589, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.31, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6931 }, { "epoch": 0.5027924856749112, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 8.7455, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6932 }, { "epoch": 0.5028650177703634, "grad_norm": 1.9453125, "learning_rate": 0.0003, "loss": 9.1752, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6933 }, { "epoch": 0.5029375498658156, "grad_norm": 7.0, "learning_rate": 0.0003, "loss": 9.212, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6934 }, { "epoch": 0.5030100819612678, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 8.9658, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6935 }, { "epoch": 0.5030826140567201, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 9.0713, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6936 }, { "epoch": 0.5031551461521724, "grad_norm": 6.75, "learning_rate": 0.0003, "loss": 9.4024, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6937 }, { "epoch": 0.5032276782476246, "grad_norm": 1.9296875, "learning_rate": 0.0003, "loss": 8.502, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6938 }, { "epoch": 0.5033002103430768, "grad_norm": 1.8203125, "learning_rate": 0.0003, "loss": 8.8908, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6939 }, { "epoch": 0.503372742438529, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.2949, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6940 }, { "epoch": 0.5034452745339812, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 8.6795, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6941 }, { "epoch": 0.5035178066294336, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.1145, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6942 }, { "epoch": 0.5035903387248858, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 9.1056, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6943 }, { "epoch": 0.503662870820338, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 8.717, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6944 }, { "epoch": 0.5037354029157902, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.7141, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6945 }, { "epoch": 0.5038079350112424, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.3383, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6946 }, { "epoch": 0.5038804671066948, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 9.1197, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6947 }, { "epoch": 0.503952999202147, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.1324, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6948 }, { "epoch": 0.5040255312975992, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.8011, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6949 }, { "epoch": 0.5040980633930514, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.9727, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6950 }, { "epoch": 0.5041705954885036, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.9107, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6951 }, { "epoch": 0.504243127583956, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.357, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6952 }, { "epoch": 0.5043156596794082, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 9.0635, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6953 }, { "epoch": 0.5043881917748604, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 8.86, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6954 }, { "epoch": 0.5044607238703126, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.7862, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6955 }, { "epoch": 0.5045332559657648, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.0756, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6956 }, { "epoch": 0.5046057880612171, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.4938, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6957 }, { "epoch": 0.5046783201566694, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.8887, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6958 }, { "epoch": 0.5047508522521216, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 8.3266, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6959 }, { "epoch": 0.5048233843475738, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 8.6172, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6960 }, { "epoch": 0.504895916443026, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.9802, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6961 }, { "epoch": 0.5049684485384783, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.6321, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6962 }, { "epoch": 0.5050409806339305, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 8.8986, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6963 }, { "epoch": 0.5051135127293828, "grad_norm": 6.53125, "learning_rate": 0.0003, "loss": 8.992, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6964 }, { "epoch": 0.505186044824835, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.2517, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6965 }, { "epoch": 0.5052585769202872, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 9.1059, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6966 }, { "epoch": 0.5053311090157394, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.7445, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6967 }, { "epoch": 0.5054036411111917, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.6767, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6968 }, { "epoch": 0.505476173206644, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 8.9176, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6969 }, { "epoch": 0.5055487053020962, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.1488, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6970 }, { "epoch": 0.5056212373975484, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.7749, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6971 }, { "epoch": 0.5056937694930006, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 8.8217, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6972 }, { "epoch": 0.5057663015884529, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.8271, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6973 }, { "epoch": 0.5058388336839051, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 9.1252, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6974 }, { "epoch": 0.5059113657793574, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 9.4973, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6975 }, { "epoch": 0.5059838978748096, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 9.0705, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6976 }, { "epoch": 0.5060564299702618, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.0305, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6977 }, { "epoch": 0.5061289620657141, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 8.5724, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6978 }, { "epoch": 0.5062014941611663, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.6879, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6979 }, { "epoch": 0.5062740262566185, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 9.0052, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6980 }, { "epoch": 0.5063465583520708, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.8515, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6981 }, { "epoch": 0.506419090447523, "grad_norm": 7.28125, "learning_rate": 0.0003, "loss": 8.9371, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6982 }, { "epoch": 0.5064916225429753, "grad_norm": 9.375, "learning_rate": 0.0003, "loss": 9.1133, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6983 }, { "epoch": 0.5065641546384275, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 9.1327, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6984 }, { "epoch": 0.5066366867338797, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 9.2213, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6985 }, { "epoch": 0.506709218829332, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 9.083, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6986 }, { "epoch": 0.5067817509247842, "grad_norm": 1.8515625, "learning_rate": 0.0003, "loss": 8.4581, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6987 }, { "epoch": 0.5068542830202365, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.8187, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6988 }, { "epoch": 0.5069268151156887, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 8.8274, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6989 }, { "epoch": 0.5069993472111409, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 9.0776, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6990 }, { "epoch": 0.5070718793065931, "grad_norm": 6.96875, "learning_rate": 0.0003, "loss": 9.081, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6991 }, { "epoch": 0.5071444114020454, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.7594, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6992 }, { "epoch": 0.5072169434974977, "grad_norm": 1.8671875, "learning_rate": 0.0003, "loss": 9.2312, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6993 }, { "epoch": 0.5072894755929499, "grad_norm": 1.953125, "learning_rate": 0.0003, "loss": 9.2579, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6994 }, { "epoch": 0.5073620076884021, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 9.2268, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6995 }, { "epoch": 0.5074345397838543, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.7932, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6996 }, { "epoch": 0.5075070718793065, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.9313, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6997 }, { "epoch": 0.5075796039747589, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 8.8901, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6998 }, { "epoch": 0.5076521360702111, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.6585, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 6999 }, { "epoch": 0.5077246681656633, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.9119, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7000 }, { "epoch": 0.5077972002611155, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.0863, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7001 }, { "epoch": 0.5078697323565677, "grad_norm": 8.0625, "learning_rate": 0.0003, "loss": 8.7176, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7002 }, { "epoch": 0.5079422644520201, "grad_norm": 8.1875, "learning_rate": 0.0003, "loss": 8.6544, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7003 }, { "epoch": 0.5080147965474723, "grad_norm": 6.0, "learning_rate": 0.0003, "loss": 8.8888, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7004 }, { "epoch": 0.5080873286429245, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 8.7086, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7005 }, { "epoch": 0.5081598607383767, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 8.5658, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7006 }, { "epoch": 0.5082323928338289, "grad_norm": 20.25, "learning_rate": 0.0003, "loss": 9.217, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7007 }, { "epoch": 0.5083049249292813, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 9.3125, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7008 }, { "epoch": 0.5083774570247335, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.1599, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7009 }, { "epoch": 0.5084499891201857, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.8466, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7010 }, { "epoch": 0.5085225212156379, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 8.8596, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7011 }, { "epoch": 0.5085950533110901, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 9.3658, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7012 }, { "epoch": 0.5086675854065424, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.7467, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7013 }, { "epoch": 0.5087401175019947, "grad_norm": 11.4375, "learning_rate": 0.0003, "loss": 8.8854, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7014 }, { "epoch": 0.5088126495974469, "grad_norm": 35.5, "learning_rate": 0.0003, "loss": 8.8296, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7015 }, { "epoch": 0.5088851816928991, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 9.4518, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7016 }, { "epoch": 0.5089577137883513, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 9.2064, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7017 }, { "epoch": 0.5090302458838036, "grad_norm": 6.28125, "learning_rate": 0.0003, "loss": 8.836, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7018 }, { "epoch": 0.5091027779792558, "grad_norm": 7.9375, "learning_rate": 0.0003, "loss": 8.6858, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7019 }, { "epoch": 0.5091753100747081, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 8.8174, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7020 }, { "epoch": 0.5092478421701603, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 9.1766, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7021 }, { "epoch": 0.5093203742656125, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.1967, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7022 }, { "epoch": 0.5093929063610648, "grad_norm": 9.1875, "learning_rate": 0.0003, "loss": 8.7479, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7023 }, { "epoch": 0.509465438456517, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 8.7934, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7024 }, { "epoch": 0.5095379705519693, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.8823, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7025 }, { "epoch": 0.5096105026474215, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.6888, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7026 }, { "epoch": 0.5096830347428737, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.7485, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7027 }, { "epoch": 0.509755566838326, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.9211, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7028 }, { "epoch": 0.5098280989337782, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 9.0473, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7029 }, { "epoch": 0.5099006310292304, "grad_norm": 5.65625, "learning_rate": 0.0003, "loss": 9.0269, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7030 }, { "epoch": 0.5099731631246827, "grad_norm": 8.375, "learning_rate": 0.0003, "loss": 8.5262, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7031 }, { "epoch": 0.5100456952201349, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.5114, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7032 }, { "epoch": 0.5101182273155872, "grad_norm": 14.75, "learning_rate": 0.0003, "loss": 9.0489, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7033 }, { "epoch": 0.5101907594110394, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 9.1295, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7034 }, { "epoch": 0.5102632915064916, "grad_norm": 1.9765625, "learning_rate": 0.0003, "loss": 8.8967, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7035 }, { "epoch": 0.5103358236019438, "grad_norm": 1.8046875, "learning_rate": 0.0003, "loss": 8.9454, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7036 }, { "epoch": 0.5104083556973961, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.2394, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7037 }, { "epoch": 0.5104808877928483, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 8.6418, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7038 }, { "epoch": 0.5105534198883006, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.7393, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7039 }, { "epoch": 0.5106259519837528, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.87, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7040 }, { "epoch": 0.510698484079205, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 8.8522, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7041 }, { "epoch": 0.5107710161746573, "grad_norm": 122.0, "learning_rate": 0.0003, "loss": 8.726, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7042 }, { "epoch": 0.5108435482701095, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.8776, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7043 }, { "epoch": 0.5109160803655618, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 9.0895, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7044 }, { "epoch": 0.510988612461014, "grad_norm": 10.1875, "learning_rate": 0.0003, "loss": 9.0522, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7045 }, { "epoch": 0.5110611445564662, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 8.4625, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7046 }, { "epoch": 0.5111336766519184, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.7626, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7047 }, { "epoch": 0.5112062087473707, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.8009, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7048 }, { "epoch": 0.511278740842823, "grad_norm": 12.9375, "learning_rate": 0.0003, "loss": 9.253, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7049 }, { "epoch": 0.5113512729382752, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.9481, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7050 }, { "epoch": 0.5114238050337274, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 8.3329, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7051 }, { "epoch": 0.5114963371291796, "grad_norm": 9.8125, "learning_rate": 0.0003, "loss": 9.2229, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7052 }, { "epoch": 0.5115688692246319, "grad_norm": 13.6875, "learning_rate": 0.0003, "loss": 9.0242, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7053 }, { "epoch": 0.5116414013200842, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.7241, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7054 }, { "epoch": 0.5117139334155364, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 9.0289, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7055 }, { "epoch": 0.5117864655109886, "grad_norm": 7.8125, "learning_rate": 0.0003, "loss": 8.7912, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7056 }, { "epoch": 0.5118589976064408, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.7336, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7057 }, { "epoch": 0.511931529701893, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 8.9989, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7058 }, { "epoch": 0.5120040617973454, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 8.991, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7059 }, { "epoch": 0.5120765938927976, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 8.7564, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7060 }, { "epoch": 0.5121491259882498, "grad_norm": 7.40625, "learning_rate": 0.0003, "loss": 8.9869, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7061 }, { "epoch": 0.512221658083702, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.958, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7062 }, { "epoch": 0.5122941901791542, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 8.6136, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7063 }, { "epoch": 0.5123667222746066, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.8738, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7064 }, { "epoch": 0.5124392543700588, "grad_norm": 8.4375, "learning_rate": 0.0003, "loss": 9.2929, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7065 }, { "epoch": 0.512511786465511, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.6843, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7066 }, { "epoch": 0.5125843185609632, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.6887, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7067 }, { "epoch": 0.5126568506564154, "grad_norm": 1.7890625, "learning_rate": 0.0003, "loss": 8.8369, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7068 }, { "epoch": 0.5127293827518677, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.6284, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7069 }, { "epoch": 0.51280191484732, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 9.3521, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7070 }, { "epoch": 0.5128744469427722, "grad_norm": 8.1875, "learning_rate": 0.0003, "loss": 8.3089, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7071 }, { "epoch": 0.5129469790382244, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 9.0363, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7072 }, { "epoch": 0.5130195111336766, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.659, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7073 }, { "epoch": 0.5130920432291289, "grad_norm": 1.8046875, "learning_rate": 0.0003, "loss": 9.0089, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7074 }, { "epoch": 0.5131645753245812, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 9.1732, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7075 }, { "epoch": 0.5132371074200334, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.8634, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7076 }, { "epoch": 0.5133096395154856, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 9.1021, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7077 }, { "epoch": 0.5133821716109378, "grad_norm": 1.578125, "learning_rate": 0.0003, "loss": 8.8694, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7078 }, { "epoch": 0.5134547037063901, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 8.4324, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7079 }, { "epoch": 0.5135272358018423, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 9.1083, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7080 }, { "epoch": 0.5135997678972946, "grad_norm": 1.71875, "learning_rate": 0.0003, "loss": 8.5923, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7081 }, { "epoch": 0.5136722999927468, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 9.2743, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7082 }, { "epoch": 0.513744832088199, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.1002, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7083 }, { "epoch": 0.5138173641836513, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 9.1347, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7084 }, { "epoch": 0.5138898962791035, "grad_norm": 6.125, "learning_rate": 0.0003, "loss": 8.776, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7085 }, { "epoch": 0.5139624283745557, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 8.6157, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7086 }, { "epoch": 0.514034960470008, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 8.8004, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7087 }, { "epoch": 0.5141074925654602, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.1277, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7088 }, { "epoch": 0.5141800246609125, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 9.3032, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7089 }, { "epoch": 0.5142525567563647, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.2447, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7090 }, { "epoch": 0.5143250888518169, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.8136, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7091 }, { "epoch": 0.5143976209472692, "grad_norm": 10.5625, "learning_rate": 0.0003, "loss": 9.2114, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7092 }, { "epoch": 0.5144701530427214, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 8.2146, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7093 }, { "epoch": 0.5145426851381737, "grad_norm": 1.8515625, "learning_rate": 0.0003, "loss": 9.1024, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7094 }, { "epoch": 0.5146152172336259, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 9.4308, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7095 }, { "epoch": 0.5146877493290781, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.0122, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7096 }, { "epoch": 0.5147602814245303, "grad_norm": 9.25, "learning_rate": 0.0003, "loss": 9.649, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7097 }, { "epoch": 0.5148328135199826, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 8.4248, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7098 }, { "epoch": 0.5149053456154349, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.8729, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7099 }, { "epoch": 0.5149778777108871, "grad_norm": 6.96875, "learning_rate": 0.0003, "loss": 9.0348, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7100 }, { "epoch": 0.5150504098063393, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 8.7838, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7101 }, { "epoch": 0.5151229419017915, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.5065, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7102 }, { "epoch": 0.5151954739972437, "grad_norm": 1.78125, "learning_rate": 0.0003, "loss": 8.6302, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7103 }, { "epoch": 0.515268006092696, "grad_norm": 11.8125, "learning_rate": 0.0003, "loss": 8.9321, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7104 }, { "epoch": 0.5153405381881483, "grad_norm": 10.4375, "learning_rate": 0.0003, "loss": 8.9168, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7105 }, { "epoch": 0.5154130702836005, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.9581, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7106 }, { "epoch": 0.5154856023790527, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.991, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7107 }, { "epoch": 0.5155581344745049, "grad_norm": 7.625, "learning_rate": 0.0003, "loss": 8.9208, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7108 }, { "epoch": 0.5156306665699572, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 9.0322, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7109 }, { "epoch": 0.5157031986654095, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 9.0436, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7110 }, { "epoch": 0.5157757307608617, "grad_norm": 5.90625, "learning_rate": 0.0003, "loss": 9.4243, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7111 }, { "epoch": 0.5158482628563139, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.7356, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7112 }, { "epoch": 0.5159207949517661, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.9291, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7113 }, { "epoch": 0.5159933270472183, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.8361, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7114 }, { "epoch": 0.5160658591426707, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.9115, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7115 }, { "epoch": 0.5161383912381229, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 9.0092, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7116 }, { "epoch": 0.5162109233335751, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 8.8165, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7117 }, { "epoch": 0.5162834554290273, "grad_norm": 8.6875, "learning_rate": 0.0003, "loss": 8.9257, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7118 }, { "epoch": 0.5163559875244795, "grad_norm": 9.375, "learning_rate": 0.0003, "loss": 8.967, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7119 }, { "epoch": 0.5164285196199319, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.0287, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7120 }, { "epoch": 0.5165010517153841, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.992, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7121 }, { "epoch": 0.5165735838108363, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 9.3265, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7122 }, { "epoch": 0.5166461159062885, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.4277, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7123 }, { "epoch": 0.5167186480017407, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 9.3633, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7124 }, { "epoch": 0.516791180097193, "grad_norm": 1.7109375, "learning_rate": 0.0003, "loss": 8.8372, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7125 }, { "epoch": 0.5168637121926453, "grad_norm": 7.65625, "learning_rate": 0.0003, "loss": 8.5892, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7126 }, { "epoch": 0.5169362442880975, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.7411, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7127 }, { "epoch": 0.5170087763835497, "grad_norm": 54.75, "learning_rate": 0.0003, "loss": 8.7165, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7128 }, { "epoch": 0.5170813084790019, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 8.8064, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7129 }, { "epoch": 0.5171538405744542, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 8.9424, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7130 }, { "epoch": 0.5172263726699065, "grad_norm": 7.09375, "learning_rate": 0.0003, "loss": 9.0381, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7131 }, { "epoch": 0.5172989047653587, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.107, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7132 }, { "epoch": 0.5173714368608109, "grad_norm": 8.5, "learning_rate": 0.0003, "loss": 9.1057, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7133 }, { "epoch": 0.5174439689562631, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 8.7944, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7134 }, { "epoch": 0.5175165010517154, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 8.496, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7135 }, { "epoch": 0.5175890331471676, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.6065, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7136 }, { "epoch": 0.5176615652426199, "grad_norm": 8.9375, "learning_rate": 0.0003, "loss": 8.7477, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7137 }, { "epoch": 0.5177340973380721, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.002, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7138 }, { "epoch": 0.5178066294335243, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 9.5739, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7139 }, { "epoch": 0.5178791615289766, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 8.7069, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7140 }, { "epoch": 0.5179516936244288, "grad_norm": 6.0625, "learning_rate": 0.0003, "loss": 8.6895, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7141 }, { "epoch": 0.518024225719881, "grad_norm": 1.8046875, "learning_rate": 0.0003, "loss": 9.1382, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7142 }, { "epoch": 0.5180967578153333, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.7826, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7143 }, { "epoch": 0.5181692899107855, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 8.5073, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7144 }, { "epoch": 0.5182418220062378, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.0719, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7145 }, { "epoch": 0.51831435410169, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 8.7436, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7146 }, { "epoch": 0.5183868861971422, "grad_norm": 15.25, "learning_rate": 0.0003, "loss": 8.5429, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7147 }, { "epoch": 0.5184594182925945, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.9419, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7148 }, { "epoch": 0.5185319503880467, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 9.2544, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7149 }, { "epoch": 0.518604482483499, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.4176, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7150 }, { "epoch": 0.5186770145789512, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 8.6756, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7151 }, { "epoch": 0.5187495466744034, "grad_norm": 7.625, "learning_rate": 0.0003, "loss": 8.9357, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7152 }, { "epoch": 0.5188220787698556, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.7857, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7153 }, { "epoch": 0.5188946108653079, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 8.9337, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7154 }, { "epoch": 0.5189671429607602, "grad_norm": 1.7890625, "learning_rate": 0.0003, "loss": 8.9558, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7155 }, { "epoch": 0.5190396750562124, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.6559, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7156 }, { "epoch": 0.5191122071516646, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.918, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7157 }, { "epoch": 0.5191847392471168, "grad_norm": 6.5625, "learning_rate": 0.0003, "loss": 8.8016, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7158 }, { "epoch": 0.519257271342569, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.1868, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7159 }, { "epoch": 0.5193298034380214, "grad_norm": 6.84375, "learning_rate": 0.0003, "loss": 9.0855, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7160 }, { "epoch": 0.5194023355334736, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.7722, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7161 }, { "epoch": 0.5194748676289258, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 9.0211, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7162 }, { "epoch": 0.519547399724378, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 9.0214, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7163 }, { "epoch": 0.5196199318198302, "grad_norm": 6.4375, "learning_rate": 0.0003, "loss": 8.8021, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7164 }, { "epoch": 0.5196924639152826, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 8.7852, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7165 }, { "epoch": 0.5197649960107348, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 8.7319, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7166 }, { "epoch": 0.519837528106187, "grad_norm": 6.0, "learning_rate": 0.0003, "loss": 8.9418, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7167 }, { "epoch": 0.5199100602016392, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.8537, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7168 }, { "epoch": 0.5199825922970914, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.7066, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7169 }, { "epoch": 0.5200551243925438, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 8.9465, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7170 }, { "epoch": 0.520127656487996, "grad_norm": 5.9375, "learning_rate": 0.0003, "loss": 9.306, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7171 }, { "epoch": 0.5202001885834482, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 8.9452, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7172 }, { "epoch": 0.5202727206789004, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.4955, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7173 }, { "epoch": 0.5203452527743526, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 8.3938, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7174 }, { "epoch": 0.5204177848698048, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 9.1185, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7175 }, { "epoch": 0.5204903169652572, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.3867, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7176 }, { "epoch": 0.5205628490607094, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.9282, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7177 }, { "epoch": 0.5206353811561616, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.6775, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7178 }, { "epoch": 0.5207079132516138, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 8.9645, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7179 }, { "epoch": 0.520780445347066, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 8.981, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7180 }, { "epoch": 0.5208529774425184, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.5174, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7181 }, { "epoch": 0.5209255095379706, "grad_norm": 15.6875, "learning_rate": 0.0003, "loss": 8.871, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7182 }, { "epoch": 0.5209980416334228, "grad_norm": 8.375, "learning_rate": 0.0003, "loss": 9.2675, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7183 }, { "epoch": 0.521070573728875, "grad_norm": 1.90625, "learning_rate": 0.0003, "loss": 8.5721, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7184 }, { "epoch": 0.5211431058243272, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.553, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7185 }, { "epoch": 0.5212156379197795, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 8.8003, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7186 }, { "epoch": 0.5212881700152318, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.937, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7187 }, { "epoch": 0.521360702110684, "grad_norm": 14.0625, "learning_rate": 0.0003, "loss": 9.0799, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7188 }, { "epoch": 0.5214332342061362, "grad_norm": 1.6875, "learning_rate": 0.0003, "loss": 8.8606, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7189 }, { "epoch": 0.5215057663015884, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 9.1612, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7190 }, { "epoch": 0.5215782983970407, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 8.9964, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7191 }, { "epoch": 0.521650830492493, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 9.0615, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7192 }, { "epoch": 0.5217233625879452, "grad_norm": 1.90625, "learning_rate": 0.0003, "loss": 9.2694, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7193 }, { "epoch": 0.5217958946833974, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.5665, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7194 }, { "epoch": 0.5218684267788496, "grad_norm": 5.375, "learning_rate": 0.0003, "loss": 8.8445, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7195 }, { "epoch": 0.5219409588743019, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.1343, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7196 }, { "epoch": 0.5220134909697541, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 9.1963, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7197 }, { "epoch": 0.5220860230652064, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 9.0143, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7198 }, { "epoch": 0.5221585551606586, "grad_norm": 17.625, "learning_rate": 0.0003, "loss": 8.9389, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7199 }, { "epoch": 0.5222310872561108, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.9049, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7200 }, { "epoch": 0.5223036193515631, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 8.8216, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7201 }, { "epoch": 0.5223761514470153, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 9.4699, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7202 }, { "epoch": 0.5224486835424675, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 8.9973, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7203 }, { "epoch": 0.5225212156379198, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 8.6603, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7204 }, { "epoch": 0.522593747733372, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 8.9463, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7205 }, { "epoch": 0.5226662798288243, "grad_norm": 6.78125, "learning_rate": 0.0003, "loss": 8.9449, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7206 }, { "epoch": 0.5227388119242765, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 8.6175, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7207 }, { "epoch": 0.5228113440197287, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 9.8701, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7208 }, { "epoch": 0.522883876115181, "grad_norm": 25.25, "learning_rate": 0.0003, "loss": 8.8651, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7209 }, { "epoch": 0.5229564082106332, "grad_norm": 5.625, "learning_rate": 0.0003, "loss": 9.3304, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7210 }, { "epoch": 0.5230289403060855, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 9.4504, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7211 }, { "epoch": 0.5231014724015377, "grad_norm": 7.1875, "learning_rate": 0.0003, "loss": 9.074, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7212 }, { "epoch": 0.5231740044969899, "grad_norm": 10.75, "learning_rate": 0.0003, "loss": 8.829, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7213 }, { "epoch": 0.5232465365924421, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 8.8357, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7214 }, { "epoch": 0.5233190686878944, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 9.0505, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7215 }, { "epoch": 0.5233916007833467, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.9639, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7216 }, { "epoch": 0.5234641328787989, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.6725, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7217 }, { "epoch": 0.5235366649742511, "grad_norm": 12.125, "learning_rate": 0.0003, "loss": 8.7071, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7218 }, { "epoch": 0.5236091970697033, "grad_norm": 1.765625, "learning_rate": 0.0003, "loss": 8.0232, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7219 }, { "epoch": 0.5236817291651555, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 8.5023, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7220 }, { "epoch": 0.5237542612606079, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.7939, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7221 }, { "epoch": 0.5238267933560601, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.8356, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7222 }, { "epoch": 0.5238993254515123, "grad_norm": 6.25, "learning_rate": 0.0003, "loss": 9.2183, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7223 }, { "epoch": 0.5239718575469645, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 9.0337, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7224 }, { "epoch": 0.5240443896424167, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 8.8335, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7225 }, { "epoch": 0.5241169217378691, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 8.856, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7226 }, { "epoch": 0.5241894538333213, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 9.197, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7227 }, { "epoch": 0.5242619859287735, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 9.4429, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7228 }, { "epoch": 0.5243345180242257, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 9.1745, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7229 }, { "epoch": 0.5244070501196779, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 8.3691, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7230 }, { "epoch": 0.5244795822151302, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.7936, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7231 }, { "epoch": 0.5245521143105825, "grad_norm": 8.0625, "learning_rate": 0.0003, "loss": 8.7048, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7232 }, { "epoch": 0.5246246464060347, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.6516, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7233 }, { "epoch": 0.5246971785014869, "grad_norm": 6.21875, "learning_rate": 0.0003, "loss": 8.4707, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7234 }, { "epoch": 0.5247697105969391, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.7934, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7235 }, { "epoch": 0.5248422426923914, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.7532, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7236 }, { "epoch": 0.5249147747878437, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 8.8185, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7237 }, { "epoch": 0.5249873068832959, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.8138, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7238 }, { "epoch": 0.5250598389787481, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 9.1371, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7239 }, { "epoch": 0.5251323710742003, "grad_norm": 1.7578125, "learning_rate": 0.0003, "loss": 9.5431, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7240 }, { "epoch": 0.5252049031696526, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.0524, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7241 }, { "epoch": 0.5252774352651048, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.5371, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7242 }, { "epoch": 0.5253499673605571, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 9.2521, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7243 }, { "epoch": 0.5254224994560093, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 8.5943, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7244 }, { "epoch": 0.5254950315514615, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 8.7732, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7245 }, { "epoch": 0.5255675636469137, "grad_norm": 1.90625, "learning_rate": 0.0003, "loss": 8.6716, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7246 }, { "epoch": 0.525640095742366, "grad_norm": 8.1875, "learning_rate": 0.0003, "loss": 9.0522, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7247 }, { "epoch": 0.5257126278378182, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 8.8543, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7248 }, { "epoch": 0.5257851599332705, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.9922, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7249 }, { "epoch": 0.5258576920287227, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.2886, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7250 }, { "epoch": 0.5259302241241749, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 9.0646, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7251 }, { "epoch": 0.5260027562196272, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.9611, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7252 }, { "epoch": 0.5260752883150794, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 8.5837, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7253 }, { "epoch": 0.5261478204105317, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 9.1692, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7254 }, { "epoch": 0.5262203525059839, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 9.1894, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7255 }, { "epoch": 0.5262928846014361, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 8.6879, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7256 }, { "epoch": 0.5263654166968884, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 8.7674, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7257 }, { "epoch": 0.5264379487923406, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 8.798, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7258 }, { "epoch": 0.5265104808877928, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 9.1058, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7259 }, { "epoch": 0.5265830129832451, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.7837, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7260 }, { "epoch": 0.5266555450786973, "grad_norm": 1.9453125, "learning_rate": 0.0003, "loss": 9.1321, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7261 }, { "epoch": 0.5267280771741496, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.6702, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7262 }, { "epoch": 0.5268006092696018, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 8.7762, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7263 }, { "epoch": 0.526873141365054, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.0048, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7264 }, { "epoch": 0.5269456734605062, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.8122, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7265 }, { "epoch": 0.5270182055559585, "grad_norm": 5.46875, "learning_rate": 0.0003, "loss": 8.3063, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7266 }, { "epoch": 0.5270907376514108, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.9679, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7267 }, { "epoch": 0.527163269746863, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 9.148, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7268 }, { "epoch": 0.5272358018423152, "grad_norm": 1.9453125, "learning_rate": 0.0003, "loss": 8.8431, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7269 }, { "epoch": 0.5273083339377674, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 8.8194, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7270 }, { "epoch": 0.5273808660332197, "grad_norm": 1.734375, "learning_rate": 0.0003, "loss": 8.4631, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7271 }, { "epoch": 0.527453398128672, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 9.1544, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7272 }, { "epoch": 0.5275259302241242, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 9.0153, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7273 }, { "epoch": 0.5275984623195764, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 8.5383, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7274 }, { "epoch": 0.5276709944150286, "grad_norm": 9.875, "learning_rate": 0.0003, "loss": 9.2927, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7275 }, { "epoch": 0.5277435265104808, "grad_norm": 13.25, "learning_rate": 0.0003, "loss": 8.6506, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7276 }, { "epoch": 0.5278160586059332, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.9094, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7277 }, { "epoch": 0.5278885907013854, "grad_norm": 11.25, "learning_rate": 0.0003, "loss": 9.3616, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7278 }, { "epoch": 0.5279611227968376, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.9658, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7279 }, { "epoch": 0.5280336548922898, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.0088, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7280 }, { "epoch": 0.528106186987742, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.2129, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7281 }, { "epoch": 0.5281787190831944, "grad_norm": 6.15625, "learning_rate": 0.0003, "loss": 8.7947, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7282 }, { "epoch": 0.5282512511786466, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.4288, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7283 }, { "epoch": 0.5283237832740988, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 9.0136, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7284 }, { "epoch": 0.528396315369551, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 9.0378, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7285 }, { "epoch": 0.5284688474650032, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.9734, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7286 }, { "epoch": 0.5285413795604555, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 8.716, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7287 }, { "epoch": 0.5286139116559078, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 8.9859, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7288 }, { "epoch": 0.52868644375136, "grad_norm": 11.1875, "learning_rate": 0.0003, "loss": 8.8077, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7289 }, { "epoch": 0.5287589758468122, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.5772, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7290 }, { "epoch": 0.5288315079422644, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 9.008, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7291 }, { "epoch": 0.5289040400377167, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 9.0105, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7292 }, { "epoch": 0.528976572133169, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 9.4564, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7293 }, { "epoch": 0.5290491042286212, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.0049, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7294 }, { "epoch": 0.5291216363240734, "grad_norm": 5.5625, "learning_rate": 0.0003, "loss": 8.4352, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7295 }, { "epoch": 0.5291941684195256, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 9.1194, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7296 }, { "epoch": 0.5292667005149779, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.6204, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7297 }, { "epoch": 0.5293392326104301, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 9.4104, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7298 }, { "epoch": 0.5294117647058824, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.299, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7299 }, { "epoch": 0.5294842968013346, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.4327, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7300 }, { "epoch": 0.5295568288967868, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 9.0726, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7301 }, { "epoch": 0.5296293609922391, "grad_norm": 1.9453125, "learning_rate": 0.0003, "loss": 9.0569, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7302 }, { "epoch": 0.5297018930876913, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.997, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7303 }, { "epoch": 0.5297744251831435, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.2221, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7304 }, { "epoch": 0.5298469572785958, "grad_norm": 7.875, "learning_rate": 0.0003, "loss": 8.6151, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7305 }, { "epoch": 0.529919489374048, "grad_norm": 12.375, "learning_rate": 0.0003, "loss": 8.9351, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7306 }, { "epoch": 0.5299920214695003, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 8.9354, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7307 }, { "epoch": 0.5300645535649525, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 8.5967, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7308 }, { "epoch": 0.5301370856604047, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 8.8261, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7309 }, { "epoch": 0.530209617755857, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.9602, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7310 }, { "epoch": 0.5302821498513092, "grad_norm": 5.6875, "learning_rate": 0.0003, "loss": 8.9057, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7311 }, { "epoch": 0.5303546819467615, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 9.4852, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7312 }, { "epoch": 0.5304272140422137, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.493, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7313 }, { "epoch": 0.5304997461376659, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.2186, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7314 }, { "epoch": 0.5305722782331181, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 8.9888, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7315 }, { "epoch": 0.5306448103285704, "grad_norm": 6.25, "learning_rate": 0.0003, "loss": 9.1974, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7316 }, { "epoch": 0.5307173424240226, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.7806, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7317 }, { "epoch": 0.5307898745194749, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 9.2067, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7318 }, { "epoch": 0.5308624066149271, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 8.7291, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7319 }, { "epoch": 0.5309349387103793, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 8.8046, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7320 }, { "epoch": 0.5310074708058315, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.7546, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7321 }, { "epoch": 0.5310800029012838, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.5747, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7322 }, { "epoch": 0.5311525349967361, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.8655, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7323 }, { "epoch": 0.5312250670921883, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.7947, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7324 }, { "epoch": 0.5312975991876405, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 9.2678, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7325 }, { "epoch": 0.5313701312830927, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 8.7145, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7326 }, { "epoch": 0.531442663378545, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.3421, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7327 }, { "epoch": 0.5315151954739973, "grad_norm": 1.8671875, "learning_rate": 0.0003, "loss": 8.9052, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7328 }, { "epoch": 0.5315877275694495, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.7545, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7329 }, { "epoch": 0.5316602596649017, "grad_norm": 6.1875, "learning_rate": 0.0003, "loss": 8.9014, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7330 }, { "epoch": 0.5317327917603539, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.3105, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7331 }, { "epoch": 0.5318053238558061, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 9.2745, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7332 }, { "epoch": 0.5318778559512585, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 9.0041, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7333 }, { "epoch": 0.5319503880467107, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.5691, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7334 }, { "epoch": 0.5320229201421629, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 9.2152, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7335 }, { "epoch": 0.5320954522376151, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 9.0254, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7336 }, { "epoch": 0.5321679843330673, "grad_norm": 1.921875, "learning_rate": 0.0003, "loss": 8.8811, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7337 }, { "epoch": 0.5322405164285197, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 9.1356, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7338 }, { "epoch": 0.5323130485239719, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.5398, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7339 }, { "epoch": 0.5323855806194241, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.5861, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7340 }, { "epoch": 0.5324581127148763, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.8961, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7341 }, { "epoch": 0.5325306448103285, "grad_norm": 7.09375, "learning_rate": 0.0003, "loss": 8.8953, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7342 }, { "epoch": 0.5326031769057809, "grad_norm": 35.75, "learning_rate": 0.0003, "loss": 9.1217, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7343 }, { "epoch": 0.5326757090012331, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.6383, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7344 }, { "epoch": 0.5327482410966853, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 9.267, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7345 }, { "epoch": 0.5328207731921375, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.5809, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7346 }, { "epoch": 0.5328933052875897, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.2025, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7347 }, { "epoch": 0.532965837383042, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.9138, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7348 }, { "epoch": 0.5330383694784943, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 9.1459, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7349 }, { "epoch": 0.5331109015739465, "grad_norm": 5.21875, "learning_rate": 0.0003, "loss": 8.8782, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7350 }, { "epoch": 0.5331834336693987, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 8.8989, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7351 }, { "epoch": 0.5332559657648509, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 9.079, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7352 }, { "epoch": 0.5333284978603032, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.4563, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7353 }, { "epoch": 0.5334010299557554, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.8896, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7354 }, { "epoch": 0.5334735620512077, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 9.1989, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7355 }, { "epoch": 0.5335460941466599, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 9.2839, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7356 }, { "epoch": 0.5336186262421121, "grad_norm": 10.125, "learning_rate": 0.0003, "loss": 8.2356, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7357 }, { "epoch": 0.5336911583375644, "grad_norm": 1.84375, "learning_rate": 0.0003, "loss": 9.06, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7358 }, { "epoch": 0.5337636904330166, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 9.043, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7359 }, { "epoch": 0.5338362225284689, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 9.1464, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7360 }, { "epoch": 0.5339087546239211, "grad_norm": 1.8359375, "learning_rate": 0.0003, "loss": 9.0385, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7361 }, { "epoch": 0.5339812867193733, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 9.2824, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7362 }, { "epoch": 0.5340538188148256, "grad_norm": 1.984375, "learning_rate": 0.0003, "loss": 8.7524, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7363 }, { "epoch": 0.5341263509102778, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.9236, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7364 }, { "epoch": 0.53419888300573, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 9.0835, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7365 }, { "epoch": 0.5342714151011823, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.8569, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7366 }, { "epoch": 0.5343439471966345, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 9.3397, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7367 }, { "epoch": 0.5344164792920868, "grad_norm": 17.5, "learning_rate": 0.0003, "loss": 8.7769, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7368 }, { "epoch": 0.534489011387539, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 8.8809, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7369 }, { "epoch": 0.5345615434829912, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 8.5446, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7370 }, { "epoch": 0.5346340755784434, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.6663, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7371 }, { "epoch": 0.5347066076738957, "grad_norm": 8.125, "learning_rate": 0.0003, "loss": 9.1983, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7372 }, { "epoch": 0.534779139769348, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 9.1504, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7373 }, { "epoch": 0.5348516718648002, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 9.1725, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7374 }, { "epoch": 0.5349242039602524, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 8.7018, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7375 }, { "epoch": 0.5349967360557046, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.8015, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7376 }, { "epoch": 0.5350692681511569, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.9383, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7377 }, { "epoch": 0.5351418002466092, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.7711, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7378 }, { "epoch": 0.5352143323420614, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.3288, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7379 }, { "epoch": 0.5352868644375136, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 8.8286, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7380 }, { "epoch": 0.5353593965329658, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.1742, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7381 }, { "epoch": 0.535431928628418, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 9.51, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7382 }, { "epoch": 0.5355044607238704, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.627, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7383 }, { "epoch": 0.5355769928193226, "grad_norm": 12.9375, "learning_rate": 0.0003, "loss": 8.3338, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7384 }, { "epoch": 0.5356495249147748, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.8879, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7385 }, { "epoch": 0.535722057010227, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.6949, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7386 }, { "epoch": 0.5357945891056792, "grad_norm": 7.625, "learning_rate": 0.0003, "loss": 9.1999, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7387 }, { "epoch": 0.5358671212011314, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 8.5502, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7388 }, { "epoch": 0.5359396532965838, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 8.7604, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7389 }, { "epoch": 0.536012185392036, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 8.8722, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7390 }, { "epoch": 0.5360847174874882, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 8.8232, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7391 }, { "epoch": 0.5361572495829404, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.4927, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7392 }, { "epoch": 0.5362297816783926, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.7953, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7393 }, { "epoch": 0.536302313773845, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.5789, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7394 }, { "epoch": 0.5363748458692972, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 9.0875, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7395 }, { "epoch": 0.5364473779647494, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.0006, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7396 }, { "epoch": 0.5365199100602016, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.4822, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7397 }, { "epoch": 0.5365924421556538, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.7465, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7398 }, { "epoch": 0.5366649742511062, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.4029, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7399 }, { "epoch": 0.5367375063465584, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 9.5079, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7400 }, { "epoch": 0.5368100384420106, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 9.2363, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7401 }, { "epoch": 0.5368825705374628, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 8.7315, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7402 }, { "epoch": 0.536955102632915, "grad_norm": 5.625, "learning_rate": 0.0003, "loss": 8.6567, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7403 }, { "epoch": 0.5370276347283673, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 9.386, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7404 }, { "epoch": 0.5371001668238196, "grad_norm": 9.0, "learning_rate": 0.0003, "loss": 8.369, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7405 }, { "epoch": 0.5371726989192718, "grad_norm": 9.375, "learning_rate": 0.0003, "loss": 8.8571, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7406 }, { "epoch": 0.537245231014724, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.8126, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7407 }, { "epoch": 0.5373177631101762, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 9.319, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7408 }, { "epoch": 0.5373902952056285, "grad_norm": 6.625, "learning_rate": 0.0003, "loss": 8.421, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7409 }, { "epoch": 0.5374628273010807, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.9437, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7410 }, { "epoch": 0.537535359396533, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 9.3291, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7411 }, { "epoch": 0.5376078914919852, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 8.4585, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7412 }, { "epoch": 0.5376804235874374, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.5331, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7413 }, { "epoch": 0.5377529556828897, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 8.993, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7414 }, { "epoch": 0.5378254877783419, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 9.3963, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7415 }, { "epoch": 0.5378980198737942, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.9497, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7416 }, { "epoch": 0.5379705519692464, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 9.0577, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7417 }, { "epoch": 0.5380430840646986, "grad_norm": 7.25, "learning_rate": 0.0003, "loss": 8.7612, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7418 }, { "epoch": 0.5381156161601509, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.8381, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7419 }, { "epoch": 0.5381881482556031, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.7192, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7420 }, { "epoch": 0.5382606803510553, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.7718, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7421 }, { "epoch": 0.5383332124465076, "grad_norm": 10.4375, "learning_rate": 0.0003, "loss": 8.7348, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7422 }, { "epoch": 0.5384057445419598, "grad_norm": 16.125, "learning_rate": 0.0003, "loss": 8.8896, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7423 }, { "epoch": 0.5384782766374121, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.7907, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7424 }, { "epoch": 0.5385508087328643, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 9.1492, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7425 }, { "epoch": 0.5386233408283165, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.9829, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7426 }, { "epoch": 0.5386958729237687, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.7258, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7427 }, { "epoch": 0.538768405019221, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 9.1117, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7428 }, { "epoch": 0.5388409371146733, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.5462, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7429 }, { "epoch": 0.5389134692101255, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.2904, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7430 }, { "epoch": 0.5389860013055777, "grad_norm": 6.4375, "learning_rate": 0.0003, "loss": 9.274, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7431 }, { "epoch": 0.5390585334010299, "grad_norm": 16.5, "learning_rate": 0.0003, "loss": 8.5655, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7432 }, { "epoch": 0.5391310654964822, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.8587, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7433 }, { "epoch": 0.5392035975919345, "grad_norm": 6.96875, "learning_rate": 0.0003, "loss": 8.7675, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7434 }, { "epoch": 0.5392761296873867, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.9851, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7435 }, { "epoch": 0.5393486617828389, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.9876, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7436 }, { "epoch": 0.5394211938782911, "grad_norm": 11.125, "learning_rate": 0.0003, "loss": 9.03, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7437 }, { "epoch": 0.5394937259737433, "grad_norm": 7.4375, "learning_rate": 0.0003, "loss": 9.139, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7438 }, { "epoch": 0.5395662580691957, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.6109, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7439 }, { "epoch": 0.5396387901646479, "grad_norm": 44.5, "learning_rate": 0.0003, "loss": 9.0242, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7440 }, { "epoch": 0.5397113222601001, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.2307, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7441 }, { "epoch": 0.5397838543555523, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.1976, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7442 }, { "epoch": 0.5398563864510045, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 8.5083, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7443 }, { "epoch": 0.5399289185464569, "grad_norm": 15.125, "learning_rate": 0.0003, "loss": 8.4044, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7444 }, { "epoch": 0.5400014506419091, "grad_norm": 12.6875, "learning_rate": 0.0003, "loss": 8.5667, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7445 }, { "epoch": 0.5400739827373613, "grad_norm": 11.125, "learning_rate": 0.0003, "loss": 8.303, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7446 }, { "epoch": 0.5401465148328135, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 9.3598, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7447 }, { "epoch": 0.5402190469282657, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.7675, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7448 }, { "epoch": 0.540291579023718, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 9.0639, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7449 }, { "epoch": 0.5403641111191703, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 8.8305, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7450 }, { "epoch": 0.5404366432146225, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 8.5592, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7451 }, { "epoch": 0.5405091753100747, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 8.9516, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7452 }, { "epoch": 0.5405817074055269, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.0662, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7453 }, { "epoch": 0.5406542395009791, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 9.2357, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7454 }, { "epoch": 0.5407267715964315, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 8.5749, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7455 }, { "epoch": 0.5407993036918837, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.877, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7456 }, { "epoch": 0.5408718357873359, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.1263, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7457 }, { "epoch": 0.5409443678827881, "grad_norm": 3.953125, "learning_rate": 0.0003, "loss": 9.0036, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7458 }, { "epoch": 0.5410168999782403, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 8.515, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7459 }, { "epoch": 0.5410894320736926, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.1323, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7460 }, { "epoch": 0.5411619641691449, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.2487, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7461 }, { "epoch": 0.5412344962645971, "grad_norm": 6.0, "learning_rate": 0.0003, "loss": 8.5335, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7462 }, { "epoch": 0.5413070283600493, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.779, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7463 }, { "epoch": 0.5413795604555015, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.3172, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7464 }, { "epoch": 0.5414520925509538, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 9.2986, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7465 }, { "epoch": 0.541524624646406, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 9.3488, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7466 }, { "epoch": 0.5415971567418583, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 9.7194, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7467 }, { "epoch": 0.5416696888373105, "grad_norm": 32.5, "learning_rate": 0.0003, "loss": 8.9178, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7468 }, { "epoch": 0.5417422209327627, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 9.0054, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7469 }, { "epoch": 0.541814753028215, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.0858, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7470 }, { "epoch": 0.5418872851236672, "grad_norm": 6.25, "learning_rate": 0.0003, "loss": 8.4033, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7471 }, { "epoch": 0.5419598172191195, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.7867, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7472 }, { "epoch": 0.5420323493145717, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 9.2887, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7473 }, { "epoch": 0.5421048814100239, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.7707, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7474 }, { "epoch": 0.5421774135054762, "grad_norm": 6.3125, "learning_rate": 0.0003, "loss": 9.0195, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7475 }, { "epoch": 0.5422499456009284, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 8.7332, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7476 }, { "epoch": 0.5423224776963806, "grad_norm": 5.65625, "learning_rate": 0.0003, "loss": 8.8437, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7477 }, { "epoch": 0.5423950097918329, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 8.9779, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7478 }, { "epoch": 0.5424675418872851, "grad_norm": 6.25, "learning_rate": 0.0003, "loss": 9.1159, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7479 }, { "epoch": 0.5425400739827374, "grad_norm": 1.546875, "learning_rate": 0.0003, "loss": 8.8221, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7480 }, { "epoch": 0.5426126060781896, "grad_norm": 7.5625, "learning_rate": 0.0003, "loss": 9.1235, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7481 }, { "epoch": 0.5426851381736418, "grad_norm": 1.4765625, "learning_rate": 0.0003, "loss": 9.0333, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7482 }, { "epoch": 0.542757670269094, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.5307, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7483 }, { "epoch": 0.5428302023645463, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.7852, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7484 }, { "epoch": 0.5429027344599986, "grad_norm": 6.75, "learning_rate": 0.0003, "loss": 8.8309, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7485 }, { "epoch": 0.5429752665554508, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 9.0004, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7486 }, { "epoch": 0.543047798650903, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 8.5053, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7487 }, { "epoch": 0.5431203307463552, "grad_norm": 1.796875, "learning_rate": 0.0003, "loss": 9.2157, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7488 }, { "epoch": 0.5431928628418075, "grad_norm": 7.25, "learning_rate": 0.0003, "loss": 9.3353, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7489 }, { "epoch": 0.5432653949372598, "grad_norm": 1.96875, "learning_rate": 0.0003, "loss": 8.6009, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7490 }, { "epoch": 0.543337927032712, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.2022, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7491 }, { "epoch": 0.5434104591281642, "grad_norm": 17.0, "learning_rate": 0.0003, "loss": 9.0442, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7492 }, { "epoch": 0.5434829912236164, "grad_norm": 1.796875, "learning_rate": 0.0003, "loss": 9.2225, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7493 }, { "epoch": 0.5435555233190686, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.8164, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7494 }, { "epoch": 0.543628055414521, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.0416, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7495 }, { "epoch": 0.5437005875099732, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.9857, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7496 }, { "epoch": 0.5437731196054254, "grad_norm": 5.625, "learning_rate": 0.0003, "loss": 9.0595, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7497 }, { "epoch": 0.5438456517008776, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.1824, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7498 }, { "epoch": 0.5439181837963298, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 9.2332, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7499 }, { "epoch": 0.5439907158917822, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 9.3445, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7500 }, { "epoch": 0.5440632479872344, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.714, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7501 }, { "epoch": 0.5441357800826866, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 9.3712, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7502 }, { "epoch": 0.5442083121781388, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.8952, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7503 }, { "epoch": 0.544280844273591, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.4961, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7504 }, { "epoch": 0.5443533763690434, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 8.4565, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7505 }, { "epoch": 0.5444259084644956, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.5623, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7506 }, { "epoch": 0.5444984405599478, "grad_norm": 5.5625, "learning_rate": 0.0003, "loss": 8.6579, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7507 }, { "epoch": 0.5445709726554, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 8.5518, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7508 }, { "epoch": 0.5446435047508522, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.1761, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7509 }, { "epoch": 0.5447160368463045, "grad_norm": 9.0, "learning_rate": 0.0003, "loss": 9.0126, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7510 }, { "epoch": 0.5447885689417568, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 9.1201, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7511 }, { "epoch": 0.544861101037209, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.7278, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7512 }, { "epoch": 0.5449336331326612, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.941, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7513 }, { "epoch": 0.5450061652281134, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.7976, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7514 }, { "epoch": 0.5450786973235657, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.3977, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7515 }, { "epoch": 0.545151229419018, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.7572, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7516 }, { "epoch": 0.5452237615144702, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 8.8316, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7517 }, { "epoch": 0.5452962936099224, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 8.9586, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7518 }, { "epoch": 0.5453688257053746, "grad_norm": 1.9453125, "learning_rate": 0.0003, "loss": 8.7502, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7519 }, { "epoch": 0.5454413578008269, "grad_norm": 7.5625, "learning_rate": 0.0003, "loss": 8.9805, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7520 }, { "epoch": 0.5455138898962791, "grad_norm": 1.53125, "learning_rate": 0.0003, "loss": 9.0172, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7521 }, { "epoch": 0.5455864219917314, "grad_norm": 7.125, "learning_rate": 0.0003, "loss": 9.406, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7522 }, { "epoch": 0.5456589540871836, "grad_norm": 5.375, "learning_rate": 0.0003, "loss": 9.2918, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7523 }, { "epoch": 0.5457314861826358, "grad_norm": 1.9296875, "learning_rate": 0.0003, "loss": 9.0344, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7524 }, { "epoch": 0.545804018278088, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.7806, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7525 }, { "epoch": 0.5458765503735403, "grad_norm": 16.625, "learning_rate": 0.0003, "loss": 8.1438, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7526 }, { "epoch": 0.5459490824689925, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.7525, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7527 }, { "epoch": 0.5460216145644448, "grad_norm": 6.4375, "learning_rate": 0.0003, "loss": 9.2563, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7528 }, { "epoch": 0.546094146659897, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 8.9306, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7529 }, { "epoch": 0.5461666787553492, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.9328, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7530 }, { "epoch": 0.5462392108508015, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.7413, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7531 }, { "epoch": 0.5463117429462537, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 8.9792, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7532 }, { "epoch": 0.546384275041706, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 8.8943, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7533 }, { "epoch": 0.5464568071371582, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.9306, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7534 }, { "epoch": 0.5465293392326104, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.023, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7535 }, { "epoch": 0.5466018713280627, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.098, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7536 }, { "epoch": 0.5466744034235149, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 8.7283, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7537 }, { "epoch": 0.5467469355189671, "grad_norm": 37.25, "learning_rate": 0.0003, "loss": 8.6982, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7538 }, { "epoch": 0.5468194676144194, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.8434, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7539 }, { "epoch": 0.5468919997098716, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.4018, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7540 }, { "epoch": 0.5469645318053239, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 8.5515, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7541 }, { "epoch": 0.5470370639007761, "grad_norm": 29.25, "learning_rate": 0.0003, "loss": 9.4566, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7542 }, { "epoch": 0.5471095959962283, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 9.456, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7543 }, { "epoch": 0.5471821280916805, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.9794, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7544 }, { "epoch": 0.5472546601871328, "grad_norm": 6.59375, "learning_rate": 0.0003, "loss": 9.4706, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7545 }, { "epoch": 0.5473271922825851, "grad_norm": 1.8125, "learning_rate": 0.0003, "loss": 8.7331, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7546 }, { "epoch": 0.5473997243780373, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 8.7428, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7547 }, { "epoch": 0.5474722564734895, "grad_norm": 7.84375, "learning_rate": 0.0003, "loss": 8.9454, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7548 }, { "epoch": 0.5475447885689417, "grad_norm": 6.34375, "learning_rate": 0.0003, "loss": 8.7974, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7549 }, { "epoch": 0.547617320664394, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 8.856, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7550 }, { "epoch": 0.5476898527598463, "grad_norm": 10.8125, "learning_rate": 0.0003, "loss": 9.1997, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7551 }, { "epoch": 0.5477623848552985, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 9.2501, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7552 }, { "epoch": 0.5478349169507507, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.8103, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7553 }, { "epoch": 0.5479074490462029, "grad_norm": 10.625, "learning_rate": 0.0003, "loss": 9.1772, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7554 }, { "epoch": 0.5479799811416551, "grad_norm": 6.34375, "learning_rate": 0.0003, "loss": 8.9369, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7555 }, { "epoch": 0.5480525132371075, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.0392, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7556 }, { "epoch": 0.5481250453325597, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.1553, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7557 }, { "epoch": 0.5481975774280119, "grad_norm": 11.25, "learning_rate": 0.0003, "loss": 9.1486, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7558 }, { "epoch": 0.5482701095234641, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.4752, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7559 }, { "epoch": 0.5483426416189163, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.6605, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7560 }, { "epoch": 0.5484151737143687, "grad_norm": 12.0625, "learning_rate": 0.0003, "loss": 8.8782, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7561 }, { "epoch": 0.5484877058098209, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.7063, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7562 }, { "epoch": 0.5485602379052731, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.9412, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7563 }, { "epoch": 0.5486327700007253, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.8424, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7564 }, { "epoch": 0.5487053020961775, "grad_norm": 1.6328125, "learning_rate": 0.0003, "loss": 8.7359, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7565 }, { "epoch": 0.5487778341916298, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.2642, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7566 }, { "epoch": 0.5488503662870821, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.8505, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7567 }, { "epoch": 0.5489228983825343, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.6275, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7568 }, { "epoch": 0.5489954304779865, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.0366, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7569 }, { "epoch": 0.5490679625734387, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.679, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7570 }, { "epoch": 0.549140494668891, "grad_norm": 8.125, "learning_rate": 0.0003, "loss": 8.5918, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7571 }, { "epoch": 0.5492130267643432, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.5177, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7572 }, { "epoch": 0.5492855588597955, "grad_norm": 6.71875, "learning_rate": 0.0003, "loss": 8.7272, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7573 }, { "epoch": 0.5493580909552477, "grad_norm": 5.90625, "learning_rate": 0.0003, "loss": 8.6314, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7574 }, { "epoch": 0.5494306230506999, "grad_norm": 9.25, "learning_rate": 0.0003, "loss": 8.7997, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7575 }, { "epoch": 0.5495031551461522, "grad_norm": 1.8515625, "learning_rate": 0.0003, "loss": 8.4741, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7576 }, { "epoch": 0.5495756872416044, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 9.098, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7577 }, { "epoch": 0.5496482193370567, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.9976, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7578 }, { "epoch": 0.5497207514325089, "grad_norm": 10.8125, "learning_rate": 0.0003, "loss": 9.0573, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7579 }, { "epoch": 0.5497932835279611, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 9.2417, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7580 }, { "epoch": 0.5498658156234134, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 8.6493, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7581 }, { "epoch": 0.5499383477188656, "grad_norm": 16.875, "learning_rate": 0.0003, "loss": 8.891, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7582 }, { "epoch": 0.5500108798143178, "grad_norm": 12.0625, "learning_rate": 0.0003, "loss": 8.6598, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7583 }, { "epoch": 0.5500834119097701, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.9896, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7584 }, { "epoch": 0.5501559440052223, "grad_norm": 1.640625, "learning_rate": 0.0003, "loss": 9.5417, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7585 }, { "epoch": 0.5502284761006746, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 9.6342, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7586 }, { "epoch": 0.5503010081961268, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.5853, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7587 }, { "epoch": 0.550373540291579, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.7733, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7588 }, { "epoch": 0.5504460723870312, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 9.0682, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7589 }, { "epoch": 0.5505186044824835, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 9.3109, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7590 }, { "epoch": 0.5505911365779358, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.9266, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7591 }, { "epoch": 0.550663668673388, "grad_norm": 1.84375, "learning_rate": 0.0003, "loss": 9.0577, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7592 }, { "epoch": 0.5507362007688402, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 9.0012, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7593 }, { "epoch": 0.5508087328642924, "grad_norm": 17.75, "learning_rate": 0.0003, "loss": 8.9903, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7594 }, { "epoch": 0.5508812649597447, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.1438, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7595 }, { "epoch": 0.5509537970551969, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.9353, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7596 }, { "epoch": 0.5510263291506492, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.8672, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7597 }, { "epoch": 0.5510988612461014, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.0056, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7598 }, { "epoch": 0.5511713933415536, "grad_norm": 1.7890625, "learning_rate": 0.0003, "loss": 9.4624, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7599 }, { "epoch": 0.5512439254370058, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 9.1987, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7600 }, { "epoch": 0.5513164575324581, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.9748, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7601 }, { "epoch": 0.5513889896279104, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 9.4851, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7602 }, { "epoch": 0.5514615217233626, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 8.9199, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7603 }, { "epoch": 0.5515340538188148, "grad_norm": 7.59375, "learning_rate": 0.0003, "loss": 8.7856, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7604 }, { "epoch": 0.551606585914267, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 9.3095, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7605 }, { "epoch": 0.5516791180097192, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 8.9637, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7606 }, { "epoch": 0.5517516501051716, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.8415, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7607 }, { "epoch": 0.5518241822006238, "grad_norm": 6.125, "learning_rate": 0.0003, "loss": 9.297, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7608 }, { "epoch": 0.551896714296076, "grad_norm": 8.1875, "learning_rate": 0.0003, "loss": 8.4404, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7609 }, { "epoch": 0.5519692463915282, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 8.6118, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7610 }, { "epoch": 0.5520417784869804, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.8429, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7611 }, { "epoch": 0.5521143105824328, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 8.9314, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7612 }, { "epoch": 0.552186842677885, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.7689, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7613 }, { "epoch": 0.5522593747733372, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.6813, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7614 }, { "epoch": 0.5523319068687894, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.6622, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7615 }, { "epoch": 0.5524044389642416, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.2244, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7616 }, { "epoch": 0.552476971059694, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.7119, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7617 }, { "epoch": 0.5525495031551462, "grad_norm": 1.7421875, "learning_rate": 0.0003, "loss": 9.3051, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7618 }, { "epoch": 0.5526220352505984, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 9.0466, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7619 }, { "epoch": 0.5526945673460506, "grad_norm": 8.5625, "learning_rate": 0.0003, "loss": 8.9261, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7620 }, { "epoch": 0.5527670994415028, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.3881, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7621 }, { "epoch": 0.5528396315369551, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 9.1061, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7622 }, { "epoch": 0.5529121636324074, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 9.0946, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7623 }, { "epoch": 0.5529846957278596, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 8.7972, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7624 }, { "epoch": 0.5530572278233118, "grad_norm": 10.5625, "learning_rate": 0.0003, "loss": 9.0804, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7625 }, { "epoch": 0.553129759918764, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 8.9219, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7626 }, { "epoch": 0.5532022920142163, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.6436, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7627 }, { "epoch": 0.5532748241096686, "grad_norm": 1.6640625, "learning_rate": 0.0003, "loss": 8.8715, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7628 }, { "epoch": 0.5533473562051208, "grad_norm": 24.875, "learning_rate": 0.0003, "loss": 8.7377, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7629 }, { "epoch": 0.553419888300573, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.9597, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7630 }, { "epoch": 0.5534924203960252, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.7523, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7631 }, { "epoch": 0.5535649524914775, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.6532, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7632 }, { "epoch": 0.5536374845869297, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.6533, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7633 }, { "epoch": 0.553710016682382, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.0207, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7634 }, { "epoch": 0.5537825487778342, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 8.9651, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7635 }, { "epoch": 0.5538550808732864, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 9.0442, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7636 }, { "epoch": 0.5539276129687387, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 9.0427, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7637 }, { "epoch": 0.5540001450641909, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 9.0298, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7638 }, { "epoch": 0.5540726771596431, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 9.1814, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7639 }, { "epoch": 0.5541452092550954, "grad_norm": 7.75, "learning_rate": 0.0003, "loss": 9.0699, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7640 }, { "epoch": 0.5542177413505476, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 8.8568, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7641 }, { "epoch": 0.5542902734459999, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.4844, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7642 }, { "epoch": 0.5543628055414521, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.6591, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7643 }, { "epoch": 0.5544353376369043, "grad_norm": 6.34375, "learning_rate": 0.0003, "loss": 8.9265, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7644 }, { "epoch": 0.5545078697323566, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.7181, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7645 }, { "epoch": 0.5545804018278088, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.018, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7646 }, { "epoch": 0.5546529339232611, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 8.9511, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7647 }, { "epoch": 0.5547254660187133, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.0844, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7648 }, { "epoch": 0.5547979981141655, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 8.7098, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7649 }, { "epoch": 0.5548705302096177, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.9429, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7650 }, { "epoch": 0.55494306230507, "grad_norm": 10.0, "learning_rate": 0.0003, "loss": 9.0139, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7651 }, { "epoch": 0.5550155944005223, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.8347, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7652 }, { "epoch": 0.5550881264959745, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 9.1003, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7653 }, { "epoch": 0.5551606585914267, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.8718, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7654 }, { "epoch": 0.5552331906868789, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 8.1686, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7655 }, { "epoch": 0.5553057227823311, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.7834, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7656 }, { "epoch": 0.5553782548777835, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 8.6196, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7657 }, { "epoch": 0.5554507869732357, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.8713, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7658 }, { "epoch": 0.5555233190686879, "grad_norm": 6.125, "learning_rate": 0.0003, "loss": 9.3918, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7659 }, { "epoch": 0.5555958511641401, "grad_norm": 5.90625, "learning_rate": 0.0003, "loss": 8.4253, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7660 }, { "epoch": 0.5556683832595923, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 9.2441, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7661 }, { "epoch": 0.5557409153550447, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 9.4167, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7662 }, { "epoch": 0.5558134474504969, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 8.8555, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7663 }, { "epoch": 0.5558859795459491, "grad_norm": 1.765625, "learning_rate": 0.0003, "loss": 8.9769, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7664 }, { "epoch": 0.5559585116414013, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.7712, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7665 }, { "epoch": 0.5560310437368535, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 8.7736, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7666 }, { "epoch": 0.5561035758323057, "grad_norm": 1.71875, "learning_rate": 0.0003, "loss": 8.8769, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7667 }, { "epoch": 0.5561761079277581, "grad_norm": 1.484375, "learning_rate": 0.0003, "loss": 8.9518, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7668 }, { "epoch": 0.5562486400232103, "grad_norm": 33.25, "learning_rate": 0.0003, "loss": 9.5459, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7669 }, { "epoch": 0.5563211721186625, "grad_norm": 8.5625, "learning_rate": 0.0003, "loss": 8.61, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7670 }, { "epoch": 0.5563937042141147, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 8.9338, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7671 }, { "epoch": 0.5564662363095669, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.5119, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7672 }, { "epoch": 0.5565387684050193, "grad_norm": 5.21875, "learning_rate": 0.0003, "loss": 9.0875, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7673 }, { "epoch": 0.5566113005004715, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 8.9056, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7674 }, { "epoch": 0.5566838325959237, "grad_norm": 1.8203125, "learning_rate": 0.0003, "loss": 8.7764, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7675 }, { "epoch": 0.5567563646913759, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 8.7495, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7676 }, { "epoch": 0.5568288967868281, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 8.8507, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7677 }, { "epoch": 0.5569014288822804, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 8.636, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7678 }, { "epoch": 0.5569739609777327, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 8.7671, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7679 }, { "epoch": 0.5570464930731849, "grad_norm": 12.25, "learning_rate": 0.0003, "loss": 8.9852, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7680 }, { "epoch": 0.5571190251686371, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 8.8179, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7681 }, { "epoch": 0.5571915572640893, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.0218, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7682 }, { "epoch": 0.5572640893595416, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.838, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7683 }, { "epoch": 0.5573366214549939, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.7057, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7684 }, { "epoch": 0.5574091535504461, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 8.5387, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7685 }, { "epoch": 0.5574816856458983, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 9.3165, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7686 }, { "epoch": 0.5575542177413505, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.5424, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7687 }, { "epoch": 0.5576267498368028, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 8.7667, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7688 }, { "epoch": 0.557699281932255, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 9.1938, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7689 }, { "epoch": 0.5577718140277073, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 8.8816, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7690 }, { "epoch": 0.5578443461231595, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 8.9596, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7691 }, { "epoch": 0.5579168782186117, "grad_norm": 10.0, "learning_rate": 0.0003, "loss": 8.8622, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7692 }, { "epoch": 0.557989410314064, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 9.1123, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7693 }, { "epoch": 0.5580619424095162, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.8362, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7694 }, { "epoch": 0.5581344745049684, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 9.1931, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7695 }, { "epoch": 0.5582070066004207, "grad_norm": 15.8125, "learning_rate": 0.0003, "loss": 9.4293, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7696 }, { "epoch": 0.5582795386958729, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 9.2895, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7697 }, { "epoch": 0.5583520707913252, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.0825, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7698 }, { "epoch": 0.5584246028867774, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.4538, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7699 }, { "epoch": 0.5584971349822296, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 9.0654, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7700 }, { "epoch": 0.5585696670776819, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 9.1111, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7701 }, { "epoch": 0.5586421991731341, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 8.6039, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7702 }, { "epoch": 0.5587147312685864, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.4135, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7703 }, { "epoch": 0.5587872633640386, "grad_norm": 6.53125, "learning_rate": 0.0003, "loss": 9.0702, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7704 }, { "epoch": 0.5588597954594908, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 8.5067, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7705 }, { "epoch": 0.558932327554943, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.9867, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7706 }, { "epoch": 0.5590048596503953, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 8.9092, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7707 }, { "epoch": 0.5590773917458476, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.0469, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7708 }, { "epoch": 0.5591499238412998, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 9.1083, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7709 }, { "epoch": 0.559222455936752, "grad_norm": 1.75, "learning_rate": 0.0003, "loss": 9.1585, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7710 }, { "epoch": 0.5592949880322042, "grad_norm": 1.7890625, "learning_rate": 0.0003, "loss": 8.9366, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7711 }, { "epoch": 0.5593675201276564, "grad_norm": 5.9375, "learning_rate": 0.0003, "loss": 9.3453, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7712 }, { "epoch": 0.5594400522231088, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 9.2079, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7713 }, { "epoch": 0.559512584318561, "grad_norm": 20.625, "learning_rate": 0.0003, "loss": 9.0872, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7714 }, { "epoch": 0.5595851164140132, "grad_norm": 5.625, "learning_rate": 0.0003, "loss": 8.7884, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7715 }, { "epoch": 0.5596576485094654, "grad_norm": 5.78125, "learning_rate": 0.0003, "loss": 9.3466, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7716 }, { "epoch": 0.5597301806049176, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 9.1447, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7717 }, { "epoch": 0.55980271270037, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.6575, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7718 }, { "epoch": 0.5598752447958222, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.9055, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7719 }, { "epoch": 0.5599477768912744, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.931, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7720 }, { "epoch": 0.5600203089867266, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.9318, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7721 }, { "epoch": 0.5600928410821788, "grad_norm": 8.625, "learning_rate": 0.0003, "loss": 9.0537, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7722 }, { "epoch": 0.5601653731776312, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.0215, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7723 }, { "epoch": 0.5602379052730834, "grad_norm": 6.46875, "learning_rate": 0.0003, "loss": 8.8771, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7724 }, { "epoch": 0.5603104373685356, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 9.155, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7725 }, { "epoch": 0.5603829694639878, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 7.994, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7726 }, { "epoch": 0.56045550155944, "grad_norm": 58.25, "learning_rate": 0.0003, "loss": 8.7479, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7727 }, { "epoch": 0.5605280336548923, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.978, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7728 }, { "epoch": 0.5606005657503446, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 8.7913, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7729 }, { "epoch": 0.5606730978457968, "grad_norm": 5.21875, "learning_rate": 0.0003, "loss": 8.9866, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7730 }, { "epoch": 0.560745629941249, "grad_norm": 11.25, "learning_rate": 0.0003, "loss": 9.0063, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7731 }, { "epoch": 0.5608181620367012, "grad_norm": 8.625, "learning_rate": 0.0003, "loss": 8.4524, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7732 }, { "epoch": 0.5608906941321535, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.4294, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7733 }, { "epoch": 0.5609632262276057, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.0826, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7734 }, { "epoch": 0.561035758323058, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.8959, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7735 }, { "epoch": 0.5611082904185102, "grad_norm": 6.4375, "learning_rate": 0.0003, "loss": 8.8051, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7736 }, { "epoch": 0.5611808225139624, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 8.6217, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7737 }, { "epoch": 0.5612533546094146, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 9.0338, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7738 }, { "epoch": 0.5613258867048669, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.7894, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7739 }, { "epoch": 0.5613984188003192, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 9.1499, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7740 }, { "epoch": 0.5614709508957714, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 8.8836, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7741 }, { "epoch": 0.5615434829912236, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 8.6926, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7742 }, { "epoch": 0.5616160150866758, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 8.973, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7743 }, { "epoch": 0.5616885471821281, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 8.4625, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7744 }, { "epoch": 0.5617610792775803, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.0303, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7745 }, { "epoch": 0.5618336113730326, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 9.3164, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7746 }, { "epoch": 0.5619061434684848, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 8.9374, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7747 }, { "epoch": 0.561978675563937, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 8.3639, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7748 }, { "epoch": 0.5620512076593893, "grad_norm": 5.84375, "learning_rate": 0.0003, "loss": 8.9069, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7749 }, { "epoch": 0.5621237397548415, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 9.093, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7750 }, { "epoch": 0.5621962718502937, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 9.0607, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7751 }, { "epoch": 0.562268803945746, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 8.934, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7752 }, { "epoch": 0.5623413360411982, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 8.8774, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7753 }, { "epoch": 0.5624138681366505, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.5143, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7754 }, { "epoch": 0.5624864002321027, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.9875, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7755 }, { "epoch": 0.5625589323275549, "grad_norm": 1.7421875, "learning_rate": 0.0003, "loss": 9.0177, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7756 }, { "epoch": 0.5626314644230072, "grad_norm": 6.53125, "learning_rate": 0.0003, "loss": 9.1905, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7757 }, { "epoch": 0.5627039965184594, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 9.1016, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7758 }, { "epoch": 0.5627765286139117, "grad_norm": 6.28125, "learning_rate": 0.0003, "loss": 8.2148, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7759 }, { "epoch": 0.5628490607093639, "grad_norm": 7.15625, "learning_rate": 0.0003, "loss": 8.584, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7760 }, { "epoch": 0.5629215928048161, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 8.6173, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7761 }, { "epoch": 0.5629941249002683, "grad_norm": 7.65625, "learning_rate": 0.0003, "loss": 8.926, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7762 }, { "epoch": 0.5630666569957206, "grad_norm": 9.4375, "learning_rate": 0.0003, "loss": 8.5408, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7763 }, { "epoch": 0.5631391890911729, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 8.7365, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7764 }, { "epoch": 0.5632117211866251, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.5786, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7765 }, { "epoch": 0.5632842532820773, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.89, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7766 }, { "epoch": 0.5633567853775295, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 9.0804, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7767 }, { "epoch": 0.5634293174729817, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 9.3859, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7768 }, { "epoch": 0.5635018495684341, "grad_norm": 10.75, "learning_rate": 0.0003, "loss": 8.8595, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7769 }, { "epoch": 0.5635743816638863, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.9916, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7770 }, { "epoch": 0.5636469137593385, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 9.2086, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7771 }, { "epoch": 0.5637194458547907, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.9139, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7772 }, { "epoch": 0.5637919779502429, "grad_norm": 11.75, "learning_rate": 0.0003, "loss": 8.6625, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7773 }, { "epoch": 0.5638645100456953, "grad_norm": 6.4375, "learning_rate": 0.0003, "loss": 8.825, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7774 }, { "epoch": 0.5639370421411475, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 8.9354, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7775 }, { "epoch": 0.5640095742365997, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 8.9736, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7776 }, { "epoch": 0.5640821063320519, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.2197, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7777 }, { "epoch": 0.5641546384275041, "grad_norm": 1.7109375, "learning_rate": 0.0003, "loss": 8.8781, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7778 }, { "epoch": 0.5642271705229565, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 9.4136, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7779 }, { "epoch": 0.5642997026184087, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 8.5633, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7780 }, { "epoch": 0.5643722347138609, "grad_norm": 1.796875, "learning_rate": 0.0003, "loss": 8.785, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7781 }, { "epoch": 0.5644447668093131, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.8624, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7782 }, { "epoch": 0.5645172989047653, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 8.8581, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7783 }, { "epoch": 0.5645898310002176, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 8.7073, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7784 }, { "epoch": 0.5646623630956699, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 8.999, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7785 }, { "epoch": 0.5647348951911221, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.6795, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7786 }, { "epoch": 0.5648074272865743, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.6807, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7787 }, { "epoch": 0.5648799593820265, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 9.1003, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7788 }, { "epoch": 0.5649524914774788, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.1372, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7789 }, { "epoch": 0.565025023572931, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.8851, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7790 }, { "epoch": 0.5650975556683833, "grad_norm": 12.875, "learning_rate": 0.0003, "loss": 8.9484, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7791 }, { "epoch": 0.5651700877638355, "grad_norm": 14.3125, "learning_rate": 0.0003, "loss": 8.7095, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7792 }, { "epoch": 0.5652426198592877, "grad_norm": 6.46875, "learning_rate": 0.0003, "loss": 8.7315, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7793 }, { "epoch": 0.56531515195474, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 8.4788, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7794 }, { "epoch": 0.5653876840501922, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.7527, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7795 }, { "epoch": 0.5654602161456445, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 8.9113, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7796 }, { "epoch": 0.5655327482410967, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 8.853, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7797 }, { "epoch": 0.5656052803365489, "grad_norm": 1.953125, "learning_rate": 0.0003, "loss": 8.7162, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7798 }, { "epoch": 0.5656778124320012, "grad_norm": 14.0625, "learning_rate": 0.0003, "loss": 8.9069, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7799 }, { "epoch": 0.5657503445274534, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 8.753, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7800 }, { "epoch": 0.5658228766229056, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 8.8898, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7801 }, { "epoch": 0.5658954087183579, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 8.861, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7802 }, { "epoch": 0.5659679408138101, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.8407, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7803 }, { "epoch": 0.5660404729092623, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.8473, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7804 }, { "epoch": 0.5661130050047146, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 9.0638, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7805 }, { "epoch": 0.5661855371001668, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.758, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7806 }, { "epoch": 0.566258069195619, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 8.6594, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7807 }, { "epoch": 0.5663306012910713, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 9.3183, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7808 }, { "epoch": 0.5664031333865235, "grad_norm": 10.125, "learning_rate": 0.0003, "loss": 9.0115, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7809 }, { "epoch": 0.5664756654819758, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 8.5505, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7810 }, { "epoch": 0.566548197577428, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.6683, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7811 }, { "epoch": 0.5666207296728802, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 9.403, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7812 }, { "epoch": 0.5666932617683325, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 8.9016, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7813 }, { "epoch": 0.5667657938637847, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.2728, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7814 }, { "epoch": 0.566838325959237, "grad_norm": 1.9609375, "learning_rate": 0.0003, "loss": 9.3621, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7815 }, { "epoch": 0.5669108580546892, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 8.8733, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7816 }, { "epoch": 0.5669833901501414, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.3036, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7817 }, { "epoch": 0.5670559222455936, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 9.1304, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7818 }, { "epoch": 0.5671284543410459, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.2315, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7819 }, { "epoch": 0.5672009864364982, "grad_norm": 6.28125, "learning_rate": 0.0003, "loss": 9.1458, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7820 }, { "epoch": 0.5672735185319504, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 8.9657, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7821 }, { "epoch": 0.5673460506274026, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.5416, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7822 }, { "epoch": 0.5674185827228548, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 9.4176, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7823 }, { "epoch": 0.567491114818307, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.7429, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7824 }, { "epoch": 0.5675636469137594, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 9.249, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7825 }, { "epoch": 0.5676361790092116, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 8.7569, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7826 }, { "epoch": 0.5677087111046638, "grad_norm": 1.9609375, "learning_rate": 0.0003, "loss": 8.7412, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7827 }, { "epoch": 0.567781243200116, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.5931, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7828 }, { "epoch": 0.5678537752955682, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 9.1448, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7829 }, { "epoch": 0.5679263073910206, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.0852, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7830 }, { "epoch": 0.5679988394864728, "grad_norm": 1.875, "learning_rate": 0.0003, "loss": 8.6883, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7831 }, { "epoch": 0.568071371581925, "grad_norm": 1.84375, "learning_rate": 0.0003, "loss": 8.8717, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7832 }, { "epoch": 0.5681439036773772, "grad_norm": 8.125, "learning_rate": 0.0003, "loss": 8.6777, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7833 }, { "epoch": 0.5682164357728294, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.9066, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7834 }, { "epoch": 0.5682889678682818, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 9.5533, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7835 }, { "epoch": 0.568361499963734, "grad_norm": 12.5, "learning_rate": 0.0003, "loss": 9.3065, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7836 }, { "epoch": 0.5684340320591862, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.7026, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7837 }, { "epoch": 0.5685065641546384, "grad_norm": 5.78125, "learning_rate": 0.0003, "loss": 8.6972, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7838 }, { "epoch": 0.5685790962500906, "grad_norm": 5.625, "learning_rate": 0.0003, "loss": 8.7287, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7839 }, { "epoch": 0.568651628345543, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.227, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7840 }, { "epoch": 0.5687241604409952, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 9.3024, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7841 }, { "epoch": 0.5687966925364474, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.9638, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7842 }, { "epoch": 0.5688692246318996, "grad_norm": 5.5625, "learning_rate": 0.0003, "loss": 8.8452, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7843 }, { "epoch": 0.5689417567273518, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.8494, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7844 }, { "epoch": 0.5690142888228041, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.5398, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7845 }, { "epoch": 0.5690868209182564, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.3293, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7846 }, { "epoch": 0.5691593530137086, "grad_norm": 6.28125, "learning_rate": 0.0003, "loss": 9.4878, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7847 }, { "epoch": 0.5692318851091608, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.019, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7848 }, { "epoch": 0.569304417204613, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 8.2524, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7849 }, { "epoch": 0.5693769493000653, "grad_norm": 6.0, "learning_rate": 0.0003, "loss": 9.0657, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7850 }, { "epoch": 0.5694494813955175, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 9.2773, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7851 }, { "epoch": 0.5695220134909698, "grad_norm": 1.7265625, "learning_rate": 0.0003, "loss": 8.4773, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7852 }, { "epoch": 0.569594545586422, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 9.157, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7853 }, { "epoch": 0.5696670776818742, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 8.6814, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7854 }, { "epoch": 0.5697396097773265, "grad_norm": 7.75, "learning_rate": 0.0003, "loss": 8.5482, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7855 }, { "epoch": 0.5698121418727787, "grad_norm": 1.8515625, "learning_rate": 0.0003, "loss": 8.6434, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7856 }, { "epoch": 0.569884673968231, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 8.7066, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7857 }, { "epoch": 0.5699572060636832, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 8.9732, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7858 }, { "epoch": 0.5700297381591354, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.8278, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7859 }, { "epoch": 0.5701022702545877, "grad_norm": 6.75, "learning_rate": 0.0003, "loss": 8.6277, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7860 }, { "epoch": 0.5701748023500399, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 8.9406, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7861 }, { "epoch": 0.5702473344454921, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 8.785, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7862 }, { "epoch": 0.5703198665409444, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.873, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7863 }, { "epoch": 0.5703923986363966, "grad_norm": 7.84375, "learning_rate": 0.0003, "loss": 8.7973, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7864 }, { "epoch": 0.5704649307318489, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.6578, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7865 }, { "epoch": 0.5705374628273011, "grad_norm": 1.9765625, "learning_rate": 0.0003, "loss": 9.0284, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7866 }, { "epoch": 0.5706099949227533, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.6139, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7867 }, { "epoch": 0.5706825270182055, "grad_norm": 8.0625, "learning_rate": 0.0003, "loss": 9.0002, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7868 }, { "epoch": 0.5707550591136578, "grad_norm": 11.5, "learning_rate": 0.0003, "loss": 8.7506, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7869 }, { "epoch": 0.5708275912091101, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.8148, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7870 }, { "epoch": 0.5709001233045623, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 8.4642, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7871 }, { "epoch": 0.5709726554000145, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 8.7633, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7872 }, { "epoch": 0.5710451874954667, "grad_norm": 15.75, "learning_rate": 0.0003, "loss": 8.8877, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7873 }, { "epoch": 0.571117719590919, "grad_norm": 7.625, "learning_rate": 0.0003, "loss": 8.6788, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7874 }, { "epoch": 0.5711902516863712, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.7866, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7875 }, { "epoch": 0.5712627837818235, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.9772, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7876 }, { "epoch": 0.5713353158772757, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 9.1087, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7877 }, { "epoch": 0.5714078479727279, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.9214, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7878 }, { "epoch": 0.5714803800681801, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.114, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7879 }, { "epoch": 0.5715529121636324, "grad_norm": 1.875, "learning_rate": 0.0003, "loss": 8.8526, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7880 }, { "epoch": 0.5716254442590847, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 9.3912, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7881 }, { "epoch": 0.5716979763545369, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 8.6216, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7882 }, { "epoch": 0.5717705084499891, "grad_norm": 15.4375, "learning_rate": 0.0003, "loss": 9.0106, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7883 }, { "epoch": 0.5718430405454413, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 9.0135, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7884 }, { "epoch": 0.5719155726408935, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 9.0727, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7885 }, { "epoch": 0.5719881047363459, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.3993, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7886 }, { "epoch": 0.5720606368317981, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 8.845, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7887 }, { "epoch": 0.5721331689272503, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.3086, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7888 }, { "epoch": 0.5722057010227025, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 9.2975, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7889 }, { "epoch": 0.5722782331181547, "grad_norm": 1.8203125, "learning_rate": 0.0003, "loss": 8.8092, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7890 }, { "epoch": 0.5723507652136071, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.7738, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7891 }, { "epoch": 0.5724232973090593, "grad_norm": 1.7421875, "learning_rate": 0.0003, "loss": 9.1033, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7892 }, { "epoch": 0.5724958294045115, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 9.0351, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7893 }, { "epoch": 0.5725683614999637, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 8.7091, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7894 }, { "epoch": 0.5726408935954159, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.9288, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7895 }, { "epoch": 0.5727134256908682, "grad_norm": 11.3125, "learning_rate": 0.0003, "loss": 8.7435, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7896 }, { "epoch": 0.5727859577863205, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 9.0949, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7897 }, { "epoch": 0.5728584898817727, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 8.9335, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7898 }, { "epoch": 0.5729310219772249, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 8.5888, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7899 }, { "epoch": 0.5730035540726771, "grad_norm": 1.6953125, "learning_rate": 0.0003, "loss": 9.1288, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7900 }, { "epoch": 0.5730760861681294, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.3541, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7901 }, { "epoch": 0.5731486182635817, "grad_norm": 16.125, "learning_rate": 0.0003, "loss": 9.4151, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7902 }, { "epoch": 0.5732211503590339, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.276, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7903 }, { "epoch": 0.5732936824544861, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.1981, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7904 }, { "epoch": 0.5733662145499383, "grad_norm": 27.625, "learning_rate": 0.0003, "loss": 9.114, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7905 }, { "epoch": 0.5734387466453906, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.8384, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7906 }, { "epoch": 0.5735112787408428, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.8378, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7907 }, { "epoch": 0.5735838108362951, "grad_norm": 32.25, "learning_rate": 0.0003, "loss": 8.8165, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7908 }, { "epoch": 0.5736563429317473, "grad_norm": 6.625, "learning_rate": 0.0003, "loss": 9.2494, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7909 }, { "epoch": 0.5737288750271995, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 8.8749, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7910 }, { "epoch": 0.5738014071226518, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.995, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7911 }, { "epoch": 0.573873939218104, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 9.2092, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7912 }, { "epoch": 0.5739464713135563, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 8.9311, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7913 }, { "epoch": 0.5740190034090085, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.2779, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7914 }, { "epoch": 0.5740915355044607, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 9.0957, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7915 }, { "epoch": 0.574164067599913, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 8.797, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7916 }, { "epoch": 0.5742365996953652, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 8.5401, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7917 }, { "epoch": 0.5743091317908174, "grad_norm": 10.0, "learning_rate": 0.0003, "loss": 9.0093, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7918 }, { "epoch": 0.5743816638862697, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 8.3565, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7919 }, { "epoch": 0.5744541959817219, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 8.8455, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7920 }, { "epoch": 0.5745267280771742, "grad_norm": 1.6640625, "learning_rate": 0.0003, "loss": 8.9965, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7921 }, { "epoch": 0.5745992601726264, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 8.4862, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7922 }, { "epoch": 0.5746717922680786, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 8.763, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7923 }, { "epoch": 0.5747443243635308, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 8.8319, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7924 }, { "epoch": 0.5748168564589831, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.9914, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7925 }, { "epoch": 0.5748893885544354, "grad_norm": 1.8359375, "learning_rate": 0.0003, "loss": 9.7084, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7926 }, { "epoch": 0.5749619206498876, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 9.065, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7927 }, { "epoch": 0.5750344527453398, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 9.0097, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7928 }, { "epoch": 0.575106984840792, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.2863, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7929 }, { "epoch": 0.5751795169362443, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.6227, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7930 }, { "epoch": 0.5752520490316966, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 8.6268, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7931 }, { "epoch": 0.5753245811271488, "grad_norm": 1.7421875, "learning_rate": 0.0003, "loss": 8.4994, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7932 }, { "epoch": 0.575397113222601, "grad_norm": 5.5625, "learning_rate": 0.0003, "loss": 9.2168, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7933 }, { "epoch": 0.5754696453180532, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 8.8387, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7934 }, { "epoch": 0.5755421774135054, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.9285, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7935 }, { "epoch": 0.5756147095089578, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 9.1563, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7936 }, { "epoch": 0.57568724160441, "grad_norm": 6.25, "learning_rate": 0.0003, "loss": 8.5446, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7937 }, { "epoch": 0.5757597736998622, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.0171, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7938 }, { "epoch": 0.5758323057953144, "grad_norm": 6.53125, "learning_rate": 0.0003, "loss": 8.6521, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7939 }, { "epoch": 0.5759048378907666, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 9.077, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7940 }, { "epoch": 0.575977369986219, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 9.1704, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7941 }, { "epoch": 0.5760499020816712, "grad_norm": 1.4375, "learning_rate": 0.0003, "loss": 9.3936, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7942 }, { "epoch": 0.5761224341771234, "grad_norm": 13.1875, "learning_rate": 0.0003, "loss": 8.7066, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7943 }, { "epoch": 0.5761949662725756, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.0313, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7944 }, { "epoch": 0.5762674983680278, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.9482, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7945 }, { "epoch": 0.57634003046348, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 8.9768, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7946 }, { "epoch": 0.5764125625589324, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 9.2739, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7947 }, { "epoch": 0.5764850946543846, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.3143, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7948 }, { "epoch": 0.5765576267498368, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.9648, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7949 }, { "epoch": 0.576630158845289, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.6322, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7950 }, { "epoch": 0.5767026909407412, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 9.1752, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7951 }, { "epoch": 0.5767752230361936, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 8.171, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7952 }, { "epoch": 0.5768477551316458, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 9.1373, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7953 }, { "epoch": 0.576920287227098, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 8.7055, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7954 }, { "epoch": 0.5769928193225502, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 8.1962, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7955 }, { "epoch": 0.5770653514180024, "grad_norm": 18.25, "learning_rate": 0.0003, "loss": 8.8714, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7956 }, { "epoch": 0.5771378835134547, "grad_norm": 7.3125, "learning_rate": 0.0003, "loss": 8.357, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7957 }, { "epoch": 0.577210415608907, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 9.0955, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7958 }, { "epoch": 0.5772829477043592, "grad_norm": 9.8125, "learning_rate": 0.0003, "loss": 9.0907, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7959 }, { "epoch": 0.5773554797998114, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.4273, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7960 }, { "epoch": 0.5774280118952636, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.7778, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7961 }, { "epoch": 0.5775005439907159, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 8.8969, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7962 }, { "epoch": 0.5775730760861681, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.5609, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7963 }, { "epoch": 0.5776456081816204, "grad_norm": 6.9375, "learning_rate": 0.0003, "loss": 9.1158, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7964 }, { "epoch": 0.5777181402770726, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.6942, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7965 }, { "epoch": 0.5777906723725248, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 8.8188, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7966 }, { "epoch": 0.5778632044679771, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 9.5056, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7967 }, { "epoch": 0.5779357365634293, "grad_norm": 1.2578125, "learning_rate": 0.0003, "loss": 8.7681, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7968 }, { "epoch": 0.5780082686588816, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.9692, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7969 }, { "epoch": 0.5780808007543338, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.6962, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7970 }, { "epoch": 0.578153332849786, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 9.1002, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7971 }, { "epoch": 0.5782258649452383, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 8.9581, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7972 }, { "epoch": 0.5782983970406905, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.5514, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7973 }, { "epoch": 0.5783709291361427, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 8.7917, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7974 }, { "epoch": 0.578443461231595, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 9.377, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7975 }, { "epoch": 0.5785159933270472, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 9.5615, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7976 }, { "epoch": 0.5785885254224995, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 9.1813, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7977 }, { "epoch": 0.5786610575179517, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.9288, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7978 }, { "epoch": 0.5787335896134039, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 8.735, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7979 }, { "epoch": 0.5788061217088561, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.3479, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7980 }, { "epoch": 0.5788786538043084, "grad_norm": 7.8125, "learning_rate": 0.0003, "loss": 8.8987, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7981 }, { "epoch": 0.5789511858997607, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.8647, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7982 }, { "epoch": 0.5790237179952129, "grad_norm": 1.8125, "learning_rate": 0.0003, "loss": 8.833, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7983 }, { "epoch": 0.5790962500906651, "grad_norm": 1.9609375, "learning_rate": 0.0003, "loss": 8.7194, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7984 }, { "epoch": 0.5791687821861173, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 8.5575, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7985 }, { "epoch": 0.5792413142815696, "grad_norm": 8.0625, "learning_rate": 0.0003, "loss": 9.2231, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7986 }, { "epoch": 0.5793138463770219, "grad_norm": 14.4375, "learning_rate": 0.0003, "loss": 8.8404, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7987 }, { "epoch": 0.5793863784724741, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 9.3458, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7988 }, { "epoch": 0.5794589105679263, "grad_norm": 1.515625, "learning_rate": 0.0003, "loss": 9.0555, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7989 }, { "epoch": 0.5795314426633785, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 9.2236, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7990 }, { "epoch": 0.5796039747588307, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.169, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7991 }, { "epoch": 0.5796765068542831, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 8.8565, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7992 }, { "epoch": 0.5797490389497353, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 9.208, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7993 }, { "epoch": 0.5798215710451875, "grad_norm": 5.375, "learning_rate": 0.0003, "loss": 8.6281, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7994 }, { "epoch": 0.5798941031406397, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 9.2021, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7995 }, { "epoch": 0.5799666352360919, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.745, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7996 }, { "epoch": 0.5800391673315443, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.6713, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7997 }, { "epoch": 0.5801116994269965, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 8.5894, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7998 }, { "epoch": 0.5801842315224487, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 9.4522, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 7999 }, { "epoch": 0.5802567636179009, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 8.7705, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8000 }, { "epoch": 0.5803292957133531, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.8891, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8001 }, { "epoch": 0.5804018278088054, "grad_norm": 23.625, "learning_rate": 0.0003, "loss": 9.2182, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8002 }, { "epoch": 0.5804743599042577, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 8.8409, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8003 }, { "epoch": 0.5805468919997099, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 8.5222, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8004 }, { "epoch": 0.5806194240951621, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.8217, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8005 }, { "epoch": 0.5806919561906143, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 8.5301, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8006 }, { "epoch": 0.5807644882860666, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.9713, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8007 }, { "epoch": 0.5808370203815189, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.8501, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8008 }, { "epoch": 0.5809095524769711, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 9.0657, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8009 }, { "epoch": 0.5809820845724233, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 8.9315, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8010 }, { "epoch": 0.5810546166678755, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 8.6926, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8011 }, { "epoch": 0.5811271487633278, "grad_norm": 1.734375, "learning_rate": 0.0003, "loss": 9.2041, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8012 }, { "epoch": 0.58119968085878, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.8725, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8013 }, { "epoch": 0.5812722129542323, "grad_norm": 9.1875, "learning_rate": 0.0003, "loss": 8.9213, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8014 }, { "epoch": 0.5813447450496845, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.7, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8015 }, { "epoch": 0.5814172771451367, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 8.6482, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8016 }, { "epoch": 0.5814898092405889, "grad_norm": 1.828125, "learning_rate": 0.0003, "loss": 8.9216, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8017 }, { "epoch": 0.5815623413360412, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 8.8088, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8018 }, { "epoch": 0.5816348734314934, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 9.0905, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8019 }, { "epoch": 0.5817074055269457, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.9287, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8020 }, { "epoch": 0.5817799376223979, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.7786, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8021 }, { "epoch": 0.5818524697178501, "grad_norm": 19.875, "learning_rate": 0.0003, "loss": 9.1015, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8022 }, { "epoch": 0.5819250018133024, "grad_norm": 14.375, "learning_rate": 0.0003, "loss": 8.8719, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8023 }, { "epoch": 0.5819975339087546, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 9.2664, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8024 }, { "epoch": 0.5820700660042069, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 8.9055, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8025 }, { "epoch": 0.5821425980996591, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.6288, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8026 }, { "epoch": 0.5822151301951113, "grad_norm": 7.6875, "learning_rate": 0.0003, "loss": 8.7998, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8027 }, { "epoch": 0.5822876622905636, "grad_norm": 80.0, "learning_rate": 0.0003, "loss": 9.49, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8028 }, { "epoch": 0.5823601943860158, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 8.6651, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8029 }, { "epoch": 0.582432726481468, "grad_norm": 1.9296875, "learning_rate": 0.0003, "loss": 8.7522, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8030 }, { "epoch": 0.5825052585769203, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.2293, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8031 }, { "epoch": 0.5825777906723725, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.7278, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8032 }, { "epoch": 0.5826503227678248, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 8.9402, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8033 }, { "epoch": 0.582722854863277, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.9923, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8034 }, { "epoch": 0.5827953869587292, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 9.0365, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8035 }, { "epoch": 0.5828679190541814, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.4624, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8036 }, { "epoch": 0.5829404511496337, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 8.5358, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8037 }, { "epoch": 0.583012983245086, "grad_norm": 49.75, "learning_rate": 0.0003, "loss": 9.3071, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8038 }, { "epoch": 0.5830855153405382, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.9359, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8039 }, { "epoch": 0.5831580474359904, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.164, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8040 }, { "epoch": 0.5832305795314426, "grad_norm": 6.0, "learning_rate": 0.0003, "loss": 8.5515, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8041 }, { "epoch": 0.5833031116268949, "grad_norm": 5.375, "learning_rate": 0.0003, "loss": 8.8302, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8042 }, { "epoch": 0.5833756437223472, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.2238, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8043 }, { "epoch": 0.5834481758177994, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.5299, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8044 }, { "epoch": 0.5835207079132516, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 9.1877, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8045 }, { "epoch": 0.5835932400087038, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.6696, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8046 }, { "epoch": 0.583665772104156, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.4216, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8047 }, { "epoch": 0.5837383041996084, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.1946, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8048 }, { "epoch": 0.5838108362950606, "grad_norm": 6.625, "learning_rate": 0.0003, "loss": 8.8617, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8049 }, { "epoch": 0.5838833683905128, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 8.8448, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8050 }, { "epoch": 0.583955900485965, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 9.1279, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8051 }, { "epoch": 0.5840284325814172, "grad_norm": 1.7890625, "learning_rate": 0.0003, "loss": 8.6898, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8052 }, { "epoch": 0.5841009646768696, "grad_norm": 9.9375, "learning_rate": 0.0003, "loss": 8.981, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8053 }, { "epoch": 0.5841734967723218, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.8134, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8054 }, { "epoch": 0.584246028867774, "grad_norm": 15.75, "learning_rate": 0.0003, "loss": 8.677, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8055 }, { "epoch": 0.5843185609632262, "grad_norm": 1.9453125, "learning_rate": 0.0003, "loss": 9.0524, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8056 }, { "epoch": 0.5843910930586784, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 8.6786, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8057 }, { "epoch": 0.5844636251541308, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 9.103, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8058 }, { "epoch": 0.584536157249583, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 8.6437, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8059 }, { "epoch": 0.5846086893450352, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 9.1361, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8060 }, { "epoch": 0.5846812214404874, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 8.8386, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8061 }, { "epoch": 0.5847537535359396, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 9.4875, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8062 }, { "epoch": 0.5848262856313919, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 8.8545, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8063 }, { "epoch": 0.5848988177268442, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 9.2014, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8064 }, { "epoch": 0.5849713498222964, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.9736, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8065 }, { "epoch": 0.5850438819177486, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 8.6091, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8066 }, { "epoch": 0.5851164140132008, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.4565, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8067 }, { "epoch": 0.5851889461086531, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.6506, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8068 }, { "epoch": 0.5852614782041053, "grad_norm": 13.4375, "learning_rate": 0.0003, "loss": 8.8163, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8069 }, { "epoch": 0.5853340102995576, "grad_norm": 17.25, "learning_rate": 0.0003, "loss": 9.0283, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8070 }, { "epoch": 0.5854065423950098, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.7196, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8071 }, { "epoch": 0.585479074490462, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.9928, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8072 }, { "epoch": 0.5855516065859143, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 8.5174, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8073 }, { "epoch": 0.5856241386813665, "grad_norm": 6.1875, "learning_rate": 0.0003, "loss": 8.8989, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8074 }, { "epoch": 0.5856966707768188, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.8175, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8075 }, { "epoch": 0.585769202872271, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 8.714, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8076 }, { "epoch": 0.5858417349677232, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.5044, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8077 }, { "epoch": 0.5859142670631755, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 9.1093, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8078 }, { "epoch": 0.5859867991586277, "grad_norm": 13.375, "learning_rate": 0.0003, "loss": 8.7449, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8079 }, { "epoch": 0.5860593312540799, "grad_norm": 5.5625, "learning_rate": 0.0003, "loss": 9.166, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8080 }, { "epoch": 0.5861318633495322, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 9.1382, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8081 }, { "epoch": 0.5862043954449844, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 9.2784, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8082 }, { "epoch": 0.5862769275404367, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.214, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8083 }, { "epoch": 0.5863494596358889, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.087, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8084 }, { "epoch": 0.5864219917313411, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 8.8733, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8085 }, { "epoch": 0.5864945238267933, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.7789, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8086 }, { "epoch": 0.5865670559222456, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.7396, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8087 }, { "epoch": 0.5866395880176978, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.7995, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8088 }, { "epoch": 0.5867121201131501, "grad_norm": 1.9453125, "learning_rate": 0.0003, "loss": 8.9332, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8089 }, { "epoch": 0.5867846522086023, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 8.7983, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8090 }, { "epoch": 0.5868571843040545, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 8.706, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8091 }, { "epoch": 0.5869297163995068, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 9.1562, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8092 }, { "epoch": 0.587002248494959, "grad_norm": 1.859375, "learning_rate": 0.0003, "loss": 8.6066, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8093 }, { "epoch": 0.5870747805904113, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.9671, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8094 }, { "epoch": 0.5871473126858635, "grad_norm": 34.75, "learning_rate": 0.0003, "loss": 8.8313, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8095 }, { "epoch": 0.5872198447813157, "grad_norm": 1.4765625, "learning_rate": 0.0003, "loss": 9.0284, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8096 }, { "epoch": 0.5872923768767679, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 9.1577, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8097 }, { "epoch": 0.5873649089722202, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.0794, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8098 }, { "epoch": 0.5874374410676725, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.6578, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8099 }, { "epoch": 0.5875099731631247, "grad_norm": 6.3125, "learning_rate": 0.0003, "loss": 9.1504, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8100 }, { "epoch": 0.5875825052585769, "grad_norm": 6.96875, "learning_rate": 0.0003, "loss": 9.0639, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8101 }, { "epoch": 0.5876550373540291, "grad_norm": 5.96875, "learning_rate": 0.0003, "loss": 9.1842, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8102 }, { "epoch": 0.5877275694494813, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.6703, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8103 }, { "epoch": 0.5878001015449337, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.7656, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8104 }, { "epoch": 0.5878726336403859, "grad_norm": 14.0625, "learning_rate": 0.0003, "loss": 8.3532, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8105 }, { "epoch": 0.5879451657358381, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 8.3733, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8106 }, { "epoch": 0.5880176978312903, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.1134, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8107 }, { "epoch": 0.5880902299267425, "grad_norm": 6.625, "learning_rate": 0.0003, "loss": 8.5435, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8108 }, { "epoch": 0.5881627620221949, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 9.3094, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8109 }, { "epoch": 0.5882352941176471, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 9.3009, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8110 }, { "epoch": 0.5883078262130993, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 8.9857, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8111 }, { "epoch": 0.5883803583085515, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 8.8266, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8112 }, { "epoch": 0.5884528904040037, "grad_norm": 1.7734375, "learning_rate": 0.0003, "loss": 8.9693, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8113 }, { "epoch": 0.588525422499456, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 8.8134, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8114 }, { "epoch": 0.5885979545949083, "grad_norm": 9.5625, "learning_rate": 0.0003, "loss": 8.8133, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8115 }, { "epoch": 0.5886704866903605, "grad_norm": 7.65625, "learning_rate": 0.0003, "loss": 9.0106, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8116 }, { "epoch": 0.5887430187858127, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.8105, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8117 }, { "epoch": 0.5888155508812649, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.7253, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8118 }, { "epoch": 0.5888880829767172, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 8.8227, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8119 }, { "epoch": 0.5889606150721695, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 9.2968, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8120 }, { "epoch": 0.5890331471676217, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 8.7399, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8121 }, { "epoch": 0.5891056792630739, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.5906, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8122 }, { "epoch": 0.5891782113585261, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 8.791, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8123 }, { "epoch": 0.5892507434539784, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 9.002, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8124 }, { "epoch": 0.5893232755494306, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 8.937, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8125 }, { "epoch": 0.5893958076448829, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 8.8202, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8126 }, { "epoch": 0.5894683397403351, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 9.1734, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8127 }, { "epoch": 0.5895408718357873, "grad_norm": 14.9375, "learning_rate": 0.0003, "loss": 8.5839, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8128 }, { "epoch": 0.5896134039312396, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.0618, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8129 }, { "epoch": 0.5896859360266918, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 8.8727, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8130 }, { "epoch": 0.589758468122144, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 8.5953, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8131 }, { "epoch": 0.5898310002175963, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 8.8287, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8132 }, { "epoch": 0.5899035323130485, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 8.9223, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8133 }, { "epoch": 0.5899760644085008, "grad_norm": 5.46875, "learning_rate": 0.0003, "loss": 9.0084, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8134 }, { "epoch": 0.590048596503953, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 8.9908, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8135 }, { "epoch": 0.5901211285994052, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.0158, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8136 }, { "epoch": 0.5901936606948575, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.8214, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8137 }, { "epoch": 0.5902661927903097, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.8332, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8138 }, { "epoch": 0.590338724885762, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 9.0366, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8139 }, { "epoch": 0.5904112569812142, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 9.5321, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8140 }, { "epoch": 0.5904837890766664, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.4447, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8141 }, { "epoch": 0.5905563211721186, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.7254, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8142 }, { "epoch": 0.5906288532675709, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.7993, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8143 }, { "epoch": 0.5907013853630232, "grad_norm": 19.5, "learning_rate": 0.0003, "loss": 8.613, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8144 }, { "epoch": 0.5907739174584754, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 9.3723, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8145 }, { "epoch": 0.5908464495539276, "grad_norm": 8.4375, "learning_rate": 0.0003, "loss": 8.928, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8146 }, { "epoch": 0.5909189816493798, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.1333, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8147 }, { "epoch": 0.590991513744832, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 8.8231, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8148 }, { "epoch": 0.5910640458402844, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 8.8638, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8149 }, { "epoch": 0.5911365779357366, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 9.5598, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8150 }, { "epoch": 0.5912091100311888, "grad_norm": 1.2578125, "learning_rate": 0.0003, "loss": 9.1432, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8151 }, { "epoch": 0.591281642126641, "grad_norm": 6.75, "learning_rate": 0.0003, "loss": 8.9685, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8152 }, { "epoch": 0.5913541742220932, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 9.3323, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8153 }, { "epoch": 0.5914267063175455, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.6078, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8154 }, { "epoch": 0.5914992384129978, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.3005, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8155 }, { "epoch": 0.59157177050845, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.9, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8156 }, { "epoch": 0.5916443026039022, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.689, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8157 }, { "epoch": 0.5917168346993544, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.7168, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8158 }, { "epoch": 0.5917893667948066, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 9.0704, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8159 }, { "epoch": 0.591861898890259, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 9.1992, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8160 }, { "epoch": 0.5919344309857112, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 8.1792, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8161 }, { "epoch": 0.5920069630811634, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.6329, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8162 }, { "epoch": 0.5920794951766156, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.6348, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8163 }, { "epoch": 0.5921520272720678, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.2025, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8164 }, { "epoch": 0.5922245593675202, "grad_norm": 14.6875, "learning_rate": 0.0003, "loss": 9.5006, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8165 }, { "epoch": 0.5922970914629724, "grad_norm": 6.5, "learning_rate": 0.0003, "loss": 8.9535, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8166 }, { "epoch": 0.5923696235584246, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 8.8913, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8167 }, { "epoch": 0.5924421556538768, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 8.8419, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8168 }, { "epoch": 0.592514687749329, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 8.9066, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8169 }, { "epoch": 0.5925872198447814, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.3325, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8170 }, { "epoch": 0.5926597519402336, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.9421, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8171 }, { "epoch": 0.5927322840356858, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 9.1694, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8172 }, { "epoch": 0.592804816131138, "grad_norm": 9.0625, "learning_rate": 0.0003, "loss": 9.2462, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8173 }, { "epoch": 0.5928773482265902, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.7324, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8174 }, { "epoch": 0.5929498803220425, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 9.1448, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8175 }, { "epoch": 0.5930224124174948, "grad_norm": 10.0, "learning_rate": 0.0003, "loss": 8.5327, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8176 }, { "epoch": 0.593094944512947, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 8.8377, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8177 }, { "epoch": 0.5931674766083992, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.1146, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8178 }, { "epoch": 0.5932400087038514, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.2935, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8179 }, { "epoch": 0.5933125407993037, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 9.0398, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8180 }, { "epoch": 0.593385072894756, "grad_norm": 1.8125, "learning_rate": 0.0003, "loss": 8.8639, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8181 }, { "epoch": 0.5934576049902082, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 9.1721, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8182 }, { "epoch": 0.5935301370856604, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.118, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8183 }, { "epoch": 0.5936026691811126, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 8.4554, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8184 }, { "epoch": 0.5936752012765649, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 9.0689, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8185 }, { "epoch": 0.5937477333720171, "grad_norm": 6.75, "learning_rate": 0.0003, "loss": 8.9593, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8186 }, { "epoch": 0.5938202654674694, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.7801, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8187 }, { "epoch": 0.5938927975629216, "grad_norm": 16.75, "learning_rate": 0.0003, "loss": 8.4425, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8188 }, { "epoch": 0.5939653296583738, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 8.8114, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8189 }, { "epoch": 0.5940378617538261, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.2298, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8190 }, { "epoch": 0.5941103938492783, "grad_norm": 5.9375, "learning_rate": 0.0003, "loss": 8.5664, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8191 }, { "epoch": 0.5941829259447305, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.8947, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8192 }, { "epoch": 0.5942554580401828, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 8.3851, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8193 }, { "epoch": 0.594327990135635, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 9.2437, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8194 }, { "epoch": 0.5944005222310873, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 8.8581, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8195 }, { "epoch": 0.5944730543265395, "grad_norm": 22.0, "learning_rate": 0.0003, "loss": 8.8895, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8196 }, { "epoch": 0.5945455864219917, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 8.5126, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8197 }, { "epoch": 0.594618118517444, "grad_norm": 5.46875, "learning_rate": 0.0003, "loss": 9.3682, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8198 }, { "epoch": 0.5946906506128962, "grad_norm": 6.15625, "learning_rate": 0.0003, "loss": 8.8819, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8199 }, { "epoch": 0.5947631827083485, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.9139, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8200 }, { "epoch": 0.5948357148038007, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 8.2254, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8201 }, { "epoch": 0.5949082468992529, "grad_norm": 20.75, "learning_rate": 0.0003, "loss": 9.2025, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8202 }, { "epoch": 0.5949807789947051, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.7754, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8203 }, { "epoch": 0.5950533110901574, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.7667, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8204 }, { "epoch": 0.5951258431856097, "grad_norm": 5.6875, "learning_rate": 0.0003, "loss": 8.858, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8205 }, { "epoch": 0.5951983752810619, "grad_norm": 8.875, "learning_rate": 0.0003, "loss": 9.1814, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8206 }, { "epoch": 0.5952709073765141, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 9.3961, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8207 }, { "epoch": 0.5953434394719663, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 8.5119, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8208 }, { "epoch": 0.5954159715674185, "grad_norm": 7.5625, "learning_rate": 0.0003, "loss": 8.6991, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8209 }, { "epoch": 0.5954885036628709, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.9675, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8210 }, { "epoch": 0.5955610357583231, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 8.9372, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8211 }, { "epoch": 0.5956335678537753, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.7632, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8212 }, { "epoch": 0.5957060999492275, "grad_norm": 8.25, "learning_rate": 0.0003, "loss": 9.0422, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8213 }, { "epoch": 0.5957786320446797, "grad_norm": 6.53125, "learning_rate": 0.0003, "loss": 8.7528, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8214 }, { "epoch": 0.5958511641401321, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.0293, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8215 }, { "epoch": 0.5959236962355843, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 8.917, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8216 }, { "epoch": 0.5959962283310365, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.9297, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8217 }, { "epoch": 0.5960687604264887, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.8552, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8218 }, { "epoch": 0.5961412925219409, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.5704, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8219 }, { "epoch": 0.5962138246173933, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.2984, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8220 }, { "epoch": 0.5962863567128455, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.7342, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8221 }, { "epoch": 0.5963588888082977, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 8.5393, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8222 }, { "epoch": 0.5964314209037499, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.6175, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8223 }, { "epoch": 0.5965039529992021, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.1416, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8224 }, { "epoch": 0.5965764850946543, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 8.8617, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8225 }, { "epoch": 0.5966490171901067, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.6609, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8226 }, { "epoch": 0.5967215492855589, "grad_norm": 6.84375, "learning_rate": 0.0003, "loss": 8.8508, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8227 }, { "epoch": 0.5967940813810111, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.8836, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8228 }, { "epoch": 0.5968666134764633, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 8.2366, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8229 }, { "epoch": 0.5969391455719155, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.6434, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8230 }, { "epoch": 0.5970116776673678, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.4779, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8231 }, { "epoch": 0.5970842097628201, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.5102, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8232 }, { "epoch": 0.5971567418582723, "grad_norm": 1.8203125, "learning_rate": 0.0003, "loss": 9.3692, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8233 }, { "epoch": 0.5972292739537245, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.8395, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8234 }, { "epoch": 0.5973018060491767, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.926, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8235 }, { "epoch": 0.597374338144629, "grad_norm": 1.921875, "learning_rate": 0.0003, "loss": 9.8505, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8236 }, { "epoch": 0.5974468702400813, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.8962, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8237 }, { "epoch": 0.5975194023355335, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.0408, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8238 }, { "epoch": 0.5975919344309857, "grad_norm": 14.6875, "learning_rate": 0.0003, "loss": 8.7662, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8239 }, { "epoch": 0.5976644665264379, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 9.0134, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8240 }, { "epoch": 0.5977369986218902, "grad_norm": 8.8125, "learning_rate": 0.0003, "loss": 9.5181, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8241 }, { "epoch": 0.5978095307173424, "grad_norm": 11.75, "learning_rate": 0.0003, "loss": 8.6679, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8242 }, { "epoch": 0.5978820628127947, "grad_norm": 1.5625, "learning_rate": 0.0003, "loss": 8.9151, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8243 }, { "epoch": 0.5979545949082469, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 8.9936, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8244 }, { "epoch": 0.5980271270036991, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 8.9543, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8245 }, { "epoch": 0.5980996590991514, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 8.6792, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8246 }, { "epoch": 0.5981721911946036, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.2319, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8247 }, { "epoch": 0.5982447232900558, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 8.7157, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8248 }, { "epoch": 0.5983172553855081, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 8.7959, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8249 }, { "epoch": 0.5983897874809603, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 9.1602, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8250 }, { "epoch": 0.5984623195764126, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.2771, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8251 }, { "epoch": 0.5985348516718648, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.0271, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8252 }, { "epoch": 0.598607383767317, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.9595, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8253 }, { "epoch": 0.5986799158627693, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.2655, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8254 }, { "epoch": 0.5987524479582215, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.6765, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8255 }, { "epoch": 0.5988249800536738, "grad_norm": 8.0, "learning_rate": 0.0003, "loss": 9.0434, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8256 }, { "epoch": 0.598897512149126, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.7864, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8257 }, { "epoch": 0.5989700442445782, "grad_norm": 6.3125, "learning_rate": 0.0003, "loss": 8.6427, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8258 }, { "epoch": 0.5990425763400304, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.1458, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8259 }, { "epoch": 0.5991151084354827, "grad_norm": 7.5625, "learning_rate": 0.0003, "loss": 8.8801, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8260 }, { "epoch": 0.599187640530935, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 9.0735, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8261 }, { "epoch": 0.5992601726263872, "grad_norm": 1.9375, "learning_rate": 0.0003, "loss": 9.084, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8262 }, { "epoch": 0.5993327047218394, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.8987, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8263 }, { "epoch": 0.5994052368172916, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 8.4577, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8264 }, { "epoch": 0.5994777689127438, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 8.5232, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8265 }, { "epoch": 0.5995503010081962, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.9267, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8266 }, { "epoch": 0.5996228331036484, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 9.0974, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8267 }, { "epoch": 0.5996953651991006, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 8.8179, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8268 }, { "epoch": 0.5997678972945528, "grad_norm": 11.25, "learning_rate": 0.0003, "loss": 8.953, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8269 }, { "epoch": 0.599840429390005, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 9.2764, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8270 }, { "epoch": 0.5999129614854574, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 8.8748, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8271 }, { "epoch": 0.5999854935809096, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.7442, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8272 }, { "epoch": 0.6000580256763618, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 8.7806, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8273 }, { "epoch": 0.600130557771814, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 8.9713, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8274 }, { "epoch": 0.6002030898672662, "grad_norm": 10.875, "learning_rate": 0.0003, "loss": 8.9644, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8275 }, { "epoch": 0.6002756219627186, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 9.1042, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8276 }, { "epoch": 0.6003481540581708, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.7337, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8277 }, { "epoch": 0.600420686153623, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.8999, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8278 }, { "epoch": 0.6004932182490752, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.8503, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8279 }, { "epoch": 0.6005657503445274, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 8.8659, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8280 }, { "epoch": 0.6006382824399797, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.7145, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8281 }, { "epoch": 0.600710814535432, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 9.1153, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8282 }, { "epoch": 0.6007833466308842, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.6889, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8283 }, { "epoch": 0.6008558787263364, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.0097, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8284 }, { "epoch": 0.6009284108217886, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 8.554, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8285 }, { "epoch": 0.6010009429172409, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 8.8776, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8286 }, { "epoch": 0.6010734750126931, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.8981, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8287 }, { "epoch": 0.6011460071081454, "grad_norm": 13.1875, "learning_rate": 0.0003, "loss": 8.9839, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8288 }, { "epoch": 0.6012185392035976, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 8.9351, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8289 }, { "epoch": 0.6012910712990498, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 8.5481, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8290 }, { "epoch": 0.6013636033945021, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 8.9809, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8291 }, { "epoch": 0.6014361354899543, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 9.0428, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8292 }, { "epoch": 0.6015086675854066, "grad_norm": 28.75, "learning_rate": 0.0003, "loss": 8.7071, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8293 }, { "epoch": 0.6015811996808588, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 8.5788, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8294 }, { "epoch": 0.601653731776311, "grad_norm": 12.125, "learning_rate": 0.0003, "loss": 8.3326, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8295 }, { "epoch": 0.6017262638717632, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 9.3234, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8296 }, { "epoch": 0.6017987959672155, "grad_norm": 1.59375, "learning_rate": 0.0003, "loss": 8.0847, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8297 }, { "epoch": 0.6018713280626677, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.4942, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8298 }, { "epoch": 0.60194386015812, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.8667, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8299 }, { "epoch": 0.6020163922535722, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 9.2252, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8300 }, { "epoch": 0.6020889243490244, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 8.8028, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8301 }, { "epoch": 0.6021614564444767, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.3371, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8302 }, { "epoch": 0.6022339885399289, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.895, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8303 }, { "epoch": 0.6023065206353811, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.6207, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8304 }, { "epoch": 0.6023790527308334, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 9.135, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8305 }, { "epoch": 0.6024515848262856, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 9.1275, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8306 }, { "epoch": 0.6025241169217379, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.8533, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8307 }, { "epoch": 0.6025966490171901, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 8.7593, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8308 }, { "epoch": 0.6026691811126423, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.8431, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8309 }, { "epoch": 0.6027417132080946, "grad_norm": 10.875, "learning_rate": 0.0003, "loss": 8.9573, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8310 }, { "epoch": 0.6028142453035468, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.2003, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8311 }, { "epoch": 0.6028867773989991, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.9297, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8312 }, { "epoch": 0.6029593094944513, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 8.8804, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8313 }, { "epoch": 0.6030318415899035, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.9727, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8314 }, { "epoch": 0.6031043736853557, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 8.712, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8315 }, { "epoch": 0.603176905780808, "grad_norm": 1.953125, "learning_rate": 0.0003, "loss": 8.8584, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8316 }, { "epoch": 0.6032494378762603, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.6764, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8317 }, { "epoch": 0.6033219699717125, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.1994, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8318 }, { "epoch": 0.6033945020671647, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 9.181, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8319 }, { "epoch": 0.6034670341626169, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.5919, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8320 }, { "epoch": 0.6035395662580691, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.4992, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8321 }, { "epoch": 0.6036120983535215, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 8.8651, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8322 }, { "epoch": 0.6036846304489737, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.8335, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8323 }, { "epoch": 0.6037571625444259, "grad_norm": 1.8203125, "learning_rate": 0.0003, "loss": 8.7002, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8324 }, { "epoch": 0.6038296946398781, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.5594, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8325 }, { "epoch": 0.6039022267353303, "grad_norm": 1.828125, "learning_rate": 0.0003, "loss": 8.9827, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8326 }, { "epoch": 0.6039747588307827, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.7626, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8327 }, { "epoch": 0.6040472909262349, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 8.2101, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8328 }, { "epoch": 0.6041198230216871, "grad_norm": 1.484375, "learning_rate": 0.0003, "loss": 8.5907, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8329 }, { "epoch": 0.6041923551171393, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.7581, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8330 }, { "epoch": 0.6042648872125915, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.7427, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8331 }, { "epoch": 0.6043374193080439, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.0367, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8332 }, { "epoch": 0.6044099514034961, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 8.9523, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8333 }, { "epoch": 0.6044824834989483, "grad_norm": 6.28125, "learning_rate": 0.0003, "loss": 8.8783, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8334 }, { "epoch": 0.6045550155944005, "grad_norm": 46.5, "learning_rate": 0.0003, "loss": 8.5058, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8335 }, { "epoch": 0.6046275476898527, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.9817, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8336 }, { "epoch": 0.604700079785305, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.9214, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8337 }, { "epoch": 0.6047726118807573, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.7155, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8338 }, { "epoch": 0.6048451439762095, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.9557, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8339 }, { "epoch": 0.6049176760716617, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.164, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8340 }, { "epoch": 0.6049902081671139, "grad_norm": 1.65625, "learning_rate": 0.0003, "loss": 8.9332, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8341 }, { "epoch": 0.6050627402625662, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.6559, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8342 }, { "epoch": 0.6051352723580185, "grad_norm": 1.8984375, "learning_rate": 0.0003, "loss": 8.8412, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8343 }, { "epoch": 0.6052078044534707, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 9.0179, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8344 }, { "epoch": 0.6052803365489229, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.9012, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8345 }, { "epoch": 0.6053528686443751, "grad_norm": 8.375, "learning_rate": 0.0003, "loss": 8.9339, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8346 }, { "epoch": 0.6054254007398274, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 8.2215, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8347 }, { "epoch": 0.6054979328352796, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 8.9835, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8348 }, { "epoch": 0.6055704649307319, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.7713, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8349 }, { "epoch": 0.6056429970261841, "grad_norm": 1.578125, "learning_rate": 0.0003, "loss": 9.0294, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8350 }, { "epoch": 0.6057155291216363, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.8663, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8351 }, { "epoch": 0.6057880612170886, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 9.3497, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8352 }, { "epoch": 0.6058605933125408, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 8.5867, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8353 }, { "epoch": 0.605933125407993, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 8.6843, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8354 }, { "epoch": 0.6060056575034453, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.7974, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8355 }, { "epoch": 0.6060781895988975, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.5707, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8356 }, { "epoch": 0.6061507216943498, "grad_norm": 6.5, "learning_rate": 0.0003, "loss": 8.7053, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8357 }, { "epoch": 0.606223253789802, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.0204, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8358 }, { "epoch": 0.6062957858852542, "grad_norm": 1.9765625, "learning_rate": 0.0003, "loss": 8.7395, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8359 }, { "epoch": 0.6063683179807065, "grad_norm": 8.4375, "learning_rate": 0.0003, "loss": 8.798, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8360 }, { "epoch": 0.6064408500761587, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.0921, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8361 }, { "epoch": 0.606513382171611, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.4032, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8362 }, { "epoch": 0.6065859142670632, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.9589, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8363 }, { "epoch": 0.6066584463625154, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.8868, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8364 }, { "epoch": 0.6067309784579676, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 9.399, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8365 }, { "epoch": 0.6068035105534199, "grad_norm": 7.5, "learning_rate": 0.0003, "loss": 8.5497, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8366 }, { "epoch": 0.6068760426488721, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.1451, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8367 }, { "epoch": 0.6069485747443244, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.3185, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8368 }, { "epoch": 0.6070211068397766, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.8654, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8369 }, { "epoch": 0.6070936389352288, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.9869, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8370 }, { "epoch": 0.607166171030681, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.616, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8371 }, { "epoch": 0.6072387031261333, "grad_norm": 9.375, "learning_rate": 0.0003, "loss": 9.0456, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8372 }, { "epoch": 0.6073112352215856, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.1951, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8373 }, { "epoch": 0.6073837673170378, "grad_norm": 1.7421875, "learning_rate": 0.0003, "loss": 9.1355, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8374 }, { "epoch": 0.60745629941249, "grad_norm": 7.03125, "learning_rate": 0.0003, "loss": 9.0527, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8375 }, { "epoch": 0.6075288315079422, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 8.3734, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8376 }, { "epoch": 0.6076013636033945, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.9278, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8377 }, { "epoch": 0.6076738956988468, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.6432, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8378 }, { "epoch": 0.607746427794299, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.6613, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8379 }, { "epoch": 0.6078189598897512, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 9.6113, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8380 }, { "epoch": 0.6078914919852034, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.956, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8381 }, { "epoch": 0.6079640240806556, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.9486, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8382 }, { "epoch": 0.608036556176108, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 8.7587, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8383 }, { "epoch": 0.6081090882715602, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.0687, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8384 }, { "epoch": 0.6081816203670124, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.7781, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8385 }, { "epoch": 0.6082541524624646, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.8346, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8386 }, { "epoch": 0.6083266845579168, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.0278, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8387 }, { "epoch": 0.6083992166533692, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.9094, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8388 }, { "epoch": 0.6084717487488214, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.9611, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8389 }, { "epoch": 0.6085442808442736, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 8.9187, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8390 }, { "epoch": 0.6086168129397258, "grad_norm": 19.625, "learning_rate": 0.0003, "loss": 9.1036, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8391 }, { "epoch": 0.608689345035178, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.8112, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8392 }, { "epoch": 0.6087618771306303, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.7501, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8393 }, { "epoch": 0.6088344092260826, "grad_norm": 8.5, "learning_rate": 0.0003, "loss": 8.7347, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8394 }, { "epoch": 0.6089069413215348, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 8.5693, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8395 }, { "epoch": 0.608979473416987, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.2492, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8396 }, { "epoch": 0.6090520055124392, "grad_norm": 5.96875, "learning_rate": 0.0003, "loss": 9.191, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8397 }, { "epoch": 0.6091245376078915, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 8.4821, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8398 }, { "epoch": 0.6091970697033438, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 8.8358, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8399 }, { "epoch": 0.609269601798796, "grad_norm": 1.8671875, "learning_rate": 0.0003, "loss": 8.9231, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8400 }, { "epoch": 0.6093421338942482, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.8636, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8401 }, { "epoch": 0.6094146659897004, "grad_norm": 1.6484375, "learning_rate": 0.0003, "loss": 8.8443, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8402 }, { "epoch": 0.6094871980851527, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 8.7905, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8403 }, { "epoch": 0.6095597301806049, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 9.0416, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8404 }, { "epoch": 0.6096322622760572, "grad_norm": 9.5, "learning_rate": 0.0003, "loss": 8.797, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8405 }, { "epoch": 0.6097047943715094, "grad_norm": 5.96875, "learning_rate": 0.0003, "loss": 8.661, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8406 }, { "epoch": 0.6097773264669616, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.2014, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8407 }, { "epoch": 0.6098498585624139, "grad_norm": 1.6640625, "learning_rate": 0.0003, "loss": 8.9738, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8408 }, { "epoch": 0.6099223906578661, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 8.9759, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8409 }, { "epoch": 0.6099949227533183, "grad_norm": 35.75, "learning_rate": 0.0003, "loss": 9.1879, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8410 }, { "epoch": 0.6100674548487706, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 8.4634, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8411 }, { "epoch": 0.6101399869442228, "grad_norm": 1.796875, "learning_rate": 0.0003, "loss": 9.0562, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8412 }, { "epoch": 0.6102125190396751, "grad_norm": 27.75, "learning_rate": 0.0003, "loss": 9.2188, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8413 }, { "epoch": 0.6102850511351273, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 9.1921, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8414 }, { "epoch": 0.6103575832305795, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.6177, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8415 }, { "epoch": 0.6104301153260318, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 9.3202, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8416 }, { "epoch": 0.610502647421484, "grad_norm": 6.625, "learning_rate": 0.0003, "loss": 9.4273, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8417 }, { "epoch": 0.6105751795169363, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.1451, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8418 }, { "epoch": 0.6106477116123885, "grad_norm": 6.4375, "learning_rate": 0.0003, "loss": 9.2139, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8419 }, { "epoch": 0.6107202437078407, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 9.0929, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8420 }, { "epoch": 0.6107927758032929, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 8.9491, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8421 }, { "epoch": 0.6108653078987452, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.7499, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8422 }, { "epoch": 0.6109378399941975, "grad_norm": 15.25, "learning_rate": 0.0003, "loss": 8.8643, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8423 }, { "epoch": 0.6110103720896497, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 8.6573, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8424 }, { "epoch": 0.6110829041851019, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.8386, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8425 }, { "epoch": 0.6111554362805541, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 9.0134, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8426 }, { "epoch": 0.6112279683760063, "grad_norm": 15.75, "learning_rate": 0.0003, "loss": 8.6869, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8427 }, { "epoch": 0.6113005004714587, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 8.9774, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8428 }, { "epoch": 0.6113730325669109, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 9.2702, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8429 }, { "epoch": 0.6114455646623631, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.5951, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8430 }, { "epoch": 0.6115180967578153, "grad_norm": 11.25, "learning_rate": 0.0003, "loss": 8.8147, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8431 }, { "epoch": 0.6115906288532675, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 8.9343, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8432 }, { "epoch": 0.6116631609487199, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 9.3596, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8433 }, { "epoch": 0.6117356930441721, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 8.9412, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8434 }, { "epoch": 0.6118082251396243, "grad_norm": 6.96875, "learning_rate": 0.0003, "loss": 8.9505, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8435 }, { "epoch": 0.6118807572350765, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.0746, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8436 }, { "epoch": 0.6119532893305287, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.6797, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8437 }, { "epoch": 0.6120258214259809, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.8397, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8438 }, { "epoch": 0.6120983535214333, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.8401, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8439 }, { "epoch": 0.6121708856168855, "grad_norm": 17.5, "learning_rate": 0.0003, "loss": 9.2759, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8440 }, { "epoch": 0.6122434177123377, "grad_norm": 8.4375, "learning_rate": 0.0003, "loss": 7.9974, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8441 }, { "epoch": 0.6123159498077899, "grad_norm": 1.8984375, "learning_rate": 0.0003, "loss": 8.8199, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8442 }, { "epoch": 0.6123884819032421, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 9.1478, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8443 }, { "epoch": 0.6124610139986945, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 8.9556, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8444 }, { "epoch": 0.6125335460941467, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.5787, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8445 }, { "epoch": 0.6126060781895989, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.1661, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8446 }, { "epoch": 0.6126786102850511, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 8.5529, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8447 }, { "epoch": 0.6127511423805033, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.4653, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8448 }, { "epoch": 0.6128236744759556, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.8714, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8449 }, { "epoch": 0.6128962065714079, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 8.6842, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8450 }, { "epoch": 0.6129687386668601, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.5125, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8451 }, { "epoch": 0.6130412707623123, "grad_norm": 7.5625, "learning_rate": 0.0003, "loss": 8.4532, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8452 }, { "epoch": 0.6131138028577645, "grad_norm": 1.96875, "learning_rate": 0.0003, "loss": 8.7697, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8453 }, { "epoch": 0.6131863349532168, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 9.1416, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8454 }, { "epoch": 0.613258867048669, "grad_norm": 45.0, "learning_rate": 0.0003, "loss": 9.1015, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8455 }, { "epoch": 0.6133313991441213, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.9961, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8456 }, { "epoch": 0.6134039312395735, "grad_norm": 1.84375, "learning_rate": 0.0003, "loss": 8.8053, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8457 }, { "epoch": 0.6134764633350257, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 8.8163, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8458 }, { "epoch": 0.613548995430478, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.9763, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8459 }, { "epoch": 0.6136215275259302, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.6846, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8460 }, { "epoch": 0.6136940596213825, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.8182, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8461 }, { "epoch": 0.6137665917168347, "grad_norm": 1.9140625, "learning_rate": 0.0003, "loss": 9.3919, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8462 }, { "epoch": 0.6138391238122869, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 9.1871, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8463 }, { "epoch": 0.6139116559077392, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 8.988, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8464 }, { "epoch": 0.6139841880031914, "grad_norm": 8.0, "learning_rate": 0.0003, "loss": 8.8694, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8465 }, { "epoch": 0.6140567200986436, "grad_norm": 5.21875, "learning_rate": 0.0003, "loss": 8.2159, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8466 }, { "epoch": 0.6141292521940959, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.1888, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8467 }, { "epoch": 0.6142017842895481, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.3764, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8468 }, { "epoch": 0.6142743163850004, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 9.0785, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8469 }, { "epoch": 0.6143468484804526, "grad_norm": 1.859375, "learning_rate": 0.0003, "loss": 8.6546, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8470 }, { "epoch": 0.6144193805759048, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 9.2314, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8471 }, { "epoch": 0.614491912671357, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 8.7444, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8472 }, { "epoch": 0.6145644447668093, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 9.0349, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8473 }, { "epoch": 0.6146369768622616, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.9455, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8474 }, { "epoch": 0.6147095089577138, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.9573, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8475 }, { "epoch": 0.614782041053166, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 8.6602, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8476 }, { "epoch": 0.6148545731486182, "grad_norm": 6.78125, "learning_rate": 0.0003, "loss": 8.8733, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8477 }, { "epoch": 0.6149271052440705, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.8122, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8478 }, { "epoch": 0.6149996373395228, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 8.8609, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8479 }, { "epoch": 0.615072169434975, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 8.9457, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8480 }, { "epoch": 0.6151447015304272, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.9574, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8481 }, { "epoch": 0.6152172336258794, "grad_norm": 6.21875, "learning_rate": 0.0003, "loss": 8.9456, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8482 }, { "epoch": 0.6152897657213316, "grad_norm": 7.25, "learning_rate": 0.0003, "loss": 8.1275, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8483 }, { "epoch": 0.615362297816784, "grad_norm": 1.625, "learning_rate": 0.0003, "loss": 8.6369, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8484 }, { "epoch": 0.6154348299122362, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 8.9489, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8485 }, { "epoch": 0.6155073620076884, "grad_norm": 1.953125, "learning_rate": 0.0003, "loss": 8.7685, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8486 }, { "epoch": 0.6155798941031406, "grad_norm": 1.828125, "learning_rate": 0.0003, "loss": 8.8646, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8487 }, { "epoch": 0.6156524261985928, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.9986, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8488 }, { "epoch": 0.6157249582940452, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 8.8481, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8489 }, { "epoch": 0.6157974903894974, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.925, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8490 }, { "epoch": 0.6158700224849496, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 8.3563, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8491 }, { "epoch": 0.6159425545804018, "grad_norm": 1.921875, "learning_rate": 0.0003, "loss": 8.895, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8492 }, { "epoch": 0.616015086675854, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 8.6124, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8493 }, { "epoch": 0.6160876187713064, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.4965, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8494 }, { "epoch": 0.6161601508667586, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 9.5079, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8495 }, { "epoch": 0.6162326829622108, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.908, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8496 }, { "epoch": 0.616305215057663, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.6619, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8497 }, { "epoch": 0.6163777471531152, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.4233, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8498 }, { "epoch": 0.6164502792485675, "grad_norm": 6.84375, "learning_rate": 0.0003, "loss": 9.2836, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8499 }, { "epoch": 0.6165228113440198, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 8.6991, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8500 }, { "epoch": 0.616595343439472, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.5461, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8501 }, { "epoch": 0.6166678755349242, "grad_norm": 24.625, "learning_rate": 0.0003, "loss": 8.8664, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8502 }, { "epoch": 0.6167404076303764, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.5969, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8503 }, { "epoch": 0.6168129397258286, "grad_norm": 6.0625, "learning_rate": 0.0003, "loss": 8.719, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8504 }, { "epoch": 0.616885471821281, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.7757, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8505 }, { "epoch": 0.6169580039167332, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 8.8618, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8506 }, { "epoch": 0.6170305360121854, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 8.8374, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8507 }, { "epoch": 0.6171030681076376, "grad_norm": 6.59375, "learning_rate": 0.0003, "loss": 9.2475, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8508 }, { "epoch": 0.6171756002030898, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.5066, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8509 }, { "epoch": 0.6172481322985421, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.8225, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8510 }, { "epoch": 0.6173206643939944, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.0317, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8511 }, { "epoch": 0.6173931964894466, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 8.7817, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8512 }, { "epoch": 0.6174657285848988, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.7526, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8513 }, { "epoch": 0.617538260680351, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 8.7573, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8514 }, { "epoch": 0.6176107927758033, "grad_norm": 6.6875, "learning_rate": 0.0003, "loss": 9.2058, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8515 }, { "epoch": 0.6176833248712555, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.8312, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8516 }, { "epoch": 0.6177558569667078, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 9.0034, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8517 }, { "epoch": 0.61782838906216, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 9.2539, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8518 }, { "epoch": 0.6179009211576122, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 9.1436, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8519 }, { "epoch": 0.6179734532530645, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.1481, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8520 }, { "epoch": 0.6180459853485167, "grad_norm": 1.8125, "learning_rate": 0.0003, "loss": 8.8894, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8521 }, { "epoch": 0.618118517443969, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.0893, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8522 }, { "epoch": 0.6181910495394212, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.7025, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8523 }, { "epoch": 0.6182635816348734, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 8.5622, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8524 }, { "epoch": 0.6183361137303257, "grad_norm": 6.21875, "learning_rate": 0.0003, "loss": 8.674, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8525 }, { "epoch": 0.6184086458257779, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.7828, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8526 }, { "epoch": 0.6184811779212301, "grad_norm": 6.125, "learning_rate": 0.0003, "loss": 8.6741, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8527 }, { "epoch": 0.6185537100166824, "grad_norm": 6.21875, "learning_rate": 0.0003, "loss": 8.8257, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8528 }, { "epoch": 0.6186262421121346, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 9.2781, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8529 }, { "epoch": 0.6186987742075869, "grad_norm": 6.78125, "learning_rate": 0.0003, "loss": 8.4623, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8530 }, { "epoch": 0.6187713063030391, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.5323, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8531 }, { "epoch": 0.6188438383984913, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.3457, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8532 }, { "epoch": 0.6189163704939435, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 8.876, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8533 }, { "epoch": 0.6189889025893958, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 8.9344, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8534 }, { "epoch": 0.6190614346848481, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.9102, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8535 }, { "epoch": 0.6191339667803003, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 8.5296, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8536 }, { "epoch": 0.6192064988757525, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 8.9431, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8537 }, { "epoch": 0.6192790309712047, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.0146, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8538 }, { "epoch": 0.619351563066657, "grad_norm": 20.625, "learning_rate": 0.0003, "loss": 8.8095, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8539 }, { "epoch": 0.6194240951621093, "grad_norm": 1.78125, "learning_rate": 0.0003, "loss": 9.0422, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8540 }, { "epoch": 0.6194966272575615, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.6958, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8541 }, { "epoch": 0.6195691593530137, "grad_norm": 1.6015625, "learning_rate": 0.0003, "loss": 9.0924, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8542 }, { "epoch": 0.6196416914484659, "grad_norm": 1.8046875, "learning_rate": 0.0003, "loss": 8.7036, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8543 }, { "epoch": 0.6197142235439181, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 9.1042, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8544 }, { "epoch": 0.6197867556393705, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.8989, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8545 }, { "epoch": 0.6198592877348227, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 8.9273, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8546 }, { "epoch": 0.6199318198302749, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.15, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8547 }, { "epoch": 0.6200043519257271, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.0936, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8548 }, { "epoch": 0.6200768840211793, "grad_norm": 6.90625, "learning_rate": 0.0003, "loss": 8.776, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8549 }, { "epoch": 0.6201494161166317, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.161, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8550 }, { "epoch": 0.6202219482120839, "grad_norm": 75.5, "learning_rate": 0.0003, "loss": 8.8759, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8551 }, { "epoch": 0.6202944803075361, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.9663, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8552 }, { "epoch": 0.6203670124029883, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 8.784, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8553 }, { "epoch": 0.6204395444984405, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.1933, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8554 }, { "epoch": 0.6205120765938928, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.8742, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8555 }, { "epoch": 0.6205846086893451, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.5764, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8556 }, { "epoch": 0.6206571407847973, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.9845, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8557 }, { "epoch": 0.6207296728802495, "grad_norm": 1.8359375, "learning_rate": 0.0003, "loss": 8.6712, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8558 }, { "epoch": 0.6208022049757017, "grad_norm": 5.90625, "learning_rate": 0.0003, "loss": 8.5414, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8559 }, { "epoch": 0.620874737071154, "grad_norm": 1.9140625, "learning_rate": 0.0003, "loss": 8.751, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8560 }, { "epoch": 0.6209472691666063, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 9.0054, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8561 }, { "epoch": 0.6210198012620585, "grad_norm": 24.75, "learning_rate": 0.0003, "loss": 8.8696, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8562 }, { "epoch": 0.6210923333575107, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.7475, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8563 }, { "epoch": 0.6211648654529629, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.2281, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8564 }, { "epoch": 0.6212373975484152, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.615, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8565 }, { "epoch": 0.6213099296438674, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 8.7803, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8566 }, { "epoch": 0.6213824617393197, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.1239, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8567 }, { "epoch": 0.6214549938347719, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.8169, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8568 }, { "epoch": 0.6215275259302241, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.9232, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8569 }, { "epoch": 0.6216000580256764, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 9.0374, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8570 }, { "epoch": 0.6216725901211286, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.9239, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8571 }, { "epoch": 0.6217451222165808, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 8.5973, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8572 }, { "epoch": 0.6218176543120331, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.5383, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8573 }, { "epoch": 0.6218901864074853, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 8.9225, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8574 }, { "epoch": 0.6219627185029375, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.0761, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8575 }, { "epoch": 0.6220352505983898, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 9.0073, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8576 }, { "epoch": 0.622107782693842, "grad_norm": 1.9453125, "learning_rate": 0.0003, "loss": 8.9572, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8577 }, { "epoch": 0.6221803147892943, "grad_norm": 10.1875, "learning_rate": 0.0003, "loss": 8.804, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8578 }, { "epoch": 0.6222528468847465, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 9.3883, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8579 }, { "epoch": 0.6223253789801987, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.9685, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8580 }, { "epoch": 0.622397911075651, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.23, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8581 }, { "epoch": 0.6224704431711032, "grad_norm": 7.75, "learning_rate": 0.0003, "loss": 8.4643, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8582 }, { "epoch": 0.6225429752665554, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 9.0339, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8583 }, { "epoch": 0.6226155073620077, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.6453, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8584 }, { "epoch": 0.6226880394574599, "grad_norm": 31.5, "learning_rate": 0.0003, "loss": 9.2705, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8585 }, { "epoch": 0.6227605715529122, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.7543, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8586 }, { "epoch": 0.6228331036483644, "grad_norm": 18.375, "learning_rate": 0.0003, "loss": 8.8545, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8587 }, { "epoch": 0.6229056357438166, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 9.1138, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8588 }, { "epoch": 0.6229781678392688, "grad_norm": 6.21875, "learning_rate": 0.0003, "loss": 8.1505, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8589 }, { "epoch": 0.6230506999347211, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 8.7614, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8590 }, { "epoch": 0.6231232320301734, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 8.4599, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8591 }, { "epoch": 0.6231957641256256, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 9.0582, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8592 }, { "epoch": 0.6232682962210778, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.831, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8593 }, { "epoch": 0.62334082831653, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.9943, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8594 }, { "epoch": 0.6234133604119823, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 8.703, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8595 }, { "epoch": 0.6234858925074346, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 9.0978, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8596 }, { "epoch": 0.6235584246028868, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.8938, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8597 }, { "epoch": 0.623630956698339, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.6655, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8598 }, { "epoch": 0.6237034887937912, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.8781, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8599 }, { "epoch": 0.6237760208892434, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.7945, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8600 }, { "epoch": 0.6238485529846958, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.3459, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8601 }, { "epoch": 0.623921085080148, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.2621, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8602 }, { "epoch": 0.6239936171756002, "grad_norm": 1.9765625, "learning_rate": 0.0003, "loss": 8.7519, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8603 }, { "epoch": 0.6240661492710524, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.9725, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8604 }, { "epoch": 0.6241386813665046, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 8.9327, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8605 }, { "epoch": 0.624211213461957, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 8.2788, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8606 }, { "epoch": 0.6242837455574092, "grad_norm": 6.90625, "learning_rate": 0.0003, "loss": 8.6429, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8607 }, { "epoch": 0.6243562776528614, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 8.6304, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8608 }, { "epoch": 0.6244288097483136, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 9.2295, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8609 }, { "epoch": 0.6245013418437658, "grad_norm": 37.5, "learning_rate": 0.0003, "loss": 9.0547, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8610 }, { "epoch": 0.6245738739392181, "grad_norm": 8.5625, "learning_rate": 0.0003, "loss": 8.7983, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8611 }, { "epoch": 0.6246464060346704, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 9.0825, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8612 }, { "epoch": 0.6247189381301226, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 8.6155, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8613 }, { "epoch": 0.6247914702255748, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 9.0226, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8614 }, { "epoch": 0.624864002321027, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.0632, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8615 }, { "epoch": 0.6249365344164793, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 8.8083, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8616 }, { "epoch": 0.6250090665119316, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.691, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8617 }, { "epoch": 0.6250815986073838, "grad_norm": 7.5, "learning_rate": 0.0003, "loss": 8.0599, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8618 }, { "epoch": 0.625154130702836, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.9161, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8619 }, { "epoch": 0.6252266627982882, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.1147, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8620 }, { "epoch": 0.6252991948937405, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 8.6638, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8621 }, { "epoch": 0.6253717269891927, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 9.4009, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8622 }, { "epoch": 0.625444259084645, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.2803, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8623 }, { "epoch": 0.6255167911800972, "grad_norm": 3.953125, "learning_rate": 0.0003, "loss": 8.9416, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8624 }, { "epoch": 0.6255893232755494, "grad_norm": 10.0625, "learning_rate": 0.0003, "loss": 9.1283, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8625 }, { "epoch": 0.6256618553710017, "grad_norm": 8.125, "learning_rate": 0.0003, "loss": 8.9471, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8626 }, { "epoch": 0.6257343874664539, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 9.1227, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8627 }, { "epoch": 0.6258069195619061, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 8.8403, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8628 }, { "epoch": 0.6258794516573584, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.9661, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8629 }, { "epoch": 0.6259519837528106, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.061, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8630 }, { "epoch": 0.6260245158482629, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 8.7983, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8631 }, { "epoch": 0.6260970479437151, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.3403, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8632 }, { "epoch": 0.6261695800391673, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 9.3776, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8633 }, { "epoch": 0.6262421121346196, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.8656, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8634 }, { "epoch": 0.6263146442300718, "grad_norm": 7.28125, "learning_rate": 0.0003, "loss": 8.8106, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8635 }, { "epoch": 0.6263871763255241, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 8.827, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8636 }, { "epoch": 0.6264597084209763, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 8.8113, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8637 }, { "epoch": 0.6265322405164285, "grad_norm": 5.6875, "learning_rate": 0.0003, "loss": 9.3249, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8638 }, { "epoch": 0.6266047726118807, "grad_norm": 9.0625, "learning_rate": 0.0003, "loss": 9.3738, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8639 }, { "epoch": 0.626677304707333, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 9.1541, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8640 }, { "epoch": 0.6267498368027853, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.1406, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8641 }, { "epoch": 0.6268223688982375, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.6525, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8642 }, { "epoch": 0.6268949009936897, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.9436, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8643 }, { "epoch": 0.6269674330891419, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 8.6363, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8644 }, { "epoch": 0.6270399651845942, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.8855, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8645 }, { "epoch": 0.6271124972800464, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.9551, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8646 }, { "epoch": 0.6271850293754987, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.2962, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8647 }, { "epoch": 0.6272575614709509, "grad_norm": 10.125, "learning_rate": 0.0003, "loss": 8.8393, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8648 }, { "epoch": 0.6273300935664031, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.3889, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8649 }, { "epoch": 0.6274026256618553, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 9.3837, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8650 }, { "epoch": 0.6274751577573076, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.7698, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8651 }, { "epoch": 0.6275476898527599, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 9.2238, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8652 }, { "epoch": 0.6276202219482121, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 9.1338, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8653 }, { "epoch": 0.6276927540436643, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 8.962, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8654 }, { "epoch": 0.6277652861391165, "grad_norm": 11.9375, "learning_rate": 0.0003, "loss": 9.061, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8655 }, { "epoch": 0.6278378182345687, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.5469, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8656 }, { "epoch": 0.6279103503300211, "grad_norm": 3.953125, "learning_rate": 0.0003, "loss": 8.3326, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8657 }, { "epoch": 0.6279828824254733, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 9.0501, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8658 }, { "epoch": 0.6280554145209255, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 8.4417, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8659 }, { "epoch": 0.6281279466163777, "grad_norm": 21.0, "learning_rate": 0.0003, "loss": 8.9736, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8660 }, { "epoch": 0.6282004787118299, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 8.9931, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8661 }, { "epoch": 0.6282730108072823, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 8.9385, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8662 }, { "epoch": 0.6283455429027345, "grad_norm": 6.90625, "learning_rate": 0.0003, "loss": 8.6142, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8663 }, { "epoch": 0.6284180749981867, "grad_norm": 1.8515625, "learning_rate": 0.0003, "loss": 8.7288, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8664 }, { "epoch": 0.6284906070936389, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.3695, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8665 }, { "epoch": 0.6285631391890911, "grad_norm": 13.75, "learning_rate": 0.0003, "loss": 9.1728, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8666 }, { "epoch": 0.6286356712845435, "grad_norm": 8.125, "learning_rate": 0.0003, "loss": 9.3343, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8667 }, { "epoch": 0.6287082033799957, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 9.2702, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8668 }, { "epoch": 0.6287807354754479, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.6434, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8669 }, { "epoch": 0.6288532675709001, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 8.5809, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8670 }, { "epoch": 0.6289257996663523, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.0218, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8671 }, { "epoch": 0.6289983317618046, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.9836, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8672 }, { "epoch": 0.6290708638572569, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.72, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8673 }, { "epoch": 0.6291433959527091, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.5615, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8674 }, { "epoch": 0.6292159280481613, "grad_norm": 1.5625, "learning_rate": 0.0003, "loss": 9.143, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8675 }, { "epoch": 0.6292884601436135, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.9173, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8676 }, { "epoch": 0.6293609922390658, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.6992, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8677 }, { "epoch": 0.629433524334518, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 8.5045, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8678 }, { "epoch": 0.6295060564299703, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 9.152, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8679 }, { "epoch": 0.6295785885254225, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.7344, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8680 }, { "epoch": 0.6296511206208747, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 8.9579, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8681 }, { "epoch": 0.629723652716327, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.8651, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8682 }, { "epoch": 0.6297961848117792, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 9.0565, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8683 }, { "epoch": 0.6298687169072315, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.8748, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8684 }, { "epoch": 0.6299412490026837, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 8.969, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8685 }, { "epoch": 0.6300137810981359, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.1053, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8686 }, { "epoch": 0.6300863131935882, "grad_norm": 11.625, "learning_rate": 0.0003, "loss": 8.7126, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8687 }, { "epoch": 0.6301588452890404, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 8.9186, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8688 }, { "epoch": 0.6302313773844926, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.0042, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8689 }, { "epoch": 0.6303039094799449, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 9.0455, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8690 }, { "epoch": 0.6303764415753971, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 9.0592, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8691 }, { "epoch": 0.6304489736708494, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 8.935, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8692 }, { "epoch": 0.6305215057663016, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 8.5418, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8693 }, { "epoch": 0.6305940378617538, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.0011, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8694 }, { "epoch": 0.630666569957206, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 9.2605, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8695 }, { "epoch": 0.6307391020526583, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 8.7348, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8696 }, { "epoch": 0.6308116341481106, "grad_norm": 5.78125, "learning_rate": 0.0003, "loss": 8.4526, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8697 }, { "epoch": 0.6308841662435628, "grad_norm": 12.25, "learning_rate": 0.0003, "loss": 8.9469, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8698 }, { "epoch": 0.630956698339015, "grad_norm": 1.7109375, "learning_rate": 0.0003, "loss": 8.3471, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8699 }, { "epoch": 0.6310292304344672, "grad_norm": 10.5, "learning_rate": 0.0003, "loss": 8.8316, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8700 }, { "epoch": 0.6311017625299195, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 9.1192, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8701 }, { "epoch": 0.6311742946253718, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 9.2869, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8702 }, { "epoch": 0.631246826720824, "grad_norm": 6.9375, "learning_rate": 0.0003, "loss": 8.7235, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8703 }, { "epoch": 0.6313193588162762, "grad_norm": 10.0625, "learning_rate": 0.0003, "loss": 8.5669, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8704 }, { "epoch": 0.6313918909117284, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 8.3358, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8705 }, { "epoch": 0.6314644230071806, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.9009, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8706 }, { "epoch": 0.631536955102633, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.4472, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8707 }, { "epoch": 0.6316094871980852, "grad_norm": 5.78125, "learning_rate": 0.0003, "loss": 8.7164, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8708 }, { "epoch": 0.6316820192935374, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 9.146, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8709 }, { "epoch": 0.6317545513889896, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.9937, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8710 }, { "epoch": 0.6318270834844418, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.9036, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8711 }, { "epoch": 0.6318996155798942, "grad_norm": 6.53125, "learning_rate": 0.0003, "loss": 8.9914, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8712 }, { "epoch": 0.6319721476753464, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.2485, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8713 }, { "epoch": 0.6320446797707986, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 8.6139, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8714 }, { "epoch": 0.6321172118662508, "grad_norm": 9.0625, "learning_rate": 0.0003, "loss": 8.4376, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8715 }, { "epoch": 0.632189743961703, "grad_norm": 6.96875, "learning_rate": 0.0003, "loss": 8.8257, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8716 }, { "epoch": 0.6322622760571552, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.5915, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8717 }, { "epoch": 0.6323348081526076, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.9934, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8718 }, { "epoch": 0.6324073402480598, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 8.7178, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8719 }, { "epoch": 0.632479872343512, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 8.8946, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8720 }, { "epoch": 0.6325524044389642, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.0418, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8721 }, { "epoch": 0.6326249365344164, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.1632, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8722 }, { "epoch": 0.6326974686298688, "grad_norm": 9.9375, "learning_rate": 0.0003, "loss": 9.3408, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8723 }, { "epoch": 0.632770000725321, "grad_norm": 11.5625, "learning_rate": 0.0003, "loss": 8.9353, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8724 }, { "epoch": 0.6328425328207732, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.9634, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8725 }, { "epoch": 0.6329150649162254, "grad_norm": 1.921875, "learning_rate": 0.0003, "loss": 8.5518, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8726 }, { "epoch": 0.6329875970116776, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 9.2429, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8727 }, { "epoch": 0.6330601291071299, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 9.152, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8728 }, { "epoch": 0.6331326612025822, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 8.7488, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8729 }, { "epoch": 0.6332051932980344, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 8.773, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8730 }, { "epoch": 0.6332777253934866, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 9.0944, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8731 }, { "epoch": 0.6333502574889388, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.6772, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8732 }, { "epoch": 0.6334227895843911, "grad_norm": 6.71875, "learning_rate": 0.0003, "loss": 9.4177, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8733 }, { "epoch": 0.6334953216798433, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.0, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8734 }, { "epoch": 0.6335678537752956, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.8257, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8735 }, { "epoch": 0.6336403858707478, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 9.1003, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8736 }, { "epoch": 0.6337129179662, "grad_norm": 21.5, "learning_rate": 0.0003, "loss": 8.6036, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8737 }, { "epoch": 0.6337854500616523, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 8.8118, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8738 }, { "epoch": 0.6338579821571045, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 9.18, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8739 }, { "epoch": 0.6339305142525568, "grad_norm": 1.65625, "learning_rate": 0.0003, "loss": 9.16, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8740 }, { "epoch": 0.634003046348009, "grad_norm": 6.34375, "learning_rate": 0.0003, "loss": 9.1415, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8741 }, { "epoch": 0.6340755784434612, "grad_norm": 5.4375, "learning_rate": 0.0003, "loss": 8.8198, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8742 }, { "epoch": 0.6341481105389135, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 8.3962, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8743 }, { "epoch": 0.6342206426343657, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.9047, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8744 }, { "epoch": 0.6342931747298179, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.7989, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8745 }, { "epoch": 0.6343657068252702, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 9.0582, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8746 }, { "epoch": 0.6344382389207224, "grad_norm": 8.5625, "learning_rate": 0.0003, "loss": 9.2284, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8747 }, { "epoch": 0.6345107710161747, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.9773, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8748 }, { "epoch": 0.6345833031116269, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 9.1151, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8749 }, { "epoch": 0.6346558352070791, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 8.3274, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8750 }, { "epoch": 0.6347283673025313, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.2106, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8751 }, { "epoch": 0.6348008993979836, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 9.1035, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8752 }, { "epoch": 0.6348734314934359, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.2669, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8753 }, { "epoch": 0.6349459635888881, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 8.7582, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8754 }, { "epoch": 0.6350184956843403, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 9.032, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8755 }, { "epoch": 0.6350910277797925, "grad_norm": 6.34375, "learning_rate": 0.0003, "loss": 8.5308, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8756 }, { "epoch": 0.6351635598752448, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 8.8189, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8757 }, { "epoch": 0.6352360919706971, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.4084, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8758 }, { "epoch": 0.6353086240661493, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 9.1378, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8759 }, { "epoch": 0.6353811561616015, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 8.6437, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8760 }, { "epoch": 0.6354536882570537, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.5349, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8761 }, { "epoch": 0.6355262203525059, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.2415, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8762 }, { "epoch": 0.6355987524479583, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.9646, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8763 }, { "epoch": 0.6356712845434105, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.9203, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8764 }, { "epoch": 0.6357438166388627, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.1058, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8765 }, { "epoch": 0.6358163487343149, "grad_norm": 6.75, "learning_rate": 0.0003, "loss": 8.6466, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8766 }, { "epoch": 0.6358888808297671, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 8.7041, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8767 }, { "epoch": 0.6359614129252195, "grad_norm": 9.125, "learning_rate": 0.0003, "loss": 8.7618, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8768 }, { "epoch": 0.6360339450206717, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 9.4346, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8769 }, { "epoch": 0.6361064771161239, "grad_norm": 1.7578125, "learning_rate": 0.0003, "loss": 8.9172, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8770 }, { "epoch": 0.6361790092115761, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.5737, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8771 }, { "epoch": 0.6362515413070283, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 9.151, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8772 }, { "epoch": 0.6363240734024806, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.3164, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8773 }, { "epoch": 0.6363966054979329, "grad_norm": 1.9453125, "learning_rate": 0.0003, "loss": 9.1194, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8774 }, { "epoch": 0.6364691375933851, "grad_norm": 12.0625, "learning_rate": 0.0003, "loss": 9.1027, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8775 }, { "epoch": 0.6365416696888373, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 8.4269, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8776 }, { "epoch": 0.6366142017842895, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 9.0474, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8777 }, { "epoch": 0.6366867338797418, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 9.7032, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8778 }, { "epoch": 0.636759265975194, "grad_norm": 12.75, "learning_rate": 0.0003, "loss": 8.6059, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8779 }, { "epoch": 0.6368317980706463, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.8952, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8780 }, { "epoch": 0.6369043301660985, "grad_norm": 57.75, "learning_rate": 0.0003, "loss": 9.0077, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8781 }, { "epoch": 0.6369768622615507, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 8.2022, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8782 }, { "epoch": 0.637049394357003, "grad_norm": 1.8828125, "learning_rate": 0.0003, "loss": 9.3146, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8783 }, { "epoch": 0.6371219264524552, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 8.6678, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8784 }, { "epoch": 0.6371944585479075, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 8.4814, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8785 }, { "epoch": 0.6372669906433597, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 8.8015, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8786 }, { "epoch": 0.6373395227388119, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.5916, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8787 }, { "epoch": 0.6374120548342641, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.3504, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8788 }, { "epoch": 0.6374845869297164, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 9.2804, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8789 }, { "epoch": 0.6375571190251687, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 9.5187, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8790 }, { "epoch": 0.6376296511206209, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 8.6601, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8791 }, { "epoch": 0.6377021832160731, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.9194, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8792 }, { "epoch": 0.6377747153115253, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 9.0596, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8793 }, { "epoch": 0.6378472474069776, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 8.9069, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8794 }, { "epoch": 0.6379197795024298, "grad_norm": 6.8125, "learning_rate": 0.0003, "loss": 8.7495, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8795 }, { "epoch": 0.6379923115978821, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 9.3048, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8796 }, { "epoch": 0.6380648436933343, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 8.7167, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8797 }, { "epoch": 0.6381373757887865, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 9.3897, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8798 }, { "epoch": 0.6382099078842388, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 8.4803, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8799 }, { "epoch": 0.638282439979691, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.2181, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8800 }, { "epoch": 0.6383549720751432, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 8.7869, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8801 }, { "epoch": 0.6384275041705955, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 8.7988, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8802 }, { "epoch": 0.6385000362660477, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.9311, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8803 }, { "epoch": 0.6385725683615, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.0052, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8804 }, { "epoch": 0.6386451004569522, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.1055, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8805 }, { "epoch": 0.6387176325524044, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.8699, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8806 }, { "epoch": 0.6387901646478567, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.1921, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8807 }, { "epoch": 0.6388626967433089, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.2243, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8808 }, { "epoch": 0.6389352288387612, "grad_norm": 1.8359375, "learning_rate": 0.0003, "loss": 8.5884, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8809 }, { "epoch": 0.6390077609342134, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.3264, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8810 }, { "epoch": 0.6390802930296656, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 8.9115, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8811 }, { "epoch": 0.6391528251251178, "grad_norm": 6.6875, "learning_rate": 0.0003, "loss": 9.2066, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8812 }, { "epoch": 0.6392253572205701, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.0502, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8813 }, { "epoch": 0.6392978893160224, "grad_norm": 7.84375, "learning_rate": 0.0003, "loss": 9.142, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8814 }, { "epoch": 0.6393704214114746, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 8.8839, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8815 }, { "epoch": 0.6394429535069268, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 9.0681, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8816 }, { "epoch": 0.639515485602379, "grad_norm": 10.125, "learning_rate": 0.0003, "loss": 8.7785, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8817 }, { "epoch": 0.6395880176978312, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.0872, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8818 }, { "epoch": 0.6396605497932836, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.9915, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8819 }, { "epoch": 0.6397330818887358, "grad_norm": 13.5625, "learning_rate": 0.0003, "loss": 8.8133, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8820 }, { "epoch": 0.639805613984188, "grad_norm": 7.0625, "learning_rate": 0.0003, "loss": 8.8927, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8821 }, { "epoch": 0.6398781460796402, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 9.316, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8822 }, { "epoch": 0.6399506781750924, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.1449, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8823 }, { "epoch": 0.6400232102705448, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 8.8422, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8824 }, { "epoch": 0.640095742365997, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 8.5484, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8825 }, { "epoch": 0.6401682744614492, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.3639, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8826 }, { "epoch": 0.6402408065569014, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 9.2602, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8827 }, { "epoch": 0.6403133386523536, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 8.9168, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8828 }, { "epoch": 0.640385870747806, "grad_norm": 10.875, "learning_rate": 0.0003, "loss": 8.9598, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8829 }, { "epoch": 0.6404584028432582, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 8.6348, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8830 }, { "epoch": 0.6405309349387104, "grad_norm": 9.25, "learning_rate": 0.0003, "loss": 8.5317, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8831 }, { "epoch": 0.6406034670341626, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.8807, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8832 }, { "epoch": 0.6406759991296148, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 9.0813, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8833 }, { "epoch": 0.6407485312250671, "grad_norm": 1.671875, "learning_rate": 0.0003, "loss": 9.0051, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8834 }, { "epoch": 0.6408210633205194, "grad_norm": 5.21875, "learning_rate": 0.0003, "loss": 9.1961, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8835 }, { "epoch": 0.6408935954159716, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 8.9368, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8836 }, { "epoch": 0.6409661275114238, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 8.964, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8837 }, { "epoch": 0.641038659606876, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.7187, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8838 }, { "epoch": 0.6411111917023283, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.7749, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8839 }, { "epoch": 0.6411837237977805, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 9.4359, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8840 }, { "epoch": 0.6412562558932328, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.012, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8841 }, { "epoch": 0.641328787988685, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.672, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8842 }, { "epoch": 0.6414013200841372, "grad_norm": 6.40625, "learning_rate": 0.0003, "loss": 9.2076, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8843 }, { "epoch": 0.6414738521795895, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.1443, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8844 }, { "epoch": 0.6415463842750417, "grad_norm": 1.984375, "learning_rate": 0.0003, "loss": 8.6594, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8845 }, { "epoch": 0.641618916370494, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 8.9204, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8846 }, { "epoch": 0.6416914484659462, "grad_norm": 6.34375, "learning_rate": 0.0003, "loss": 9.2007, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8847 }, { "epoch": 0.6417639805613984, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.2589, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8848 }, { "epoch": 0.6418365126568507, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.0838, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8849 }, { "epoch": 0.6419090447523029, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 8.654, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8850 }, { "epoch": 0.6419815768477551, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 8.7251, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8851 }, { "epoch": 0.6420541089432074, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 9.1686, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8852 }, { "epoch": 0.6421266410386596, "grad_norm": 22.5, "learning_rate": 0.0003, "loss": 8.7244, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8853 }, { "epoch": 0.6421991731341118, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.8586, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8854 }, { "epoch": 0.6422717052295641, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.7637, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8855 }, { "epoch": 0.6423442373250163, "grad_norm": 28.375, "learning_rate": 0.0003, "loss": 8.9968, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8856 }, { "epoch": 0.6424167694204685, "grad_norm": 7.84375, "learning_rate": 0.0003, "loss": 9.271, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8857 }, { "epoch": 0.6424893015159208, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.8893, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8858 }, { "epoch": 0.642561833611373, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 8.4109, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8859 }, { "epoch": 0.6426343657068253, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 9.1742, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8860 }, { "epoch": 0.6427068978022775, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 8.2404, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8861 }, { "epoch": 0.6427794298977297, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.8795, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8862 }, { "epoch": 0.642851961993182, "grad_norm": 1.6015625, "learning_rate": 0.0003, "loss": 9.3494, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8863 }, { "epoch": 0.6429244940886342, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.8765, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8864 }, { "epoch": 0.6429970261840865, "grad_norm": 13.625, "learning_rate": 0.0003, "loss": 9.103, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8865 }, { "epoch": 0.6430695582795387, "grad_norm": 6.53125, "learning_rate": 0.0003, "loss": 8.7976, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8866 }, { "epoch": 0.6431420903749909, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.8922, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8867 }, { "epoch": 0.6432146224704431, "grad_norm": 7.125, "learning_rate": 0.0003, "loss": 8.756, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8868 }, { "epoch": 0.6432871545658954, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 8.9722, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8869 }, { "epoch": 0.6433596866613477, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 9.0491, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8870 }, { "epoch": 0.6434322187567999, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 8.8195, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8871 }, { "epoch": 0.6435047508522521, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 8.8661, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8872 }, { "epoch": 0.6435772829477043, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 8.74, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8873 }, { "epoch": 0.6436498150431565, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.8054, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8874 }, { "epoch": 0.6437223471386089, "grad_norm": 6.25, "learning_rate": 0.0003, "loss": 8.3627, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8875 }, { "epoch": 0.6437948792340611, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 9.4514, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8876 }, { "epoch": 0.6438674113295133, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.04, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8877 }, { "epoch": 0.6439399434249655, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 8.1456, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8878 }, { "epoch": 0.6440124755204177, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.0364, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8879 }, { "epoch": 0.6440850076158701, "grad_norm": 13.1875, "learning_rate": 0.0003, "loss": 9.0634, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8880 }, { "epoch": 0.6441575397113223, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.7666, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8881 }, { "epoch": 0.6442300718067745, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.9421, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8882 }, { "epoch": 0.6443026039022267, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 8.914, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8883 }, { "epoch": 0.6443751359976789, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 8.9289, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8884 }, { "epoch": 0.6444476680931313, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.6914, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8885 }, { "epoch": 0.6445202001885835, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 8.6734, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8886 }, { "epoch": 0.6445927322840357, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 9.0557, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8887 }, { "epoch": 0.6446652643794879, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.6805, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8888 }, { "epoch": 0.6447377964749401, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.3414, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8889 }, { "epoch": 0.6448103285703924, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.7963, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8890 }, { "epoch": 0.6448828606658447, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.7914, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8891 }, { "epoch": 0.6449553927612969, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 8.8719, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8892 }, { "epoch": 0.6450279248567491, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.2524, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8893 }, { "epoch": 0.6451004569522013, "grad_norm": 7.46875, "learning_rate": 0.0003, "loss": 9.0748, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8894 }, { "epoch": 0.6451729890476536, "grad_norm": 1.796875, "learning_rate": 0.0003, "loss": 8.7673, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8895 }, { "epoch": 0.6452455211431058, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.2481, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8896 }, { "epoch": 0.6453180532385581, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 8.381, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8897 }, { "epoch": 0.6453905853340103, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 9.0759, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8898 }, { "epoch": 0.6454631174294625, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.9347, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8899 }, { "epoch": 0.6455356495249148, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 8.4505, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8900 }, { "epoch": 0.645608181620367, "grad_norm": 7.09375, "learning_rate": 0.0003, "loss": 9.0334, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8901 }, { "epoch": 0.6456807137158193, "grad_norm": 9.125, "learning_rate": 0.0003, "loss": 8.8818, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8902 }, { "epoch": 0.6457532458112715, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.7163, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8903 }, { "epoch": 0.6458257779067237, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 8.8371, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8904 }, { "epoch": 0.645898310002176, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.8186, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8905 }, { "epoch": 0.6459708420976282, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 8.6343, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8906 }, { "epoch": 0.6460433741930804, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 8.702, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8907 }, { "epoch": 0.6461159062885327, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 8.6002, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8908 }, { "epoch": 0.6461884383839849, "grad_norm": 8.375, "learning_rate": 0.0003, "loss": 8.8579, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8909 }, { "epoch": 0.6462609704794372, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.5275, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8910 }, { "epoch": 0.6463335025748894, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 8.959, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8911 }, { "epoch": 0.6464060346703416, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.1378, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8912 }, { "epoch": 0.6464785667657938, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 8.7037, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8913 }, { "epoch": 0.6465510988612461, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.6031, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8914 }, { "epoch": 0.6466236309566984, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.2695, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8915 }, { "epoch": 0.6466961630521506, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.9523, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8916 }, { "epoch": 0.6467686951476028, "grad_norm": 19.0, "learning_rate": 0.0003, "loss": 8.7597, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8917 }, { "epoch": 0.646841227243055, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.0117, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8918 }, { "epoch": 0.6469137593385073, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 8.812, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8919 }, { "epoch": 0.6469862914339596, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 8.5587, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8920 }, { "epoch": 0.6470588235294118, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.824, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8921 }, { "epoch": 0.647131355624864, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.717, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8922 }, { "epoch": 0.6472038877203162, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.9503, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8923 }, { "epoch": 0.6472764198157684, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.9275, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8924 }, { "epoch": 0.6473489519112207, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.7844, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8925 }, { "epoch": 0.647421484006673, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.0812, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8926 }, { "epoch": 0.6474940161021252, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 9.1076, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8927 }, { "epoch": 0.6475665481975774, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 8.8437, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8928 }, { "epoch": 0.6476390802930296, "grad_norm": 13.4375, "learning_rate": 0.0003, "loss": 8.8893, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8929 }, { "epoch": 0.6477116123884818, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 8.8309, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8930 }, { "epoch": 0.6477841444839342, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.9316, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8931 }, { "epoch": 0.6478566765793864, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.0789, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8932 }, { "epoch": 0.6479292086748386, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 9.1039, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8933 }, { "epoch": 0.6480017407702908, "grad_norm": 1.71875, "learning_rate": 0.0003, "loss": 9.0396, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8934 }, { "epoch": 0.648074272865743, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 9.2602, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8935 }, { "epoch": 0.6481468049611954, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 8.3317, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8936 }, { "epoch": 0.6482193370566476, "grad_norm": 1.8203125, "learning_rate": 0.0003, "loss": 8.8812, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8937 }, { "epoch": 0.6482918691520998, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 8.4546, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8938 }, { "epoch": 0.648364401247552, "grad_norm": 6.34375, "learning_rate": 0.0003, "loss": 9.4046, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8939 }, { "epoch": 0.6484369333430042, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 8.586, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8940 }, { "epoch": 0.6485094654384566, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.4307, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8941 }, { "epoch": 0.6485819975339088, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.7985, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8942 }, { "epoch": 0.648654529629361, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 8.8629, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8943 }, { "epoch": 0.6487270617248132, "grad_norm": 8.625, "learning_rate": 0.0003, "loss": 9.2097, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8944 }, { "epoch": 0.6487995938202654, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 9.2536, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8945 }, { "epoch": 0.6488721259157177, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.5212, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8946 }, { "epoch": 0.64894465801117, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.3357, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8947 }, { "epoch": 0.6490171901066222, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 8.7472, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8948 }, { "epoch": 0.6490897222020744, "grad_norm": 6.15625, "learning_rate": 0.0003, "loss": 9.1874, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8949 }, { "epoch": 0.6491622542975266, "grad_norm": 1.8359375, "learning_rate": 0.0003, "loss": 9.5304, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8950 }, { "epoch": 0.6492347863929789, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 8.8883, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8951 }, { "epoch": 0.6493073184884312, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 9.0718, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8952 }, { "epoch": 0.6493798505838834, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 9.1881, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8953 }, { "epoch": 0.6494523826793356, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 9.2619, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8954 }, { "epoch": 0.6495249147747878, "grad_norm": 6.78125, "learning_rate": 0.0003, "loss": 8.8012, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8955 }, { "epoch": 0.6495974468702401, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.5284, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8956 }, { "epoch": 0.6496699789656923, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.3843, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8957 }, { "epoch": 0.6497425110611446, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.3674, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8958 }, { "epoch": 0.6498150431565968, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 8.2973, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8959 }, { "epoch": 0.649887575252049, "grad_norm": 21.375, "learning_rate": 0.0003, "loss": 8.6836, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8960 }, { "epoch": 0.6499601073475013, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 8.6354, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8961 }, { "epoch": 0.6500326394429535, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.4529, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8962 }, { "epoch": 0.6501051715384057, "grad_norm": 13.5625, "learning_rate": 0.0003, "loss": 8.3215, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8963 }, { "epoch": 0.650177703633858, "grad_norm": 6.65625, "learning_rate": 0.0003, "loss": 8.6416, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8964 }, { "epoch": 0.6502502357293102, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 9.4973, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8965 }, { "epoch": 0.6503227678247625, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.2854, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8966 }, { "epoch": 0.6503952999202147, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 8.8425, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8967 }, { "epoch": 0.6504678320156669, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.8208, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8968 }, { "epoch": 0.6505403641111192, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.7887, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8969 }, { "epoch": 0.6506128962065714, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 9.0038, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8970 }, { "epoch": 0.6506854283020237, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.0948, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8971 }, { "epoch": 0.6507579603974759, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 8.6496, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8972 }, { "epoch": 0.6508304924929281, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 9.0594, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8973 }, { "epoch": 0.6509030245883803, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.0877, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8974 }, { "epoch": 0.6509755566838326, "grad_norm": 1.8125, "learning_rate": 0.0003, "loss": 9.0716, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8975 }, { "epoch": 0.6510480887792849, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 9.0355, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8976 }, { "epoch": 0.6511206208747371, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 9.0552, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8977 }, { "epoch": 0.6511931529701893, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.9752, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8978 }, { "epoch": 0.6512656850656415, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.0751, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8979 }, { "epoch": 0.6513382171610937, "grad_norm": 6.4375, "learning_rate": 0.0003, "loss": 8.9771, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8980 }, { "epoch": 0.6514107492565461, "grad_norm": 1.84375, "learning_rate": 0.0003, "loss": 8.6934, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8981 }, { "epoch": 0.6514832813519983, "grad_norm": 7.84375, "learning_rate": 0.0003, "loss": 8.8949, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8982 }, { "epoch": 0.6515558134474505, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.6263, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8983 }, { "epoch": 0.6516283455429027, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.4856, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8984 }, { "epoch": 0.6517008776383549, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 8.8325, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8985 }, { "epoch": 0.6517734097338073, "grad_norm": 8.4375, "learning_rate": 0.0003, "loss": 8.8236, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8986 }, { "epoch": 0.6518459418292595, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.7037, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8987 }, { "epoch": 0.6519184739247117, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.2071, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8988 }, { "epoch": 0.6519910060201639, "grad_norm": 12.5625, "learning_rate": 0.0003, "loss": 8.1636, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8989 }, { "epoch": 0.6520635381156161, "grad_norm": 1.640625, "learning_rate": 0.0003, "loss": 9.4374, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8990 }, { "epoch": 0.6521360702110685, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 8.7526, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8991 }, { "epoch": 0.6522086023065207, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 8.3301, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8992 }, { "epoch": 0.6522811344019729, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.6884, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8993 }, { "epoch": 0.6523536664974251, "grad_norm": 7.5625, "learning_rate": 0.0003, "loss": 8.941, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8994 }, { "epoch": 0.6524261985928773, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 9.1572, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8995 }, { "epoch": 0.6524987306883295, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 9.2356, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8996 }, { "epoch": 0.6525712627837819, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 8.957, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8997 }, { "epoch": 0.6526437948792341, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.1496, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8998 }, { "epoch": 0.6527163269746863, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 9.0486, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 8999 }, { "epoch": 0.6527888590701385, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 8.8551, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9000 }, { "epoch": 0.6528613911655907, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.6707, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9001 }, { "epoch": 0.652933923261043, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.0058, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9002 }, { "epoch": 0.6530064553564953, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 8.9214, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9003 }, { "epoch": 0.6530789874519475, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 8.6604, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9004 }, { "epoch": 0.6531515195473997, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.7509, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9005 }, { "epoch": 0.6532240516428519, "grad_norm": 8.8125, "learning_rate": 0.0003, "loss": 9.0902, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9006 }, { "epoch": 0.6532965837383042, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.6318, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9007 }, { "epoch": 0.6533691158337565, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 9.1228, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9008 }, { "epoch": 0.6534416479292087, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.7856, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9009 }, { "epoch": 0.6535141800246609, "grad_norm": 6.71875, "learning_rate": 0.0003, "loss": 9.0116, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9010 }, { "epoch": 0.6535867121201131, "grad_norm": 6.71875, "learning_rate": 0.0003, "loss": 9.1419, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9011 }, { "epoch": 0.6536592442155654, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 8.9715, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9012 }, { "epoch": 0.6537317763110176, "grad_norm": 6.6875, "learning_rate": 0.0003, "loss": 8.7786, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9013 }, { "epoch": 0.6538043084064699, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 8.7919, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9014 }, { "epoch": 0.6538768405019221, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.731, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9015 }, { "epoch": 0.6539493725973743, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 8.9746, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9016 }, { "epoch": 0.6540219046928266, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.7244, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9017 }, { "epoch": 0.6540944367882788, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.2177, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9018 }, { "epoch": 0.654166968883731, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 9.3284, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9019 }, { "epoch": 0.6542395009791833, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.8477, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9020 }, { "epoch": 0.6543120330746355, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 8.0016, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9021 }, { "epoch": 0.6543845651700878, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.3765, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9022 }, { "epoch": 0.65445709726554, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.6386, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9023 }, { "epoch": 0.6545296293609922, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 9.2462, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9024 }, { "epoch": 0.6546021614564445, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.4997, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9025 }, { "epoch": 0.6546746935518967, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.2834, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9026 }, { "epoch": 0.654747225647349, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.8166, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9027 }, { "epoch": 0.6548197577428012, "grad_norm": 12.4375, "learning_rate": 0.0003, "loss": 8.4821, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9028 }, { "epoch": 0.6548922898382534, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 9.0129, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9029 }, { "epoch": 0.6549648219337056, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.4331, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9030 }, { "epoch": 0.6550373540291579, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 8.7067, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9031 }, { "epoch": 0.6551098861246102, "grad_norm": 9.875, "learning_rate": 0.0003, "loss": 8.7568, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9032 }, { "epoch": 0.6551824182200624, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.1372, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9033 }, { "epoch": 0.6552549503155146, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 8.9526, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9034 }, { "epoch": 0.6553274824109668, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.1079, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9035 }, { "epoch": 0.655400014506419, "grad_norm": 5.96875, "learning_rate": 0.0003, "loss": 8.5347, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9036 }, { "epoch": 0.6554725466018714, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.8167, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9037 }, { "epoch": 0.6555450786973236, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 8.7701, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9038 }, { "epoch": 0.6556176107927758, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 8.4066, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9039 }, { "epoch": 0.655690142888228, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 9.0593, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9040 }, { "epoch": 0.6557626749836802, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 8.8068, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9041 }, { "epoch": 0.6558352070791326, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.5324, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9042 }, { "epoch": 0.6559077391745848, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 8.9437, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9043 }, { "epoch": 0.655980271270037, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.5992, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9044 }, { "epoch": 0.6560528033654892, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.1039, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9045 }, { "epoch": 0.6561253354609414, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 9.1385, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9046 }, { "epoch": 0.6561978675563938, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.7436, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9047 }, { "epoch": 0.656270399651846, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.7403, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9048 }, { "epoch": 0.6563429317472982, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.2895, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9049 }, { "epoch": 0.6564154638427504, "grad_norm": 6.28125, "learning_rate": 0.0003, "loss": 8.8386, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9050 }, { "epoch": 0.6564879959382026, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 8.976, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9051 }, { "epoch": 0.6565605280336549, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.4406, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9052 }, { "epoch": 0.6566330601291072, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 8.3517, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9053 }, { "epoch": 0.6567055922245594, "grad_norm": 8.25, "learning_rate": 0.0003, "loss": 8.6189, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9054 }, { "epoch": 0.6567781243200116, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 8.7647, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9055 }, { "epoch": 0.6568506564154638, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 9.3343, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9056 }, { "epoch": 0.6569231885109161, "grad_norm": 1.515625, "learning_rate": 0.0003, "loss": 8.9237, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9057 }, { "epoch": 0.6569957206063683, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.8531, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9058 }, { "epoch": 0.6570682527018206, "grad_norm": 5.46875, "learning_rate": 0.0003, "loss": 9.0416, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9059 }, { "epoch": 0.6571407847972728, "grad_norm": 1.9375, "learning_rate": 0.0003, "loss": 8.9749, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9060 }, { "epoch": 0.657213316892725, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.9118, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9061 }, { "epoch": 0.6572858489881773, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 8.3354, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9062 }, { "epoch": 0.6573583810836295, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 9.2788, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9063 }, { "epoch": 0.6574309131790818, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 8.7488, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9064 }, { "epoch": 0.657503445274534, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.9485, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9065 }, { "epoch": 0.6575759773699862, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 9.1389, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9066 }, { "epoch": 0.6576485094654384, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.5784, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9067 }, { "epoch": 0.6577210415608907, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 9.0661, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9068 }, { "epoch": 0.657793573656343, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 8.3974, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9069 }, { "epoch": 0.6578661057517952, "grad_norm": 42.0, "learning_rate": 0.0003, "loss": 9.1753, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9070 }, { "epoch": 0.6579386378472474, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.7158, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9071 }, { "epoch": 0.6580111699426996, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 9.4307, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9072 }, { "epoch": 0.6580837020381519, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.5054, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9073 }, { "epoch": 0.6581562341336041, "grad_norm": 6.125, "learning_rate": 0.0003, "loss": 8.3931, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9074 }, { "epoch": 0.6582287662290564, "grad_norm": 11.3125, "learning_rate": 0.0003, "loss": 9.3347, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9075 }, { "epoch": 0.6583012983245086, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.8206, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9076 }, { "epoch": 0.6583738304199608, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 8.9718, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9077 }, { "epoch": 0.6584463625154131, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 8.7332, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9078 }, { "epoch": 0.6585188946108653, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.1519, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9079 }, { "epoch": 0.6585914267063175, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 8.1839, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9080 }, { "epoch": 0.6586639588017698, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.3298, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9081 }, { "epoch": 0.658736490897222, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 9.0996, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9082 }, { "epoch": 0.6588090229926743, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 9.3333, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9083 }, { "epoch": 0.6588815550881265, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 8.7228, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9084 }, { "epoch": 0.6589540871835787, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 8.5005, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9085 }, { "epoch": 0.659026619279031, "grad_norm": 6.65625, "learning_rate": 0.0003, "loss": 8.647, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9086 }, { "epoch": 0.6590991513744832, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.4926, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9087 }, { "epoch": 0.6591716834699355, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.5877, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9088 }, { "epoch": 0.6592442155653877, "grad_norm": 1.875, "learning_rate": 0.0003, "loss": 9.2038, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9089 }, { "epoch": 0.6593167476608399, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.651, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9090 }, { "epoch": 0.6593892797562921, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.0195, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9091 }, { "epoch": 0.6594618118517444, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 8.7459, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9092 }, { "epoch": 0.6595343439471967, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.4504, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9093 }, { "epoch": 0.6596068760426489, "grad_norm": 11.8125, "learning_rate": 0.0003, "loss": 8.7751, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9094 }, { "epoch": 0.6596794081381011, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 8.614, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9095 }, { "epoch": 0.6597519402335533, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.8836, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9096 }, { "epoch": 0.6598244723290055, "grad_norm": 12.8125, "learning_rate": 0.0003, "loss": 8.9476, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9097 }, { "epoch": 0.6598970044244579, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.8863, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9098 }, { "epoch": 0.6599695365199101, "grad_norm": 13.0, "learning_rate": 0.0003, "loss": 8.5082, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9099 }, { "epoch": 0.6600420686153623, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 9.1391, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9100 }, { "epoch": 0.6601146007108145, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 8.8823, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9101 }, { "epoch": 0.6601871328062667, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.8055, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9102 }, { "epoch": 0.6602596649017191, "grad_norm": 9.125, "learning_rate": 0.0003, "loss": 9.236, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9103 }, { "epoch": 0.6603321969971713, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 8.7009, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9104 }, { "epoch": 0.6604047290926235, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.7663, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9105 }, { "epoch": 0.6604772611880757, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.9156, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9106 }, { "epoch": 0.6605497932835279, "grad_norm": 6.5, "learning_rate": 0.0003, "loss": 9.439, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9107 }, { "epoch": 0.6606223253789802, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.1029, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9108 }, { "epoch": 0.6606948574744325, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.684, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9109 }, { "epoch": 0.6607673895698847, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.2585, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9110 }, { "epoch": 0.6608399216653369, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 8.9654, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9111 }, { "epoch": 0.6609124537607891, "grad_norm": 1.765625, "learning_rate": 0.0003, "loss": 8.9419, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9112 }, { "epoch": 0.6609849858562414, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 8.7255, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9113 }, { "epoch": 0.6610575179516937, "grad_norm": 6.5625, "learning_rate": 0.0003, "loss": 8.738, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9114 }, { "epoch": 0.6611300500471459, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 8.9724, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9115 }, { "epoch": 0.6612025821425981, "grad_norm": 7.1875, "learning_rate": 0.0003, "loss": 8.9711, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9116 }, { "epoch": 0.6612751142380503, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.3274, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9117 }, { "epoch": 0.6613476463335026, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 9.0112, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9118 }, { "epoch": 0.6614201784289548, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 8.1027, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9119 }, { "epoch": 0.6614927105244071, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 8.8568, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9120 }, { "epoch": 0.6615652426198593, "grad_norm": 21.75, "learning_rate": 0.0003, "loss": 8.9104, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9121 }, { "epoch": 0.6616377747153115, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.0512, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9122 }, { "epoch": 0.6617103068107638, "grad_norm": 1.828125, "learning_rate": 0.0003, "loss": 9.2557, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9123 }, { "epoch": 0.661782838906216, "grad_norm": 6.53125, "learning_rate": 0.0003, "loss": 8.4717, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9124 }, { "epoch": 0.6618553710016682, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.387, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9125 }, { "epoch": 0.6619279030971205, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 8.4468, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9126 }, { "epoch": 0.6620004351925727, "grad_norm": 18.0, "learning_rate": 0.0003, "loss": 9.3018, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9127 }, { "epoch": 0.662072967288025, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 8.8507, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9128 }, { "epoch": 0.6621454993834772, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.6046, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9129 }, { "epoch": 0.6622180314789294, "grad_norm": 78.5, "learning_rate": 0.0003, "loss": 8.7165, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9130 }, { "epoch": 0.6622905635743817, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.4659, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9131 }, { "epoch": 0.6623630956698339, "grad_norm": 5.21875, "learning_rate": 0.0003, "loss": 8.6707, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9132 }, { "epoch": 0.6624356277652862, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.4628, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9133 }, { "epoch": 0.6625081598607384, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.9095, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9134 }, { "epoch": 0.6625806919561906, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.8128, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9135 }, { "epoch": 0.6626532240516428, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.6176, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9136 }, { "epoch": 0.6627257561470951, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.2107, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9137 }, { "epoch": 0.6627982882425473, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 9.4005, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9138 }, { "epoch": 0.6628708203379996, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.2304, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9139 }, { "epoch": 0.6629433524334518, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 9.0319, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9140 }, { "epoch": 0.663015884528904, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.5399, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9141 }, { "epoch": 0.6630884166243562, "grad_norm": 20.375, "learning_rate": 0.0003, "loss": 9.1081, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9142 }, { "epoch": 0.6631609487198085, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.64, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9143 }, { "epoch": 0.6632334808152608, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.0171, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9144 }, { "epoch": 0.663306012910713, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.4269, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9145 }, { "epoch": 0.6633785450061652, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.9163, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9146 }, { "epoch": 0.6634510771016174, "grad_norm": 1.9609375, "learning_rate": 0.0003, "loss": 9.0418, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9147 }, { "epoch": 0.6635236091970697, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.8751, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9148 }, { "epoch": 0.663596141292522, "grad_norm": 30.375, "learning_rate": 0.0003, "loss": 9.1013, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9149 }, { "epoch": 0.6636686733879742, "grad_norm": 8.5, "learning_rate": 0.0003, "loss": 9.1309, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9150 }, { "epoch": 0.6637412054834264, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.6495, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9151 }, { "epoch": 0.6638137375788786, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 8.913, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9152 }, { "epoch": 0.6638862696743308, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.4147, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9153 }, { "epoch": 0.6639588017697832, "grad_norm": 8.0, "learning_rate": 0.0003, "loss": 9.2438, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9154 }, { "epoch": 0.6640313338652354, "grad_norm": 9.5, "learning_rate": 0.0003, "loss": 8.5214, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9155 }, { "epoch": 0.6641038659606876, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 9.0865, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9156 }, { "epoch": 0.6641763980561398, "grad_norm": 1.84375, "learning_rate": 0.0003, "loss": 9.078, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9157 }, { "epoch": 0.664248930151592, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.9832, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9158 }, { "epoch": 0.6643214622470444, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 8.6899, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9159 }, { "epoch": 0.6643939943424966, "grad_norm": 17.875, "learning_rate": 0.0003, "loss": 8.3506, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9160 }, { "epoch": 0.6644665264379488, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.9006, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9161 }, { "epoch": 0.664539058533401, "grad_norm": 5.21875, "learning_rate": 0.0003, "loss": 8.6935, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9162 }, { "epoch": 0.6646115906288532, "grad_norm": 6.125, "learning_rate": 0.0003, "loss": 8.7337, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9163 }, { "epoch": 0.6646841227243055, "grad_norm": 7.15625, "learning_rate": 0.0003, "loss": 8.4573, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9164 }, { "epoch": 0.6647566548197578, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.083, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9165 }, { "epoch": 0.66482918691521, "grad_norm": 23.5, "learning_rate": 0.0003, "loss": 8.7939, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9166 }, { "epoch": 0.6649017190106622, "grad_norm": 5.625, "learning_rate": 0.0003, "loss": 8.66, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9167 }, { "epoch": 0.6649742511061144, "grad_norm": 1.84375, "learning_rate": 0.0003, "loss": 8.9595, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9168 }, { "epoch": 0.6650467832015667, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 8.8556, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9169 }, { "epoch": 0.665119315297019, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 9.3006, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9170 }, { "epoch": 0.6651918473924712, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.748, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9171 }, { "epoch": 0.6652643794879234, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.2541, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9172 }, { "epoch": 0.6653369115833756, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 9.3657, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9173 }, { "epoch": 0.6654094436788279, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 9.1855, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9174 }, { "epoch": 0.6654819757742801, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 8.7963, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9175 }, { "epoch": 0.6655545078697324, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.9007, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9176 }, { "epoch": 0.6656270399651846, "grad_norm": 1.953125, "learning_rate": 0.0003, "loss": 8.7131, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9177 }, { "epoch": 0.6656995720606368, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.1469, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9178 }, { "epoch": 0.6657721041560891, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.7428, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9179 }, { "epoch": 0.6658446362515413, "grad_norm": 1.96875, "learning_rate": 0.0003, "loss": 8.9787, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9180 }, { "epoch": 0.6659171683469935, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.9536, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9181 }, { "epoch": 0.6659897004424458, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.7837, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9182 }, { "epoch": 0.666062232537898, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.2953, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9183 }, { "epoch": 0.6661347646333503, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 9.5044, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9184 }, { "epoch": 0.6662072967288025, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 9.0387, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9185 }, { "epoch": 0.6662798288242547, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 8.7195, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9186 }, { "epoch": 0.666352360919707, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.6238, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9187 }, { "epoch": 0.6664248930151592, "grad_norm": 1.921875, "learning_rate": 0.0003, "loss": 8.5745, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9188 }, { "epoch": 0.6664974251106115, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 9.0271, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9189 }, { "epoch": 0.6665699572060637, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 9.0965, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9190 }, { "epoch": 0.6666424893015159, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.5093, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9191 }, { "epoch": 0.6667150213969681, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 8.4971, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9192 }, { "epoch": 0.6667875534924204, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.818, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9193 }, { "epoch": 0.6668600855878727, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 8.972, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9194 }, { "epoch": 0.6669326176833249, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.8255, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9195 }, { "epoch": 0.6670051497787771, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 9.0098, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9196 }, { "epoch": 0.6670776818742293, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.7295, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9197 }, { "epoch": 0.6671502139696815, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.4783, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9198 }, { "epoch": 0.6672227460651339, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 9.4304, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9199 }, { "epoch": 0.6672952781605861, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 8.6513, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9200 }, { "epoch": 0.6673678102560383, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 8.9123, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9201 }, { "epoch": 0.6674403423514905, "grad_norm": 18.375, "learning_rate": 0.0003, "loss": 9.4534, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9202 }, { "epoch": 0.6675128744469427, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 8.7105, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9203 }, { "epoch": 0.667585406542395, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 9.0588, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9204 }, { "epoch": 0.6676579386378473, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 8.8406, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9205 }, { "epoch": 0.6677304707332995, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.6982, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9206 }, { "epoch": 0.6678030028287517, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 8.501, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9207 }, { "epoch": 0.6678755349242039, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.2952, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9208 }, { "epoch": 0.6679480670196561, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.3475, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9209 }, { "epoch": 0.6680205991151085, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.8327, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9210 }, { "epoch": 0.6680931312105607, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 8.5612, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9211 }, { "epoch": 0.6681656633060129, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.7794, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9212 }, { "epoch": 0.6682381954014651, "grad_norm": 1.6875, "learning_rate": 0.0003, "loss": 8.8541, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9213 }, { "epoch": 0.6683107274969173, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.6621, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9214 }, { "epoch": 0.6683832595923697, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 9.2107, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9215 }, { "epoch": 0.6684557916878219, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.7753, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9216 }, { "epoch": 0.6685283237832741, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.0729, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9217 }, { "epoch": 0.6686008558787263, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 8.7489, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9218 }, { "epoch": 0.6686733879741785, "grad_norm": 6.71875, "learning_rate": 0.0003, "loss": 8.7068, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9219 }, { "epoch": 0.6687459200696309, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.2417, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9220 }, { "epoch": 0.6688184521650831, "grad_norm": 1.9375, "learning_rate": 0.0003, "loss": 8.4741, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9221 }, { "epoch": 0.6688909842605353, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 8.9814, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9222 }, { "epoch": 0.6689635163559875, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.8975, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9223 }, { "epoch": 0.6690360484514397, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.8164, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9224 }, { "epoch": 0.669108580546892, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.4475, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9225 }, { "epoch": 0.6691811126423443, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.1437, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9226 }, { "epoch": 0.6692536447377965, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.8824, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9227 }, { "epoch": 0.6693261768332487, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.7911, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9228 }, { "epoch": 0.6693987089287009, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 8.6114, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9229 }, { "epoch": 0.6694712410241532, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.9047, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9230 }, { "epoch": 0.6695437731196054, "grad_norm": 6.40625, "learning_rate": 0.0003, "loss": 8.6257, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9231 }, { "epoch": 0.6696163052150577, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 9.0897, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9232 }, { "epoch": 0.6696888373105099, "grad_norm": 7.21875, "learning_rate": 0.0003, "loss": 9.2078, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9233 }, { "epoch": 0.6697613694059621, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.9118, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9234 }, { "epoch": 0.6698339015014144, "grad_norm": 5.84375, "learning_rate": 0.0003, "loss": 8.6729, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9235 }, { "epoch": 0.6699064335968666, "grad_norm": 5.84375, "learning_rate": 0.0003, "loss": 8.6483, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9236 }, { "epoch": 0.6699789656923189, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 8.8581, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9237 }, { "epoch": 0.6700514977877711, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 8.9011, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9238 }, { "epoch": 0.6701240298832233, "grad_norm": 7.5625, "learning_rate": 0.0003, "loss": 8.6772, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9239 }, { "epoch": 0.6701965619786756, "grad_norm": 7.21875, "learning_rate": 0.0003, "loss": 9.144, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9240 }, { "epoch": 0.6702690940741278, "grad_norm": 6.34375, "learning_rate": 0.0003, "loss": 9.1865, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9241 }, { "epoch": 0.67034162616958, "grad_norm": 10.4375, "learning_rate": 0.0003, "loss": 9.1425, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9242 }, { "epoch": 0.6704141582650323, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.6388, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9243 }, { "epoch": 0.6704866903604845, "grad_norm": 7.625, "learning_rate": 0.0003, "loss": 8.7828, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9244 }, { "epoch": 0.6705592224559368, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 8.8666, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9245 }, { "epoch": 0.670631754551389, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.9939, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9246 }, { "epoch": 0.6707042866468412, "grad_norm": 6.0625, "learning_rate": 0.0003, "loss": 8.7812, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9247 }, { "epoch": 0.6707768187422934, "grad_norm": 12.875, "learning_rate": 0.0003, "loss": 8.8616, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9248 }, { "epoch": 0.6708493508377457, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 9.0469, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9249 }, { "epoch": 0.670921882933198, "grad_norm": 12.5, "learning_rate": 0.0003, "loss": 9.1298, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9250 }, { "epoch": 0.6709944150286502, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 8.6629, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9251 }, { "epoch": 0.6710669471241024, "grad_norm": 14.75, "learning_rate": 0.0003, "loss": 8.3237, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9252 }, { "epoch": 0.6711394792195546, "grad_norm": 15.125, "learning_rate": 0.0003, "loss": 9.1262, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9253 }, { "epoch": 0.6712120113150069, "grad_norm": 7.4375, "learning_rate": 0.0003, "loss": 8.8697, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9254 }, { "epoch": 0.6712845434104592, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.3941, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9255 }, { "epoch": 0.6713570755059114, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.7602, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9256 }, { "epoch": 0.6714296076013636, "grad_norm": 9.9375, "learning_rate": 0.0003, "loss": 8.948, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9257 }, { "epoch": 0.6715021396968158, "grad_norm": 17.25, "learning_rate": 0.0003, "loss": 9.0735, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9258 }, { "epoch": 0.671574671792268, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 9.0153, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9259 }, { "epoch": 0.6716472038877204, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.0432, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9260 }, { "epoch": 0.6717197359831726, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 8.8041, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9261 }, { "epoch": 0.6717922680786248, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.3592, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9262 }, { "epoch": 0.671864800174077, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.6313, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9263 }, { "epoch": 0.6719373322695292, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.8872, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9264 }, { "epoch": 0.6720098643649816, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.1871, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9265 }, { "epoch": 0.6720823964604338, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 8.3944, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9266 }, { "epoch": 0.672154928555886, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 8.6791, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9267 }, { "epoch": 0.6722274606513382, "grad_norm": 17.25, "learning_rate": 0.0003, "loss": 8.9131, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9268 }, { "epoch": 0.6722999927467904, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.1163, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9269 }, { "epoch": 0.6723725248422427, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.9755, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9270 }, { "epoch": 0.672445056937695, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 9.1115, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9271 }, { "epoch": 0.6725175890331472, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.8863, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9272 }, { "epoch": 0.6725901211285994, "grad_norm": 1.7421875, "learning_rate": 0.0003, "loss": 8.6941, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9273 }, { "epoch": 0.6726626532240516, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.5433, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9274 }, { "epoch": 0.6727351853195038, "grad_norm": 8.4375, "learning_rate": 0.0003, "loss": 9.0013, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9275 }, { "epoch": 0.6728077174149562, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 8.7017, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9276 }, { "epoch": 0.6728802495104084, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.4256, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9277 }, { "epoch": 0.6729527816058606, "grad_norm": 107.5, "learning_rate": 0.0003, "loss": 8.8547, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9278 }, { "epoch": 0.6730253137013128, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 8.8999, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9279 }, { "epoch": 0.673097845796765, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.9496, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9280 }, { "epoch": 0.6731703778922173, "grad_norm": 28.25, "learning_rate": 0.0003, "loss": 8.1694, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9281 }, { "epoch": 0.6732429099876696, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 9.1444, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9282 }, { "epoch": 0.6733154420831218, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 8.9729, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9283 }, { "epoch": 0.673387974178574, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 8.6868, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9284 }, { "epoch": 0.6734605062740262, "grad_norm": 5.4375, "learning_rate": 0.0003, "loss": 9.1513, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9285 }, { "epoch": 0.6735330383694785, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 8.9333, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9286 }, { "epoch": 0.6736055704649307, "grad_norm": 15.875, "learning_rate": 0.0003, "loss": 8.4376, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9287 }, { "epoch": 0.673678102560383, "grad_norm": 6.6875, "learning_rate": 0.0003, "loss": 8.4783, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9288 }, { "epoch": 0.6737506346558352, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 8.4026, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9289 }, { "epoch": 0.6738231667512874, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 9.458, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9290 }, { "epoch": 0.6738956988467397, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 9.2272, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9291 }, { "epoch": 0.6739682309421919, "grad_norm": 6.59375, "learning_rate": 0.0003, "loss": 8.7381, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9292 }, { "epoch": 0.6740407630376442, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 8.5652, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9293 }, { "epoch": 0.6741132951330964, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 8.5053, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9294 }, { "epoch": 0.6741858272285486, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 8.9377, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9295 }, { "epoch": 0.6742583593240009, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 9.0104, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9296 }, { "epoch": 0.6743308914194531, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 8.8919, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9297 }, { "epoch": 0.6744034235149053, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.8277, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9298 }, { "epoch": 0.6744759556103576, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.2994, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9299 }, { "epoch": 0.6745484877058098, "grad_norm": 1.671875, "learning_rate": 0.0003, "loss": 8.4931, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9300 }, { "epoch": 0.6746210198012621, "grad_norm": 7.03125, "learning_rate": 0.0003, "loss": 8.8081, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9301 }, { "epoch": 0.6746935518967143, "grad_norm": 11.375, "learning_rate": 0.0003, "loss": 8.5068, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9302 }, { "epoch": 0.6747660839921665, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 8.9038, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9303 }, { "epoch": 0.6748386160876187, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 9.0986, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9304 }, { "epoch": 0.674911148183071, "grad_norm": 8.0625, "learning_rate": 0.0003, "loss": 8.3298, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9305 }, { "epoch": 0.6749836802785233, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.1969, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9306 }, { "epoch": 0.6750562123739755, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.0067, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9307 }, { "epoch": 0.6751287444694277, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 9.1031, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9308 }, { "epoch": 0.6752012765648799, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 8.7919, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9309 }, { "epoch": 0.6752738086603322, "grad_norm": 5.5625, "learning_rate": 0.0003, "loss": 8.8262, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9310 }, { "epoch": 0.6753463407557845, "grad_norm": 1.5859375, "learning_rate": 0.0003, "loss": 8.9727, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9311 }, { "epoch": 0.6754188728512367, "grad_norm": 1.703125, "learning_rate": 0.0003, "loss": 8.7771, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9312 }, { "epoch": 0.6754914049466889, "grad_norm": 1.84375, "learning_rate": 0.0003, "loss": 8.5356, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9313 }, { "epoch": 0.6755639370421411, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 8.9127, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9314 }, { "epoch": 0.6756364691375933, "grad_norm": 14.5625, "learning_rate": 0.0003, "loss": 8.7726, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9315 }, { "epoch": 0.6757090012330457, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.9985, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9316 }, { "epoch": 0.6757815333284979, "grad_norm": 6.59375, "learning_rate": 0.0003, "loss": 8.8604, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9317 }, { "epoch": 0.6758540654239501, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.2441, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9318 }, { "epoch": 0.6759265975194023, "grad_norm": 13.8125, "learning_rate": 0.0003, "loss": 8.5336, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9319 }, { "epoch": 0.6759991296148545, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 9.3038, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9320 }, { "epoch": 0.6760716617103069, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 9.3637, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9321 }, { "epoch": 0.6761441938057591, "grad_norm": 11.3125, "learning_rate": 0.0003, "loss": 9.1069, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9322 }, { "epoch": 0.6762167259012113, "grad_norm": 5.46875, "learning_rate": 0.0003, "loss": 9.0681, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9323 }, { "epoch": 0.6762892579966635, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.8338, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9324 }, { "epoch": 0.6763617900921157, "grad_norm": 7.40625, "learning_rate": 0.0003, "loss": 8.7572, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9325 }, { "epoch": 0.676434322187568, "grad_norm": 6.0, "learning_rate": 0.0003, "loss": 8.6088, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9326 }, { "epoch": 0.6765068542830203, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 9.1285, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9327 }, { "epoch": 0.6765793863784725, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 9.0015, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9328 }, { "epoch": 0.6766519184739247, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 8.5101, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9329 }, { "epoch": 0.6767244505693769, "grad_norm": 1.9296875, "learning_rate": 0.0003, "loss": 9.3993, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9330 }, { "epoch": 0.6767969826648292, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 8.8269, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9331 }, { "epoch": 0.6768695147602815, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.7977, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9332 }, { "epoch": 0.6769420468557337, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 8.6521, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9333 }, { "epoch": 0.6770145789511859, "grad_norm": 17.125, "learning_rate": 0.0003, "loss": 8.4829, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9334 }, { "epoch": 0.6770871110466381, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.9478, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9335 }, { "epoch": 0.6771596431420904, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.8566, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9336 }, { "epoch": 0.6772321752375426, "grad_norm": 5.78125, "learning_rate": 0.0003, "loss": 8.5243, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9337 }, { "epoch": 0.6773047073329949, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 8.8589, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9338 }, { "epoch": 0.6773772394284471, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 8.5229, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9339 }, { "epoch": 0.6774497715238993, "grad_norm": 7.75, "learning_rate": 0.0003, "loss": 8.8993, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9340 }, { "epoch": 0.6775223036193516, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.867, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9341 }, { "epoch": 0.6775948357148038, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 8.9718, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9342 }, { "epoch": 0.677667367810256, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 9.3783, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9343 }, { "epoch": 0.6777398999057083, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 9.0898, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9344 }, { "epoch": 0.6778124320011605, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 8.7953, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9345 }, { "epoch": 0.6778849640966127, "grad_norm": 1.640625, "learning_rate": 0.0003, "loss": 9.4802, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9346 }, { "epoch": 0.677957496192065, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.4953, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9347 }, { "epoch": 0.6780300282875172, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.9625, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9348 }, { "epoch": 0.6781025603829695, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 8.9996, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9349 }, { "epoch": 0.6781750924784217, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.5327, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9350 }, { "epoch": 0.6782476245738739, "grad_norm": 12.3125, "learning_rate": 0.0003, "loss": 8.9724, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9351 }, { "epoch": 0.6783201566693262, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 8.7253, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9352 }, { "epoch": 0.6783926887647784, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.7764, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9353 }, { "epoch": 0.6784652208602306, "grad_norm": 5.96875, "learning_rate": 0.0003, "loss": 8.9604, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9354 }, { "epoch": 0.6785377529556829, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 9.0595, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9355 }, { "epoch": 0.6786102850511351, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 9.0238, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9356 }, { "epoch": 0.6786828171465874, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.9334, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9357 }, { "epoch": 0.6787553492420396, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.9287, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9358 }, { "epoch": 0.6788278813374918, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.7201, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9359 }, { "epoch": 0.678900413432944, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 8.8781, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9360 }, { "epoch": 0.6789729455283963, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.9941, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9361 }, { "epoch": 0.6790454776238486, "grad_norm": 7.8125, "learning_rate": 0.0003, "loss": 9.1511, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9362 }, { "epoch": 0.6791180097193008, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.7552, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9363 }, { "epoch": 0.679190541814753, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 8.7151, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9364 }, { "epoch": 0.6792630739102052, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 9.087, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9365 }, { "epoch": 0.6793356060056575, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.7876, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9366 }, { "epoch": 0.6794081381011098, "grad_norm": 5.625, "learning_rate": 0.0003, "loss": 8.4567, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9367 }, { "epoch": 0.679480670196562, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 8.9156, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9368 }, { "epoch": 0.6795532022920142, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.0722, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9369 }, { "epoch": 0.6796257343874664, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 8.8031, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9370 }, { "epoch": 0.6796982664829186, "grad_norm": 6.90625, "learning_rate": 0.0003, "loss": 8.9632, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9371 }, { "epoch": 0.679770798578371, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.4011, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9372 }, { "epoch": 0.6798433306738232, "grad_norm": 1.921875, "learning_rate": 0.0003, "loss": 8.7087, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9373 }, { "epoch": 0.6799158627692754, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 8.7545, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9374 }, { "epoch": 0.6799883948647276, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 9.0355, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9375 }, { "epoch": 0.6800609269601798, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.859, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9376 }, { "epoch": 0.6801334590556322, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.4244, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9377 }, { "epoch": 0.6802059911510844, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 9.0194, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9378 }, { "epoch": 0.6802785232465366, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.8234, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9379 }, { "epoch": 0.6803510553419888, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.4844, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9380 }, { "epoch": 0.680423587437441, "grad_norm": 1.546875, "learning_rate": 0.0003, "loss": 8.7385, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9381 }, { "epoch": 0.6804961195328934, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 9.2734, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9382 }, { "epoch": 0.6805686516283456, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 9.2836, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9383 }, { "epoch": 0.6806411837237978, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.4924, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9384 }, { "epoch": 0.68071371581925, "grad_norm": 5.375, "learning_rate": 0.0003, "loss": 9.3923, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9385 }, { "epoch": 0.6807862479147022, "grad_norm": 1.78125, "learning_rate": 0.0003, "loss": 8.8176, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9386 }, { "epoch": 0.6808587800101545, "grad_norm": 18.125, "learning_rate": 0.0003, "loss": 9.1772, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9387 }, { "epoch": 0.6809313121056068, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 8.4823, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9388 }, { "epoch": 0.681003844201059, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 8.529, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9389 }, { "epoch": 0.6810763762965112, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 8.8805, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9390 }, { "epoch": 0.6811489083919634, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.8735, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9391 }, { "epoch": 0.6812214404874157, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.7832, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9392 }, { "epoch": 0.681293972582868, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.4506, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9393 }, { "epoch": 0.6813665046783202, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 9.0194, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9394 }, { "epoch": 0.6814390367737724, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.888, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9395 }, { "epoch": 0.6815115688692246, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.7521, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9396 }, { "epoch": 0.6815841009646769, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 9.476, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9397 }, { "epoch": 0.6816566330601291, "grad_norm": 7.1875, "learning_rate": 0.0003, "loss": 8.3681, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9398 }, { "epoch": 0.6817291651555814, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.5066, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9399 }, { "epoch": 0.6818016972510336, "grad_norm": 1.375, "learning_rate": 0.0003, "loss": 9.0668, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9400 }, { "epoch": 0.6818742293464858, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.8226, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9401 }, { "epoch": 0.6819467614419381, "grad_norm": 7.59375, "learning_rate": 0.0003, "loss": 8.9371, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9402 }, { "epoch": 0.6820192935373903, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 9.1523, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9403 }, { "epoch": 0.6820918256328425, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 9.3394, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9404 }, { "epoch": 0.6821643577282948, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.3601, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9405 }, { "epoch": 0.682236889823747, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.7637, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9406 }, { "epoch": 0.6823094219191993, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.1841, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9407 }, { "epoch": 0.6823819540146515, "grad_norm": 5.96875, "learning_rate": 0.0003, "loss": 9.1758, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9408 }, { "epoch": 0.6824544861101037, "grad_norm": 1.65625, "learning_rate": 0.0003, "loss": 8.7767, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9409 }, { "epoch": 0.682527018205556, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.0184, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9410 }, { "epoch": 0.6825995503010082, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.2171, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9411 }, { "epoch": 0.6826720823964605, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 9.2119, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9412 }, { "epoch": 0.6827446144919127, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 9.2867, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9413 }, { "epoch": 0.6828171465873649, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.9287, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9414 }, { "epoch": 0.6828896786828171, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.5828, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9415 }, { "epoch": 0.6829622107782694, "grad_norm": 24.5, "learning_rate": 0.0003, "loss": 8.7357, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9416 }, { "epoch": 0.6830347428737216, "grad_norm": 1.9765625, "learning_rate": 0.0003, "loss": 8.8293, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9417 }, { "epoch": 0.6831072749691739, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 9.3872, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9418 }, { "epoch": 0.6831798070646261, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.9148, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9419 }, { "epoch": 0.6832523391600783, "grad_norm": 22.125, "learning_rate": 0.0003, "loss": 8.5449, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9420 }, { "epoch": 0.6833248712555305, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.7258, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9421 }, { "epoch": 0.6833974033509828, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.6336, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9422 }, { "epoch": 0.6834699354464351, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.9355, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9423 }, { "epoch": 0.6835424675418873, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.0092, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9424 }, { "epoch": 0.6836149996373395, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 8.4916, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9425 }, { "epoch": 0.6836875317327917, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 9.206, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9426 }, { "epoch": 0.683760063828244, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.6224, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9427 }, { "epoch": 0.6838325959236963, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.7029, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9428 }, { "epoch": 0.6839051280191485, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.8067, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9429 }, { "epoch": 0.6839776601146007, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.9953, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9430 }, { "epoch": 0.6840501922100529, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 8.7268, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9431 }, { "epoch": 0.6841227243055051, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.1228, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9432 }, { "epoch": 0.6841952564009575, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.058, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9433 }, { "epoch": 0.6842677884964097, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.7115, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9434 }, { "epoch": 0.6843403205918619, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 8.8242, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9435 }, { "epoch": 0.6844128526873141, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 8.8288, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9436 }, { "epoch": 0.6844853847827663, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 9.4247, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9437 }, { "epoch": 0.6845579168782187, "grad_norm": 26.25, "learning_rate": 0.0003, "loss": 9.161, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9438 }, { "epoch": 0.6846304489736709, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.8697, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9439 }, { "epoch": 0.6847029810691231, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.7673, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9440 }, { "epoch": 0.6847755131645753, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.6576, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9441 }, { "epoch": 0.6848480452600275, "grad_norm": 9.5, "learning_rate": 0.0003, "loss": 9.111, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9442 }, { "epoch": 0.6849205773554798, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 9.3483, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9443 }, { "epoch": 0.6849931094509321, "grad_norm": 13.0, "learning_rate": 0.0003, "loss": 8.9302, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9444 }, { "epoch": 0.6850656415463843, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.8257, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9445 }, { "epoch": 0.6851381736418365, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.3318, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9446 }, { "epoch": 0.6852107057372887, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 8.9055, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9447 }, { "epoch": 0.685283237832741, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.9348, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9448 }, { "epoch": 0.6853557699281932, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 8.6601, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9449 }, { "epoch": 0.6854283020236455, "grad_norm": 6.71875, "learning_rate": 0.0003, "loss": 9.114, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9450 }, { "epoch": 0.6855008341190977, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 9.0324, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9451 }, { "epoch": 0.6855733662145499, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 8.9154, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9452 }, { "epoch": 0.6856458983100022, "grad_norm": 7.75, "learning_rate": 0.0003, "loss": 8.7703, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9453 }, { "epoch": 0.6857184304054544, "grad_norm": 1.984375, "learning_rate": 0.0003, "loss": 8.7486, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9454 }, { "epoch": 0.6857909625009067, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 8.8046, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9455 }, { "epoch": 0.6858634945963589, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 9.0889, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9456 }, { "epoch": 0.6859360266918111, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 8.9215, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9457 }, { "epoch": 0.6860085587872634, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.7337, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9458 }, { "epoch": 0.6860810908827156, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 9.0528, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9459 }, { "epoch": 0.6861536229781678, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.6567, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9460 }, { "epoch": 0.6862261550736201, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.6662, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9461 }, { "epoch": 0.6862986871690723, "grad_norm": 7.0, "learning_rate": 0.0003, "loss": 8.2777, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9462 }, { "epoch": 0.6863712192645246, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.5547, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9463 }, { "epoch": 0.6864437513599768, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 8.6846, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9464 }, { "epoch": 0.686516283455429, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 8.6435, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9465 }, { "epoch": 0.6865888155508812, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.9517, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9466 }, { "epoch": 0.6866613476463335, "grad_norm": 9.5625, "learning_rate": 0.0003, "loss": 8.2959, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9467 }, { "epoch": 0.6867338797417858, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 9.0177, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9468 }, { "epoch": 0.686806411837238, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 9.0294, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9469 }, { "epoch": 0.6868789439326902, "grad_norm": 11.0625, "learning_rate": 0.0003, "loss": 8.5553, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9470 }, { "epoch": 0.6869514760281424, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.7846, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9471 }, { "epoch": 0.6870240081235947, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 9.2699, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9472 }, { "epoch": 0.687096540219047, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.7379, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9473 }, { "epoch": 0.6871690723144992, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 8.498, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9474 }, { "epoch": 0.6872416044099514, "grad_norm": 1.375, "learning_rate": 0.0003, "loss": 9.1484, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9475 }, { "epoch": 0.6873141365054036, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.6945, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9476 }, { "epoch": 0.6873866686008558, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.7277, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9477 }, { "epoch": 0.6874592006963082, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.7359, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9478 }, { "epoch": 0.6875317327917604, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.3041, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9479 }, { "epoch": 0.6876042648872126, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.643, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9480 }, { "epoch": 0.6876767969826648, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 9.1067, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9481 }, { "epoch": 0.687749329078117, "grad_norm": 7.46875, "learning_rate": 0.0003, "loss": 9.392, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9482 }, { "epoch": 0.6878218611735692, "grad_norm": 33.75, "learning_rate": 0.0003, "loss": 8.8478, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9483 }, { "epoch": 0.6878943932690216, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.9673, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9484 }, { "epoch": 0.6879669253644738, "grad_norm": 6.90625, "learning_rate": 0.0003, "loss": 8.5279, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9485 }, { "epoch": 0.688039457459926, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 8.8906, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9486 }, { "epoch": 0.6881119895553782, "grad_norm": 5.84375, "learning_rate": 0.0003, "loss": 8.7844, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9487 }, { "epoch": 0.6881845216508304, "grad_norm": 8.8125, "learning_rate": 0.0003, "loss": 8.7403, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9488 }, { "epoch": 0.6882570537462828, "grad_norm": 5.625, "learning_rate": 0.0003, "loss": 8.9165, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9489 }, { "epoch": 0.688329585841735, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.5833, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9490 }, { "epoch": 0.6884021179371872, "grad_norm": 7.25, "learning_rate": 0.0003, "loss": 8.6682, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9491 }, { "epoch": 0.6884746500326394, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.9487, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9492 }, { "epoch": 0.6885471821280916, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.2137, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9493 }, { "epoch": 0.688619714223544, "grad_norm": 5.4375, "learning_rate": 0.0003, "loss": 8.4867, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9494 }, { "epoch": 0.6886922463189962, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.7149, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9495 }, { "epoch": 0.6887647784144484, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 8.7908, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9496 }, { "epoch": 0.6888373105099006, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 8.555, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9497 }, { "epoch": 0.6889098426053528, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 8.8627, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9498 }, { "epoch": 0.6889823747008051, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 9.4803, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9499 }, { "epoch": 0.6890549067962574, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.594, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9500 }, { "epoch": 0.6891274388917096, "grad_norm": 6.21875, "learning_rate": 0.0003, "loss": 8.398, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9501 }, { "epoch": 0.6891999709871618, "grad_norm": 10.6875, "learning_rate": 0.0003, "loss": 8.6526, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9502 }, { "epoch": 0.689272503082614, "grad_norm": 1.984375, "learning_rate": 0.0003, "loss": 8.5321, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9503 }, { "epoch": 0.6893450351780663, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.3067, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9504 }, { "epoch": 0.6894175672735186, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 9.1637, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9505 }, { "epoch": 0.6894900993689708, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.6463, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9506 }, { "epoch": 0.689562631464423, "grad_norm": 7.71875, "learning_rate": 0.0003, "loss": 8.7986, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9507 }, { "epoch": 0.6896351635598752, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.9578, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9508 }, { "epoch": 0.6897076956553275, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 8.5279, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9509 }, { "epoch": 0.6897802277507797, "grad_norm": 7.09375, "learning_rate": 0.0003, "loss": 9.2516, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9510 }, { "epoch": 0.689852759846232, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 8.7923, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9511 }, { "epoch": 0.6899252919416842, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.7014, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9512 }, { "epoch": 0.6899978240371364, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.9152, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9513 }, { "epoch": 0.6900703561325887, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.1668, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9514 }, { "epoch": 0.6901428882280409, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.1216, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9515 }, { "epoch": 0.6902154203234931, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 8.65, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9516 }, { "epoch": 0.6902879524189454, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 9.0389, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9517 }, { "epoch": 0.6903604845143976, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.0014, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9518 }, { "epoch": 0.6904330166098499, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 8.8689, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9519 }, { "epoch": 0.6905055487053021, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 9.19, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9520 }, { "epoch": 0.6905780808007543, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 9.4086, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9521 }, { "epoch": 0.6906506128962066, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.5025, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9522 }, { "epoch": 0.6907231449916588, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.7752, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9523 }, { "epoch": 0.6907956770871111, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 9.0894, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9524 }, { "epoch": 0.6908682091825633, "grad_norm": 6.75, "learning_rate": 0.0003, "loss": 8.5561, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9525 }, { "epoch": 0.6909407412780155, "grad_norm": 8.8125, "learning_rate": 0.0003, "loss": 8.769, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9526 }, { "epoch": 0.6910132733734677, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 9.685, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9527 }, { "epoch": 0.69108580546892, "grad_norm": 1.96875, "learning_rate": 0.0003, "loss": 9.2798, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9528 }, { "epoch": 0.6911583375643723, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.5665, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9529 }, { "epoch": 0.6912308696598245, "grad_norm": 1.6875, "learning_rate": 0.0003, "loss": 9.0673, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9530 }, { "epoch": 0.6913034017552767, "grad_norm": 10.1875, "learning_rate": 0.0003, "loss": 8.9245, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9531 }, { "epoch": 0.6913759338507289, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.9943, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9532 }, { "epoch": 0.6914484659461811, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.9481, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9533 }, { "epoch": 0.6915209980416335, "grad_norm": 55.25, "learning_rate": 0.0003, "loss": 8.4095, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9534 }, { "epoch": 0.6915935301370857, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.3811, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9535 }, { "epoch": 0.6916660622325379, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.4766, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9536 }, { "epoch": 0.6917385943279901, "grad_norm": 23.5, "learning_rate": 0.0003, "loss": 8.8812, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9537 }, { "epoch": 0.6918111264234423, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 8.0099, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9538 }, { "epoch": 0.6918836585188947, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.0029, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9539 }, { "epoch": 0.6919561906143469, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.7435, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9540 }, { "epoch": 0.6920287227097991, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 9.1517, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9541 }, { "epoch": 0.6921012548052513, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.5214, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9542 }, { "epoch": 0.6921737869007035, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 8.5693, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9543 }, { "epoch": 0.6922463189961559, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 8.6778, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9544 }, { "epoch": 0.6923188510916081, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.6288, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9545 }, { "epoch": 0.6923913831870603, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 8.5642, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9546 }, { "epoch": 0.6924639152825125, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 8.5906, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9547 }, { "epoch": 0.6925364473779647, "grad_norm": 5.21875, "learning_rate": 0.0003, "loss": 8.4741, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9548 }, { "epoch": 0.692608979473417, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 9.1149, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9549 }, { "epoch": 0.6926815115688693, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.8337, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9550 }, { "epoch": 0.6927540436643215, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.6629, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9551 }, { "epoch": 0.6928265757597737, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.7959, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9552 }, { "epoch": 0.6928991078552259, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 9.1042, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9553 }, { "epoch": 0.6929716399506781, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.6974, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9554 }, { "epoch": 0.6930441720461304, "grad_norm": 1.9453125, "learning_rate": 0.0003, "loss": 8.9795, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9555 }, { "epoch": 0.6931167041415827, "grad_norm": 7.53125, "learning_rate": 0.0003, "loss": 8.8606, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9556 }, { "epoch": 0.6931892362370349, "grad_norm": 6.75, "learning_rate": 0.0003, "loss": 8.5398, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9557 }, { "epoch": 0.6932617683324871, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 8.7688, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9558 }, { "epoch": 0.6933343004279393, "grad_norm": 5.6875, "learning_rate": 0.0003, "loss": 9.1455, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9559 }, { "epoch": 0.6934068325233916, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.8698, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9560 }, { "epoch": 0.6934793646188439, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 9.1377, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9561 }, { "epoch": 0.6935518967142961, "grad_norm": 21.625, "learning_rate": 0.0003, "loss": 9.0136, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9562 }, { "epoch": 0.6936244288097483, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 9.1067, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9563 }, { "epoch": 0.6936969609052005, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 8.8162, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9564 }, { "epoch": 0.6937694930006528, "grad_norm": 12.1875, "learning_rate": 0.0003, "loss": 8.818, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9565 }, { "epoch": 0.693842025096105, "grad_norm": 6.21875, "learning_rate": 0.0003, "loss": 9.0959, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9566 }, { "epoch": 0.6939145571915573, "grad_norm": 13.1875, "learning_rate": 0.0003, "loss": 8.2679, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9567 }, { "epoch": 0.6939870892870095, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.1874, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9568 }, { "epoch": 0.6940596213824617, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.2925, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9569 }, { "epoch": 0.694132153477914, "grad_norm": 6.8125, "learning_rate": 0.0003, "loss": 8.8998, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9570 }, { "epoch": 0.6942046855733662, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 7.933, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9571 }, { "epoch": 0.6942772176688184, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 8.916, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9572 }, { "epoch": 0.6943497497642707, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.9853, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9573 }, { "epoch": 0.6944222818597229, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 8.5694, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9574 }, { "epoch": 0.6944948139551752, "grad_norm": 1.65625, "learning_rate": 0.0003, "loss": 9.3561, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9575 }, { "epoch": 0.6945673460506274, "grad_norm": 6.78125, "learning_rate": 0.0003, "loss": 8.8567, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9576 }, { "epoch": 0.6946398781460796, "grad_norm": 8.8125, "learning_rate": 0.0003, "loss": 8.8482, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9577 }, { "epoch": 0.6947124102415319, "grad_norm": 7.40625, "learning_rate": 0.0003, "loss": 8.6056, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9578 }, { "epoch": 0.6947849423369841, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.6691, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9579 }, { "epoch": 0.6948574744324364, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 8.9801, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9580 }, { "epoch": 0.6949300065278886, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.2025, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9581 }, { "epoch": 0.6950025386233408, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 9.0619, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9582 }, { "epoch": 0.695075070718793, "grad_norm": 12.9375, "learning_rate": 0.0003, "loss": 8.997, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9583 }, { "epoch": 0.6951476028142453, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 9.103, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9584 }, { "epoch": 0.6952201349096976, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.9708, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9585 }, { "epoch": 0.6952926670051498, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 8.6332, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9586 }, { "epoch": 0.695365199100602, "grad_norm": 1.703125, "learning_rate": 0.0003, "loss": 9.0373, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9587 }, { "epoch": 0.6954377311960542, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.8575, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9588 }, { "epoch": 0.6955102632915064, "grad_norm": 7.1875, "learning_rate": 0.0003, "loss": 9.3193, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9589 }, { "epoch": 0.6955827953869588, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.3919, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9590 }, { "epoch": 0.695655327482411, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 9.0238, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9591 }, { "epoch": 0.6957278595778632, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.8796, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9592 }, { "epoch": 0.6958003916733154, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.4929, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9593 }, { "epoch": 0.6958729237687676, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.8911, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9594 }, { "epoch": 0.69594545586422, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.9314, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9595 }, { "epoch": 0.6960179879596722, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 9.2713, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9596 }, { "epoch": 0.6960905200551244, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 8.73, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9597 }, { "epoch": 0.6961630521505766, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.9924, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9598 }, { "epoch": 0.6962355842460288, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.1253, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9599 }, { "epoch": 0.6963081163414812, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 8.7919, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9600 }, { "epoch": 0.6963806484369334, "grad_norm": 6.5, "learning_rate": 0.0003, "loss": 8.8522, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9601 }, { "epoch": 0.6964531805323856, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.8244, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9602 }, { "epoch": 0.6965257126278378, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.8385, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9603 }, { "epoch": 0.69659824472329, "grad_norm": 17.25, "learning_rate": 0.0003, "loss": 8.7854, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9604 }, { "epoch": 0.6966707768187423, "grad_norm": 5.4375, "learning_rate": 0.0003, "loss": 9.2142, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9605 }, { "epoch": 0.6967433089141946, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 9.4645, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9606 }, { "epoch": 0.6968158410096468, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.8543, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9607 }, { "epoch": 0.696888373105099, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.4215, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9608 }, { "epoch": 0.6969609052005512, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 8.6733, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9609 }, { "epoch": 0.6970334372960035, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.5823, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9610 }, { "epoch": 0.6971059693914557, "grad_norm": 6.625, "learning_rate": 0.0003, "loss": 8.8439, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9611 }, { "epoch": 0.697178501486908, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 9.3308, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9612 }, { "epoch": 0.6972510335823602, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 9.0253, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9613 }, { "epoch": 0.6973235656778124, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.9907, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9614 }, { "epoch": 0.6973960977732647, "grad_norm": 1.7734375, "learning_rate": 0.0003, "loss": 9.1073, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9615 }, { "epoch": 0.6974686298687169, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.5445, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9616 }, { "epoch": 0.6975411619641692, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 8.9931, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9617 }, { "epoch": 0.6976136940596214, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 9.0501, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9618 }, { "epoch": 0.6976862261550736, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.343, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9619 }, { "epoch": 0.6977587582505259, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.8123, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9620 }, { "epoch": 0.6978312903459781, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 9.1477, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9621 }, { "epoch": 0.6979038224414303, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 8.8054, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9622 }, { "epoch": 0.6979763545368826, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 8.3227, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9623 }, { "epoch": 0.6980488866323348, "grad_norm": 7.21875, "learning_rate": 0.0003, "loss": 9.1425, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9624 }, { "epoch": 0.698121418727787, "grad_norm": 6.03125, "learning_rate": 0.0003, "loss": 8.5302, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9625 }, { "epoch": 0.6981939508232393, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.7574, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9626 }, { "epoch": 0.6982664829186915, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.0366, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9627 }, { "epoch": 0.6983390150141437, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 8.1713, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9628 }, { "epoch": 0.698411547109596, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 9.1313, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9629 }, { "epoch": 0.6984840792050482, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 9.0405, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9630 }, { "epoch": 0.6985566113005005, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 9.073, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9631 }, { "epoch": 0.6986291433959527, "grad_norm": 7.8125, "learning_rate": 0.0003, "loss": 8.6096, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9632 }, { "epoch": 0.6987016754914049, "grad_norm": 5.4375, "learning_rate": 0.0003, "loss": 8.3859, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9633 }, { "epoch": 0.6987742075868572, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.4429, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9634 }, { "epoch": 0.6988467396823094, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 9.035, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9635 }, { "epoch": 0.6989192717777617, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.2942, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9636 }, { "epoch": 0.6989918038732139, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 8.698, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9637 }, { "epoch": 0.6990643359686661, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.8654, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9638 }, { "epoch": 0.6991368680641183, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 9.0705, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9639 }, { "epoch": 0.6992094001595706, "grad_norm": 1.953125, "learning_rate": 0.0003, "loss": 9.0431, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9640 }, { "epoch": 0.6992819322550229, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 8.8658, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9641 }, { "epoch": 0.6993544643504751, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 8.4671, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9642 }, { "epoch": 0.6994269964459273, "grad_norm": 1.8671875, "learning_rate": 0.0003, "loss": 8.4776, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9643 }, { "epoch": 0.6994995285413795, "grad_norm": 11.75, "learning_rate": 0.0003, "loss": 8.9811, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9644 }, { "epoch": 0.6995720606368317, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.7801, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9645 }, { "epoch": 0.6996445927322841, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.7105, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9646 }, { "epoch": 0.6997171248277363, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.0348, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9647 }, { "epoch": 0.6997896569231885, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 8.4579, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9648 }, { "epoch": 0.6998621890186407, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.1988, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9649 }, { "epoch": 0.6999347211140929, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.8999, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9650 }, { "epoch": 0.7000072532095453, "grad_norm": 1.8984375, "learning_rate": 0.0003, "loss": 9.1242, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9651 }, { "epoch": 0.7000797853049975, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.4765, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9652 }, { "epoch": 0.7001523174004497, "grad_norm": 7.84375, "learning_rate": 0.0003, "loss": 9.3126, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9653 }, { "epoch": 0.7002248494959019, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 8.6507, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9654 }, { "epoch": 0.7002973815913541, "grad_norm": 5.78125, "learning_rate": 0.0003, "loss": 8.7346, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9655 }, { "epoch": 0.7003699136868065, "grad_norm": 1.953125, "learning_rate": 0.0003, "loss": 9.006, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9656 }, { "epoch": 0.7004424457822587, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.8085, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9657 }, { "epoch": 0.7005149778777109, "grad_norm": 6.65625, "learning_rate": 0.0003, "loss": 8.4585, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9658 }, { "epoch": 0.7005875099731631, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.8918, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9659 }, { "epoch": 0.7006600420686153, "grad_norm": 9.0, "learning_rate": 0.0003, "loss": 8.4666, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9660 }, { "epoch": 0.7007325741640676, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.8856, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9661 }, { "epoch": 0.7008051062595199, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 9.0234, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9662 }, { "epoch": 0.7008776383549721, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 9.1579, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9663 }, { "epoch": 0.7009501704504243, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 9.0347, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9664 }, { "epoch": 0.7010227025458765, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 9.1784, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9665 }, { "epoch": 0.7010952346413288, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.9076, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9666 }, { "epoch": 0.701167766736781, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 8.3577, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9667 }, { "epoch": 0.7012402988322333, "grad_norm": 7.21875, "learning_rate": 0.0003, "loss": 8.6903, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9668 }, { "epoch": 0.7013128309276855, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.6337, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9669 }, { "epoch": 0.7013853630231377, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 9.0024, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9670 }, { "epoch": 0.70145789511859, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 9.4598, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9671 }, { "epoch": 0.7015304272140422, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.5404, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9672 }, { "epoch": 0.7016029593094945, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 8.6904, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9673 }, { "epoch": 0.7016754914049467, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.7663, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9674 }, { "epoch": 0.7017480235003989, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 8.9684, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9675 }, { "epoch": 0.7018205555958512, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 9.3556, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9676 }, { "epoch": 0.7018930876913034, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.874, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9677 }, { "epoch": 0.7019656197867556, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.9249, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9678 }, { "epoch": 0.7020381518822079, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 8.7436, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9679 }, { "epoch": 0.7021106839776601, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.8439, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9680 }, { "epoch": 0.7021832160731124, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 9.124, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9681 }, { "epoch": 0.7022557481685646, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 8.6927, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9682 }, { "epoch": 0.7023282802640168, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.2695, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9683 }, { "epoch": 0.702400812359469, "grad_norm": 6.59375, "learning_rate": 0.0003, "loss": 8.3955, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9684 }, { "epoch": 0.7024733444549213, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.7431, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9685 }, { "epoch": 0.7025458765503736, "grad_norm": 11.25, "learning_rate": 0.0003, "loss": 8.6079, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9686 }, { "epoch": 0.7026184086458258, "grad_norm": 8.9375, "learning_rate": 0.0003, "loss": 9.0698, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9687 }, { "epoch": 0.702690940741278, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.6249, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9688 }, { "epoch": 0.7027634728367302, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 8.1923, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9689 }, { "epoch": 0.7028360049321825, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 9.026, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9690 }, { "epoch": 0.7029085370276348, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.4997, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9691 }, { "epoch": 0.702981069123087, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.2422, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9692 }, { "epoch": 0.7030536012185392, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.7721, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9693 }, { "epoch": 0.7031261333139914, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 9.1994, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9694 }, { "epoch": 0.7031986654094436, "grad_norm": 7.4375, "learning_rate": 0.0003, "loss": 8.6278, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9695 }, { "epoch": 0.7032711975048959, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.5344, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9696 }, { "epoch": 0.7033437296003482, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.3483, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9697 }, { "epoch": 0.7034162616958004, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.2229, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9698 }, { "epoch": 0.7034887937912526, "grad_norm": 5.84375, "learning_rate": 0.0003, "loss": 8.8231, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9699 }, { "epoch": 0.7035613258867048, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 9.0012, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9700 }, { "epoch": 0.703633857982157, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 9.2781, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9701 }, { "epoch": 0.7037063900776094, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 8.9618, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9702 }, { "epoch": 0.7037789221730616, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.9797, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9703 }, { "epoch": 0.7038514542685138, "grad_norm": 1.765625, "learning_rate": 0.0003, "loss": 8.9135, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9704 }, { "epoch": 0.703923986363966, "grad_norm": 6.15625, "learning_rate": 0.0003, "loss": 9.182, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9705 }, { "epoch": 0.7039965184594182, "grad_norm": 5.4375, "learning_rate": 0.0003, "loss": 8.6275, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9706 }, { "epoch": 0.7040690505548706, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.0541, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9707 }, { "epoch": 0.7041415826503228, "grad_norm": 17.25, "learning_rate": 0.0003, "loss": 8.728, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9708 }, { "epoch": 0.704214114745775, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.0299, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9709 }, { "epoch": 0.7042866468412272, "grad_norm": 6.71875, "learning_rate": 0.0003, "loss": 8.4577, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9710 }, { "epoch": 0.7043591789366794, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.0927, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9711 }, { "epoch": 0.7044317110321318, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.7728, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9712 }, { "epoch": 0.704504243127584, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 8.8511, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9713 }, { "epoch": 0.7045767752230362, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 8.8245, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9714 }, { "epoch": 0.7046493073184884, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 9.1808, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9715 }, { "epoch": 0.7047218394139406, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.726, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9716 }, { "epoch": 0.704794371509393, "grad_norm": 32.25, "learning_rate": 0.0003, "loss": 8.9136, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9717 }, { "epoch": 0.7048669036048452, "grad_norm": 5.78125, "learning_rate": 0.0003, "loss": 8.6951, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9718 }, { "epoch": 0.7049394357002974, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 8.7967, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9719 }, { "epoch": 0.7050119677957496, "grad_norm": 5.4375, "learning_rate": 0.0003, "loss": 8.9946, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9720 }, { "epoch": 0.7050844998912018, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 9.1623, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9721 }, { "epoch": 0.7051570319866541, "grad_norm": 1.828125, "learning_rate": 0.0003, "loss": 9.2805, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9722 }, { "epoch": 0.7052295640821064, "grad_norm": 8.375, "learning_rate": 0.0003, "loss": 8.6257, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9723 }, { "epoch": 0.7053020961775586, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 9.3476, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9724 }, { "epoch": 0.7053746282730108, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 9.4945, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9725 }, { "epoch": 0.705447160368463, "grad_norm": 6.28125, "learning_rate": 0.0003, "loss": 8.4345, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9726 }, { "epoch": 0.7055196924639153, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 8.732, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9727 }, { "epoch": 0.7055922245593675, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 8.8079, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9728 }, { "epoch": 0.7056647566548198, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 8.7041, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9729 }, { "epoch": 0.705737288750272, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 8.4996, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9730 }, { "epoch": 0.7058098208457242, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.4704, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9731 }, { "epoch": 0.7058823529411765, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 8.7133, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9732 }, { "epoch": 0.7059548850366287, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.7476, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9733 }, { "epoch": 0.706027417132081, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.7891, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9734 }, { "epoch": 0.7060999492275332, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.7261, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9735 }, { "epoch": 0.7061724813229854, "grad_norm": 18.375, "learning_rate": 0.0003, "loss": 9.2868, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9736 }, { "epoch": 0.7062450134184377, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 9.1956, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9737 }, { "epoch": 0.7063175455138899, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 9.3111, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9738 }, { "epoch": 0.7063900776093421, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.5019, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9739 }, { "epoch": 0.7064626097047944, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 8.6951, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9740 }, { "epoch": 0.7065351418002466, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 9.06, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9741 }, { "epoch": 0.7066076738956989, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 9.1158, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9742 }, { "epoch": 0.7066802059911511, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.6953, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9743 }, { "epoch": 0.7067527380866033, "grad_norm": 9.1875, "learning_rate": 0.0003, "loss": 9.0245, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9744 }, { "epoch": 0.7068252701820555, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 8.6686, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9745 }, { "epoch": 0.7068978022775078, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.6188, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9746 }, { "epoch": 0.7069703343729601, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 8.79, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9747 }, { "epoch": 0.7070428664684123, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.7427, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9748 }, { "epoch": 0.7071153985638645, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 8.8481, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9749 }, { "epoch": 0.7071879306593167, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 9.1793, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9750 }, { "epoch": 0.707260462754769, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.6959, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9751 }, { "epoch": 0.7073329948502213, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.8707, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9752 }, { "epoch": 0.7074055269456735, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 9.0507, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9753 }, { "epoch": 0.7074780590411257, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.5541, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9754 }, { "epoch": 0.7075505911365779, "grad_norm": 1.7421875, "learning_rate": 0.0003, "loss": 9.0935, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9755 }, { "epoch": 0.7076231232320301, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.7988, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9756 }, { "epoch": 0.7076956553274825, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.3623, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9757 }, { "epoch": 0.7077681874229347, "grad_norm": 5.5625, "learning_rate": 0.0003, "loss": 8.7586, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9758 }, { "epoch": 0.7078407195183869, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 9.0229, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9759 }, { "epoch": 0.7079132516138391, "grad_norm": 6.1875, "learning_rate": 0.0003, "loss": 8.5617, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9760 }, { "epoch": 0.7079857837092913, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.3868, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9761 }, { "epoch": 0.7080583158047437, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.8129, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9762 }, { "epoch": 0.7081308479001959, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.5035, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9763 }, { "epoch": 0.7082033799956481, "grad_norm": 1.875, "learning_rate": 0.0003, "loss": 8.7893, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9764 }, { "epoch": 0.7082759120911003, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 9.0912, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9765 }, { "epoch": 0.7083484441865525, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.465, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9766 }, { "epoch": 0.7084209762820047, "grad_norm": 1.875, "learning_rate": 0.0003, "loss": 9.0963, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9767 }, { "epoch": 0.7084935083774571, "grad_norm": 8.0625, "learning_rate": 0.0003, "loss": 8.9367, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9768 }, { "epoch": 0.7085660404729093, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 8.4367, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9769 }, { "epoch": 0.7086385725683615, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.2699, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9770 }, { "epoch": 0.7087111046638137, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 8.9757, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9771 }, { "epoch": 0.7087836367592659, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.0908, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9772 }, { "epoch": 0.7088561688547182, "grad_norm": 7.84375, "learning_rate": 0.0003, "loss": 8.7447, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9773 }, { "epoch": 0.7089287009501705, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.8119, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9774 }, { "epoch": 0.7090012330456227, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.1665, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9775 }, { "epoch": 0.7090737651410749, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.711, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9776 }, { "epoch": 0.7091462972365271, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 8.8273, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9777 }, { "epoch": 0.7092188293319794, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.568, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9778 }, { "epoch": 0.7092913614274317, "grad_norm": 16.375, "learning_rate": 0.0003, "loss": 8.7602, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9779 }, { "epoch": 0.7093638935228839, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 8.885, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9780 }, { "epoch": 0.7094364256183361, "grad_norm": 6.0, "learning_rate": 0.0003, "loss": 8.923, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9781 }, { "epoch": 0.7095089577137883, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 8.8924, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9782 }, { "epoch": 0.7095814898092406, "grad_norm": 5.9375, "learning_rate": 0.0003, "loss": 9.1141, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9783 }, { "epoch": 0.7096540219046928, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.3186, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9784 }, { "epoch": 0.7097265540001451, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.0312, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9785 }, { "epoch": 0.7097990860955973, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.6402, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9786 }, { "epoch": 0.7098716181910495, "grad_norm": 13.5, "learning_rate": 0.0003, "loss": 9.3313, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9787 }, { "epoch": 0.7099441502865018, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.1987, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9788 }, { "epoch": 0.710016682381954, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 8.5516, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9789 }, { "epoch": 0.7100892144774062, "grad_norm": 1.7734375, "learning_rate": 0.0003, "loss": 8.8729, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9790 }, { "epoch": 0.7101617465728585, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.8013, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9791 }, { "epoch": 0.7102342786683107, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 8.585, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9792 }, { "epoch": 0.710306810763763, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.2283, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9793 }, { "epoch": 0.7103793428592152, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 9.4945, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9794 }, { "epoch": 0.7104518749546674, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 8.642, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9795 }, { "epoch": 0.7105244070501197, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.8417, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9796 }, { "epoch": 0.7105969391455719, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.7942, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9797 }, { "epoch": 0.7106694712410242, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.6474, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9798 }, { "epoch": 0.7107420033364764, "grad_norm": 5.84375, "learning_rate": 0.0003, "loss": 8.6927, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9799 }, { "epoch": 0.7108145354319286, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 8.9373, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9800 }, { "epoch": 0.7108870675273808, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.9305, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9801 }, { "epoch": 0.7109595996228331, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 9.037, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9802 }, { "epoch": 0.7110321317182854, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 8.3027, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9803 }, { "epoch": 0.7111046638137376, "grad_norm": 7.25, "learning_rate": 0.0003, "loss": 9.1583, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9804 }, { "epoch": 0.7111771959091898, "grad_norm": 8.875, "learning_rate": 0.0003, "loss": 8.485, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9805 }, { "epoch": 0.711249728004642, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.8059, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9806 }, { "epoch": 0.7113222601000943, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.6214, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9807 }, { "epoch": 0.7113947921955466, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.4101, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9808 }, { "epoch": 0.7114673242909988, "grad_norm": 7.28125, "learning_rate": 0.0003, "loss": 8.9834, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9809 }, { "epoch": 0.711539856386451, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.8781, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9810 }, { "epoch": 0.7116123884819032, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 8.9309, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9811 }, { "epoch": 0.7116849205773554, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.4216, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9812 }, { "epoch": 0.7117574526728078, "grad_norm": 11.875, "learning_rate": 0.0003, "loss": 8.8244, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9813 }, { "epoch": 0.71182998476826, "grad_norm": 3.953125, "learning_rate": 0.0003, "loss": 9.0086, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9814 }, { "epoch": 0.7119025168637122, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.7741, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9815 }, { "epoch": 0.7119750489591644, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 9.1077, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9816 }, { "epoch": 0.7120475810546166, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.0706, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9817 }, { "epoch": 0.712120113150069, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.7719, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9818 }, { "epoch": 0.7121926452455212, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.8012, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9819 }, { "epoch": 0.7122651773409734, "grad_norm": 5.5625, "learning_rate": 0.0003, "loss": 9.448, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9820 }, { "epoch": 0.7123377094364256, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 8.9115, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9821 }, { "epoch": 0.7124102415318778, "grad_norm": 19.75, "learning_rate": 0.0003, "loss": 9.1357, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9822 }, { "epoch": 0.7124827736273301, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.8134, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9823 }, { "epoch": 0.7125553057227824, "grad_norm": 5.78125, "learning_rate": 0.0003, "loss": 8.8551, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9824 }, { "epoch": 0.7126278378182346, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 8.5978, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9825 }, { "epoch": 0.7127003699136868, "grad_norm": 7.125, "learning_rate": 0.0003, "loss": 8.8083, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9826 }, { "epoch": 0.712772902009139, "grad_norm": 11.0625, "learning_rate": 0.0003, "loss": 9.3685, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9827 }, { "epoch": 0.7128454341045913, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.0187, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9828 }, { "epoch": 0.7129179662000436, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.577, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9829 }, { "epoch": 0.7129904982954958, "grad_norm": 5.90625, "learning_rate": 0.0003, "loss": 8.7135, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9830 }, { "epoch": 0.713063030390948, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.6898, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9831 }, { "epoch": 0.7131355624864002, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.9, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9832 }, { "epoch": 0.7132080945818524, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 8.3399, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9833 }, { "epoch": 0.7132806266773047, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.9752, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9834 }, { "epoch": 0.713353158772757, "grad_norm": 1.765625, "learning_rate": 0.0003, "loss": 9.0834, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9835 }, { "epoch": 0.7134256908682092, "grad_norm": 20.375, "learning_rate": 0.0003, "loss": 9.1054, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9836 }, { "epoch": 0.7134982229636614, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.9498, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9837 }, { "epoch": 0.7135707550591136, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 8.7406, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9838 }, { "epoch": 0.7136432871545659, "grad_norm": 1.921875, "learning_rate": 0.0003, "loss": 8.9973, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9839 }, { "epoch": 0.7137158192500181, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 9.0439, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9840 }, { "epoch": 0.7137883513454704, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.8747, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9841 }, { "epoch": 0.7138608834409226, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.8365, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9842 }, { "epoch": 0.7139334155363748, "grad_norm": 8.9375, "learning_rate": 0.0003, "loss": 9.2555, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9843 }, { "epoch": 0.7140059476318271, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.9718, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9844 }, { "epoch": 0.7140784797272793, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 9.2762, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9845 }, { "epoch": 0.7141510118227316, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 8.6417, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9846 }, { "epoch": 0.7142235439181838, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 8.975, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9847 }, { "epoch": 0.714296076013636, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 9.0257, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9848 }, { "epoch": 0.7143686081090883, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 8.6146, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9849 }, { "epoch": 0.7144411402045405, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 9.0709, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9850 }, { "epoch": 0.7145136722999927, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.4599, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9851 }, { "epoch": 0.714586204395445, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 9.3537, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9852 }, { "epoch": 0.7146587364908972, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 8.7478, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9853 }, { "epoch": 0.7147312685863495, "grad_norm": 1.8359375, "learning_rate": 0.0003, "loss": 8.432, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9854 }, { "epoch": 0.7148038006818017, "grad_norm": 1.7890625, "learning_rate": 0.0003, "loss": 9.0511, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9855 }, { "epoch": 0.7148763327772539, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.5702, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9856 }, { "epoch": 0.7149488648727061, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 9.1417, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9857 }, { "epoch": 0.7150213969681584, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.0354, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9858 }, { "epoch": 0.7150939290636107, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.0087, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9859 }, { "epoch": 0.7151664611590629, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.7076, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9860 }, { "epoch": 0.7152389932545151, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 9.2234, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9861 }, { "epoch": 0.7153115253499673, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 8.8401, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9862 }, { "epoch": 0.7153840574454196, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 8.7773, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9863 }, { "epoch": 0.7154565895408719, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.4082, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9864 }, { "epoch": 0.7155291216363241, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.3505, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9865 }, { "epoch": 0.7156016537317763, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 9.0021, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9866 }, { "epoch": 0.7156741858272285, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 8.886, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9867 }, { "epoch": 0.7157467179226807, "grad_norm": 1.84375, "learning_rate": 0.0003, "loss": 8.9616, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9868 }, { "epoch": 0.7158192500181331, "grad_norm": 7.75, "learning_rate": 0.0003, "loss": 9.0084, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9869 }, { "epoch": 0.7158917821135853, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.3962, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9870 }, { "epoch": 0.7159643142090375, "grad_norm": 7.0, "learning_rate": 0.0003, "loss": 8.8207, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9871 }, { "epoch": 0.7160368463044897, "grad_norm": 1.8515625, "learning_rate": 0.0003, "loss": 8.9206, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9872 }, { "epoch": 0.7161093783999419, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 8.8038, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9873 }, { "epoch": 0.7161819104953943, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 9.1147, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9874 }, { "epoch": 0.7162544425908465, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 9.0996, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9875 }, { "epoch": 0.7163269746862987, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.1297, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9876 }, { "epoch": 0.7163995067817509, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.8998, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9877 }, { "epoch": 0.7164720388772031, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.1931, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9878 }, { "epoch": 0.7165445709726554, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.1507, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9879 }, { "epoch": 0.7166171030681077, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.9551, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9880 }, { "epoch": 0.7166896351635599, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 9.4521, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9881 }, { "epoch": 0.7167621672590121, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 9.2388, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9882 }, { "epoch": 0.7168346993544643, "grad_norm": 1.8671875, "learning_rate": 0.0003, "loss": 8.4635, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9883 }, { "epoch": 0.7169072314499166, "grad_norm": 1.8515625, "learning_rate": 0.0003, "loss": 9.1058, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9884 }, { "epoch": 0.7169797635453689, "grad_norm": 1.7578125, "learning_rate": 0.0003, "loss": 8.8409, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9885 }, { "epoch": 0.7170522956408211, "grad_norm": 1.6015625, "learning_rate": 0.0003, "loss": 9.0454, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9886 }, { "epoch": 0.7171248277362733, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.1425, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9887 }, { "epoch": 0.7171973598317255, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.8076, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9888 }, { "epoch": 0.7172698919271778, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.7637, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9889 }, { "epoch": 0.71734242402263, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 9.2651, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9890 }, { "epoch": 0.7174149561180823, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 9.0914, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9891 }, { "epoch": 0.7174874882135345, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 8.877, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9892 }, { "epoch": 0.7175600203089867, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 8.6973, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9893 }, { "epoch": 0.717632552404439, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.7305, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9894 }, { "epoch": 0.7177050844998912, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 8.7265, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9895 }, { "epoch": 0.7177776165953434, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 8.2744, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9896 }, { "epoch": 0.7178501486907957, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.4348, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9897 }, { "epoch": 0.7179226807862479, "grad_norm": 7.5625, "learning_rate": 0.0003, "loss": 8.8215, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9898 }, { "epoch": 0.7179952128817002, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 9.0369, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9899 }, { "epoch": 0.7180677449771524, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 9.6419, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9900 }, { "epoch": 0.7181402770726046, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.9927, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9901 }, { "epoch": 0.7182128091680569, "grad_norm": 6.34375, "learning_rate": 0.0003, "loss": 8.9554, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9902 }, { "epoch": 0.7182853412635091, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 8.7875, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9903 }, { "epoch": 0.7183578733589613, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.0512, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9904 }, { "epoch": 0.7184304054544136, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 9.3101, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9905 }, { "epoch": 0.7185029375498658, "grad_norm": 1.8359375, "learning_rate": 0.0003, "loss": 9.0327, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9906 }, { "epoch": 0.718575469645318, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 9.4089, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9907 }, { "epoch": 0.7186480017407703, "grad_norm": 1.78125, "learning_rate": 0.0003, "loss": 9.0943, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9908 }, { "epoch": 0.7187205338362225, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.5206, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9909 }, { "epoch": 0.7187930659316748, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.0535, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9910 }, { "epoch": 0.718865598027127, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 8.8486, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9911 }, { "epoch": 0.7189381301225792, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 8.8979, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9912 }, { "epoch": 0.7190106622180314, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.9958, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9913 }, { "epoch": 0.7190831943134837, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 8.6625, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9914 }, { "epoch": 0.719155726408936, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.6885, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9915 }, { "epoch": 0.7192282585043882, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 9.6617, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9916 }, { "epoch": 0.7193007905998404, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.091, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9917 }, { "epoch": 0.7193733226952926, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 8.6179, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9918 }, { "epoch": 0.7194458547907449, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 9.152, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9919 }, { "epoch": 0.7195183868861972, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 8.6083, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9920 }, { "epoch": 0.7195909189816494, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.4253, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9921 }, { "epoch": 0.7196634510771016, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 8.6902, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9922 }, { "epoch": 0.7197359831725538, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 8.7319, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9923 }, { "epoch": 0.719808515268006, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 8.6234, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9924 }, { "epoch": 0.7198810473634584, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.5019, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9925 }, { "epoch": 0.7199535794589106, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 8.6229, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9926 }, { "epoch": 0.7200261115543628, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.7359, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9927 }, { "epoch": 0.720098643649815, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 8.4028, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9928 }, { "epoch": 0.7201711757452672, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.3689, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9929 }, { "epoch": 0.7202437078407196, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 9.1977, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9930 }, { "epoch": 0.7203162399361718, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 8.667, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9931 }, { "epoch": 0.720388772031624, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 9.1793, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9932 }, { "epoch": 0.7204613041270762, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.6707, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9933 }, { "epoch": 0.7205338362225284, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.8156, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9934 }, { "epoch": 0.7206063683179807, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.0767, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9935 }, { "epoch": 0.720678900413433, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 9.0556, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9936 }, { "epoch": 0.7207514325088852, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.9797, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9937 }, { "epoch": 0.7208239646043374, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.6093, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9938 }, { "epoch": 0.7208964966997896, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.977, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9939 }, { "epoch": 0.7209690287952419, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 9.2981, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9940 }, { "epoch": 0.7210415608906942, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.7856, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9941 }, { "epoch": 0.7211140929861464, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 8.6762, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9942 }, { "epoch": 0.7211866250815986, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.564, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9943 }, { "epoch": 0.7212591571770508, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 9.2755, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9944 }, { "epoch": 0.7213316892725031, "grad_norm": 13.25, "learning_rate": 0.0003, "loss": 8.5894, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9945 }, { "epoch": 0.7214042213679553, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 9.274, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9946 }, { "epoch": 0.7214767534634076, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.7297, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9947 }, { "epoch": 0.7215492855588598, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 8.8264, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9948 }, { "epoch": 0.721621817654312, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.7955, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9949 }, { "epoch": 0.7216943497497643, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 9.4698, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9950 }, { "epoch": 0.7217668818452165, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.1848, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9951 }, { "epoch": 0.7218394139406688, "grad_norm": 10.0625, "learning_rate": 0.0003, "loss": 8.9627, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9952 }, { "epoch": 0.721911946036121, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.9071, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9953 }, { "epoch": 0.7219844781315732, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 8.867, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9954 }, { "epoch": 0.7220570102270255, "grad_norm": 1.90625, "learning_rate": 0.0003, "loss": 8.2391, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9955 }, { "epoch": 0.7221295423224777, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 9.1108, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9956 }, { "epoch": 0.7222020744179299, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 8.8706, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9957 }, { "epoch": 0.7222746065133822, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.8207, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9958 }, { "epoch": 0.7223471386088344, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 9.0439, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9959 }, { "epoch": 0.7224196707042867, "grad_norm": 8.0625, "learning_rate": 0.0003, "loss": 8.671, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9960 }, { "epoch": 0.7224922027997389, "grad_norm": 1.578125, "learning_rate": 0.0003, "loss": 8.7111, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9961 }, { "epoch": 0.7225647348951911, "grad_norm": 6.5625, "learning_rate": 0.0003, "loss": 8.7373, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9962 }, { "epoch": 0.7226372669906433, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.6096, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9963 }, { "epoch": 0.7227097990860956, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 8.866, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9964 }, { "epoch": 0.7227823311815479, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.8758, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9965 }, { "epoch": 0.7228548632770001, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.6943, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9966 }, { "epoch": 0.7229273953724523, "grad_norm": 7.8125, "learning_rate": 0.0003, "loss": 8.5067, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9967 }, { "epoch": 0.7229999274679045, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.5242, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9968 }, { "epoch": 0.7230724595633568, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.1084, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9969 }, { "epoch": 0.7231449916588091, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 8.6303, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9970 }, { "epoch": 0.7232175237542613, "grad_norm": 16.75, "learning_rate": 0.0003, "loss": 8.5749, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9971 }, { "epoch": 0.7232900558497135, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.4047, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9972 }, { "epoch": 0.7233625879451657, "grad_norm": 1.9296875, "learning_rate": 0.0003, "loss": 8.9016, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9973 }, { "epoch": 0.7234351200406179, "grad_norm": 6.5625, "learning_rate": 0.0003, "loss": 8.9613, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9974 }, { "epoch": 0.7235076521360702, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.9915, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9975 }, { "epoch": 0.7235801842315225, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 9.0469, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9976 }, { "epoch": 0.7236527163269747, "grad_norm": 1.65625, "learning_rate": 0.0003, "loss": 8.8881, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9977 }, { "epoch": 0.7237252484224269, "grad_norm": 41.0, "learning_rate": 0.0003, "loss": 8.4711, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9978 }, { "epoch": 0.7237977805178791, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.8906, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9979 }, { "epoch": 0.7238703126133313, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.8863, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9980 }, { "epoch": 0.7239428447087837, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.5292, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9981 }, { "epoch": 0.7240153768042359, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 8.7875, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9982 }, { "epoch": 0.7240879088996881, "grad_norm": 9.75, "learning_rate": 0.0003, "loss": 9.3372, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9983 }, { "epoch": 0.7241604409951403, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 8.6665, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9984 }, { "epoch": 0.7242329730905925, "grad_norm": 8.4375, "learning_rate": 0.0003, "loss": 8.5886, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9985 }, { "epoch": 0.7243055051860449, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.9512, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9986 }, { "epoch": 0.7243780372814971, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.2333, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9987 }, { "epoch": 0.7244505693769493, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 9.115, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9988 }, { "epoch": 0.7245231014724015, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.8479, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9989 }, { "epoch": 0.7245956335678537, "grad_norm": 1.9375, "learning_rate": 0.0003, "loss": 8.7598, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9990 }, { "epoch": 0.724668165663306, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 8.9887, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9991 }, { "epoch": 0.7247406977587583, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 8.99, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9992 }, { "epoch": 0.7248132298542105, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 9.0372, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9993 }, { "epoch": 0.7248857619496627, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 8.722, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9994 }, { "epoch": 0.7249582940451149, "grad_norm": 7.40625, "learning_rate": 0.0003, "loss": 8.6302, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9995 }, { "epoch": 0.7250308261405672, "grad_norm": 6.71875, "learning_rate": 0.0003, "loss": 9.0664, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9996 }, { "epoch": 0.7251033582360195, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.8072, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9997 }, { "epoch": 0.7251758903314717, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.995, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9998 }, { "epoch": 0.7252484224269239, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 9.0154, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 9999 }, { "epoch": 0.7253209545223761, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 8.8778, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10000 }, { "epoch": 0.7253934866178284, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.8042, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10001 }, { "epoch": 0.7254660187132806, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.9122, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10002 }, { "epoch": 0.7255385508087329, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 8.8313, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10003 }, { "epoch": 0.7256110829041851, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.2046, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10004 }, { "epoch": 0.7256836149996373, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.7512, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10005 }, { "epoch": 0.7257561470950896, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.3738, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10006 }, { "epoch": 0.7258286791905418, "grad_norm": 1.9765625, "learning_rate": 0.0003, "loss": 8.6258, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10007 }, { "epoch": 0.725901211285994, "grad_norm": 20.25, "learning_rate": 0.0003, "loss": 8.3975, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10008 }, { "epoch": 0.7259737433814463, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 9.0642, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10009 }, { "epoch": 0.7260462754768985, "grad_norm": 1.78125, "learning_rate": 0.0003, "loss": 9.1405, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10010 }, { "epoch": 0.7261188075723508, "grad_norm": 8.625, "learning_rate": 0.0003, "loss": 8.9676, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10011 }, { "epoch": 0.726191339667803, "grad_norm": 1.7890625, "learning_rate": 0.0003, "loss": 8.8291, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10012 }, { "epoch": 0.7262638717632552, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 9.1857, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10013 }, { "epoch": 0.7263364038587075, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.2809, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10014 }, { "epoch": 0.7264089359541597, "grad_norm": 6.6875, "learning_rate": 0.0003, "loss": 9.0147, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10015 }, { "epoch": 0.726481468049612, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 8.7839, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10016 }, { "epoch": 0.7265540001450642, "grad_norm": 7.8125, "learning_rate": 0.0003, "loss": 8.8616, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10017 }, { "epoch": 0.7266265322405164, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.7606, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10018 }, { "epoch": 0.7266990643359686, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 8.6238, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10019 }, { "epoch": 0.7267715964314209, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.9279, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10020 }, { "epoch": 0.7268441285268732, "grad_norm": 5.9375, "learning_rate": 0.0003, "loss": 8.8583, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10021 }, { "epoch": 0.7269166606223254, "grad_norm": 6.21875, "learning_rate": 0.0003, "loss": 8.7326, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10022 }, { "epoch": 0.7269891927177776, "grad_norm": 6.4375, "learning_rate": 0.0003, "loss": 8.9512, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10023 }, { "epoch": 0.7270617248132298, "grad_norm": 488.0, "learning_rate": 0.0003, "loss": 8.9446, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10024 }, { "epoch": 0.727134256908682, "grad_norm": 11.25, "learning_rate": 0.0003, "loss": 9.1856, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10025 }, { "epoch": 0.7272067890041344, "grad_norm": 10.25, "learning_rate": 0.0003, "loss": 8.9236, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10026 }, { "epoch": 0.7272793210995866, "grad_norm": 11.25, "learning_rate": 0.0003, "loss": 8.7822, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10027 }, { "epoch": 0.7273518531950388, "grad_norm": 15.875, "learning_rate": 0.0003, "loss": 8.8906, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10028 }, { "epoch": 0.727424385290491, "grad_norm": 23.125, "learning_rate": 0.0003, "loss": 9.2418, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10029 }, { "epoch": 0.7274969173859432, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 9.0769, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10030 }, { "epoch": 0.7275694494813956, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.8561, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10031 }, { "epoch": 0.7276419815768478, "grad_norm": 7.125, "learning_rate": 0.0003, "loss": 8.6934, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10032 }, { "epoch": 0.7277145136723, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 9.2029, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10033 }, { "epoch": 0.7277870457677522, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.6825, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10034 }, { "epoch": 0.7278595778632044, "grad_norm": 6.5625, "learning_rate": 0.0003, "loss": 8.295, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10035 }, { "epoch": 0.7279321099586568, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 9.2264, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10036 }, { "epoch": 0.728004642054109, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.1356, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10037 }, { "epoch": 0.7280771741495612, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 8.7187, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10038 }, { "epoch": 0.7281497062450134, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 8.5401, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10039 }, { "epoch": 0.7282222383404656, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 8.667, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10040 }, { "epoch": 0.728294770435918, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.7404, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10041 }, { "epoch": 0.7283673025313702, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 9.0611, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10042 }, { "epoch": 0.7284398346268224, "grad_norm": 10.75, "learning_rate": 0.0003, "loss": 9.0166, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10043 }, { "epoch": 0.7285123667222746, "grad_norm": 13.0625, "learning_rate": 0.0003, "loss": 9.253, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10044 }, { "epoch": 0.7285848988177268, "grad_norm": 1.8671875, "learning_rate": 0.0003, "loss": 8.7063, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10045 }, { "epoch": 0.728657430913179, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.7688, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10046 }, { "epoch": 0.7287299630086314, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.6808, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10047 }, { "epoch": 0.7288024951040836, "grad_norm": 7.5, "learning_rate": 0.0003, "loss": 8.4138, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10048 }, { "epoch": 0.7288750271995358, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 8.8755, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10049 }, { "epoch": 0.728947559294988, "grad_norm": 10.3125, "learning_rate": 0.0003, "loss": 8.4717, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10050 }, { "epoch": 0.7290200913904402, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.9635, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10051 }, { "epoch": 0.7290926234858925, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 8.6006, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10052 }, { "epoch": 0.7291651555813448, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 9.036, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10053 }, { "epoch": 0.729237687676797, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.4714, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10054 }, { "epoch": 0.7293102197722492, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.5848, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10055 }, { "epoch": 0.7293827518677014, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 9.018, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10056 }, { "epoch": 0.7294552839631537, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 8.6195, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10057 }, { "epoch": 0.729527816058606, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.9537, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10058 }, { "epoch": 0.7296003481540582, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.9143, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10059 }, { "epoch": 0.7296728802495104, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.5559, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10060 }, { "epoch": 0.7297454123449626, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.8127, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10061 }, { "epoch": 0.7298179444404149, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.0219, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10062 }, { "epoch": 0.7298904765358671, "grad_norm": 1.921875, "learning_rate": 0.0003, "loss": 8.8798, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10063 }, { "epoch": 0.7299630086313194, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.7938, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10064 }, { "epoch": 0.7300355407267716, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.0344, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10065 }, { "epoch": 0.7301080728222238, "grad_norm": 6.03125, "learning_rate": 0.0003, "loss": 9.1469, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10066 }, { "epoch": 0.7301806049176761, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 8.5266, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10067 }, { "epoch": 0.7302531370131283, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.03, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10068 }, { "epoch": 0.7303256691085805, "grad_norm": 10.625, "learning_rate": 0.0003, "loss": 8.8912, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10069 }, { "epoch": 0.7303982012040328, "grad_norm": 31.375, "learning_rate": 0.0003, "loss": 8.7624, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10070 }, { "epoch": 0.730470733299485, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 7.9291, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10071 }, { "epoch": 0.7305432653949373, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 9.0237, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10072 }, { "epoch": 0.7306157974903895, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.7498, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10073 }, { "epoch": 0.7306883295858417, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 9.0408, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10074 }, { "epoch": 0.730760861681294, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 9.1333, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10075 }, { "epoch": 0.7308333937767462, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 8.6476, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10076 }, { "epoch": 0.7309059258721985, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.8054, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10077 }, { "epoch": 0.7309784579676507, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.7574, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10078 }, { "epoch": 0.7310509900631029, "grad_norm": 1.6875, "learning_rate": 0.0003, "loss": 8.7127, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10079 }, { "epoch": 0.7311235221585551, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.7776, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10080 }, { "epoch": 0.7311960542540074, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 8.756, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10081 }, { "epoch": 0.7312685863494597, "grad_norm": 5.625, "learning_rate": 0.0003, "loss": 8.7938, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10082 }, { "epoch": 0.7313411184449119, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.6862, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10083 }, { "epoch": 0.7314136505403641, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 9.0084, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10084 }, { "epoch": 0.7314861826358163, "grad_norm": 12.6875, "learning_rate": 0.0003, "loss": 9.084, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10085 }, { "epoch": 0.7315587147312685, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 8.512, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10086 }, { "epoch": 0.7316312468267209, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 9.054, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10087 }, { "epoch": 0.7317037789221731, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 9.1477, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10088 }, { "epoch": 0.7317763110176253, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 8.4018, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10089 }, { "epoch": 0.7318488431130775, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.3181, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10090 }, { "epoch": 0.7319213752085297, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.6957, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10091 }, { "epoch": 0.7319939073039821, "grad_norm": 6.5625, "learning_rate": 0.0003, "loss": 8.9136, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10092 }, { "epoch": 0.7320664393994343, "grad_norm": 10.1875, "learning_rate": 0.0003, "loss": 8.9032, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10093 }, { "epoch": 0.7321389714948865, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 8.6168, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10094 }, { "epoch": 0.7322115035903387, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.002, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10095 }, { "epoch": 0.7322840356857909, "grad_norm": 1.9140625, "learning_rate": 0.0003, "loss": 9.5056, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10096 }, { "epoch": 0.7323565677812433, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.8117, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10097 }, { "epoch": 0.7324290998766955, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 9.2162, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10098 }, { "epoch": 0.7325016319721477, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.7116, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10099 }, { "epoch": 0.7325741640675999, "grad_norm": 1.8125, "learning_rate": 0.0003, "loss": 8.9755, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10100 }, { "epoch": 0.7326466961630521, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 8.8109, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10101 }, { "epoch": 0.7327192282585044, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 8.1701, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10102 }, { "epoch": 0.7327917603539567, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 9.2272, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10103 }, { "epoch": 0.7328642924494089, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 9.1982, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10104 }, { "epoch": 0.7329368245448611, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 9.0884, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10105 }, { "epoch": 0.7330093566403133, "grad_norm": 5.375, "learning_rate": 0.0003, "loss": 8.605, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10106 }, { "epoch": 0.7330818887357656, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.7139, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10107 }, { "epoch": 0.7331544208312178, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 8.5121, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10108 }, { "epoch": 0.7332269529266701, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 8.7778, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10109 }, { "epoch": 0.7332994850221223, "grad_norm": 7.90625, "learning_rate": 0.0003, "loss": 9.0865, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10110 }, { "epoch": 0.7333720171175745, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 9.1536, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10111 }, { "epoch": 0.7334445492130268, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 8.5915, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10112 }, { "epoch": 0.733517081308479, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.8063, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10113 }, { "epoch": 0.7335896134039313, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 8.9595, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10114 }, { "epoch": 0.7336621454993835, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.0405, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10115 }, { "epoch": 0.7337346775948357, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.2934, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10116 }, { "epoch": 0.7338072096902879, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 8.6665, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10117 }, { "epoch": 0.7338797417857402, "grad_norm": 8.6875, "learning_rate": 0.0003, "loss": 9.1159, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10118 }, { "epoch": 0.7339522738811924, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.5049, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10119 }, { "epoch": 0.7340248059766447, "grad_norm": 7.34375, "learning_rate": 0.0003, "loss": 8.4076, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10120 }, { "epoch": 0.7340973380720969, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.533, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10121 }, { "epoch": 0.7341698701675491, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.6067, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10122 }, { "epoch": 0.7342424022630014, "grad_norm": 1.4609375, "learning_rate": 0.0003, "loss": 8.5282, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10123 }, { "epoch": 0.7343149343584536, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 8.4099, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10124 }, { "epoch": 0.7343874664539058, "grad_norm": 7.625, "learning_rate": 0.0003, "loss": 8.9025, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10125 }, { "epoch": 0.7344599985493581, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.098, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10126 }, { "epoch": 0.7345325306448103, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.751, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10127 }, { "epoch": 0.7346050627402626, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 8.5352, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10128 }, { "epoch": 0.7346775948357148, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.6516, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10129 }, { "epoch": 0.734750126931167, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.7763, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10130 }, { "epoch": 0.7348226590266193, "grad_norm": 1.9140625, "learning_rate": 0.0003, "loss": 8.6802, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10131 }, { "epoch": 0.7348951911220715, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 9.1628, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10132 }, { "epoch": 0.7349677232175238, "grad_norm": 7.34375, "learning_rate": 0.0003, "loss": 8.9834, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10133 }, { "epoch": 0.735040255312976, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.99, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10134 }, { "epoch": 0.7351127874084282, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 9.0655, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10135 }, { "epoch": 0.7351853195038804, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 8.7883, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10136 }, { "epoch": 0.7352578515993327, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.607, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10137 }, { "epoch": 0.735330383694785, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.9186, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10138 }, { "epoch": 0.7354029157902372, "grad_norm": 26.0, "learning_rate": 0.0003, "loss": 8.8603, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10139 }, { "epoch": 0.7354754478856894, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 9.0346, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10140 }, { "epoch": 0.7355479799811416, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 8.8053, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10141 }, { "epoch": 0.7356205120765938, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.9568, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10142 }, { "epoch": 0.7356930441720462, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 9.1912, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10143 }, { "epoch": 0.7357655762674984, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.0259, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10144 }, { "epoch": 0.7358381083629506, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 9.1364, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10145 }, { "epoch": 0.7359106404584028, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 8.7373, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10146 }, { "epoch": 0.735983172553855, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.8193, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10147 }, { "epoch": 0.7360557046493074, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 9.1408, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10148 }, { "epoch": 0.7361282367447596, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 8.9291, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10149 }, { "epoch": 0.7362007688402118, "grad_norm": 6.8125, "learning_rate": 0.0003, "loss": 8.5233, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10150 }, { "epoch": 0.736273300935664, "grad_norm": 11.375, "learning_rate": 0.0003, "loss": 8.6557, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10151 }, { "epoch": 0.7363458330311162, "grad_norm": 8.4375, "learning_rate": 0.0003, "loss": 9.062, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10152 }, { "epoch": 0.7364183651265686, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.0884, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10153 }, { "epoch": 0.7364908972220208, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.0821, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10154 }, { "epoch": 0.736563429317473, "grad_norm": 11.6875, "learning_rate": 0.0003, "loss": 8.4153, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10155 }, { "epoch": 0.7366359614129252, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 8.9486, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10156 }, { "epoch": 0.7367084935083774, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 9.2474, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10157 }, { "epoch": 0.7367810256038297, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.5959, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10158 }, { "epoch": 0.736853557699282, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 9.0982, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10159 }, { "epoch": 0.7369260897947342, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 9.0408, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10160 }, { "epoch": 0.7369986218901864, "grad_norm": 50.5, "learning_rate": 0.0003, "loss": 8.9071, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10161 }, { "epoch": 0.7370711539856386, "grad_norm": 10.6875, "learning_rate": 0.0003, "loss": 8.7921, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10162 }, { "epoch": 0.7371436860810909, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.5661, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10163 }, { "epoch": 0.7372162181765431, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.6887, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10164 }, { "epoch": 0.7372887502719954, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.7298, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10165 }, { "epoch": 0.7373612823674476, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 9.1288, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10166 }, { "epoch": 0.7374338144628998, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 9.2412, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10167 }, { "epoch": 0.7375063465583521, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.1263, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10168 }, { "epoch": 0.7375788786538043, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 8.7943, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10169 }, { "epoch": 0.7376514107492566, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.7687, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10170 }, { "epoch": 0.7377239428447088, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 8.6198, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10171 }, { "epoch": 0.737796474940161, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 9.524, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10172 }, { "epoch": 0.7378690070356133, "grad_norm": 1.9140625, "learning_rate": 0.0003, "loss": 8.7498, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10173 }, { "epoch": 0.7379415391310655, "grad_norm": 42.25, "learning_rate": 0.0003, "loss": 9.032, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10174 }, { "epoch": 0.7380140712265177, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 8.4735, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10175 }, { "epoch": 0.73808660332197, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 9.1321, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10176 }, { "epoch": 0.7381591354174222, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.1402, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10177 }, { "epoch": 0.7382316675128745, "grad_norm": 1.8671875, "learning_rate": 0.0003, "loss": 9.0707, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10178 }, { "epoch": 0.7383041996083267, "grad_norm": 17.5, "learning_rate": 0.0003, "loss": 8.4296, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10179 }, { "epoch": 0.7383767317037789, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.1292, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10180 }, { "epoch": 0.7384492637992311, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.5497, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10181 }, { "epoch": 0.7385217958946834, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.6839, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10182 }, { "epoch": 0.7385943279901356, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.8334, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10183 }, { "epoch": 0.7386668600855879, "grad_norm": 1.96875, "learning_rate": 0.0003, "loss": 9.5397, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10184 }, { "epoch": 0.7387393921810401, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.8568, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10185 }, { "epoch": 0.7388119242764923, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.3827, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10186 }, { "epoch": 0.7388844563719446, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 8.6966, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10187 }, { "epoch": 0.7389569884673968, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 8.8203, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10188 }, { "epoch": 0.7390295205628491, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.0795, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10189 }, { "epoch": 0.7391020526583013, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 9.1972, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10190 }, { "epoch": 0.7391745847537535, "grad_norm": 14.75, "learning_rate": 0.0003, "loss": 8.7479, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10191 }, { "epoch": 0.7392471168492057, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 9.0098, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10192 }, { "epoch": 0.739319648944658, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 8.9633, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10193 }, { "epoch": 0.7393921810401103, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 8.6376, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10194 }, { "epoch": 0.7394647131355625, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.6638, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10195 }, { "epoch": 0.7395372452310147, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 9.0612, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10196 }, { "epoch": 0.7396097773264669, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.5903, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10197 }, { "epoch": 0.7396823094219191, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 8.557, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10198 }, { "epoch": 0.7397548415173715, "grad_norm": 7.6875, "learning_rate": 0.0003, "loss": 8.8215, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10199 }, { "epoch": 0.7398273736128237, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 9.1057, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10200 }, { "epoch": 0.7398999057082759, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 8.3844, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10201 }, { "epoch": 0.7399724378037281, "grad_norm": 1.6171875, "learning_rate": 0.0003, "loss": 8.8116, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10202 }, { "epoch": 0.7400449698991803, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.9963, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10203 }, { "epoch": 0.7401175019946327, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.0654, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10204 }, { "epoch": 0.7401900340900849, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 9.0239, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10205 }, { "epoch": 0.7402625661855371, "grad_norm": 5.46875, "learning_rate": 0.0003, "loss": 9.0668, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10206 }, { "epoch": 0.7403350982809893, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.8719, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10207 }, { "epoch": 0.7404076303764415, "grad_norm": 1.9765625, "learning_rate": 0.0003, "loss": 9.2788, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10208 }, { "epoch": 0.7404801624718939, "grad_norm": 1.9765625, "learning_rate": 0.0003, "loss": 8.3562, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10209 }, { "epoch": 0.7405526945673461, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 8.986, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10210 }, { "epoch": 0.7406252266627983, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.5719, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10211 }, { "epoch": 0.7406977587582505, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 8.6647, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10212 }, { "epoch": 0.7407702908537027, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.9269, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10213 }, { "epoch": 0.740842822949155, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 8.6295, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10214 }, { "epoch": 0.7409153550446073, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.7481, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10215 }, { "epoch": 0.7409878871400595, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 9.0231, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10216 }, { "epoch": 0.7410604192355117, "grad_norm": 1.9140625, "learning_rate": 0.0003, "loss": 8.932, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10217 }, { "epoch": 0.7411329513309639, "grad_norm": 1.890625, "learning_rate": 0.0003, "loss": 8.8365, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10218 }, { "epoch": 0.7412054834264162, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 9.3906, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10219 }, { "epoch": 0.7412780155218684, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.6509, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10220 }, { "epoch": 0.7413505476173207, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.9248, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10221 }, { "epoch": 0.7414230797127729, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.5963, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10222 }, { "epoch": 0.7414956118082251, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 9.1979, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10223 }, { "epoch": 0.7415681439036774, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 8.566, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10224 }, { "epoch": 0.7416406759991296, "grad_norm": 11.625, "learning_rate": 0.0003, "loss": 8.7432, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10225 }, { "epoch": 0.7417132080945819, "grad_norm": 18.875, "learning_rate": 0.0003, "loss": 8.9201, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10226 }, { "epoch": 0.7417857401900341, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 8.7106, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10227 }, { "epoch": 0.7418582722854863, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.7993, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10228 }, { "epoch": 0.7419308043809386, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.7135, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10229 }, { "epoch": 0.7420033364763908, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 8.4346, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10230 }, { "epoch": 0.742075868571843, "grad_norm": 7.46875, "learning_rate": 0.0003, "loss": 9.68, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10231 }, { "epoch": 0.7421484006672953, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.1255, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10232 }, { "epoch": 0.7422209327627475, "grad_norm": 8.625, "learning_rate": 0.0003, "loss": 9.0309, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10233 }, { "epoch": 0.7422934648581998, "grad_norm": 5.84375, "learning_rate": 0.0003, "loss": 8.6428, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10234 }, { "epoch": 0.742365996953652, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.8676, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10235 }, { "epoch": 0.7424385290491042, "grad_norm": 1.5234375, "learning_rate": 0.0003, "loss": 8.7219, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10236 }, { "epoch": 0.7425110611445565, "grad_norm": 6.4375, "learning_rate": 0.0003, "loss": 8.9625, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10237 }, { "epoch": 0.7425835932400087, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 8.789, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10238 }, { "epoch": 0.742656125335461, "grad_norm": 5.21875, "learning_rate": 0.0003, "loss": 8.7792, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10239 }, { "epoch": 0.7427286574309132, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 9.1954, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10240 }, { "epoch": 0.7428011895263654, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 9.3178, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10241 }, { "epoch": 0.7428737216218176, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 8.3847, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10242 }, { "epoch": 0.7429462537172699, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 8.5461, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10243 }, { "epoch": 0.7430187858127222, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.9682, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10244 }, { "epoch": 0.7430913179081744, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.7336, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10245 }, { "epoch": 0.7431638500036266, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 8.9806, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10246 }, { "epoch": 0.7432363820990788, "grad_norm": 6.25, "learning_rate": 0.0003, "loss": 8.3846, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10247 }, { "epoch": 0.743308914194531, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.0296, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10248 }, { "epoch": 0.7433814462899834, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 8.8874, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10249 }, { "epoch": 0.7434539783854356, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 9.2185, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10250 }, { "epoch": 0.7435265104808878, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 8.9501, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10251 }, { "epoch": 0.74359904257634, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.0947, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10252 }, { "epoch": 0.7436715746717922, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.2211, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10253 }, { "epoch": 0.7437441067672445, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 9.1112, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10254 }, { "epoch": 0.7438166388626968, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.6532, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10255 }, { "epoch": 0.743889170958149, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.7851, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10256 }, { "epoch": 0.7439617030536012, "grad_norm": 8.0625, "learning_rate": 0.0003, "loss": 8.9981, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10257 }, { "epoch": 0.7440342351490534, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 8.7867, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10258 }, { "epoch": 0.7441067672445056, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.9759, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10259 }, { "epoch": 0.744179299339958, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.4682, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10260 }, { "epoch": 0.7442518314354102, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.7241, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10261 }, { "epoch": 0.7443243635308624, "grad_norm": 10.375, "learning_rate": 0.0003, "loss": 8.542, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10262 }, { "epoch": 0.7443968956263146, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.7914, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10263 }, { "epoch": 0.7444694277217668, "grad_norm": 5.78125, "learning_rate": 0.0003, "loss": 8.691, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10264 }, { "epoch": 0.7445419598172192, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.8723, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10265 }, { "epoch": 0.7446144919126714, "grad_norm": 5.9375, "learning_rate": 0.0003, "loss": 8.834, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10266 }, { "epoch": 0.7446870240081236, "grad_norm": 7.28125, "learning_rate": 0.0003, "loss": 8.2693, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10267 }, { "epoch": 0.7447595561035758, "grad_norm": 1.984375, "learning_rate": 0.0003, "loss": 8.9245, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10268 }, { "epoch": 0.744832088199028, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.9947, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10269 }, { "epoch": 0.7449046202944803, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.9889, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10270 }, { "epoch": 0.7449771523899326, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 8.4202, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10271 }, { "epoch": 0.7450496844853848, "grad_norm": 6.4375, "learning_rate": 0.0003, "loss": 8.9736, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10272 }, { "epoch": 0.745122216580837, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.9297, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10273 }, { "epoch": 0.7451947486762892, "grad_norm": 1.84375, "learning_rate": 0.0003, "loss": 8.0615, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10274 }, { "epoch": 0.7452672807717415, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 9.0978, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10275 }, { "epoch": 0.7453398128671938, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.7315, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10276 }, { "epoch": 0.745412344962646, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.2221, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10277 }, { "epoch": 0.7454848770580982, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.9314, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10278 }, { "epoch": 0.7455574091535504, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.3574, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10279 }, { "epoch": 0.7456299412490027, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.5917, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10280 }, { "epoch": 0.7457024733444549, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 8.8739, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10281 }, { "epoch": 0.7457750054399072, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.5627, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10282 }, { "epoch": 0.7458475375353594, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.5993, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10283 }, { "epoch": 0.7459200696308116, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.5154, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10284 }, { "epoch": 0.7459926017262639, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 8.739, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10285 }, { "epoch": 0.7460651338217161, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 9.1128, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10286 }, { "epoch": 0.7461376659171683, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.7021, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10287 }, { "epoch": 0.7462101980126206, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.4194, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10288 }, { "epoch": 0.7462827301080728, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 9.0784, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10289 }, { "epoch": 0.7463552622035251, "grad_norm": 8.375, "learning_rate": 0.0003, "loss": 8.4507, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10290 }, { "epoch": 0.7464277942989773, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.3129, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10291 }, { "epoch": 0.7465003263944295, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.2555, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10292 }, { "epoch": 0.7465728584898818, "grad_norm": 6.34375, "learning_rate": 0.0003, "loss": 9.07, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10293 }, { "epoch": 0.746645390585334, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 8.9548, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10294 }, { "epoch": 0.7467179226807863, "grad_norm": 1.6484375, "learning_rate": 0.0003, "loss": 8.8519, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10295 }, { "epoch": 0.7467904547762385, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.9483, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10296 }, { "epoch": 0.7468629868716907, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.3132, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10297 }, { "epoch": 0.7469355189671429, "grad_norm": 11.9375, "learning_rate": 0.0003, "loss": 8.9867, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10298 }, { "epoch": 0.7470080510625952, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.788, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10299 }, { "epoch": 0.7470805831580475, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.8814, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10300 }, { "epoch": 0.7471531152534997, "grad_norm": 9.0, "learning_rate": 0.0003, "loss": 8.7632, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10301 }, { "epoch": 0.7472256473489519, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.709, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10302 }, { "epoch": 0.7472981794444041, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 9.3038, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10303 }, { "epoch": 0.7473707115398563, "grad_norm": 7.71875, "learning_rate": 0.0003, "loss": 8.3326, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10304 }, { "epoch": 0.7474432436353087, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.4921, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10305 }, { "epoch": 0.7475157757307609, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.1168, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10306 }, { "epoch": 0.7475883078262131, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.7944, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10307 }, { "epoch": 0.7476608399216653, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 8.569, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10308 }, { "epoch": 0.7477333720171175, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 8.8808, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10309 }, { "epoch": 0.7478059041125699, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 8.5055, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10310 }, { "epoch": 0.7478784362080221, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 9.2707, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10311 }, { "epoch": 0.7479509683034743, "grad_norm": 5.6875, "learning_rate": 0.0003, "loss": 8.2264, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10312 }, { "epoch": 0.7480235003989265, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.614, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10313 }, { "epoch": 0.7480960324943787, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 8.9888, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10314 }, { "epoch": 0.748168564589831, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 9.3972, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10315 }, { "epoch": 0.7482410966852833, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.8847, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10316 }, { "epoch": 0.7483136287807355, "grad_norm": 34.25, "learning_rate": 0.0003, "loss": 9.095, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10317 }, { "epoch": 0.7483861608761877, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.3774, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10318 }, { "epoch": 0.7484586929716399, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.7309, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10319 }, { "epoch": 0.7485312250670922, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 8.8916, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10320 }, { "epoch": 0.7486037571625445, "grad_norm": 5.21875, "learning_rate": 0.0003, "loss": 9.1111, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10321 }, { "epoch": 0.7486762892579967, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.8183, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10322 }, { "epoch": 0.7487488213534489, "grad_norm": 9.8125, "learning_rate": 0.0003, "loss": 8.7028, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10323 }, { "epoch": 0.7488213534489011, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 9.1188, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10324 }, { "epoch": 0.7488938855443533, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.6297, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10325 }, { "epoch": 0.7489664176398056, "grad_norm": 1.9296875, "learning_rate": 0.0003, "loss": 8.7183, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10326 }, { "epoch": 0.7490389497352579, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.607, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10327 }, { "epoch": 0.7491114818307101, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 8.8949, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10328 }, { "epoch": 0.7491840139261623, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.8599, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10329 }, { "epoch": 0.7492565460216145, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.5839, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10330 }, { "epoch": 0.7493290781170668, "grad_norm": 9.5, "learning_rate": 0.0003, "loss": 8.6323, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10331 }, { "epoch": 0.749401610212519, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.1195, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10332 }, { "epoch": 0.7494741423079713, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.0963, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10333 }, { "epoch": 0.7495466744034235, "grad_norm": 1.421875, "learning_rate": 0.0003, "loss": 9.4801, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10334 }, { "epoch": 0.7496192064988757, "grad_norm": 61.25, "learning_rate": 0.0003, "loss": 8.9839, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10335 }, { "epoch": 0.749691738594328, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 9.1225, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10336 }, { "epoch": 0.7497642706897802, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.9678, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10337 }, { "epoch": 0.7498368027852325, "grad_norm": 5.84375, "learning_rate": 0.0003, "loss": 8.1374, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10338 }, { "epoch": 0.7499093348806847, "grad_norm": 7.03125, "learning_rate": 0.0003, "loss": 8.4563, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10339 }, { "epoch": 0.7499818669761369, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.9382, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10340 }, { "epoch": 0.7500543990715892, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.8443, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10341 }, { "epoch": 0.7501269311670414, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 8.4739, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10342 }, { "epoch": 0.7501994632624936, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 9.4147, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10343 }, { "epoch": 0.7502719953579459, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 9.1956, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10344 }, { "epoch": 0.7503445274533981, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 9.0828, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10345 }, { "epoch": 0.7504170595488504, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.0926, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10346 }, { "epoch": 0.7504895916443026, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 8.786, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10347 }, { "epoch": 0.7505621237397548, "grad_norm": 7.28125, "learning_rate": 0.0003, "loss": 8.8906, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10348 }, { "epoch": 0.750634655835207, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 8.8196, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10349 }, { "epoch": 0.7507071879306593, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.2284, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10350 }, { "epoch": 0.7507797200261116, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.8354, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10351 }, { "epoch": 0.7508522521215638, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 8.7268, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10352 }, { "epoch": 0.750924784217016, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 8.7457, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10353 }, { "epoch": 0.7509973163124682, "grad_norm": 7.28125, "learning_rate": 0.0003, "loss": 8.7238, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10354 }, { "epoch": 0.7510698484079205, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 8.6417, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10355 }, { "epoch": 0.7511423805033728, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 8.5593, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10356 }, { "epoch": 0.751214912598825, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 9.0267, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10357 }, { "epoch": 0.7512874446942772, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.7963, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10358 }, { "epoch": 0.7513599767897294, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 9.0989, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10359 }, { "epoch": 0.7514325088851816, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 8.9295, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10360 }, { "epoch": 0.751505040980634, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.9604, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10361 }, { "epoch": 0.7515775730760862, "grad_norm": 9.375, "learning_rate": 0.0003, "loss": 8.8943, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10362 }, { "epoch": 0.7516501051715384, "grad_norm": 22.875, "learning_rate": 0.0003, "loss": 8.7922, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10363 }, { "epoch": 0.7517226372669906, "grad_norm": 8.5, "learning_rate": 0.0003, "loss": 9.062, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10364 }, { "epoch": 0.7517951693624428, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.9875, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10365 }, { "epoch": 0.7518677014578952, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 8.7171, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10366 }, { "epoch": 0.7519402335533474, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.6686, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10367 }, { "epoch": 0.7520127656487996, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.2633, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10368 }, { "epoch": 0.7520852977442518, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.8058, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10369 }, { "epoch": 0.752157829839704, "grad_norm": 15.75, "learning_rate": 0.0003, "loss": 8.5556, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10370 }, { "epoch": 0.7522303619351564, "grad_norm": 6.46875, "learning_rate": 0.0003, "loss": 8.4387, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10371 }, { "epoch": 0.7523028940306086, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.5734, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10372 }, { "epoch": 0.7523754261260608, "grad_norm": 6.96875, "learning_rate": 0.0003, "loss": 8.7932, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10373 }, { "epoch": 0.752447958221513, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 8.7792, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10374 }, { "epoch": 0.7525204903169652, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 8.7735, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10375 }, { "epoch": 0.7525930224124175, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.3519, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10376 }, { "epoch": 0.7526655545078698, "grad_norm": 42.75, "learning_rate": 0.0003, "loss": 9.2798, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10377 }, { "epoch": 0.752738086603322, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 9.3563, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10378 }, { "epoch": 0.7528106186987742, "grad_norm": 6.46875, "learning_rate": 0.0003, "loss": 9.0041, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10379 }, { "epoch": 0.7528831507942264, "grad_norm": 40.5, "learning_rate": 0.0003, "loss": 8.8565, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10380 }, { "epoch": 0.7529556828896787, "grad_norm": 6.75, "learning_rate": 0.0003, "loss": 8.8553, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10381 }, { "epoch": 0.753028214985131, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.2865, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10382 }, { "epoch": 0.7531007470805832, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 9.0272, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10383 }, { "epoch": 0.7531732791760354, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.9456, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10384 }, { "epoch": 0.7532458112714876, "grad_norm": 6.8125, "learning_rate": 0.0003, "loss": 9.0091, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10385 }, { "epoch": 0.7533183433669399, "grad_norm": 9.875, "learning_rate": 0.0003, "loss": 8.5351, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10386 }, { "epoch": 0.7533908754623921, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.9862, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10387 }, { "epoch": 0.7534634075578444, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 8.7131, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10388 }, { "epoch": 0.7535359396532966, "grad_norm": 6.71875, "learning_rate": 0.0003, "loss": 8.5787, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10389 }, { "epoch": 0.7536084717487488, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.4211, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10390 }, { "epoch": 0.7536810038442011, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.8673, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10391 }, { "epoch": 0.7537535359396533, "grad_norm": 7.21875, "learning_rate": 0.0003, "loss": 8.6733, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10392 }, { "epoch": 0.7538260680351055, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 8.797, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10393 }, { "epoch": 0.7538986001305578, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.9644, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10394 }, { "epoch": 0.75397113222601, "grad_norm": 10.8125, "learning_rate": 0.0003, "loss": 8.0283, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10395 }, { "epoch": 0.7540436643214622, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.7623, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10396 }, { "epoch": 0.7541161964169145, "grad_norm": 1.46875, "learning_rate": 0.0003, "loss": 8.8336, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10397 }, { "epoch": 0.7541887285123667, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 9.013, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10398 }, { "epoch": 0.754261260607819, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 9.081, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10399 }, { "epoch": 0.7543337927032712, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.6873, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10400 }, { "epoch": 0.7544063247987234, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 8.7983, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10401 }, { "epoch": 0.7544788568941757, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 8.6272, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10402 }, { "epoch": 0.7545513889896279, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.8938, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10403 }, { "epoch": 0.7546239210850801, "grad_norm": 6.53125, "learning_rate": 0.0003, "loss": 8.7315, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10404 }, { "epoch": 0.7546964531805324, "grad_norm": 1.6171875, "learning_rate": 0.0003, "loss": 8.8671, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10405 }, { "epoch": 0.7547689852759846, "grad_norm": 14.875, "learning_rate": 0.0003, "loss": 8.9901, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10406 }, { "epoch": 0.7548415173714369, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 8.6852, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10407 }, { "epoch": 0.7549140494668891, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 8.605, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10408 }, { "epoch": 0.7549865815623413, "grad_norm": 1.8984375, "learning_rate": 0.0003, "loss": 9.2212, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10409 }, { "epoch": 0.7550591136577935, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 8.7595, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10410 }, { "epoch": 0.7551316457532458, "grad_norm": 12.0625, "learning_rate": 0.0003, "loss": 9.1188, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10411 }, { "epoch": 0.7552041778486981, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.5047, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10412 }, { "epoch": 0.7552767099441503, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 8.6517, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10413 }, { "epoch": 0.7553492420396025, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 9.0066, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10414 }, { "epoch": 0.7554217741350547, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.1256, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10415 }, { "epoch": 0.755494306230507, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.9463, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10416 }, { "epoch": 0.7555668383259593, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.9539, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10417 }, { "epoch": 0.7556393704214115, "grad_norm": 23.75, "learning_rate": 0.0003, "loss": 8.7153, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10418 }, { "epoch": 0.7557119025168637, "grad_norm": 10.5625, "learning_rate": 0.0003, "loss": 8.6841, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10419 }, { "epoch": 0.7557844346123159, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.6917, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10420 }, { "epoch": 0.7558569667077681, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 9.1953, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10421 }, { "epoch": 0.7559294988032205, "grad_norm": 1.9296875, "learning_rate": 0.0003, "loss": 8.5547, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10422 }, { "epoch": 0.7560020308986727, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 9.367, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10423 }, { "epoch": 0.7560745629941249, "grad_norm": 7.65625, "learning_rate": 0.0003, "loss": 8.972, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10424 }, { "epoch": 0.7561470950895771, "grad_norm": 11.5625, "learning_rate": 0.0003, "loss": 9.0875, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10425 }, { "epoch": 0.7562196271850293, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.7557, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10426 }, { "epoch": 0.7562921592804817, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.5287, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10427 }, { "epoch": 0.7563646913759339, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.103, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10428 }, { "epoch": 0.7564372234713861, "grad_norm": 8.8125, "learning_rate": 0.0003, "loss": 8.6974, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10429 }, { "epoch": 0.7565097555668383, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.9858, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10430 }, { "epoch": 0.7565822876622905, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 8.447, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10431 }, { "epoch": 0.7566548197577428, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.6626, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10432 }, { "epoch": 0.7567273518531951, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 9.0783, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10433 }, { "epoch": 0.7567998839486473, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 9.5681, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10434 }, { "epoch": 0.7568724160440995, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.2086, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10435 }, { "epoch": 0.7569449481395517, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.1336, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10436 }, { "epoch": 0.757017480235004, "grad_norm": 12.5625, "learning_rate": 0.0003, "loss": 8.5163, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10437 }, { "epoch": 0.7570900123304563, "grad_norm": 6.125, "learning_rate": 0.0003, "loss": 8.7932, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10438 }, { "epoch": 0.7571625444259085, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.1476, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10439 }, { "epoch": 0.7572350765213607, "grad_norm": 13.4375, "learning_rate": 0.0003, "loss": 9.1705, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10440 }, { "epoch": 0.7573076086168129, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.8927, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10441 }, { "epoch": 0.7573801407122652, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 8.5953, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10442 }, { "epoch": 0.7574526728077174, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 9.0696, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10443 }, { "epoch": 0.7575252049031697, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 9.1164, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10444 }, { "epoch": 0.7575977369986219, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.8333, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10445 }, { "epoch": 0.7576702690940741, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 8.7952, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10446 }, { "epoch": 0.7577428011895264, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 8.5387, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10447 }, { "epoch": 0.7578153332849786, "grad_norm": 1.90625, "learning_rate": 0.0003, "loss": 8.8279, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10448 }, { "epoch": 0.7578878653804308, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 8.7872, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10449 }, { "epoch": 0.7579603974758831, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 8.8393, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10450 }, { "epoch": 0.7580329295713353, "grad_norm": 26.875, "learning_rate": 0.0003, "loss": 8.5308, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10451 }, { "epoch": 0.7581054616667876, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.9248, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10452 }, { "epoch": 0.7581779937622398, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 9.1292, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10453 }, { "epoch": 0.758250525857692, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.8134, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10454 }, { "epoch": 0.7583230579531443, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.809, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10455 }, { "epoch": 0.7583955900485965, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.9308, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10456 }, { "epoch": 0.7584681221440488, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.9138, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10457 }, { "epoch": 0.758540654239501, "grad_norm": 9.0625, "learning_rate": 0.0003, "loss": 8.8779, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10458 }, { "epoch": 0.7586131863349532, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.46, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10459 }, { "epoch": 0.7586857184304054, "grad_norm": 5.9375, "learning_rate": 0.0003, "loss": 9.2933, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10460 }, { "epoch": 0.7587582505258577, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.6382, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10461 }, { "epoch": 0.75883078262131, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.9728, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10462 }, { "epoch": 0.7589033147167622, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 8.7574, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10463 }, { "epoch": 0.7589758468122144, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 8.8568, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10464 }, { "epoch": 0.7590483789076666, "grad_norm": 22.875, "learning_rate": 0.0003, "loss": 9.0749, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10465 }, { "epoch": 0.7591209110031188, "grad_norm": 5.65625, "learning_rate": 0.0003, "loss": 9.2963, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10466 }, { "epoch": 0.7591934430985711, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 9.225, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10467 }, { "epoch": 0.7592659751940234, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 8.6391, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10468 }, { "epoch": 0.7593385072894756, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.3611, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10469 }, { "epoch": 0.7594110393849278, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.9603, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10470 }, { "epoch": 0.75948357148038, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.7569, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10471 }, { "epoch": 0.7595561035758323, "grad_norm": 12.5, "learning_rate": 0.0003, "loss": 8.4576, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10472 }, { "epoch": 0.7596286356712846, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 8.5374, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10473 }, { "epoch": 0.7597011677667368, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.5852, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10474 }, { "epoch": 0.759773699862189, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 9.2593, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10475 }, { "epoch": 0.7598462319576412, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.8458, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10476 }, { "epoch": 0.7599187640530934, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 9.32, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10477 }, { "epoch": 0.7599912961485458, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 8.5469, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10478 }, { "epoch": 0.760063828243998, "grad_norm": 9.375, "learning_rate": 0.0003, "loss": 8.666, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10479 }, { "epoch": 0.7601363603394502, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 8.9312, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10480 }, { "epoch": 0.7602088924349024, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 8.9678, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10481 }, { "epoch": 0.7602814245303546, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 9.0112, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10482 }, { "epoch": 0.760353956625807, "grad_norm": 13.3125, "learning_rate": 0.0003, "loss": 8.7602, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10483 }, { "epoch": 0.7604264887212592, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 8.5856, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10484 }, { "epoch": 0.7604990208167114, "grad_norm": 1.96875, "learning_rate": 0.0003, "loss": 8.5502, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10485 }, { "epoch": 0.7605715529121636, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.9067, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10486 }, { "epoch": 0.7606440850076158, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 8.8903, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10487 }, { "epoch": 0.7607166171030681, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.5611, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10488 }, { "epoch": 0.7607891491985204, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.7479, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10489 }, { "epoch": 0.7608616812939726, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 9.2605, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10490 }, { "epoch": 0.7609342133894248, "grad_norm": 5.46875, "learning_rate": 0.0003, "loss": 8.8531, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10491 }, { "epoch": 0.761006745484877, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 8.5687, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10492 }, { "epoch": 0.7610792775803293, "grad_norm": 5.5625, "learning_rate": 0.0003, "loss": 8.8346, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10493 }, { "epoch": 0.7611518096757816, "grad_norm": 1.8515625, "learning_rate": 0.0003, "loss": 9.3216, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10494 }, { "epoch": 0.7612243417712338, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.2355, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10495 }, { "epoch": 0.761296873866686, "grad_norm": 13.0, "learning_rate": 0.0003, "loss": 9.3896, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10496 }, { "epoch": 0.7613694059621382, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.5914, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10497 }, { "epoch": 0.7614419380575905, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.9367, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10498 }, { "epoch": 0.7615144701530427, "grad_norm": 1.9296875, "learning_rate": 0.0003, "loss": 9.0104, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10499 }, { "epoch": 0.761587002248495, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.1266, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10500 }, { "epoch": 0.7616595343439472, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.5907, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10501 }, { "epoch": 0.7617320664393994, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.9047, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10502 }, { "epoch": 0.7618045985348517, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.8207, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10503 }, { "epoch": 0.7618771306303039, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 9.0708, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10504 }, { "epoch": 0.7619496627257561, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.9291, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10505 }, { "epoch": 0.7620221948212084, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.6859, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10506 }, { "epoch": 0.7620947269166606, "grad_norm": 7.375, "learning_rate": 0.0003, "loss": 8.9485, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10507 }, { "epoch": 0.7621672590121129, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.8918, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10508 }, { "epoch": 0.7622397911075651, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 8.0141, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10509 }, { "epoch": 0.7623123232030173, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.774, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10510 }, { "epoch": 0.7623848552984696, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 9.1351, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10511 }, { "epoch": 0.7624573873939218, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.5735, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10512 }, { "epoch": 0.7625299194893741, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.4578, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10513 }, { "epoch": 0.7626024515848263, "grad_norm": 6.28125, "learning_rate": 0.0003, "loss": 9.0418, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10514 }, { "epoch": 0.7626749836802785, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.8403, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10515 }, { "epoch": 0.7627475157757307, "grad_norm": 1.984375, "learning_rate": 0.0003, "loss": 8.8173, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10516 }, { "epoch": 0.762820047871183, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.7662, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10517 }, { "epoch": 0.7628925799666353, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 9.3856, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10518 }, { "epoch": 0.7629651120620875, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 9.1816, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10519 }, { "epoch": 0.7630376441575397, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.4415, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10520 }, { "epoch": 0.7631101762529919, "grad_norm": 5.5625, "learning_rate": 0.0003, "loss": 8.5951, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10521 }, { "epoch": 0.7631827083484441, "grad_norm": 7.09375, "learning_rate": 0.0003, "loss": 8.5815, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10522 }, { "epoch": 0.7632552404438965, "grad_norm": 8.9375, "learning_rate": 0.0003, "loss": 8.943, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10523 }, { "epoch": 0.7633277725393487, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 8.9391, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10524 }, { "epoch": 0.7634003046348009, "grad_norm": 8.375, "learning_rate": 0.0003, "loss": 9.0857, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10525 }, { "epoch": 0.7634728367302531, "grad_norm": 7.875, "learning_rate": 0.0003, "loss": 8.7339, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10526 }, { "epoch": 0.7635453688257053, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.6056, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10527 }, { "epoch": 0.7636179009211577, "grad_norm": 8.0625, "learning_rate": 0.0003, "loss": 8.1844, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10528 }, { "epoch": 0.7636904330166099, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 9.0924, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10529 }, { "epoch": 0.7637629651120621, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.6896, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10530 }, { "epoch": 0.7638354972075143, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.7805, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10531 }, { "epoch": 0.7639080293029665, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 9.1881, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10532 }, { "epoch": 0.7639805613984187, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 9.0596, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10533 }, { "epoch": 0.7640530934938711, "grad_norm": 1.890625, "learning_rate": 0.0003, "loss": 9.2966, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10534 }, { "epoch": 0.7641256255893233, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.8724, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10535 }, { "epoch": 0.7641981576847755, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 9.0255, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10536 }, { "epoch": 0.7642706897802277, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.8102, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10537 }, { "epoch": 0.7643432218756799, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.7502, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10538 }, { "epoch": 0.7644157539711323, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.8369, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10539 }, { "epoch": 0.7644882860665845, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 8.9316, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10540 }, { "epoch": 0.7645608181620367, "grad_norm": 664.0, "learning_rate": 0.0003, "loss": 9.1109, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10541 }, { "epoch": 0.7646333502574889, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 9.0161, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10542 }, { "epoch": 0.7647058823529411, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 8.5498, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10543 }, { "epoch": 0.7647784144483935, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 8.7878, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10544 }, { "epoch": 0.7648509465438457, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 9.0552, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10545 }, { "epoch": 0.7649234786392979, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 8.7846, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10546 }, { "epoch": 0.7649960107347501, "grad_norm": 8.1875, "learning_rate": 0.0003, "loss": 9.1762, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10547 }, { "epoch": 0.7650685428302023, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.8971, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10548 }, { "epoch": 0.7651410749256546, "grad_norm": 1.9765625, "learning_rate": 0.0003, "loss": 8.8397, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10549 }, { "epoch": 0.7652136070211069, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.9086, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10550 }, { "epoch": 0.7652861391165591, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.7036, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10551 }, { "epoch": 0.7653586712120113, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 8.7884, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10552 }, { "epoch": 0.7654312033074635, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.8069, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10553 }, { "epoch": 0.7655037354029158, "grad_norm": 6.625, "learning_rate": 0.0003, "loss": 8.6431, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10554 }, { "epoch": 0.765576267498368, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 8.069, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10555 }, { "epoch": 0.7656487995938203, "grad_norm": 13.5625, "learning_rate": 0.0003, "loss": 8.6652, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10556 }, { "epoch": 0.7657213316892725, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 8.9697, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10557 }, { "epoch": 0.7657938637847247, "grad_norm": 5.90625, "learning_rate": 0.0003, "loss": 9.3114, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10558 }, { "epoch": 0.765866395880177, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.8465, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10559 }, { "epoch": 0.7659389279756292, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 9.2718, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10560 }, { "epoch": 0.7660114600710815, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.676, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10561 }, { "epoch": 0.7660839921665337, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 9.3953, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10562 }, { "epoch": 0.7661565242619859, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 9.3809, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10563 }, { "epoch": 0.7662290563574382, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.9915, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10564 }, { "epoch": 0.7663015884528904, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.7111, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10565 }, { "epoch": 0.7663741205483426, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.0201, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10566 }, { "epoch": 0.7664466526437949, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 8.7541, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10567 }, { "epoch": 0.7665191847392471, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 8.7834, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10568 }, { "epoch": 0.7665917168346994, "grad_norm": 14.3125, "learning_rate": 0.0003, "loss": 8.9963, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10569 }, { "epoch": 0.7666642489301516, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.0268, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10570 }, { "epoch": 0.7667367810256038, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 8.7312, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10571 }, { "epoch": 0.766809313121056, "grad_norm": 16.875, "learning_rate": 0.0003, "loss": 8.8335, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10572 }, { "epoch": 0.7668818452165083, "grad_norm": 37.5, "learning_rate": 0.0003, "loss": 8.5022, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10573 }, { "epoch": 0.7669543773119606, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 9.3791, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10574 }, { "epoch": 0.7670269094074128, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 8.3388, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10575 }, { "epoch": 0.767099441502865, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.8796, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10576 }, { "epoch": 0.7671719735983172, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.6204, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10577 }, { "epoch": 0.7672445056937695, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 8.7827, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10578 }, { "epoch": 0.7673170377892218, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.6408, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10579 }, { "epoch": 0.767389569884674, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 8.9398, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10580 }, { "epoch": 0.7674621019801262, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.0509, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10581 }, { "epoch": 0.7675346340755784, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.8053, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10582 }, { "epoch": 0.7676071661710306, "grad_norm": 1.984375, "learning_rate": 0.0003, "loss": 8.93, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10583 }, { "epoch": 0.767679698266483, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 8.4878, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10584 }, { "epoch": 0.7677522303619352, "grad_norm": 9.3125, "learning_rate": 0.0003, "loss": 8.2202, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10585 }, { "epoch": 0.7678247624573874, "grad_norm": 8.0, "learning_rate": 0.0003, "loss": 8.7696, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10586 }, { "epoch": 0.7678972945528396, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.2233, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10587 }, { "epoch": 0.7679698266482918, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 8.5038, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10588 }, { "epoch": 0.7680423587437442, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.4024, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10589 }, { "epoch": 0.7681148908391964, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.3502, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10590 }, { "epoch": 0.7681874229346486, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.7048, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10591 }, { "epoch": 0.7682599550301008, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.8673, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10592 }, { "epoch": 0.768332487125553, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 9.3496, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10593 }, { "epoch": 0.7684050192210053, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 8.5878, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10594 }, { "epoch": 0.7684775513164576, "grad_norm": 7.40625, "learning_rate": 0.0003, "loss": 8.2371, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10595 }, { "epoch": 0.7685500834119098, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.2243, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10596 }, { "epoch": 0.768622615507362, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 9.136, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10597 }, { "epoch": 0.7686951476028142, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 8.8314, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10598 }, { "epoch": 0.7687676796982665, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.7793, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10599 }, { "epoch": 0.7688402117937188, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.1595, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10600 }, { "epoch": 0.768912743889171, "grad_norm": 6.65625, "learning_rate": 0.0003, "loss": 8.8174, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10601 }, { "epoch": 0.7689852759846232, "grad_norm": 5.625, "learning_rate": 0.0003, "loss": 8.653, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10602 }, { "epoch": 0.7690578080800754, "grad_norm": 6.75, "learning_rate": 0.0003, "loss": 8.7974, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10603 }, { "epoch": 0.7691303401755276, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 8.6209, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10604 }, { "epoch": 0.7692028722709799, "grad_norm": 6.4375, "learning_rate": 0.0003, "loss": 9.2581, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10605 }, { "epoch": 0.7692754043664322, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 8.9106, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10606 }, { "epoch": 0.7693479364618844, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 8.6522, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10607 }, { "epoch": 0.7694204685573366, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 9.0112, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10608 }, { "epoch": 0.7694930006527888, "grad_norm": 11.5, "learning_rate": 0.0003, "loss": 8.4697, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10609 }, { "epoch": 0.7695655327482411, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 8.5734, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10610 }, { "epoch": 0.7696380648436933, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 8.7594, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10611 }, { "epoch": 0.7697105969391456, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.715, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10612 }, { "epoch": 0.7697831290345978, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 9.6435, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10613 }, { "epoch": 0.76985566113005, "grad_norm": 5.90625, "learning_rate": 0.0003, "loss": 8.8125, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10614 }, { "epoch": 0.7699281932255023, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.7544, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10615 }, { "epoch": 0.7700007253209545, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.1389, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10616 }, { "epoch": 0.7700732574164068, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 8.6813, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10617 }, { "epoch": 0.770145789511859, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 9.362, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10618 }, { "epoch": 0.7702183216073112, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.9698, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10619 }, { "epoch": 0.7702908537027635, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.8729, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10620 }, { "epoch": 0.7703633857982157, "grad_norm": 1.7265625, "learning_rate": 0.0003, "loss": 9.1399, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10621 }, { "epoch": 0.7704359178936679, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.3234, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10622 }, { "epoch": 0.7705084499891202, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 8.799, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10623 }, { "epoch": 0.7705809820845724, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 8.8148, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10624 }, { "epoch": 0.7706535141800247, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 8.8008, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10625 }, { "epoch": 0.7707260462754769, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 8.5193, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10626 }, { "epoch": 0.7707985783709291, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 9.0953, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10627 }, { "epoch": 0.7708711104663813, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 9.2913, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10628 }, { "epoch": 0.7709436425618336, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.9913, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10629 }, { "epoch": 0.7710161746572859, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 8.8927, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10630 }, { "epoch": 0.7710887067527381, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.7263, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10631 }, { "epoch": 0.7711612388481903, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 9.0634, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10632 }, { "epoch": 0.7712337709436425, "grad_norm": 10.25, "learning_rate": 0.0003, "loss": 9.2188, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10633 }, { "epoch": 0.7713063030390948, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.8679, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10634 }, { "epoch": 0.7713788351345471, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 8.6014, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10635 }, { "epoch": 0.7714513672299993, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.6497, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10636 }, { "epoch": 0.7715238993254515, "grad_norm": 9.5, "learning_rate": 0.0003, "loss": 8.9107, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10637 }, { "epoch": 0.7715964314209037, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 9.0209, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10638 }, { "epoch": 0.7716689635163559, "grad_norm": 12.3125, "learning_rate": 0.0003, "loss": 9.0907, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10639 }, { "epoch": 0.7717414956118083, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.8319, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10640 }, { "epoch": 0.7718140277072605, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.7572, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10641 }, { "epoch": 0.7718865598027127, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.5489, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10642 }, { "epoch": 0.7719590918981649, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.2521, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10643 }, { "epoch": 0.7720316239936171, "grad_norm": 35.25, "learning_rate": 0.0003, "loss": 8.985, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10644 }, { "epoch": 0.7721041560890695, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 8.6383, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10645 }, { "epoch": 0.7721766881845217, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 9.501, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10646 }, { "epoch": 0.7722492202799739, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 8.8571, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10647 }, { "epoch": 0.7723217523754261, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.0408, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10648 }, { "epoch": 0.7723942844708783, "grad_norm": 36.0, "learning_rate": 0.0003, "loss": 8.8418, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10649 }, { "epoch": 0.7724668165663306, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.789, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10650 }, { "epoch": 0.7725393486617829, "grad_norm": 3.953125, "learning_rate": 0.0003, "loss": 9.1298, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10651 }, { "epoch": 0.7726118807572351, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.4151, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10652 }, { "epoch": 0.7726844128526873, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.6862, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10653 }, { "epoch": 0.7727569449481395, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.0939, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10654 }, { "epoch": 0.7728294770435918, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 9.1405, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10655 }, { "epoch": 0.772902009139044, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 9.1586, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10656 }, { "epoch": 0.7729745412344963, "grad_norm": 7.09375, "learning_rate": 0.0003, "loss": 9.1813, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10657 }, { "epoch": 0.7730470733299485, "grad_norm": 1.5078125, "learning_rate": 0.0003, "loss": 8.7049, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10658 }, { "epoch": 0.7731196054254007, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.9138, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10659 }, { "epoch": 0.773192137520853, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 9.0124, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10660 }, { "epoch": 0.7732646696163052, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.6104, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10661 }, { "epoch": 0.7733372017117575, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.2436, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10662 }, { "epoch": 0.7734097338072097, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.3355, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10663 }, { "epoch": 0.7734822659026619, "grad_norm": 17.875, "learning_rate": 0.0003, "loss": 8.0354, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10664 }, { "epoch": 0.7735547979981142, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 8.7046, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10665 }, { "epoch": 0.7736273300935664, "grad_norm": 14.5, "learning_rate": 0.0003, "loss": 8.8516, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10666 }, { "epoch": 0.7736998621890187, "grad_norm": 7.75, "learning_rate": 0.0003, "loss": 8.911, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10667 }, { "epoch": 0.7737723942844709, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 9.1684, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10668 }, { "epoch": 0.7738449263799231, "grad_norm": 5.9375, "learning_rate": 0.0003, "loss": 8.9815, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10669 }, { "epoch": 0.7739174584753754, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.7851, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10670 }, { "epoch": 0.7739899905708276, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 9.0474, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10671 }, { "epoch": 0.7740625226662798, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 9.032, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10672 }, { "epoch": 0.774135054761732, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.5126, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10673 }, { "epoch": 0.7742075868571843, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 9.3595, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10674 }, { "epoch": 0.7742801189526365, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 8.9247, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10675 }, { "epoch": 0.7743526510480888, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 8.6663, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10676 }, { "epoch": 0.774425183143541, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 8.9124, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10677 }, { "epoch": 0.7744977152389932, "grad_norm": 9.375, "learning_rate": 0.0003, "loss": 8.7768, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10678 }, { "epoch": 0.7745702473344455, "grad_norm": 10.4375, "learning_rate": 0.0003, "loss": 8.3287, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10679 }, { "epoch": 0.7746427794298977, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 9.0005, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10680 }, { "epoch": 0.77471531152535, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.8646, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10681 }, { "epoch": 0.7747878436208022, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 8.6238, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10682 }, { "epoch": 0.7748603757162544, "grad_norm": 6.25, "learning_rate": 0.0003, "loss": 8.9965, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10683 }, { "epoch": 0.7749329078117067, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 8.7564, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10684 }, { "epoch": 0.7750054399071589, "grad_norm": 16.625, "learning_rate": 0.0003, "loss": 8.8605, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10685 }, { "epoch": 0.7750779720026112, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.8977, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10686 }, { "epoch": 0.7751505040980634, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 9.1262, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10687 }, { "epoch": 0.7752230361935156, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 8.8864, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10688 }, { "epoch": 0.7752955682889678, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.7474, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10689 }, { "epoch": 0.7753681003844201, "grad_norm": 5.46875, "learning_rate": 0.0003, "loss": 8.7033, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10690 }, { "epoch": 0.7754406324798724, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.1335, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10691 }, { "epoch": 0.7755131645753246, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.9412, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10692 }, { "epoch": 0.7755856966707768, "grad_norm": 9.0625, "learning_rate": 0.0003, "loss": 8.7518, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10693 }, { "epoch": 0.775658228766229, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 9.0465, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10694 }, { "epoch": 0.7757307608616812, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 8.7731, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10695 }, { "epoch": 0.7758032929571336, "grad_norm": 27.125, "learning_rate": 0.0003, "loss": 8.5134, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10696 }, { "epoch": 0.7758758250525858, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 8.8575, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10697 }, { "epoch": 0.775948357148038, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.4257, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10698 }, { "epoch": 0.7760208892434902, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 8.7231, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10699 }, { "epoch": 0.7760934213389424, "grad_norm": 7.0, "learning_rate": 0.0003, "loss": 8.4884, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10700 }, { "epoch": 0.7761659534343948, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.9322, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10701 }, { "epoch": 0.776238485529847, "grad_norm": 5.9375, "learning_rate": 0.0003, "loss": 8.6552, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10702 }, { "epoch": 0.7763110176252992, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.0248, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10703 }, { "epoch": 0.7763835497207514, "grad_norm": 8.375, "learning_rate": 0.0003, "loss": 8.7115, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10704 }, { "epoch": 0.7764560818162036, "grad_norm": 8.9375, "learning_rate": 0.0003, "loss": 9.1115, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10705 }, { "epoch": 0.776528613911656, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 9.1538, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10706 }, { "epoch": 0.7766011460071082, "grad_norm": 9.0625, "learning_rate": 0.0003, "loss": 9.1603, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10707 }, { "epoch": 0.7766736781025604, "grad_norm": 9.0, "learning_rate": 0.0003, "loss": 9.1929, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10708 }, { "epoch": 0.7767462101980126, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 8.6716, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10709 }, { "epoch": 0.7768187422934648, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.7368, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10710 }, { "epoch": 0.7768912743889171, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.7454, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10711 }, { "epoch": 0.7769638064843694, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 8.5992, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10712 }, { "epoch": 0.7770363385798216, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 8.3546, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10713 }, { "epoch": 0.7771088706752738, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.6042, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10714 }, { "epoch": 0.777181402770726, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.6702, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10715 }, { "epoch": 0.7772539348661783, "grad_norm": 7.9375, "learning_rate": 0.0003, "loss": 9.0888, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10716 }, { "epoch": 0.7773264669616305, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 8.6393, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10717 }, { "epoch": 0.7773989990570828, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.706, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10718 }, { "epoch": 0.777471531152535, "grad_norm": 15.9375, "learning_rate": 0.0003, "loss": 9.0693, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10719 }, { "epoch": 0.7775440632479872, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.514, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10720 }, { "epoch": 0.7776165953434395, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 8.9439, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10721 }, { "epoch": 0.7776891274388917, "grad_norm": 14.1875, "learning_rate": 0.0003, "loss": 8.4219, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10722 }, { "epoch": 0.777761659534344, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 8.9285, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10723 }, { "epoch": 0.7778341916297962, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.4711, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10724 }, { "epoch": 0.7779067237252484, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.3804, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10725 }, { "epoch": 0.7779792558207007, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 8.9396, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10726 }, { "epoch": 0.7780517879161529, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 8.4544, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10727 }, { "epoch": 0.7781243200116051, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.9466, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10728 }, { "epoch": 0.7781968521070574, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 9.3652, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10729 }, { "epoch": 0.7782693842025096, "grad_norm": 5.6875, "learning_rate": 0.0003, "loss": 9.1605, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10730 }, { "epoch": 0.7783419162979619, "grad_norm": 18.5, "learning_rate": 0.0003, "loss": 8.0184, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10731 }, { "epoch": 0.7784144483934141, "grad_norm": 8.5, "learning_rate": 0.0003, "loss": 8.8374, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10732 }, { "epoch": 0.7784869804888663, "grad_norm": 6.53125, "learning_rate": 0.0003, "loss": 8.5337, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10733 }, { "epoch": 0.7785595125843185, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 9.2216, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10734 }, { "epoch": 0.7786320446797708, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 8.8902, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10735 }, { "epoch": 0.7787045767752231, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 9.3799, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10736 }, { "epoch": 0.7787771088706753, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 8.7997, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10737 }, { "epoch": 0.7788496409661275, "grad_norm": 6.96875, "learning_rate": 0.0003, "loss": 8.8472, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10738 }, { "epoch": 0.7789221730615797, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 9.1599, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10739 }, { "epoch": 0.778994705157032, "grad_norm": 12.625, "learning_rate": 0.0003, "loss": 9.1195, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10740 }, { "epoch": 0.7790672372524843, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 9.0372, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10741 }, { "epoch": 0.7791397693479365, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.9837, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10742 }, { "epoch": 0.7792123014433887, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.4361, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10743 }, { "epoch": 0.7792848335388409, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 8.5369, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10744 }, { "epoch": 0.7793573656342931, "grad_norm": 7.53125, "learning_rate": 0.0003, "loss": 8.8026, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10745 }, { "epoch": 0.7794298977297454, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 8.7257, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10746 }, { "epoch": 0.7795024298251977, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 8.8814, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10747 }, { "epoch": 0.7795749619206499, "grad_norm": 10.75, "learning_rate": 0.0003, "loss": 9.0464, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10748 }, { "epoch": 0.7796474940161021, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 9.1636, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10749 }, { "epoch": 0.7797200261115543, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 8.4166, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10750 }, { "epoch": 0.7797925582070065, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 9.0193, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10751 }, { "epoch": 0.7798650903024589, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 8.4214, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10752 }, { "epoch": 0.7799376223979111, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.8719, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10753 }, { "epoch": 0.7800101544933633, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 9.1186, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10754 }, { "epoch": 0.7800826865888155, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 8.7321, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10755 }, { "epoch": 0.7801552186842677, "grad_norm": 6.3125, "learning_rate": 0.0003, "loss": 8.7481, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10756 }, { "epoch": 0.7802277507797201, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 9.3698, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10757 }, { "epoch": 0.7803002828751723, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 8.9536, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10758 }, { "epoch": 0.7803728149706245, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 8.8346, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10759 }, { "epoch": 0.7804453470660767, "grad_norm": 7.3125, "learning_rate": 0.0003, "loss": 8.9914, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10760 }, { "epoch": 0.7805178791615289, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.0603, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10761 }, { "epoch": 0.7805904112569813, "grad_norm": 1.8671875, "learning_rate": 0.0003, "loss": 8.7961, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10762 }, { "epoch": 0.7806629433524335, "grad_norm": 69.5, "learning_rate": 0.0003, "loss": 8.665, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10763 }, { "epoch": 0.7807354754478857, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 9.0734, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10764 }, { "epoch": 0.7808080075433379, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.0398, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10765 }, { "epoch": 0.7808805396387901, "grad_norm": 6.34375, "learning_rate": 0.0003, "loss": 8.7343, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10766 }, { "epoch": 0.7809530717342424, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 9.0722, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10767 }, { "epoch": 0.7810256038296947, "grad_norm": 7.0, "learning_rate": 0.0003, "loss": 8.7794, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10768 }, { "epoch": 0.7810981359251469, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 8.848, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10769 }, { "epoch": 0.7811706680205991, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 8.376, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10770 }, { "epoch": 0.7812432001160513, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.7555, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10771 }, { "epoch": 0.7813157322115036, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.0832, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10772 }, { "epoch": 0.7813882643069558, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.0449, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10773 }, { "epoch": 0.7814607964024081, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 8.9629, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10774 }, { "epoch": 0.7815333284978603, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 9.1813, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10775 }, { "epoch": 0.7816058605933125, "grad_norm": 7.09375, "learning_rate": 0.0003, "loss": 8.8003, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10776 }, { "epoch": 0.7816783926887648, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.6773, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10777 }, { "epoch": 0.781750924784217, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 9.1844, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10778 }, { "epoch": 0.7818234568796693, "grad_norm": 9.3125, "learning_rate": 0.0003, "loss": 8.9342, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10779 }, { "epoch": 0.7818959889751215, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.4617, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10780 }, { "epoch": 0.7819685210705737, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.6065, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10781 }, { "epoch": 0.782041053166026, "grad_norm": 6.84375, "learning_rate": 0.0003, "loss": 9.1909, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10782 }, { "epoch": 0.7821135852614782, "grad_norm": 1.625, "learning_rate": 0.0003, "loss": 8.4871, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10783 }, { "epoch": 0.7821861173569304, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 8.3351, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10784 }, { "epoch": 0.7822586494523827, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 8.3974, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10785 }, { "epoch": 0.7823311815478349, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 9.1311, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10786 }, { "epoch": 0.7824037136432872, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 9.0674, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10787 }, { "epoch": 0.7824762457387394, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 8.2836, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10788 }, { "epoch": 0.7825487778341916, "grad_norm": 10.8125, "learning_rate": 0.0003, "loss": 8.9123, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10789 }, { "epoch": 0.7826213099296438, "grad_norm": 5.96875, "learning_rate": 0.0003, "loss": 8.8512, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10790 }, { "epoch": 0.7826938420250961, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 9.0013, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10791 }, { "epoch": 0.7827663741205484, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 8.7161, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10792 }, { "epoch": 0.7828389062160006, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.4191, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10793 }, { "epoch": 0.7829114383114528, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.6577, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10794 }, { "epoch": 0.782983970406905, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 9.2096, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10795 }, { "epoch": 0.7830565025023573, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 9.2925, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10796 }, { "epoch": 0.7831290345978096, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.7916, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10797 }, { "epoch": 0.7832015666932618, "grad_norm": 1.9765625, "learning_rate": 0.0003, "loss": 8.4613, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10798 }, { "epoch": 0.783274098788714, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 8.645, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10799 }, { "epoch": 0.7833466308841662, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 8.5157, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10800 }, { "epoch": 0.7834191629796184, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.2337, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10801 }, { "epoch": 0.7834916950750708, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.5079, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10802 }, { "epoch": 0.783564227170523, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 9.0801, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10803 }, { "epoch": 0.7836367592659752, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 8.7695, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10804 }, { "epoch": 0.7837092913614274, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 8.7718, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10805 }, { "epoch": 0.7837818234568796, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.722, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10806 }, { "epoch": 0.783854355552332, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 8.3486, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10807 }, { "epoch": 0.7839268876477842, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.7276, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10808 }, { "epoch": 0.7839994197432364, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 8.7031, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10809 }, { "epoch": 0.7840719518386886, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 9.1872, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10810 }, { "epoch": 0.7841444839341408, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.8219, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10811 }, { "epoch": 0.7842170160295932, "grad_norm": 1.96875, "learning_rate": 0.0003, "loss": 9.0156, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10812 }, { "epoch": 0.7842895481250454, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 9.0662, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10813 }, { "epoch": 0.7843620802204976, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.3969, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10814 }, { "epoch": 0.7844346123159498, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.2146, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10815 }, { "epoch": 0.784507144411402, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.1779, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10816 }, { "epoch": 0.7845796765068542, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.7588, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10817 }, { "epoch": 0.7846522086023066, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.8558, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10818 }, { "epoch": 0.7847247406977588, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 8.9315, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10819 }, { "epoch": 0.784797272793211, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 8.7274, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10820 }, { "epoch": 0.7848698048886632, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.9229, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10821 }, { "epoch": 0.7849423369841154, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 8.675, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10822 }, { "epoch": 0.7850148690795677, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.5986, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10823 }, { "epoch": 0.78508740117502, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 8.6323, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10824 }, { "epoch": 0.7851599332704722, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.0419, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10825 }, { "epoch": 0.7852324653659244, "grad_norm": 1.7890625, "learning_rate": 0.0003, "loss": 9.1092, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10826 }, { "epoch": 0.7853049974613766, "grad_norm": 6.0, "learning_rate": 0.0003, "loss": 8.9003, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10827 }, { "epoch": 0.7853775295568289, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.7094, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10828 }, { "epoch": 0.7854500616522812, "grad_norm": 39.5, "learning_rate": 0.0003, "loss": 8.994, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10829 }, { "epoch": 0.7855225937477334, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 9.3171, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10830 }, { "epoch": 0.7855951258431856, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.6586, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10831 }, { "epoch": 0.7856676579386378, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.4507, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10832 }, { "epoch": 0.7857401900340901, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 8.5984, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10833 }, { "epoch": 0.7858127221295423, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 8.714, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10834 }, { "epoch": 0.7858852542249946, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 9.3425, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10835 }, { "epoch": 0.7859577863204468, "grad_norm": 6.8125, "learning_rate": 0.0003, "loss": 9.1071, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10836 }, { "epoch": 0.786030318415899, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.5782, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10837 }, { "epoch": 0.7861028505113513, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 8.9887, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10838 }, { "epoch": 0.7861753826068035, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 9.0049, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10839 }, { "epoch": 0.7862479147022557, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.7539, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10840 }, { "epoch": 0.786320446797708, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 9.2758, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10841 }, { "epoch": 0.7863929788931602, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.9886, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10842 }, { "epoch": 0.7864655109886125, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.0384, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10843 }, { "epoch": 0.7865380430840647, "grad_norm": 15.125, "learning_rate": 0.0003, "loss": 8.9284, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10844 }, { "epoch": 0.7866105751795169, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.3594, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10845 }, { "epoch": 0.7866831072749692, "grad_norm": 1.875, "learning_rate": 0.0003, "loss": 9.1938, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10846 }, { "epoch": 0.7867556393704214, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 9.422, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10847 }, { "epoch": 0.7868281714658737, "grad_norm": 7.125, "learning_rate": 0.0003, "loss": 9.1636, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10848 }, { "epoch": 0.7869007035613259, "grad_norm": 14.625, "learning_rate": 0.0003, "loss": 9.274, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10849 }, { "epoch": 0.7869732356567781, "grad_norm": 1.75, "learning_rate": 0.0003, "loss": 8.581, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10850 }, { "epoch": 0.7870457677522303, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 8.9342, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10851 }, { "epoch": 0.7871182998476826, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.6844, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10852 }, { "epoch": 0.7871908319431349, "grad_norm": 111.5, "learning_rate": 0.0003, "loss": 8.7713, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10853 }, { "epoch": 0.7872633640385871, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 9.0488, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10854 }, { "epoch": 0.7873358961340393, "grad_norm": 6.6875, "learning_rate": 0.0003, "loss": 9.1427, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10855 }, { "epoch": 0.7874084282294915, "grad_norm": 7.5, "learning_rate": 0.0003, "loss": 8.1728, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10856 }, { "epoch": 0.7874809603249437, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.9048, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10857 }, { "epoch": 0.7875534924203961, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 9.0683, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10858 }, { "epoch": 0.7876260245158483, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.9807, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10859 }, { "epoch": 0.7876985566113005, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.6274, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10860 }, { "epoch": 0.7877710887067527, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 9.1636, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10861 }, { "epoch": 0.7878436208022049, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 8.7912, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10862 }, { "epoch": 0.7879161528976573, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.8186, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10863 }, { "epoch": 0.7879886849931095, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 8.7905, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10864 }, { "epoch": 0.7880612170885617, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.8091, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10865 }, { "epoch": 0.7881337491840139, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.2235, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10866 }, { "epoch": 0.7882062812794661, "grad_norm": 7.21875, "learning_rate": 0.0003, "loss": 9.1237, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10867 }, { "epoch": 0.7882788133749185, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.4506, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10868 }, { "epoch": 0.7883513454703707, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 8.9469, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10869 }, { "epoch": 0.7884238775658229, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 9.0384, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10870 }, { "epoch": 0.7884964096612751, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 9.2381, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10871 }, { "epoch": 0.7885689417567273, "grad_norm": 10.25, "learning_rate": 0.0003, "loss": 8.7201, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10872 }, { "epoch": 0.7886414738521796, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.8838, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10873 }, { "epoch": 0.7887140059476319, "grad_norm": 20.625, "learning_rate": 0.0003, "loss": 8.6391, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10874 }, { "epoch": 0.7887865380430841, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.7697, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10875 }, { "epoch": 0.7888590701385363, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 8.6089, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10876 }, { "epoch": 0.7889316022339885, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.9965, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10877 }, { "epoch": 0.7890041343294408, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 9.0705, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10878 }, { "epoch": 0.789076666424893, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.8103, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10879 }, { "epoch": 0.7891491985203453, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 9.2815, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10880 }, { "epoch": 0.7892217306157975, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 9.0794, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10881 }, { "epoch": 0.7892942627112497, "grad_norm": 11.125, "learning_rate": 0.0003, "loss": 8.5105, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10882 }, { "epoch": 0.7893667948067019, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 9.1159, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10883 }, { "epoch": 0.7894393269021542, "grad_norm": 31.375, "learning_rate": 0.0003, "loss": 9.2708, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10884 }, { "epoch": 0.7895118589976065, "grad_norm": 8.5, "learning_rate": 0.0003, "loss": 8.9913, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10885 }, { "epoch": 0.7895843910930587, "grad_norm": 5.46875, "learning_rate": 0.0003, "loss": 8.6265, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10886 }, { "epoch": 0.7896569231885109, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.9689, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10887 }, { "epoch": 0.7897294552839631, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.0223, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10888 }, { "epoch": 0.7898019873794154, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 9.0442, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10889 }, { "epoch": 0.7898745194748676, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 9.2241, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10890 }, { "epoch": 0.7899470515703199, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.8736, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10891 }, { "epoch": 0.7900195836657721, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.4799, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10892 }, { "epoch": 0.7900921157612243, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.9101, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10893 }, { "epoch": 0.7901646478566766, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.4705, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10894 }, { "epoch": 0.7902371799521288, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 9.0746, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10895 }, { "epoch": 0.790309712047581, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 9.2638, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10896 }, { "epoch": 0.7903822441430333, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.0667, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10897 }, { "epoch": 0.7904547762384855, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.849, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10898 }, { "epoch": 0.7905273083339378, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.3249, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10899 }, { "epoch": 0.79059984042939, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.0068, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10900 }, { "epoch": 0.7906723725248422, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 8.1988, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10901 }, { "epoch": 0.7907449046202945, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.8514, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10902 }, { "epoch": 0.7908174367157467, "grad_norm": 7.75, "learning_rate": 0.0003, "loss": 8.8005, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10903 }, { "epoch": 0.790889968811199, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 8.5851, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10904 }, { "epoch": 0.7909625009066512, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.8238, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10905 }, { "epoch": 0.7910350330021034, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.85, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10906 }, { "epoch": 0.7911075650975556, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.8372, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10907 }, { "epoch": 0.7911800971930079, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 9.1927, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10908 }, { "epoch": 0.7912526292884602, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.0646, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10909 }, { "epoch": 0.7913251613839124, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.2863, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10910 }, { "epoch": 0.7913976934793646, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.7736, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10911 }, { "epoch": 0.7914702255748168, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 9.2936, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10912 }, { "epoch": 0.791542757670269, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 9.2824, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10913 }, { "epoch": 0.7916152897657214, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 8.7309, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10914 }, { "epoch": 0.7916878218611736, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.8561, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10915 }, { "epoch": 0.7917603539566258, "grad_norm": 10.0625, "learning_rate": 0.0003, "loss": 9.1353, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10916 }, { "epoch": 0.791832886052078, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 8.4602, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10917 }, { "epoch": 0.7919054181475302, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.8938, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10918 }, { "epoch": 0.7919779502429826, "grad_norm": 9.0, "learning_rate": 0.0003, "loss": 9.2654, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10919 }, { "epoch": 0.7920504823384348, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 8.8498, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10920 }, { "epoch": 0.792123014433887, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.0517, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10921 }, { "epoch": 0.7921955465293392, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 9.2452, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10922 }, { "epoch": 0.7922680786247914, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.6601, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10923 }, { "epoch": 0.7923406107202438, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.5071, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10924 }, { "epoch": 0.792413142815696, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.7916, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10925 }, { "epoch": 0.7924856749111482, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.4457, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10926 }, { "epoch": 0.7925582070066004, "grad_norm": 8.5625, "learning_rate": 0.0003, "loss": 8.9793, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10927 }, { "epoch": 0.7926307391020526, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.7295, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10928 }, { "epoch": 0.7927032711975049, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 9.2669, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10929 }, { "epoch": 0.7927758032929572, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.574, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10930 }, { "epoch": 0.7928483353884094, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 9.0909, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10931 }, { "epoch": 0.7929208674838616, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.8366, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10932 }, { "epoch": 0.7929933995793138, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.8181, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10933 }, { "epoch": 0.7930659316747661, "grad_norm": 1.921875, "learning_rate": 0.0003, "loss": 9.4795, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10934 }, { "epoch": 0.7931384637702183, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.4076, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10935 }, { "epoch": 0.7932109958656706, "grad_norm": 5.96875, "learning_rate": 0.0003, "loss": 8.3725, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10936 }, { "epoch": 0.7932835279611228, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.9037, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10937 }, { "epoch": 0.793356060056575, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 9.167, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10938 }, { "epoch": 0.7934285921520273, "grad_norm": 7.59375, "learning_rate": 0.0003, "loss": 8.5084, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10939 }, { "epoch": 0.7935011242474795, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 8.4791, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10940 }, { "epoch": 0.7935736563429318, "grad_norm": 14.5, "learning_rate": 0.0003, "loss": 9.2803, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10941 }, { "epoch": 0.793646188438384, "grad_norm": 6.59375, "learning_rate": 0.0003, "loss": 9.1601, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10942 }, { "epoch": 0.7937187205338362, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 8.4792, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10943 }, { "epoch": 0.7937912526292885, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 9.2886, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10944 }, { "epoch": 0.7938637847247407, "grad_norm": 1.8359375, "learning_rate": 0.0003, "loss": 8.4505, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10945 }, { "epoch": 0.7939363168201929, "grad_norm": 7.65625, "learning_rate": 0.0003, "loss": 9.0078, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10946 }, { "epoch": 0.7940088489156452, "grad_norm": 9.625, "learning_rate": 0.0003, "loss": 8.9336, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10947 }, { "epoch": 0.7940813810110974, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.0563, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10948 }, { "epoch": 0.7941539131065497, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.4903, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10949 }, { "epoch": 0.7942264452020019, "grad_norm": 11.375, "learning_rate": 0.0003, "loss": 8.9251, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10950 }, { "epoch": 0.7942989772974541, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 9.0582, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10951 }, { "epoch": 0.7943715093929063, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 8.3768, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10952 }, { "epoch": 0.7944440414883586, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 8.8699, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10953 }, { "epoch": 0.7945165735838108, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 8.5714, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10954 }, { "epoch": 0.7945891056792631, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 9.0258, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10955 }, { "epoch": 0.7946616377747153, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 8.8542, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10956 }, { "epoch": 0.7947341698701675, "grad_norm": 3.953125, "learning_rate": 0.0003, "loss": 9.4117, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10957 }, { "epoch": 0.7948067019656198, "grad_norm": 9.4375, "learning_rate": 0.0003, "loss": 8.9721, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10958 }, { "epoch": 0.794879234061072, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 9.4658, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10959 }, { "epoch": 0.7949517661565243, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 9.0839, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10960 }, { "epoch": 0.7950242982519765, "grad_norm": 8.75, "learning_rate": 0.0003, "loss": 8.6383, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10961 }, { "epoch": 0.7950968303474287, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 9.0039, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10962 }, { "epoch": 0.795169362442881, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.959, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10963 }, { "epoch": 0.7952418945383332, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 8.4341, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10964 }, { "epoch": 0.7953144266337855, "grad_norm": 5.6875, "learning_rate": 0.0003, "loss": 9.086, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10965 }, { "epoch": 0.7953869587292377, "grad_norm": 1.7109375, "learning_rate": 0.0003, "loss": 9.0483, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10966 }, { "epoch": 0.7954594908246899, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 9.0387, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10967 }, { "epoch": 0.7955320229201421, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 9.0359, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10968 }, { "epoch": 0.7956045550155944, "grad_norm": 6.25, "learning_rate": 0.0003, "loss": 9.396, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10969 }, { "epoch": 0.7956770871110467, "grad_norm": 10.5625, "learning_rate": 0.0003, "loss": 8.167, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10970 }, { "epoch": 0.7957496192064989, "grad_norm": 6.53125, "learning_rate": 0.0003, "loss": 9.0404, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10971 }, { "epoch": 0.7958221513019511, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 8.5864, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10972 }, { "epoch": 0.7958946833974033, "grad_norm": 5.21875, "learning_rate": 0.0003, "loss": 9.1288, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10973 }, { "epoch": 0.7959672154928555, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.6674, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10974 }, { "epoch": 0.7960397475883079, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.2784, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10975 }, { "epoch": 0.7961122796837601, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.5169, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10976 }, { "epoch": 0.7961848117792123, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 8.6778, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10977 }, { "epoch": 0.7962573438746645, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 8.7102, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10978 }, { "epoch": 0.7963298759701167, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 8.7594, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10979 }, { "epoch": 0.7964024080655691, "grad_norm": 5.625, "learning_rate": 0.0003, "loss": 8.8947, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10980 }, { "epoch": 0.7964749401610213, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.7945, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10981 }, { "epoch": 0.7965474722564735, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 8.7103, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10982 }, { "epoch": 0.7966200043519257, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.8312, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10983 }, { "epoch": 0.7966925364473779, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.654, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10984 }, { "epoch": 0.7967650685428302, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 9.1046, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10985 }, { "epoch": 0.7968376006382825, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.8436, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10986 }, { "epoch": 0.7969101327337347, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.82, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10987 }, { "epoch": 0.7969826648291869, "grad_norm": 18.625, "learning_rate": 0.0003, "loss": 8.8726, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10988 }, { "epoch": 0.7970551969246391, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 8.9303, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10989 }, { "epoch": 0.7971277290200914, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.9706, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10990 }, { "epoch": 0.7972002611155437, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 9.5057, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10991 }, { "epoch": 0.7972727932109959, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.488, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10992 }, { "epoch": 0.7973453253064481, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 8.6141, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10993 }, { "epoch": 0.7974178574019003, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.9249, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10994 }, { "epoch": 0.7974903894973526, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.4966, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10995 }, { "epoch": 0.7975629215928048, "grad_norm": 8.1875, "learning_rate": 0.0003, "loss": 8.1128, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10996 }, { "epoch": 0.7976354536882571, "grad_norm": 7.0625, "learning_rate": 0.0003, "loss": 8.6742, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10997 }, { "epoch": 0.7977079857837093, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.8019, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10998 }, { "epoch": 0.7977805178791615, "grad_norm": 1.8984375, "learning_rate": 0.0003, "loss": 8.8778, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 10999 }, { "epoch": 0.7978530499746138, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 9.4479, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11000 }, { "epoch": 0.797925582070066, "grad_norm": 1.6953125, "learning_rate": 0.0003, "loss": 8.7686, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11001 }, { "epoch": 0.7979981141655182, "grad_norm": 10.0625, "learning_rate": 0.0003, "loss": 8.6283, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11002 }, { "epoch": 0.7980706462609705, "grad_norm": 7.15625, "learning_rate": 0.0003, "loss": 8.9441, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11003 }, { "epoch": 0.7981431783564227, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 8.9907, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11004 }, { "epoch": 0.798215710451875, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.8499, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11005 }, { "epoch": 0.7982882425473272, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 8.6914, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11006 }, { "epoch": 0.7983607746427794, "grad_norm": 8.8125, "learning_rate": 0.0003, "loss": 8.9308, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11007 }, { "epoch": 0.7984333067382317, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 9.0615, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11008 }, { "epoch": 0.7985058388336839, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 8.7707, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11009 }, { "epoch": 0.7985783709291362, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 8.8915, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11010 }, { "epoch": 0.7986509030245884, "grad_norm": 7.625, "learning_rate": 0.0003, "loss": 8.8267, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11011 }, { "epoch": 0.7987234351200406, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 9.2023, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11012 }, { "epoch": 0.7987959672154928, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 9.2304, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11013 }, { "epoch": 0.7988684993109451, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 9.3002, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11014 }, { "epoch": 0.7989410314063974, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 9.54, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11015 }, { "epoch": 0.7990135635018496, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 9.2382, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11016 }, { "epoch": 0.7990860955973018, "grad_norm": 12.9375, "learning_rate": 0.0003, "loss": 8.1425, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11017 }, { "epoch": 0.799158627692754, "grad_norm": 1.7265625, "learning_rate": 0.0003, "loss": 8.5701, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11018 }, { "epoch": 0.7992311597882062, "grad_norm": 1.609375, "learning_rate": 0.0003, "loss": 8.8425, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11019 }, { "epoch": 0.7993036918836586, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 9.0302, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11020 }, { "epoch": 0.7993762239791108, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 9.0612, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11021 }, { "epoch": 0.799448756074563, "grad_norm": 6.5625, "learning_rate": 0.0003, "loss": 8.1655, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11022 }, { "epoch": 0.7995212881700152, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.2888, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11023 }, { "epoch": 0.7995938202654674, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 8.3584, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11024 }, { "epoch": 0.7996663523609197, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.7643, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11025 }, { "epoch": 0.799738884456372, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.0483, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11026 }, { "epoch": 0.7998114165518242, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 8.8352, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11027 }, { "epoch": 0.7998839486472764, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 8.7661, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11028 }, { "epoch": 0.7999564807427286, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 8.8595, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11029 }, { "epoch": 0.8000290128381808, "grad_norm": 6.28125, "learning_rate": 0.0003, "loss": 8.1961, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11030 }, { "epoch": 0.8001015449336332, "grad_norm": 8.3125, "learning_rate": 0.0003, "loss": 8.7994, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11031 }, { "epoch": 0.8001740770290854, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 8.7153, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11032 }, { "epoch": 0.8002466091245376, "grad_norm": 12.5, "learning_rate": 0.0003, "loss": 9.2306, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11033 }, { "epoch": 0.8003191412199898, "grad_norm": 12.0625, "learning_rate": 0.0003, "loss": 9.3661, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11034 }, { "epoch": 0.800391673315442, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.1303, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11035 }, { "epoch": 0.8004642054108944, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.5948, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11036 }, { "epoch": 0.8005367375063466, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 8.6288, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11037 }, { "epoch": 0.8006092696017988, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.843, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11038 }, { "epoch": 0.800681801697251, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.6279, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11039 }, { "epoch": 0.8007543337927032, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 9.2574, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11040 }, { "epoch": 0.8008268658881555, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.845, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11041 }, { "epoch": 0.8008993979836078, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.9088, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11042 }, { "epoch": 0.80097193007906, "grad_norm": 68.5, "learning_rate": 0.0003, "loss": 9.1399, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11043 }, { "epoch": 0.8010444621745122, "grad_norm": 11.9375, "learning_rate": 0.0003, "loss": 9.0004, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11044 }, { "epoch": 0.8011169942699644, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.2941, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11045 }, { "epoch": 0.8011895263654167, "grad_norm": 5.375, "learning_rate": 0.0003, "loss": 8.9904, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11046 }, { "epoch": 0.801262058460869, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 8.98, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11047 }, { "epoch": 0.8013345905563212, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.9954, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11048 }, { "epoch": 0.8014071226517734, "grad_norm": 1.6796875, "learning_rate": 0.0003, "loss": 8.849, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11049 }, { "epoch": 0.8014796547472256, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 9.4914, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11050 }, { "epoch": 0.8015521868426779, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 8.4077, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11051 }, { "epoch": 0.8016247189381301, "grad_norm": 13.6875, "learning_rate": 0.0003, "loss": 9.016, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11052 }, { "epoch": 0.8016972510335824, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 9.0802, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11053 }, { "epoch": 0.8017697831290346, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.8442, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11054 }, { "epoch": 0.8018423152244868, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.6663, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11055 }, { "epoch": 0.8019148473199391, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.7472, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11056 }, { "epoch": 0.8019873794153913, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.6848, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11057 }, { "epoch": 0.8020599115108435, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 8.493, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11058 }, { "epoch": 0.8021324436062958, "grad_norm": 6.03125, "learning_rate": 0.0003, "loss": 8.8395, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11059 }, { "epoch": 0.802204975701748, "grad_norm": 9.1875, "learning_rate": 0.0003, "loss": 8.2101, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11060 }, { "epoch": 0.8022775077972003, "grad_norm": 6.28125, "learning_rate": 0.0003, "loss": 8.4306, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11061 }, { "epoch": 0.8023500398926525, "grad_norm": 6.75, "learning_rate": 0.0003, "loss": 9.0111, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11062 }, { "epoch": 0.8024225719881047, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 9.4218, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11063 }, { "epoch": 0.802495104083557, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 8.8642, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11064 }, { "epoch": 0.8025676361790092, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 8.9955, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11065 }, { "epoch": 0.8026401682744615, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.1798, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11066 }, { "epoch": 0.8027127003699137, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 9.3057, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11067 }, { "epoch": 0.8027852324653659, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.2776, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11068 }, { "epoch": 0.8028577645608181, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 8.7833, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11069 }, { "epoch": 0.8029302966562704, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.0419, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11070 }, { "epoch": 0.8030028287517227, "grad_norm": 8.75, "learning_rate": 0.0003, "loss": 9.2646, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11071 }, { "epoch": 0.8030753608471749, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.6158, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11072 }, { "epoch": 0.8031478929426271, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 9.1969, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11073 }, { "epoch": 0.8032204250380793, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 8.9924, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11074 }, { "epoch": 0.8032929571335315, "grad_norm": 7.1875, "learning_rate": 0.0003, "loss": 8.8138, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11075 }, { "epoch": 0.8033654892289839, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 9.0226, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11076 }, { "epoch": 0.8034380213244361, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.91, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11077 }, { "epoch": 0.8035105534198883, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 8.9168, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11078 }, { "epoch": 0.8035830855153405, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 8.2478, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11079 }, { "epoch": 0.8036556176107927, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.9516, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11080 }, { "epoch": 0.8037281497062451, "grad_norm": 14.25, "learning_rate": 0.0003, "loss": 8.6797, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11081 }, { "epoch": 0.8038006818016973, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 8.8852, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11082 }, { "epoch": 0.8038732138971495, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 8.3417, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11083 }, { "epoch": 0.8039457459926017, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 8.8193, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11084 }, { "epoch": 0.8040182780880539, "grad_norm": 15.5625, "learning_rate": 0.0003, "loss": 8.4713, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11085 }, { "epoch": 0.8040908101835063, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.6311, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11086 }, { "epoch": 0.8041633422789585, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.4683, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11087 }, { "epoch": 0.8042358743744107, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 8.1726, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11088 }, { "epoch": 0.8043084064698629, "grad_norm": 6.4375, "learning_rate": 0.0003, "loss": 8.6757, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11089 }, { "epoch": 0.8043809385653151, "grad_norm": 23.75, "learning_rate": 0.0003, "loss": 8.4863, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11090 }, { "epoch": 0.8044534706607674, "grad_norm": 13.3125, "learning_rate": 0.0003, "loss": 8.5583, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11091 }, { "epoch": 0.8045260027562197, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 9.0556, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11092 }, { "epoch": 0.8045985348516719, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 8.9735, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11093 }, { "epoch": 0.8046710669471241, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 8.6309, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11094 }, { "epoch": 0.8047435990425763, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.2837, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11095 }, { "epoch": 0.8048161311380285, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.9426, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11096 }, { "epoch": 0.8048886632334808, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 8.9462, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11097 }, { "epoch": 0.8049611953289331, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.2351, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11098 }, { "epoch": 0.8050337274243853, "grad_norm": 7.15625, "learning_rate": 0.0003, "loss": 8.5002, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11099 }, { "epoch": 0.8051062595198375, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 8.9526, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11100 }, { "epoch": 0.8051787916152897, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.1293, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11101 }, { "epoch": 0.805251323710742, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.9519, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11102 }, { "epoch": 0.8053238558061943, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.8699, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11103 }, { "epoch": 0.8053963879016465, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 9.1201, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11104 }, { "epoch": 0.8054689199970987, "grad_norm": 1.7578125, "learning_rate": 0.0003, "loss": 8.7174, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11105 }, { "epoch": 0.8055414520925509, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 9.004, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11106 }, { "epoch": 0.8056139841880032, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 8.6241, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11107 }, { "epoch": 0.8056865162834554, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 8.5226, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11108 }, { "epoch": 0.8057590483789077, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.4049, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11109 }, { "epoch": 0.8058315804743599, "grad_norm": 5.96875, "learning_rate": 0.0003, "loss": 8.7887, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11110 }, { "epoch": 0.8059041125698121, "grad_norm": 8.3125, "learning_rate": 0.0003, "loss": 8.7297, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11111 }, { "epoch": 0.8059766446652644, "grad_norm": 6.71875, "learning_rate": 0.0003, "loss": 9.3751, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11112 }, { "epoch": 0.8060491767607166, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 8.8751, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11113 }, { "epoch": 0.8061217088561689, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.5401, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11114 }, { "epoch": 0.8061942409516211, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 9.216, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11115 }, { "epoch": 0.8062667730470733, "grad_norm": 1.875, "learning_rate": 0.0003, "loss": 9.4434, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11116 }, { "epoch": 0.8063393051425256, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.9738, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11117 }, { "epoch": 0.8064118372379778, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 8.4204, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11118 }, { "epoch": 0.80648436933343, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.8566, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11119 }, { "epoch": 0.8065569014288823, "grad_norm": 7.15625, "learning_rate": 0.0003, "loss": 9.384, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11120 }, { "epoch": 0.8066294335243345, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 8.5224, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11121 }, { "epoch": 0.8067019656197868, "grad_norm": 7.90625, "learning_rate": 0.0003, "loss": 9.2101, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11122 }, { "epoch": 0.806774497715239, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.6021, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11123 }, { "epoch": 0.8068470298106912, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 8.9575, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11124 }, { "epoch": 0.8069195619061434, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.9348, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11125 }, { "epoch": 0.8069920940015957, "grad_norm": 6.875, "learning_rate": 0.0003, "loss": 8.5374, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11126 }, { "epoch": 0.807064626097048, "grad_norm": 9.375, "learning_rate": 0.0003, "loss": 8.4923, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11127 }, { "epoch": 0.8071371581925002, "grad_norm": 8.375, "learning_rate": 0.0003, "loss": 8.7014, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11128 }, { "epoch": 0.8072096902879524, "grad_norm": 6.3125, "learning_rate": 0.0003, "loss": 9.1595, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11129 }, { "epoch": 0.8072822223834046, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.4723, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11130 }, { "epoch": 0.8073547544788569, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.4476, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11131 }, { "epoch": 0.8074272865743092, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.6628, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11132 }, { "epoch": 0.8074998186697614, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 9.2676, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11133 }, { "epoch": 0.8075723507652136, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 8.4401, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11134 }, { "epoch": 0.8076448828606658, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 8.7623, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11135 }, { "epoch": 0.807717414956118, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 8.8621, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11136 }, { "epoch": 0.8077899470515704, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 8.6391, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11137 }, { "epoch": 0.8078624791470226, "grad_norm": 5.65625, "learning_rate": 0.0003, "loss": 8.5541, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11138 }, { "epoch": 0.8079350112424748, "grad_norm": 6.46875, "learning_rate": 0.0003, "loss": 8.8191, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11139 }, { "epoch": 0.808007543337927, "grad_norm": 7.9375, "learning_rate": 0.0003, "loss": 9.0285, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11140 }, { "epoch": 0.8080800754333792, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 8.9285, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11141 }, { "epoch": 0.8081526075288316, "grad_norm": 35.5, "learning_rate": 0.0003, "loss": 8.9397, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11142 }, { "epoch": 0.8082251396242838, "grad_norm": 6.15625, "learning_rate": 0.0003, "loss": 8.6118, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11143 }, { "epoch": 0.808297671719736, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 9.1629, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11144 }, { "epoch": 0.8083702038151882, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.779, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11145 }, { "epoch": 0.8084427359106404, "grad_norm": 42.0, "learning_rate": 0.0003, "loss": 8.488, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11146 }, { "epoch": 0.8085152680060927, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 8.6121, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11147 }, { "epoch": 0.808587800101545, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 8.41, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11148 }, { "epoch": 0.8086603321969972, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 8.902, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11149 }, { "epoch": 0.8087328642924494, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 8.6524, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11150 }, { "epoch": 0.8088053963879016, "grad_norm": 5.625, "learning_rate": 0.0003, "loss": 8.7461, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11151 }, { "epoch": 0.8088779284833539, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.4238, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11152 }, { "epoch": 0.8089504605788062, "grad_norm": 7.0, "learning_rate": 0.0003, "loss": 8.7033, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11153 }, { "epoch": 0.8090229926742584, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.3713, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11154 }, { "epoch": 0.8090955247697106, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 9.0677, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11155 }, { "epoch": 0.8091680568651628, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 9.3353, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11156 }, { "epoch": 0.8092405889606151, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.555, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11157 }, { "epoch": 0.8093131210560673, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.6289, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11158 }, { "epoch": 0.8093856531515196, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 8.8947, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11159 }, { "epoch": 0.8094581852469718, "grad_norm": 24.5, "learning_rate": 0.0003, "loss": 8.7332, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11160 }, { "epoch": 0.809530717342424, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 9.0155, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11161 }, { "epoch": 0.8096032494378763, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 8.8736, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11162 }, { "epoch": 0.8096757815333285, "grad_norm": 6.75, "learning_rate": 0.0003, "loss": 8.6792, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11163 }, { "epoch": 0.8097483136287807, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.8096, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11164 }, { "epoch": 0.809820845724233, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.8018, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11165 }, { "epoch": 0.8098933778196852, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.3491, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11166 }, { "epoch": 0.8099659099151374, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.0132, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11167 }, { "epoch": 0.8100384420105897, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 8.767, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11168 }, { "epoch": 0.8101109741060419, "grad_norm": 12.25, "learning_rate": 0.0003, "loss": 8.9542, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11169 }, { "epoch": 0.8101835062014942, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.481, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11170 }, { "epoch": 0.8102560382969464, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.6968, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11171 }, { "epoch": 0.8103285703923986, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.8203, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11172 }, { "epoch": 0.8104011024878509, "grad_norm": 6.0625, "learning_rate": 0.0003, "loss": 9.0965, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11173 }, { "epoch": 0.8104736345833031, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 9.1294, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11174 }, { "epoch": 0.8105461666787553, "grad_norm": 7.34375, "learning_rate": 0.0003, "loss": 8.9639, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11175 }, { "epoch": 0.8106186987742076, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 9.1675, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11176 }, { "epoch": 0.8106912308696598, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.5443, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11177 }, { "epoch": 0.8107637629651121, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.7476, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11178 }, { "epoch": 0.8108362950605643, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.9168, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11179 }, { "epoch": 0.8109088271560165, "grad_norm": 16.875, "learning_rate": 0.0003, "loss": 9.0449, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11180 }, { "epoch": 0.8109813592514687, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.1409, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11181 }, { "epoch": 0.811053891346921, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.6664, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11182 }, { "epoch": 0.8111264234423733, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 8.979, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11183 }, { "epoch": 0.8111989555378255, "grad_norm": 3.953125, "learning_rate": 0.0003, "loss": 8.4167, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11184 }, { "epoch": 0.8112714876332777, "grad_norm": 5.375, "learning_rate": 0.0003, "loss": 9.1027, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11185 }, { "epoch": 0.8113440197287299, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 8.3861, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11186 }, { "epoch": 0.8114165518241822, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.648, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11187 }, { "epoch": 0.8114890839196345, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.0299, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11188 }, { "epoch": 0.8115616160150867, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 8.0796, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11189 }, { "epoch": 0.8116341481105389, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.966, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11190 }, { "epoch": 0.8117066802059911, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.4683, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11191 }, { "epoch": 0.8117792123014433, "grad_norm": 12.1875, "learning_rate": 0.0003, "loss": 8.7936, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11192 }, { "epoch": 0.8118517443968957, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.2912, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11193 }, { "epoch": 0.8119242764923479, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.5696, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11194 }, { "epoch": 0.8119968085878001, "grad_norm": 67.5, "learning_rate": 0.0003, "loss": 8.6255, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11195 }, { "epoch": 0.8120693406832523, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.8135, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11196 }, { "epoch": 0.8121418727787045, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.9804, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11197 }, { "epoch": 0.8122144048741569, "grad_norm": 3.953125, "learning_rate": 0.0003, "loss": 9.1464, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11198 }, { "epoch": 0.8122869369696091, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 9.0136, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11199 }, { "epoch": 0.8123594690650613, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 8.552, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11200 }, { "epoch": 0.8124320011605135, "grad_norm": 7.8125, "learning_rate": 0.0003, "loss": 9.0996, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11201 }, { "epoch": 0.8125045332559657, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.7571, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11202 }, { "epoch": 0.812577065351418, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.7565, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11203 }, { "epoch": 0.8126495974468703, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.0184, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11204 }, { "epoch": 0.8127221295423225, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.5259, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11205 }, { "epoch": 0.8127946616377747, "grad_norm": 5.84375, "learning_rate": 0.0003, "loss": 8.8462, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11206 }, { "epoch": 0.8128671937332269, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 9.0064, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11207 }, { "epoch": 0.8129397258286792, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 9.123, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11208 }, { "epoch": 0.8130122579241315, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 8.6697, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11209 }, { "epoch": 0.8130847900195837, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 9.2831, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11210 }, { "epoch": 0.8131573221150359, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.3313, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11211 }, { "epoch": 0.8132298542104881, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 9.1577, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11212 }, { "epoch": 0.8133023863059404, "grad_norm": 13.1875, "learning_rate": 0.0003, "loss": 8.7235, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11213 }, { "epoch": 0.8133749184013926, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 8.6443, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11214 }, { "epoch": 0.8134474504968449, "grad_norm": 1.9296875, "learning_rate": 0.0003, "loss": 8.553, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11215 }, { "epoch": 0.8135199825922971, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.0294, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11216 }, { "epoch": 0.8135925146877493, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.7386, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11217 }, { "epoch": 0.8136650467832016, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 8.3904, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11218 }, { "epoch": 0.8137375788786538, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.0353, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11219 }, { "epoch": 0.813810110974106, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.3024, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11220 }, { "epoch": 0.8138826430695583, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.02, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11221 }, { "epoch": 0.8139551751650105, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 8.7305, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11222 }, { "epoch": 0.8140277072604628, "grad_norm": 19.75, "learning_rate": 0.0003, "loss": 8.9825, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11223 }, { "epoch": 0.814100239355915, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.0681, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11224 }, { "epoch": 0.8141727714513672, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 8.89, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11225 }, { "epoch": 0.8142453035468195, "grad_norm": 5.4375, "learning_rate": 0.0003, "loss": 8.7744, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11226 }, { "epoch": 0.8143178356422717, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 9.0579, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11227 }, { "epoch": 0.814390367737724, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 8.7023, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11228 }, { "epoch": 0.8144628998331762, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.081, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11229 }, { "epoch": 0.8145354319286284, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 8.8118, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11230 }, { "epoch": 0.8146079640240806, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 8.8416, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11231 }, { "epoch": 0.8146804961195329, "grad_norm": 20.75, "learning_rate": 0.0003, "loss": 9.0878, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11232 }, { "epoch": 0.8147530282149851, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 9.1147, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11233 }, { "epoch": 0.8148255603104374, "grad_norm": 8.375, "learning_rate": 0.0003, "loss": 8.5002, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11234 }, { "epoch": 0.8148980924058896, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 8.83, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11235 }, { "epoch": 0.8149706245013418, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 8.9434, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11236 }, { "epoch": 0.815043156596794, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 9.2848, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11237 }, { "epoch": 0.8151156886922463, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 8.8617, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11238 }, { "epoch": 0.8151882207876986, "grad_norm": 6.78125, "learning_rate": 0.0003, "loss": 8.6414, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11239 }, { "epoch": 0.8152607528831508, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.6576, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11240 }, { "epoch": 0.815333284978603, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 8.6703, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11241 }, { "epoch": 0.8154058170740552, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.6602, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11242 }, { "epoch": 0.8154783491695075, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.7095, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11243 }, { "epoch": 0.8155508812649598, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 8.5782, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11244 }, { "epoch": 0.815623413360412, "grad_norm": 8.25, "learning_rate": 0.0003, "loss": 8.9688, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11245 }, { "epoch": 0.8156959454558642, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 9.1198, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11246 }, { "epoch": 0.8157684775513164, "grad_norm": 1.828125, "learning_rate": 0.0003, "loss": 9.4224, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11247 }, { "epoch": 0.8158410096467686, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 9.2592, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11248 }, { "epoch": 0.815913541742221, "grad_norm": 1.6015625, "learning_rate": 0.0003, "loss": 8.4742, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11249 }, { "epoch": 0.8159860738376732, "grad_norm": 10.1875, "learning_rate": 0.0003, "loss": 8.6037, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11250 }, { "epoch": 0.8160586059331254, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.6649, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11251 }, { "epoch": 0.8161311380285776, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 9.2885, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11252 }, { "epoch": 0.8162036701240298, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.2685, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11253 }, { "epoch": 0.8162762022194822, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.7261, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11254 }, { "epoch": 0.8163487343149344, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 9.0068, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11255 }, { "epoch": 0.8164212664103866, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 8.5893, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11256 }, { "epoch": 0.8164937985058388, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 8.9817, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11257 }, { "epoch": 0.816566330601291, "grad_norm": 5.375, "learning_rate": 0.0003, "loss": 9.1062, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11258 }, { "epoch": 0.8166388626967434, "grad_norm": 8.6875, "learning_rate": 0.0003, "loss": 8.889, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11259 }, { "epoch": 0.8167113947921956, "grad_norm": 10.3125, "learning_rate": 0.0003, "loss": 8.8222, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11260 }, { "epoch": 0.8167839268876478, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 8.7361, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11261 }, { "epoch": 0.8168564589831, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.7333, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11262 }, { "epoch": 0.8169289910785522, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 8.8865, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11263 }, { "epoch": 0.8170015231740045, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 9.5045, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11264 }, { "epoch": 0.8170740552694568, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 9.2427, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11265 }, { "epoch": 0.817146587364909, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 8.837, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11266 }, { "epoch": 0.8172191194603612, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.678, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11267 }, { "epoch": 0.8172916515558134, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 8.7633, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11268 }, { "epoch": 0.8173641836512657, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 8.8873, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11269 }, { "epoch": 0.817436715746718, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 9.0133, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11270 }, { "epoch": 0.8175092478421702, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 8.9109, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11271 }, { "epoch": 0.8175817799376224, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.0494, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11272 }, { "epoch": 0.8176543120330746, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 9.1136, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11273 }, { "epoch": 0.8177268441285269, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 9.4018, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11274 }, { "epoch": 0.8177993762239791, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 8.2673, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11275 }, { "epoch": 0.8178719083194314, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 8.9836, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11276 }, { "epoch": 0.8179444404148836, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.0432, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11277 }, { "epoch": 0.8180169725103358, "grad_norm": 5.4375, "learning_rate": 0.0003, "loss": 8.4452, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11278 }, { "epoch": 0.8180895046057881, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 9.0421, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11279 }, { "epoch": 0.8181620367012403, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.4198, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11280 }, { "epoch": 0.8182345687966925, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.0486, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11281 }, { "epoch": 0.8183071008921448, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 9.2968, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11282 }, { "epoch": 0.818379632987597, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 8.8342, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11283 }, { "epoch": 0.8184521650830493, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 8.7236, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11284 }, { "epoch": 0.8185246971785015, "grad_norm": 10.625, "learning_rate": 0.0003, "loss": 8.9781, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11285 }, { "epoch": 0.8185972292739537, "grad_norm": 7.53125, "learning_rate": 0.0003, "loss": 8.8219, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11286 }, { "epoch": 0.818669761369406, "grad_norm": 15.8125, "learning_rate": 0.0003, "loss": 8.9533, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11287 }, { "epoch": 0.8187422934648582, "grad_norm": 1.84375, "learning_rate": 0.0003, "loss": 8.757, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11288 }, { "epoch": 0.8188148255603105, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 9.2066, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11289 }, { "epoch": 0.8188873576557627, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.5671, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11290 }, { "epoch": 0.8189598897512149, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.6945, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11291 }, { "epoch": 0.8190324218466671, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.9174, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11292 }, { "epoch": 0.8191049539421194, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.0448, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11293 }, { "epoch": 0.8191774860375717, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.8726, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11294 }, { "epoch": 0.8192500181330239, "grad_norm": 9.1875, "learning_rate": 0.0003, "loss": 8.9924, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11295 }, { "epoch": 0.8193225502284761, "grad_norm": 5.375, "learning_rate": 0.0003, "loss": 8.7927, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11296 }, { "epoch": 0.8193950823239283, "grad_norm": 9.1875, "learning_rate": 0.0003, "loss": 8.116, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11297 }, { "epoch": 0.8194676144193805, "grad_norm": 6.46875, "learning_rate": 0.0003, "loss": 8.2084, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11298 }, { "epoch": 0.8195401465148329, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.9308, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11299 }, { "epoch": 0.8196126786102851, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 8.548, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11300 }, { "epoch": 0.8196852107057373, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.8262, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11301 }, { "epoch": 0.8197577428011895, "grad_norm": 9.6875, "learning_rate": 0.0003, "loss": 8.377, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11302 }, { "epoch": 0.8198302748966417, "grad_norm": 7.375, "learning_rate": 0.0003, "loss": 8.4571, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11303 }, { "epoch": 0.819902806992094, "grad_norm": 6.84375, "learning_rate": 0.0003, "loss": 8.5864, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11304 }, { "epoch": 0.8199753390875463, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 9.0906, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11305 }, { "epoch": 0.8200478711829985, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 8.4825, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11306 }, { "epoch": 0.8201204032784507, "grad_norm": 1.984375, "learning_rate": 0.0003, "loss": 8.5021, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11307 }, { "epoch": 0.8201929353739029, "grad_norm": 14.0, "learning_rate": 0.0003, "loss": 8.5546, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11308 }, { "epoch": 0.8202654674693551, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.8142, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11309 }, { "epoch": 0.8203379995648075, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.9755, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11310 }, { "epoch": 0.8204105316602597, "grad_norm": 1.703125, "learning_rate": 0.0003, "loss": 8.4136, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11311 }, { "epoch": 0.8204830637557119, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 8.818, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11312 }, { "epoch": 0.8205555958511641, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.5735, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11313 }, { "epoch": 0.8206281279466163, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.8796, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11314 }, { "epoch": 0.8207006600420687, "grad_norm": 7.03125, "learning_rate": 0.0003, "loss": 9.2692, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11315 }, { "epoch": 0.8207731921375209, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 9.0083, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11316 }, { "epoch": 0.8208457242329731, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.8835, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11317 }, { "epoch": 0.8209182563284253, "grad_norm": 1.8671875, "learning_rate": 0.0003, "loss": 8.5125, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11318 }, { "epoch": 0.8209907884238775, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.1594, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11319 }, { "epoch": 0.8210633205193298, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 8.5383, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11320 }, { "epoch": 0.8211358526147821, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.9833, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11321 }, { "epoch": 0.8212083847102343, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.9467, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11322 }, { "epoch": 0.8212809168056865, "grad_norm": 10.0, "learning_rate": 0.0003, "loss": 8.7203, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11323 }, { "epoch": 0.8213534489011387, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.5202, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11324 }, { "epoch": 0.821425980996591, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.9092, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11325 }, { "epoch": 0.8214985130920432, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.8296, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11326 }, { "epoch": 0.8215710451874955, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 9.453, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11327 }, { "epoch": 0.8216435772829477, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.8971, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11328 }, { "epoch": 0.8217161093783999, "grad_norm": 6.03125, "learning_rate": 0.0003, "loss": 8.3762, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11329 }, { "epoch": 0.8217886414738522, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 8.8695, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11330 }, { "epoch": 0.8218611735693044, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.7413, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11331 }, { "epoch": 0.8219337056647567, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.6024, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11332 }, { "epoch": 0.8220062377602089, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 8.6187, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11333 }, { "epoch": 0.8220787698556611, "grad_norm": 6.03125, "learning_rate": 0.0003, "loss": 8.8589, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11334 }, { "epoch": 0.8221513019511134, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 8.9261, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11335 }, { "epoch": 0.8222238340465656, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.8496, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11336 }, { "epoch": 0.8222963661420178, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 9.3281, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11337 }, { "epoch": 0.8223688982374701, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 9.4152, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11338 }, { "epoch": 0.8224414303329223, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 8.8517, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11339 }, { "epoch": 0.8225139624283746, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 8.6193, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11340 }, { "epoch": 0.8225864945238268, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 8.6954, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11341 }, { "epoch": 0.822659026619279, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 8.3714, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11342 }, { "epoch": 0.8227315587147312, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 8.9009, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11343 }, { "epoch": 0.8228040908101835, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 9.2599, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11344 }, { "epoch": 0.8228766229056358, "grad_norm": 5.4375, "learning_rate": 0.0003, "loss": 8.5213, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11345 }, { "epoch": 0.822949155001088, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 8.937, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11346 }, { "epoch": 0.8230216870965402, "grad_norm": 7.5, "learning_rate": 0.0003, "loss": 8.6122, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11347 }, { "epoch": 0.8230942191919924, "grad_norm": 1.578125, "learning_rate": 0.0003, "loss": 9.2474, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11348 }, { "epoch": 0.8231667512874447, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 9.1847, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11349 }, { "epoch": 0.823239283382897, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 9.104, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11350 }, { "epoch": 0.8233118154783492, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 8.8344, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11351 }, { "epoch": 0.8233843475738014, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 8.3297, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11352 }, { "epoch": 0.8234568796692536, "grad_norm": 8.8125, "learning_rate": 0.0003, "loss": 8.6845, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11353 }, { "epoch": 0.8235294117647058, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 8.438, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11354 }, { "epoch": 0.8236019438601582, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 9.2551, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11355 }, { "epoch": 0.8236744759556104, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.8961, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11356 }, { "epoch": 0.8237470080510626, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.8973, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11357 }, { "epoch": 0.8238195401465148, "grad_norm": 6.5625, "learning_rate": 0.0003, "loss": 8.8929, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11358 }, { "epoch": 0.823892072241967, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.1715, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11359 }, { "epoch": 0.8239646043374194, "grad_norm": 6.53125, "learning_rate": 0.0003, "loss": 8.7081, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11360 }, { "epoch": 0.8240371364328716, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.2811, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11361 }, { "epoch": 0.8241096685283238, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 8.5569, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11362 }, { "epoch": 0.824182200623776, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 8.6051, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11363 }, { "epoch": 0.8242547327192282, "grad_norm": 11.5625, "learning_rate": 0.0003, "loss": 9.1161, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11364 }, { "epoch": 0.8243272648146805, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 9.1111, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11365 }, { "epoch": 0.8243997969101328, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 8.7189, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11366 }, { "epoch": 0.824472329005585, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 9.1377, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11367 }, { "epoch": 0.8245448611010372, "grad_norm": 9.875, "learning_rate": 0.0003, "loss": 8.7168, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11368 }, { "epoch": 0.8246173931964894, "grad_norm": 8.0625, "learning_rate": 0.0003, "loss": 8.6589, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11369 }, { "epoch": 0.8246899252919417, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 9.0023, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11370 }, { "epoch": 0.824762457387394, "grad_norm": 12.9375, "learning_rate": 0.0003, "loss": 8.5785, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11371 }, { "epoch": 0.8248349894828462, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 9.0643, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11372 }, { "epoch": 0.8249075215782984, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 9.513, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11373 }, { "epoch": 0.8249800536737506, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 9.2209, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11374 }, { "epoch": 0.8250525857692028, "grad_norm": 15.875, "learning_rate": 0.0003, "loss": 8.5004, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11375 }, { "epoch": 0.8251251178646551, "grad_norm": 8.0625, "learning_rate": 0.0003, "loss": 8.4968, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11376 }, { "epoch": 0.8251976499601074, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 9.0685, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11377 }, { "epoch": 0.8252701820555596, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.716, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11378 }, { "epoch": 0.8253427141510118, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.8877, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11379 }, { "epoch": 0.825415246246464, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 9.0132, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11380 }, { "epoch": 0.8254877783419163, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 8.9568, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11381 }, { "epoch": 0.8255603104373685, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.3796, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11382 }, { "epoch": 0.8256328425328208, "grad_norm": 5.65625, "learning_rate": 0.0003, "loss": 8.7578, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11383 }, { "epoch": 0.825705374628273, "grad_norm": 7.03125, "learning_rate": 0.0003, "loss": 8.7353, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11384 }, { "epoch": 0.8257779067237252, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 8.8069, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11385 }, { "epoch": 0.8258504388191775, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.1131, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11386 }, { "epoch": 0.8259229709146297, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 9.0794, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11387 }, { "epoch": 0.825995503010082, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 8.7815, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11388 }, { "epoch": 0.8260680351055342, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 8.9761, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11389 }, { "epoch": 0.8261405672009864, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.8284, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11390 }, { "epoch": 0.8262130992964387, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 8.5865, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11391 }, { "epoch": 0.8262856313918909, "grad_norm": 13.4375, "learning_rate": 0.0003, "loss": 8.6687, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11392 }, { "epoch": 0.8263581634873431, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.4283, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11393 }, { "epoch": 0.8264306955827954, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 8.6662, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11394 }, { "epoch": 0.8265032276782476, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.714, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11395 }, { "epoch": 0.8265757597736999, "grad_norm": 6.46875, "learning_rate": 0.0003, "loss": 8.8535, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11396 }, { "epoch": 0.8266482918691521, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.4615, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11397 }, { "epoch": 0.8267208239646043, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 9.1992, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11398 }, { "epoch": 0.8267933560600566, "grad_norm": 6.40625, "learning_rate": 0.0003, "loss": 8.2677, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11399 }, { "epoch": 0.8268658881555088, "grad_norm": 1.875, "learning_rate": 0.0003, "loss": 9.134, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11400 }, { "epoch": 0.8269384202509611, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.0383, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11401 }, { "epoch": 0.8270109523464133, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.0359, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11402 }, { "epoch": 0.8270834844418655, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 8.235, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11403 }, { "epoch": 0.8271560165373177, "grad_norm": 5.4375, "learning_rate": 0.0003, "loss": 8.4211, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11404 }, { "epoch": 0.82722854863277, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 8.7012, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11405 }, { "epoch": 0.8273010807282223, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 9.0868, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11406 }, { "epoch": 0.8273736128236745, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.9061, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11407 }, { "epoch": 0.8274461449191267, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 8.8092, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11408 }, { "epoch": 0.8275186770145789, "grad_norm": 6.75, "learning_rate": 0.0003, "loss": 8.8777, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11409 }, { "epoch": 0.8275912091100311, "grad_norm": 6.3125, "learning_rate": 0.0003, "loss": 8.816, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11410 }, { "epoch": 0.8276637412054835, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.9071, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11411 }, { "epoch": 0.8277362733009357, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 8.3955, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11412 }, { "epoch": 0.8278088053963879, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 8.2901, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11413 }, { "epoch": 0.8278813374918401, "grad_norm": 6.71875, "learning_rate": 0.0003, "loss": 9.1489, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11414 }, { "epoch": 0.8279538695872923, "grad_norm": 1.765625, "learning_rate": 0.0003, "loss": 8.8225, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11415 }, { "epoch": 0.8280264016827447, "grad_norm": 9.6875, "learning_rate": 0.0003, "loss": 8.8475, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11416 }, { "epoch": 0.8280989337781969, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 8.8839, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11417 }, { "epoch": 0.8281714658736491, "grad_norm": 5.9375, "learning_rate": 0.0003, "loss": 7.9318, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11418 }, { "epoch": 0.8282439979691013, "grad_norm": 6.03125, "learning_rate": 0.0003, "loss": 9.1194, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11419 }, { "epoch": 0.8283165300645535, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 9.2913, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11420 }, { "epoch": 0.8283890621600059, "grad_norm": 12.1875, "learning_rate": 0.0003, "loss": 8.8557, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11421 }, { "epoch": 0.8284615942554581, "grad_norm": 7.65625, "learning_rate": 0.0003, "loss": 8.9267, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11422 }, { "epoch": 0.8285341263509103, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 8.9712, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11423 }, { "epoch": 0.8286066584463625, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.5262, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11424 }, { "epoch": 0.8286791905418147, "grad_norm": 11.25, "learning_rate": 0.0003, "loss": 8.7597, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11425 }, { "epoch": 0.828751722637267, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 9.2435, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11426 }, { "epoch": 0.8288242547327193, "grad_norm": 43.75, "learning_rate": 0.0003, "loss": 8.847, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11427 }, { "epoch": 0.8288967868281715, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.9238, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11428 }, { "epoch": 0.8289693189236237, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 8.9389, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11429 }, { "epoch": 0.8290418510190759, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.3266, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11430 }, { "epoch": 0.8291143831145282, "grad_norm": 6.03125, "learning_rate": 0.0003, "loss": 8.8126, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11431 }, { "epoch": 0.8291869152099804, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 8.997, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11432 }, { "epoch": 0.8292594473054327, "grad_norm": 8.5, "learning_rate": 0.0003, "loss": 8.5895, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11433 }, { "epoch": 0.8293319794008849, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 9.338, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11434 }, { "epoch": 0.8294045114963371, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 8.7938, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11435 }, { "epoch": 0.8294770435917894, "grad_norm": 12.5625, "learning_rate": 0.0003, "loss": 8.7992, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11436 }, { "epoch": 0.8295495756872416, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.8103, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11437 }, { "epoch": 0.8296221077826939, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 8.7018, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11438 }, { "epoch": 0.8296946398781461, "grad_norm": 6.0625, "learning_rate": 0.0003, "loss": 8.7674, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11439 }, { "epoch": 0.8297671719735983, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.7707, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11440 }, { "epoch": 0.8298397040690506, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 9.0362, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11441 }, { "epoch": 0.8299122361645028, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 8.7467, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11442 }, { "epoch": 0.829984768259955, "grad_norm": 6.875, "learning_rate": 0.0003, "loss": 9.0023, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11443 }, { "epoch": 0.8300573003554073, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 8.8696, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11444 }, { "epoch": 0.8301298324508595, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.6341, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11445 }, { "epoch": 0.8302023645463117, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 8.9437, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11446 }, { "epoch": 0.830274896641764, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 9.2946, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11447 }, { "epoch": 0.8303474287372162, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.7687, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11448 }, { "epoch": 0.8304199608326684, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.9901, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11449 }, { "epoch": 0.8304924929281207, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.9382, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11450 }, { "epoch": 0.8305650250235729, "grad_norm": 3.953125, "learning_rate": 0.0003, "loss": 8.3511, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11451 }, { "epoch": 0.8306375571190252, "grad_norm": 5.4375, "learning_rate": 0.0003, "loss": 8.5816, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11452 }, { "epoch": 0.8307100892144774, "grad_norm": 1.7421875, "learning_rate": 0.0003, "loss": 8.9068, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11453 }, { "epoch": 0.8307826213099296, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 9.4082, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11454 }, { "epoch": 0.8308551534053819, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 9.0889, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11455 }, { "epoch": 0.8309276855008341, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.6575, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11456 }, { "epoch": 0.8310002175962864, "grad_norm": 56.25, "learning_rate": 0.0003, "loss": 9.2039, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11457 }, { "epoch": 0.8310727496917386, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 9.0695, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11458 }, { "epoch": 0.8311452817871908, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.8298, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11459 }, { "epoch": 0.831217813882643, "grad_norm": 18.375, "learning_rate": 0.0003, "loss": 8.6297, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11460 }, { "epoch": 0.8312903459780953, "grad_norm": 13.4375, "learning_rate": 0.0003, "loss": 9.3773, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11461 }, { "epoch": 0.8313628780735476, "grad_norm": 1.6171875, "learning_rate": 0.0003, "loss": 8.9685, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11462 }, { "epoch": 0.8314354101689998, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 9.2014, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11463 }, { "epoch": 0.831507942264452, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.5754, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11464 }, { "epoch": 0.8315804743599042, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.3364, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11465 }, { "epoch": 0.8316530064553564, "grad_norm": 8.1875, "learning_rate": 0.0003, "loss": 9.0922, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11466 }, { "epoch": 0.8317255385508088, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 8.939, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11467 }, { "epoch": 0.831798070646261, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 9.1733, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11468 }, { "epoch": 0.8318706027417132, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 8.621, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11469 }, { "epoch": 0.8319431348371654, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.9584, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11470 }, { "epoch": 0.8320156669326176, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.879, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11471 }, { "epoch": 0.83208819902807, "grad_norm": 13.8125, "learning_rate": 0.0003, "loss": 8.4005, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11472 }, { "epoch": 0.8321607311235222, "grad_norm": 5.9375, "learning_rate": 0.0003, "loss": 8.9628, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11473 }, { "epoch": 0.8322332632189744, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.8412, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11474 }, { "epoch": 0.8323057953144266, "grad_norm": 21.375, "learning_rate": 0.0003, "loss": 8.4532, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11475 }, { "epoch": 0.8323783274098788, "grad_norm": 6.40625, "learning_rate": 0.0003, "loss": 8.8146, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11476 }, { "epoch": 0.8324508595053312, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 8.9106, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11477 }, { "epoch": 0.8325233916007834, "grad_norm": 5.5625, "learning_rate": 0.0003, "loss": 8.7766, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11478 }, { "epoch": 0.8325959236962356, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 8.7574, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11479 }, { "epoch": 0.8326684557916878, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 8.6305, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11480 }, { "epoch": 0.83274098788714, "grad_norm": 10.0625, "learning_rate": 0.0003, "loss": 8.5765, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11481 }, { "epoch": 0.8328135199825923, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.6634, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11482 }, { "epoch": 0.8328860520780446, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 9.4434, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11483 }, { "epoch": 0.8329585841734968, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.9404, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11484 }, { "epoch": 0.833031116268949, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 8.8528, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11485 }, { "epoch": 0.8331036483644012, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.7956, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11486 }, { "epoch": 0.8331761804598535, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.897, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11487 }, { "epoch": 0.8332487125553057, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 9.0426, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11488 }, { "epoch": 0.833321244650758, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 8.1173, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11489 }, { "epoch": 0.8333937767462102, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.574, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11490 }, { "epoch": 0.8334663088416624, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 8.899, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11491 }, { "epoch": 0.8335388409371147, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 8.6366, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11492 }, { "epoch": 0.8336113730325669, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.6458, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11493 }, { "epoch": 0.8336839051280192, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.9212, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11494 }, { "epoch": 0.8337564372234714, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.4362, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11495 }, { "epoch": 0.8338289693189236, "grad_norm": 6.03125, "learning_rate": 0.0003, "loss": 9.1552, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11496 }, { "epoch": 0.8339015014143759, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.3917, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11497 }, { "epoch": 0.8339740335098281, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.6815, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11498 }, { "epoch": 0.8340465656052803, "grad_norm": 9.625, "learning_rate": 0.0003, "loss": 9.2302, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11499 }, { "epoch": 0.8341190977007326, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 9.0034, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11500 }, { "epoch": 0.8341916297961848, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.6225, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11501 }, { "epoch": 0.8342641618916371, "grad_norm": 6.625, "learning_rate": 0.0003, "loss": 8.669, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11502 }, { "epoch": 0.8343366939870893, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.7578, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11503 }, { "epoch": 0.8344092260825415, "grad_norm": 1.84375, "learning_rate": 0.0003, "loss": 8.4125, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11504 }, { "epoch": 0.8344817581779937, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 8.7023, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11505 }, { "epoch": 0.834554290273446, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 8.6282, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11506 }, { "epoch": 0.8346268223688983, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 8.8222, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11507 }, { "epoch": 0.8346993544643505, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 8.7291, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11508 }, { "epoch": 0.8347718865598027, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 8.9918, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11509 }, { "epoch": 0.8348444186552549, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.6185, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11510 }, { "epoch": 0.8349169507507072, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 8.7457, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11511 }, { "epoch": 0.8349894828461595, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 8.5069, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11512 }, { "epoch": 0.8350620149416117, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 8.902, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11513 }, { "epoch": 0.8351345470370639, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 8.8072, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11514 }, { "epoch": 0.8352070791325161, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.8911, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11515 }, { "epoch": 0.8352796112279683, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 8.8653, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11516 }, { "epoch": 0.8353521433234206, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 8.8616, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11517 }, { "epoch": 0.8354246754188729, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 8.9096, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11518 }, { "epoch": 0.8354972075143251, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.8898, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11519 }, { "epoch": 0.8355697396097773, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 8.83, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11520 }, { "epoch": 0.8356422717052295, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 9.004, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11521 }, { "epoch": 0.8357148038006817, "grad_norm": 7.03125, "learning_rate": 0.0003, "loss": 8.8059, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11522 }, { "epoch": 0.8357873358961341, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 8.9147, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11523 }, { "epoch": 0.8358598679915863, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 9.0378, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11524 }, { "epoch": 0.8359324000870385, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 9.034, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11525 }, { "epoch": 0.8360049321824907, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.7374, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11526 }, { "epoch": 0.8360774642779429, "grad_norm": 7.09375, "learning_rate": 0.0003, "loss": 8.403, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11527 }, { "epoch": 0.8361499963733953, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.7357, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11528 }, { "epoch": 0.8362225284688475, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 9.1271, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11529 }, { "epoch": 0.8362950605642997, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 8.7479, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11530 }, { "epoch": 0.8363675926597519, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 8.451, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11531 }, { "epoch": 0.8364401247552041, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.8605, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11532 }, { "epoch": 0.8365126568506565, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 9.1424, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11533 }, { "epoch": 0.8365851889461087, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.9092, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11534 }, { "epoch": 0.8366577210415609, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.6305, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11535 }, { "epoch": 0.8367302531370131, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.0808, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11536 }, { "epoch": 0.8368027852324653, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.1594, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11537 }, { "epoch": 0.8368753173279176, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 9.6816, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11538 }, { "epoch": 0.8369478494233699, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.4874, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11539 }, { "epoch": 0.8370203815188221, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 8.7515, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11540 }, { "epoch": 0.8370929136142743, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 8.7478, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11541 }, { "epoch": 0.8371654457097265, "grad_norm": 1.90625, "learning_rate": 0.0003, "loss": 8.6451, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11542 }, { "epoch": 0.8372379778051788, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 9.0718, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11543 }, { "epoch": 0.837310509900631, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 8.7463, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11544 }, { "epoch": 0.8373830419960833, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.939, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11545 }, { "epoch": 0.8374555740915355, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.8596, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11546 }, { "epoch": 0.8375281061869877, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.1209, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11547 }, { "epoch": 0.83760063828244, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.7788, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11548 }, { "epoch": 0.8376731703778922, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.427, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11549 }, { "epoch": 0.8377457024733445, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 9.0912, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11550 }, { "epoch": 0.8378182345687967, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 9.291, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11551 }, { "epoch": 0.8378907666642489, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.4622, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11552 }, { "epoch": 0.8379632987597012, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 9.0995, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11553 }, { "epoch": 0.8380358308551534, "grad_norm": 8.6875, "learning_rate": 0.0003, "loss": 8.6206, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11554 }, { "epoch": 0.8381083629506056, "grad_norm": 8.875, "learning_rate": 0.0003, "loss": 8.5781, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11555 }, { "epoch": 0.8381808950460579, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 8.5932, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11556 }, { "epoch": 0.8382534271415101, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 8.7323, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11557 }, { "epoch": 0.8383259592369624, "grad_norm": 5.46875, "learning_rate": 0.0003, "loss": 9.0317, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11558 }, { "epoch": 0.8383984913324146, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 8.9833, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11559 }, { "epoch": 0.8384710234278668, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 9.2814, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11560 }, { "epoch": 0.838543555523319, "grad_norm": 5.625, "learning_rate": 0.0003, "loss": 9.0579, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11561 }, { "epoch": 0.8386160876187713, "grad_norm": 6.21875, "learning_rate": 0.0003, "loss": 8.5739, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11562 }, { "epoch": 0.8386886197142236, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 8.8712, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11563 }, { "epoch": 0.8387611518096758, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.6379, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11564 }, { "epoch": 0.838833683905128, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 8.7083, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11565 }, { "epoch": 0.8389062160005802, "grad_norm": 7.03125, "learning_rate": 0.0003, "loss": 9.1505, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11566 }, { "epoch": 0.8389787480960325, "grad_norm": 7.84375, "learning_rate": 0.0003, "loss": 8.869, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11567 }, { "epoch": 0.8390512801914848, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.1716, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11568 }, { "epoch": 0.839123812286937, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.4889, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11569 }, { "epoch": 0.8391963443823892, "grad_norm": 15.125, "learning_rate": 0.0003, "loss": 8.495, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11570 }, { "epoch": 0.8392688764778414, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 9.371, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11571 }, { "epoch": 0.8393414085732936, "grad_norm": 28.25, "learning_rate": 0.0003, "loss": 8.4552, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11572 }, { "epoch": 0.839413940668746, "grad_norm": 1.9609375, "learning_rate": 0.0003, "loss": 8.6852, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11573 }, { "epoch": 0.8394864727641982, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.8297, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11574 }, { "epoch": 0.8395590048596504, "grad_norm": 9.125, "learning_rate": 0.0003, "loss": 9.4038, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11575 }, { "epoch": 0.8396315369551026, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 8.9833, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11576 }, { "epoch": 0.8397040690505548, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.8477, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11577 }, { "epoch": 0.8397766011460072, "grad_norm": 13.6875, "learning_rate": 0.0003, "loss": 8.8894, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11578 }, { "epoch": 0.8398491332414594, "grad_norm": 5.375, "learning_rate": 0.0003, "loss": 8.9575, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11579 }, { "epoch": 0.8399216653369116, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 9.0641, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11580 }, { "epoch": 0.8399941974323638, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.1166, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11581 }, { "epoch": 0.840066729527816, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 9.0379, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11582 }, { "epoch": 0.8401392616232682, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 8.5241, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11583 }, { "epoch": 0.8402117937187206, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.7227, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11584 }, { "epoch": 0.8402843258141728, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 8.9347, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11585 }, { "epoch": 0.840356857909625, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.2805, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11586 }, { "epoch": 0.8404293900050772, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 9.3268, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11587 }, { "epoch": 0.8405019221005294, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 9.2841, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11588 }, { "epoch": 0.8405744541959818, "grad_norm": 8.3125, "learning_rate": 0.0003, "loss": 9.0244, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11589 }, { "epoch": 0.840646986291434, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 8.7248, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11590 }, { "epoch": 0.8407195183868862, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.8183, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11591 }, { "epoch": 0.8407920504823384, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.9385, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11592 }, { "epoch": 0.8408645825777906, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.0228, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11593 }, { "epoch": 0.840937114673243, "grad_norm": 8.5, "learning_rate": 0.0003, "loss": 8.7964, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11594 }, { "epoch": 0.8410096467686952, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.5525, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11595 }, { "epoch": 0.8410821788641474, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 8.7903, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11596 }, { "epoch": 0.8411547109595996, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 8.9679, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11597 }, { "epoch": 0.8412272430550518, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.984, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11598 }, { "epoch": 0.8412997751505041, "grad_norm": 1.6171875, "learning_rate": 0.0003, "loss": 9.1168, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11599 }, { "epoch": 0.8413723072459564, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.7707, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11600 }, { "epoch": 0.8414448393414086, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 9.0757, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11601 }, { "epoch": 0.8415173714368608, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 9.2753, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11602 }, { "epoch": 0.841589903532313, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.1809, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11603 }, { "epoch": 0.8416624356277653, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 8.9515, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11604 }, { "epoch": 0.8417349677232175, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 8.8238, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11605 }, { "epoch": 0.8418074998186698, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.2248, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11606 }, { "epoch": 0.841880031914122, "grad_norm": 1.8203125, "learning_rate": 0.0003, "loss": 9.1558, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11607 }, { "epoch": 0.8419525640095742, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.1468, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11608 }, { "epoch": 0.8420250961050265, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 9.0371, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11609 }, { "epoch": 0.8420976282004787, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 8.4481, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11610 }, { "epoch": 0.842170160295931, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 8.9822, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11611 }, { "epoch": 0.8422426923913832, "grad_norm": 1.796875, "learning_rate": 0.0003, "loss": 9.075, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11612 }, { "epoch": 0.8423152244868354, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.8473, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11613 }, { "epoch": 0.8423877565822877, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.689, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11614 }, { "epoch": 0.8424602886777399, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 8.5727, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11615 }, { "epoch": 0.8425328207731921, "grad_norm": 16.875, "learning_rate": 0.0003, "loss": 8.7622, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11616 }, { "epoch": 0.8426053528686444, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 9.2238, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11617 }, { "epoch": 0.8426778849640966, "grad_norm": 27.125, "learning_rate": 0.0003, "loss": 8.6004, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11618 }, { "epoch": 0.8427504170595489, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 9.0292, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11619 }, { "epoch": 0.8428229491550011, "grad_norm": 1.8203125, "learning_rate": 0.0003, "loss": 9.1854, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11620 }, { "epoch": 0.8428954812504533, "grad_norm": 7.75, "learning_rate": 0.0003, "loss": 9.0361, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11621 }, { "epoch": 0.8429680133459055, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.9922, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11622 }, { "epoch": 0.8430405454413578, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 8.821, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11623 }, { "epoch": 0.8431130775368101, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 9.3198, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11624 }, { "epoch": 0.8431856096322623, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.0283, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11625 }, { "epoch": 0.8432581417277145, "grad_norm": 8.3125, "learning_rate": 0.0003, "loss": 8.874, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11626 }, { "epoch": 0.8433306738231667, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 8.8059, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11627 }, { "epoch": 0.843403205918619, "grad_norm": 10.125, "learning_rate": 0.0003, "loss": 9.119, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11628 }, { "epoch": 0.8434757380140713, "grad_norm": 14.125, "learning_rate": 0.0003, "loss": 9.1403, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11629 }, { "epoch": 0.8435482701095235, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.2636, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11630 }, { "epoch": 0.8436208022049757, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 8.9961, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11631 }, { "epoch": 0.8436933343004279, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.8352, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11632 }, { "epoch": 0.8437658663958801, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.7411, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11633 }, { "epoch": 0.8438383984913325, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.7949, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11634 }, { "epoch": 0.8439109305867847, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.6636, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11635 }, { "epoch": 0.8439834626822369, "grad_norm": 6.40625, "learning_rate": 0.0003, "loss": 9.0773, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11636 }, { "epoch": 0.8440559947776891, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 8.2057, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11637 }, { "epoch": 0.8441285268731413, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.6288, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11638 }, { "epoch": 0.8442010589685937, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.9693, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11639 }, { "epoch": 0.8442735910640459, "grad_norm": 11.1875, "learning_rate": 0.0003, "loss": 8.5111, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11640 }, { "epoch": 0.8443461231594981, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 9.0902, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11641 }, { "epoch": 0.8444186552549503, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 8.7136, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11642 }, { "epoch": 0.8444911873504025, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 9.2589, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11643 }, { "epoch": 0.8445637194458548, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 9.3126, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11644 }, { "epoch": 0.8446362515413071, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 8.8776, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11645 }, { "epoch": 0.8447087836367593, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 9.1121, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11646 }, { "epoch": 0.8447813157322115, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 9.067, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11647 }, { "epoch": 0.8448538478276637, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 8.4572, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11648 }, { "epoch": 0.844926379923116, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.5902, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11649 }, { "epoch": 0.8449989120185682, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.5797, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11650 }, { "epoch": 0.8450714441140205, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.5256, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11651 }, { "epoch": 0.8451439762094727, "grad_norm": 7.71875, "learning_rate": 0.0003, "loss": 8.7114, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11652 }, { "epoch": 0.8452165083049249, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.0968, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11653 }, { "epoch": 0.8452890404003771, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.7126, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11654 }, { "epoch": 0.8453615724958294, "grad_norm": 1.8671875, "learning_rate": 0.0003, "loss": 9.1937, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11655 }, { "epoch": 0.8454341045912817, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 9.1964, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11656 }, { "epoch": 0.8455066366867339, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 8.9023, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11657 }, { "epoch": 0.8455791687821861, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.8165, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11658 }, { "epoch": 0.8456517008776383, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 9.2266, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11659 }, { "epoch": 0.8457242329730906, "grad_norm": 11.25, "learning_rate": 0.0003, "loss": 9.1563, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11660 }, { "epoch": 0.8457967650685428, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.4602, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11661 }, { "epoch": 0.8458692971639951, "grad_norm": 14.3125, "learning_rate": 0.0003, "loss": 8.6477, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11662 }, { "epoch": 0.8459418292594473, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.0743, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11663 }, { "epoch": 0.8460143613548995, "grad_norm": 10.25, "learning_rate": 0.0003, "loss": 8.8236, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11664 }, { "epoch": 0.8460868934503518, "grad_norm": 6.21875, "learning_rate": 0.0003, "loss": 8.3165, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11665 }, { "epoch": 0.846159425545804, "grad_norm": 10.0625, "learning_rate": 0.0003, "loss": 8.6725, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11666 }, { "epoch": 0.8462319576412562, "grad_norm": 6.625, "learning_rate": 0.0003, "loss": 9.1321, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11667 }, { "epoch": 0.8463044897367085, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 9.0889, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11668 }, { "epoch": 0.8463770218321607, "grad_norm": 6.21875, "learning_rate": 0.0003, "loss": 8.6873, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11669 }, { "epoch": 0.846449553927613, "grad_norm": 1.96875, "learning_rate": 0.0003, "loss": 9.1931, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11670 }, { "epoch": 0.8465220860230652, "grad_norm": 13.0, "learning_rate": 0.0003, "loss": 8.6286, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11671 }, { "epoch": 0.8465946181185174, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 8.5251, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11672 }, { "epoch": 0.8466671502139697, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 8.8105, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11673 }, { "epoch": 0.8467396823094219, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 9.1392, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11674 }, { "epoch": 0.8468122144048742, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.6832, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11675 }, { "epoch": 0.8468847465003264, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 8.5684, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11676 }, { "epoch": 0.8469572785957786, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.7974, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11677 }, { "epoch": 0.8470298106912308, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.6701, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11678 }, { "epoch": 0.8471023427866831, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.3935, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11679 }, { "epoch": 0.8471748748821354, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 8.7354, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11680 }, { "epoch": 0.8472474069775876, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.3909, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11681 }, { "epoch": 0.8473199390730398, "grad_norm": 9.875, "learning_rate": 0.0003, "loss": 8.4413, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11682 }, { "epoch": 0.847392471168492, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.8957, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11683 }, { "epoch": 0.8474650032639442, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.1237, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11684 }, { "epoch": 0.8475375353593966, "grad_norm": 12.8125, "learning_rate": 0.0003, "loss": 8.616, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11685 }, { "epoch": 0.8476100674548488, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 8.722, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11686 }, { "epoch": 0.847682599550301, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 9.296, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11687 }, { "epoch": 0.8477551316457532, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 9.1993, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11688 }, { "epoch": 0.8478276637412054, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 8.3207, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11689 }, { "epoch": 0.8479001958366578, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.1955, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11690 }, { "epoch": 0.84797272793211, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 9.1596, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11691 }, { "epoch": 0.8480452600275622, "grad_norm": 6.5, "learning_rate": 0.0003, "loss": 8.48, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11692 }, { "epoch": 0.8481177921230144, "grad_norm": 7.46875, "learning_rate": 0.0003, "loss": 8.776, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11693 }, { "epoch": 0.8481903242184666, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.7651, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11694 }, { "epoch": 0.848262856313919, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 8.7345, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11695 }, { "epoch": 0.8483353884093712, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 9.3659, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11696 }, { "epoch": 0.8484079205048234, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.1176, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11697 }, { "epoch": 0.8484804526002756, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 9.512, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11698 }, { "epoch": 0.8485529846957278, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 9.1144, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11699 }, { "epoch": 0.8486255167911801, "grad_norm": 1.953125, "learning_rate": 0.0003, "loss": 9.1763, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11700 }, { "epoch": 0.8486980488866324, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 8.619, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11701 }, { "epoch": 0.8487705809820846, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.4747, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11702 }, { "epoch": 0.8488431130775368, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.9344, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11703 }, { "epoch": 0.848915645172989, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 8.7317, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11704 }, { "epoch": 0.8489881772684413, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 8.9982, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11705 }, { "epoch": 0.8490607093638936, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 8.4836, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11706 }, { "epoch": 0.8491332414593458, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 8.8756, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11707 }, { "epoch": 0.849205773554798, "grad_norm": 32.5, "learning_rate": 0.0003, "loss": 9.1449, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11708 }, { "epoch": 0.8492783056502502, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 9.0591, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11709 }, { "epoch": 0.8493508377457025, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.9801, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11710 }, { "epoch": 0.8494233698411547, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 8.9335, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11711 }, { "epoch": 0.849495901936607, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 8.7503, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11712 }, { "epoch": 0.8495684340320592, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 8.7579, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11713 }, { "epoch": 0.8496409661275114, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 8.9124, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11714 }, { "epoch": 0.8497134982229637, "grad_norm": 8.625, "learning_rate": 0.0003, "loss": 9.345, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11715 }, { "epoch": 0.8497860303184159, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.3099, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11716 }, { "epoch": 0.8498585624138681, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.4717, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11717 }, { "epoch": 0.8499310945093204, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 8.5737, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11718 }, { "epoch": 0.8500036266047726, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 8.8912, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11719 }, { "epoch": 0.8500761587002249, "grad_norm": 9.375, "learning_rate": 0.0003, "loss": 8.8929, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11720 }, { "epoch": 0.8501486907956771, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.6538, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11721 }, { "epoch": 0.8502212228911293, "grad_norm": 1.8984375, "learning_rate": 0.0003, "loss": 8.4144, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11722 }, { "epoch": 0.8502937549865816, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 9.1667, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11723 }, { "epoch": 0.8503662870820338, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.8882, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11724 }, { "epoch": 0.850438819177486, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.8622, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11725 }, { "epoch": 0.8505113512729383, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.3719, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11726 }, { "epoch": 0.8505838833683905, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.8925, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11727 }, { "epoch": 0.8506564154638427, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 8.8309, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11728 }, { "epoch": 0.850728947559295, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 8.6483, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11729 }, { "epoch": 0.8508014796547472, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 8.5786, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11730 }, { "epoch": 0.8508740117501995, "grad_norm": 18.25, "learning_rate": 0.0003, "loss": 8.8408, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11731 }, { "epoch": 0.8509465438456517, "grad_norm": 13.4375, "learning_rate": 0.0003, "loss": 9.3333, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11732 }, { "epoch": 0.8510190759411039, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.6441, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11733 }, { "epoch": 0.8510916080365561, "grad_norm": 10.875, "learning_rate": 0.0003, "loss": 9.2289, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11734 }, { "epoch": 0.8511641401320084, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.1487, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11735 }, { "epoch": 0.8512366722274607, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 8.977, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11736 }, { "epoch": 0.8513092043229129, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.359, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11737 }, { "epoch": 0.8513817364183651, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.6481, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11738 }, { "epoch": 0.8514542685138173, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 8.5038, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11739 }, { "epoch": 0.8515268006092696, "grad_norm": 170.0, "learning_rate": 0.0003, "loss": 8.2881, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11740 }, { "epoch": 0.8515993327047219, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.869, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11741 }, { "epoch": 0.8516718648001741, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 8.8744, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11742 }, { "epoch": 0.8517443968956263, "grad_norm": 1.8515625, "learning_rate": 0.0003, "loss": 8.8601, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11743 }, { "epoch": 0.8518169289910785, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 8.5869, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11744 }, { "epoch": 0.8518894610865307, "grad_norm": 6.0625, "learning_rate": 0.0003, "loss": 8.6652, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11745 }, { "epoch": 0.8519619931819831, "grad_norm": 5.625, "learning_rate": 0.0003, "loss": 8.7357, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11746 }, { "epoch": 0.8520345252774353, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.3815, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11747 }, { "epoch": 0.8521070573728875, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 8.9504, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11748 }, { "epoch": 0.8521795894683397, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.7636, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11749 }, { "epoch": 0.8522521215637919, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.5515, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11750 }, { "epoch": 0.8523246536592443, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.4527, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11751 }, { "epoch": 0.8523971857546965, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.1654, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11752 }, { "epoch": 0.8524697178501487, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.9401, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11753 }, { "epoch": 0.8525422499456009, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 9.187, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11754 }, { "epoch": 0.8526147820410531, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.6512, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11755 }, { "epoch": 0.8526873141365054, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 9.0251, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11756 }, { "epoch": 0.8527598462319577, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.6414, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11757 }, { "epoch": 0.8528323783274099, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 9.22, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11758 }, { "epoch": 0.8529049104228621, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 8.7185, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11759 }, { "epoch": 0.8529774425183143, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 9.0541, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11760 }, { "epoch": 0.8530499746137666, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.9785, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11761 }, { "epoch": 0.8531225067092189, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.8218, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11762 }, { "epoch": 0.8531950388046711, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.6614, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11763 }, { "epoch": 0.8532675709001233, "grad_norm": 1.78125, "learning_rate": 0.0003, "loss": 8.7494, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11764 }, { "epoch": 0.8533401029955755, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 8.7437, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11765 }, { "epoch": 0.8534126350910278, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 8.4331, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11766 }, { "epoch": 0.85348516718648, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 9.1209, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11767 }, { "epoch": 0.8535576992819323, "grad_norm": 7.5, "learning_rate": 0.0003, "loss": 8.9985, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11768 }, { "epoch": 0.8536302313773845, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.1443, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11769 }, { "epoch": 0.8537027634728367, "grad_norm": 7.03125, "learning_rate": 0.0003, "loss": 9.1773, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11770 }, { "epoch": 0.853775295568289, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 9.1026, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11771 }, { "epoch": 0.8538478276637412, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 8.9126, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11772 }, { "epoch": 0.8539203597591934, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 8.5043, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11773 }, { "epoch": 0.8539928918546457, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.6244, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11774 }, { "epoch": 0.8540654239500979, "grad_norm": 1.984375, "learning_rate": 0.0003, "loss": 8.1914, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11775 }, { "epoch": 0.8541379560455502, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.8347, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11776 }, { "epoch": 0.8542104881410024, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.9668, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11777 }, { "epoch": 0.8542830202364546, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 8.9075, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11778 }, { "epoch": 0.8543555523319069, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 9.2451, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11779 }, { "epoch": 0.8544280844273591, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.8872, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11780 }, { "epoch": 0.8545006165228114, "grad_norm": 1.96875, "learning_rate": 0.0003, "loss": 9.0623, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11781 }, { "epoch": 0.8545731486182636, "grad_norm": 1.875, "learning_rate": 0.0003, "loss": 9.2848, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11782 }, { "epoch": 0.8546456807137158, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.2558, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11783 }, { "epoch": 0.854718212809168, "grad_norm": 9.25, "learning_rate": 0.0003, "loss": 8.7598, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11784 }, { "epoch": 0.8547907449046203, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.7913, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11785 }, { "epoch": 0.8548632770000726, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.9521, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11786 }, { "epoch": 0.8549358090955248, "grad_norm": 9.25, "learning_rate": 0.0003, "loss": 9.2492, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11787 }, { "epoch": 0.855008341190977, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 9.0863, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11788 }, { "epoch": 0.8550808732864292, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 9.0996, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11789 }, { "epoch": 0.8551534053818814, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.7243, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11790 }, { "epoch": 0.8552259374773338, "grad_norm": 5.375, "learning_rate": 0.0003, "loss": 8.9553, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11791 }, { "epoch": 0.855298469572786, "grad_norm": 26.75, "learning_rate": 0.0003, "loss": 8.871, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11792 }, { "epoch": 0.8553710016682382, "grad_norm": 7.1875, "learning_rate": 0.0003, "loss": 8.9712, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11793 }, { "epoch": 0.8554435337636904, "grad_norm": 7.21875, "learning_rate": 0.0003, "loss": 8.7943, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11794 }, { "epoch": 0.8555160658591426, "grad_norm": 6.53125, "learning_rate": 0.0003, "loss": 8.9579, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11795 }, { "epoch": 0.8555885979545949, "grad_norm": 6.65625, "learning_rate": 0.0003, "loss": 8.9041, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11796 }, { "epoch": 0.8556611300500472, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 9.3346, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11797 }, { "epoch": 0.8557336621454994, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.719, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11798 }, { "epoch": 0.8558061942409516, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 8.5272, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11799 }, { "epoch": 0.8558787263364038, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.7618, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11800 }, { "epoch": 0.855951258431856, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.9288, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11801 }, { "epoch": 0.8560237905273084, "grad_norm": 5.78125, "learning_rate": 0.0003, "loss": 9.0061, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11802 }, { "epoch": 0.8560963226227606, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.8582, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11803 }, { "epoch": 0.8561688547182128, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 8.7975, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11804 }, { "epoch": 0.856241386813665, "grad_norm": 6.84375, "learning_rate": 0.0003, "loss": 9.0443, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11805 }, { "epoch": 0.8563139189091172, "grad_norm": 1.796875, "learning_rate": 0.0003, "loss": 8.8361, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11806 }, { "epoch": 0.8563864510045696, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.7749, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11807 }, { "epoch": 0.8564589831000218, "grad_norm": 20.5, "learning_rate": 0.0003, "loss": 8.6055, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11808 }, { "epoch": 0.856531515195474, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.7877, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11809 }, { "epoch": 0.8566040472909262, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.9295, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11810 }, { "epoch": 0.8566765793863784, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.281, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11811 }, { "epoch": 0.8567491114818307, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 8.1523, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11812 }, { "epoch": 0.856821643577283, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.9079, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11813 }, { "epoch": 0.8568941756727352, "grad_norm": 28.625, "learning_rate": 0.0003, "loss": 8.9932, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11814 }, { "epoch": 0.8569667077681874, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.9098, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11815 }, { "epoch": 0.8570392398636396, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 8.3362, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11816 }, { "epoch": 0.8571117719590919, "grad_norm": 6.3125, "learning_rate": 0.0003, "loss": 9.2384, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11817 }, { "epoch": 0.8571843040545442, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 9.2018, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11818 }, { "epoch": 0.8572568361499964, "grad_norm": 7.03125, "learning_rate": 0.0003, "loss": 8.138, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11819 }, { "epoch": 0.8573293682454486, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 8.681, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11820 }, { "epoch": 0.8574019003409008, "grad_norm": 8.0625, "learning_rate": 0.0003, "loss": 8.7432, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11821 }, { "epoch": 0.8574744324363531, "grad_norm": 6.40625, "learning_rate": 0.0003, "loss": 8.5349, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11822 }, { "epoch": 0.8575469645318053, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 9.0601, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11823 }, { "epoch": 0.8576194966272576, "grad_norm": 6.3125, "learning_rate": 0.0003, "loss": 8.686, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11824 }, { "epoch": 0.8576920287227098, "grad_norm": 6.875, "learning_rate": 0.0003, "loss": 8.6286, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11825 }, { "epoch": 0.857764560818162, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 8.8712, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11826 }, { "epoch": 0.8578370929136143, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.8406, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11827 }, { "epoch": 0.8579096250090665, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.934, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11828 }, { "epoch": 0.8579821571045188, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.1612, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11829 }, { "epoch": 0.858054689199971, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.7792, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11830 }, { "epoch": 0.8581272212954232, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 9.0481, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11831 }, { "epoch": 0.8581997533908755, "grad_norm": 7.59375, "learning_rate": 0.0003, "loss": 8.6449, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11832 }, { "epoch": 0.8582722854863277, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 8.9674, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11833 }, { "epoch": 0.8583448175817799, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 9.1729, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11834 }, { "epoch": 0.8584173496772322, "grad_norm": 6.3125, "learning_rate": 0.0003, "loss": 9.1293, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11835 }, { "epoch": 0.8584898817726844, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.5188, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11836 }, { "epoch": 0.8585624138681367, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 9.2087, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11837 }, { "epoch": 0.8586349459635889, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.2219, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11838 }, { "epoch": 0.8587074780590411, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.5683, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11839 }, { "epoch": 0.8587800101544933, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 9.2675, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11840 }, { "epoch": 0.8588525422499456, "grad_norm": 6.5, "learning_rate": 0.0003, "loss": 8.6102, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11841 }, { "epoch": 0.8589250743453979, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.125, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11842 }, { "epoch": 0.8589976064408501, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 9.256, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11843 }, { "epoch": 0.8590701385363023, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.5572, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11844 }, { "epoch": 0.8591426706317545, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 8.4442, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11845 }, { "epoch": 0.8592152027272068, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 9.2366, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11846 }, { "epoch": 0.8592877348226591, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.5373, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11847 }, { "epoch": 0.8593602669181113, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 9.2717, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11848 }, { "epoch": 0.8594327990135635, "grad_norm": 6.5625, "learning_rate": 0.0003, "loss": 9.2203, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11849 }, { "epoch": 0.8595053311090157, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.6079, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11850 }, { "epoch": 0.8595778632044679, "grad_norm": 9.4375, "learning_rate": 0.0003, "loss": 8.7747, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11851 }, { "epoch": 0.8596503952999203, "grad_norm": 8.8125, "learning_rate": 0.0003, "loss": 8.2652, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11852 }, { "epoch": 0.8597229273953725, "grad_norm": 10.0625, "learning_rate": 0.0003, "loss": 8.3631, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11853 }, { "epoch": 0.8597954594908247, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.7006, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11854 }, { "epoch": 0.8598679915862769, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 8.7067, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11855 }, { "epoch": 0.8599405236817291, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 8.6589, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11856 }, { "epoch": 0.8600130557771815, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 9.0768, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11857 }, { "epoch": 0.8600855878726337, "grad_norm": 1.984375, "learning_rate": 0.0003, "loss": 8.9613, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11858 }, { "epoch": 0.8601581199680859, "grad_norm": 18.875, "learning_rate": 0.0003, "loss": 8.7841, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11859 }, { "epoch": 0.8602306520635381, "grad_norm": 6.4375, "learning_rate": 0.0003, "loss": 8.7263, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11860 }, { "epoch": 0.8603031841589903, "grad_norm": 5.96875, "learning_rate": 0.0003, "loss": 9.0493, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11861 }, { "epoch": 0.8603757162544426, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 8.8227, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11862 }, { "epoch": 0.8604482483498949, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 9.0812, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11863 }, { "epoch": 0.8605207804453471, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 8.4291, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11864 }, { "epoch": 0.8605933125407993, "grad_norm": 7.875, "learning_rate": 0.0003, "loss": 8.6895, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11865 }, { "epoch": 0.8606658446362515, "grad_norm": 7.53125, "learning_rate": 0.0003, "loss": 9.0975, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11866 }, { "epoch": 0.8607383767317037, "grad_norm": 9.875, "learning_rate": 0.0003, "loss": 8.746, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11867 }, { "epoch": 0.860810908827156, "grad_norm": 5.625, "learning_rate": 0.0003, "loss": 8.6917, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11868 }, { "epoch": 0.8608834409226083, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 8.5976, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11869 }, { "epoch": 0.8609559730180605, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.6725, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11870 }, { "epoch": 0.8610285051135127, "grad_norm": 40.25, "learning_rate": 0.0003, "loss": 8.4824, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11871 }, { "epoch": 0.8611010372089649, "grad_norm": 5.4375, "learning_rate": 0.0003, "loss": 8.767, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11872 }, { "epoch": 0.8611735693044172, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.1919, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11873 }, { "epoch": 0.8612461013998695, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 9.0835, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11874 }, { "epoch": 0.8613186334953217, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.8936, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11875 }, { "epoch": 0.8613911655907739, "grad_norm": 5.625, "learning_rate": 0.0003, "loss": 8.7674, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11876 }, { "epoch": 0.8614636976862261, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 8.9971, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11877 }, { "epoch": 0.8615362297816784, "grad_norm": 6.0625, "learning_rate": 0.0003, "loss": 8.2976, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11878 }, { "epoch": 0.8616087618771306, "grad_norm": 8.9375, "learning_rate": 0.0003, "loss": 8.8756, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11879 }, { "epoch": 0.8616812939725829, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.2201, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11880 }, { "epoch": 0.8617538260680351, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.7947, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11881 }, { "epoch": 0.8618263581634873, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 8.8596, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11882 }, { "epoch": 0.8618988902589396, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 8.4585, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11883 }, { "epoch": 0.8619714223543918, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 9.1514, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11884 }, { "epoch": 0.862043954449844, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 8.8507, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11885 }, { "epoch": 0.8621164865452963, "grad_norm": 1.8828125, "learning_rate": 0.0003, "loss": 9.0888, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11886 }, { "epoch": 0.8621890186407485, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 9.2739, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11887 }, { "epoch": 0.8622615507362008, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.4364, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11888 }, { "epoch": 0.862334082831653, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.4434, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11889 }, { "epoch": 0.8624066149271052, "grad_norm": 8.3125, "learning_rate": 0.0003, "loss": 8.294, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11890 }, { "epoch": 0.8624791470225575, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 9.1623, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11891 }, { "epoch": 0.8625516791180097, "grad_norm": 77.5, "learning_rate": 0.0003, "loss": 8.5464, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11892 }, { "epoch": 0.862624211213462, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.2611, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11893 }, { "epoch": 0.8626967433089142, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 9.1245, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11894 }, { "epoch": 0.8627692754043664, "grad_norm": 17.125, "learning_rate": 0.0003, "loss": 8.3671, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11895 }, { "epoch": 0.8628418074998186, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.5563, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11896 }, { "epoch": 0.8629143395952709, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 8.9622, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11897 }, { "epoch": 0.8629868716907232, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 8.5008, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11898 }, { "epoch": 0.8630594037861754, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.7691, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11899 }, { "epoch": 0.8631319358816276, "grad_norm": 8.0625, "learning_rate": 0.0003, "loss": 8.9847, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11900 }, { "epoch": 0.8632044679770798, "grad_norm": 12.8125, "learning_rate": 0.0003, "loss": 8.7868, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11901 }, { "epoch": 0.863277000072532, "grad_norm": 9.125, "learning_rate": 0.0003, "loss": 8.5521, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11902 }, { "epoch": 0.8633495321679844, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.8252, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11903 }, { "epoch": 0.8634220642634366, "grad_norm": 5.21875, "learning_rate": 0.0003, "loss": 8.8055, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11904 }, { "epoch": 0.8634945963588888, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 8.8588, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11905 }, { "epoch": 0.863567128454341, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.6093, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11906 }, { "epoch": 0.8636396605497932, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 8.3949, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11907 }, { "epoch": 0.8637121926452456, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 8.8722, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11908 }, { "epoch": 0.8637847247406978, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.7919, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11909 }, { "epoch": 0.86385725683615, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 9.3149, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11910 }, { "epoch": 0.8639297889316022, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 8.6502, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11911 }, { "epoch": 0.8640023210270544, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 8.5861, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11912 }, { "epoch": 0.8640748531225068, "grad_norm": 6.625, "learning_rate": 0.0003, "loss": 8.5288, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11913 }, { "epoch": 0.864147385217959, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 8.9887, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11914 }, { "epoch": 0.8642199173134112, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 9.1762, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11915 }, { "epoch": 0.8642924494088634, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 8.952, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11916 }, { "epoch": 0.8643649815043156, "grad_norm": 56.5, "learning_rate": 0.0003, "loss": 8.1212, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11917 }, { "epoch": 0.864437513599768, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 8.8315, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11918 }, { "epoch": 0.8645100456952202, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.3315, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11919 }, { "epoch": 0.8645825777906724, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 8.4774, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11920 }, { "epoch": 0.8646551098861246, "grad_norm": 7.71875, "learning_rate": 0.0003, "loss": 9.3416, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11921 }, { "epoch": 0.8647276419815768, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.4057, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11922 }, { "epoch": 0.8648001740770291, "grad_norm": 11.5625, "learning_rate": 0.0003, "loss": 8.7643, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11923 }, { "epoch": 0.8648727061724814, "grad_norm": 7.21875, "learning_rate": 0.0003, "loss": 7.9828, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11924 }, { "epoch": 0.8649452382679336, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.2574, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11925 }, { "epoch": 0.8650177703633858, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 9.2112, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11926 }, { "epoch": 0.865090302458838, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.8049, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11927 }, { "epoch": 0.8651628345542903, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 8.8425, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11928 }, { "epoch": 0.8652353666497425, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.4147, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11929 }, { "epoch": 0.8653078987451948, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.3138, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11930 }, { "epoch": 0.865380430840647, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.8849, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11931 }, { "epoch": 0.8654529629360992, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.9558, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11932 }, { "epoch": 0.8655254950315514, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.5824, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11933 }, { "epoch": 0.8655980271270037, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 9.2341, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11934 }, { "epoch": 0.865670559222456, "grad_norm": 69.5, "learning_rate": 0.0003, "loss": 8.7559, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11935 }, { "epoch": 0.8657430913179082, "grad_norm": 7.15625, "learning_rate": 0.0003, "loss": 9.333, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11936 }, { "epoch": 0.8658156234133604, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 9.4864, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11937 }, { "epoch": 0.8658881555088126, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.6757, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11938 }, { "epoch": 0.8659606876042649, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.4308, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11939 }, { "epoch": 0.8660332196997171, "grad_norm": 9.0625, "learning_rate": 0.0003, "loss": 8.7548, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11940 }, { "epoch": 0.8661057517951694, "grad_norm": 243.0, "learning_rate": 0.0003, "loss": 9.1988, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11941 }, { "epoch": 0.8661782838906216, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.7823, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11942 }, { "epoch": 0.8662508159860738, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.6717, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11943 }, { "epoch": 0.8663233480815261, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.7702, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11944 }, { "epoch": 0.8663958801769783, "grad_norm": 6.9375, "learning_rate": 0.0003, "loss": 8.5115, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11945 }, { "epoch": 0.8664684122724305, "grad_norm": 13.375, "learning_rate": 0.0003, "loss": 9.0188, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11946 }, { "epoch": 0.8665409443678828, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.5365, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11947 }, { "epoch": 0.866613476463335, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.9319, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11948 }, { "epoch": 0.8666860085587873, "grad_norm": 6.71875, "learning_rate": 0.0003, "loss": 8.9166, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11949 }, { "epoch": 0.8667585406542395, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.9897, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11950 }, { "epoch": 0.8668310727496917, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 8.6257, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11951 }, { "epoch": 0.866903604845144, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 9.2674, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11952 }, { "epoch": 0.8669761369405962, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.7511, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11953 }, { "epoch": 0.8670486690360485, "grad_norm": 11.9375, "learning_rate": 0.0003, "loss": 8.4614, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11954 }, { "epoch": 0.8671212011315007, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.3989, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11955 }, { "epoch": 0.8671937332269529, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.9277, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11956 }, { "epoch": 0.8672662653224051, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 8.5265, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11957 }, { "epoch": 0.8673387974178574, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.7186, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11958 }, { "epoch": 0.8674113295133097, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 8.8516, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11959 }, { "epoch": 0.8674838616087619, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 8.5569, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11960 }, { "epoch": 0.8675563937042141, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.5564, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11961 }, { "epoch": 0.8676289257996663, "grad_norm": 6.96875, "learning_rate": 0.0003, "loss": 8.6449, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11962 }, { "epoch": 0.8677014578951185, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 8.2939, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11963 }, { "epoch": 0.8677739899905709, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.5358, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11964 }, { "epoch": 0.8678465220860231, "grad_norm": 1.9375, "learning_rate": 0.0003, "loss": 8.6787, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11965 }, { "epoch": 0.8679190541814753, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 8.2639, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11966 }, { "epoch": 0.8679915862769275, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.9529, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11967 }, { "epoch": 0.8680641183723797, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 9.3407, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11968 }, { "epoch": 0.8681366504678321, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.7754, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11969 }, { "epoch": 0.8682091825632843, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.7488, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11970 }, { "epoch": 0.8682817146587365, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 8.9915, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11971 }, { "epoch": 0.8683542467541887, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 8.6916, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11972 }, { "epoch": 0.8684267788496409, "grad_norm": 9.5625, "learning_rate": 0.0003, "loss": 9.023, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11973 }, { "epoch": 0.8684993109450933, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.655, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11974 }, { "epoch": 0.8685718430405455, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.4693, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11975 }, { "epoch": 0.8686443751359977, "grad_norm": 7.0625, "learning_rate": 0.0003, "loss": 9.3682, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11976 }, { "epoch": 0.8687169072314499, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.1175, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11977 }, { "epoch": 0.8687894393269021, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 9.0098, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11978 }, { "epoch": 0.8688619714223544, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 9.0313, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11979 }, { "epoch": 0.8689345035178067, "grad_norm": 1.90625, "learning_rate": 0.0003, "loss": 9.1311, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11980 }, { "epoch": 0.8690070356132589, "grad_norm": 7.6875, "learning_rate": 0.0003, "loss": 8.7774, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11981 }, { "epoch": 0.8690795677087111, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 8.5998, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11982 }, { "epoch": 0.8691520998041633, "grad_norm": 24.75, "learning_rate": 0.0003, "loss": 9.5456, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11983 }, { "epoch": 0.8692246318996156, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 8.7974, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11984 }, { "epoch": 0.8692971639950678, "grad_norm": 6.0625, "learning_rate": 0.0003, "loss": 8.7164, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11985 }, { "epoch": 0.8693696960905201, "grad_norm": 7.84375, "learning_rate": 0.0003, "loss": 8.7014, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11986 }, { "epoch": 0.8694422281859723, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.0972, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11987 }, { "epoch": 0.8695147602814245, "grad_norm": 1.9609375, "learning_rate": 0.0003, "loss": 8.9348, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11988 }, { "epoch": 0.8695872923768768, "grad_norm": 6.03125, "learning_rate": 0.0003, "loss": 8.9713, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11989 }, { "epoch": 0.869659824472329, "grad_norm": 6.21875, "learning_rate": 0.0003, "loss": 8.7314, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11990 }, { "epoch": 0.8697323565677813, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 9.028, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11991 }, { "epoch": 0.8698048886632335, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 8.7262, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11992 }, { "epoch": 0.8698774207586857, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.782, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11993 }, { "epoch": 0.869949952854138, "grad_norm": 11.8125, "learning_rate": 0.0003, "loss": 9.3697, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11994 }, { "epoch": 0.8700224849495902, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.1989, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11995 }, { "epoch": 0.8700950170450424, "grad_norm": 11.8125, "learning_rate": 0.0003, "loss": 8.4986, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11996 }, { "epoch": 0.8701675491404947, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.5066, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11997 }, { "epoch": 0.8702400812359469, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 8.8355, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11998 }, { "epoch": 0.8703126133313992, "grad_norm": 12.375, "learning_rate": 0.0003, "loss": 8.7208, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 11999 }, { "epoch": 0.8703851454268514, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 9.3446, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12000 }, { "epoch": 0.8704576775223036, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 8.75, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12001 }, { "epoch": 0.8705302096177558, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 8.7905, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12002 }, { "epoch": 0.8706027417132081, "grad_norm": 6.0, "learning_rate": 0.0003, "loss": 8.7752, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12003 }, { "epoch": 0.8706752738086603, "grad_norm": 7.875, "learning_rate": 0.0003, "loss": 9.1454, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12004 }, { "epoch": 0.8707478059041126, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 8.3889, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12005 }, { "epoch": 0.8708203379995648, "grad_norm": 9.375, "learning_rate": 0.0003, "loss": 8.8107, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12006 }, { "epoch": 0.870892870095017, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.6764, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12007 }, { "epoch": 0.8709654021904693, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.8541, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12008 }, { "epoch": 0.8710379342859215, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.8121, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12009 }, { "epoch": 0.8711104663813738, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 8.4161, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12010 }, { "epoch": 0.871182998476826, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.625, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12011 }, { "epoch": 0.8712555305722782, "grad_norm": 13.1875, "learning_rate": 0.0003, "loss": 8.9161, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12012 }, { "epoch": 0.8713280626677304, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.485, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12013 }, { "epoch": 0.8714005947631827, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.9333, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12014 }, { "epoch": 0.871473126858635, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 8.7611, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12015 }, { "epoch": 0.8715456589540872, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 8.8229, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12016 }, { "epoch": 0.8716181910495394, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.7895, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12017 }, { "epoch": 0.8716907231449916, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 8.5987, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12018 }, { "epoch": 0.8717632552404438, "grad_norm": 1.2890625, "learning_rate": 0.0003, "loss": 8.7148, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12019 }, { "epoch": 0.8718357873358962, "grad_norm": 21.875, "learning_rate": 0.0003, "loss": 8.7492, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12020 }, { "epoch": 0.8719083194313484, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.5793, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12021 }, { "epoch": 0.8719808515268006, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 9.1867, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12022 }, { "epoch": 0.8720533836222528, "grad_norm": 17.75, "learning_rate": 0.0003, "loss": 9.226, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12023 }, { "epoch": 0.872125915717705, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 9.0614, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12024 }, { "epoch": 0.8721984478131574, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 9.1679, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12025 }, { "epoch": 0.8722709799086096, "grad_norm": 10.0, "learning_rate": 0.0003, "loss": 8.7782, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12026 }, { "epoch": 0.8723435120040618, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.2419, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12027 }, { "epoch": 0.872416044099514, "grad_norm": 7.09375, "learning_rate": 0.0003, "loss": 8.8827, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12028 }, { "epoch": 0.8724885761949662, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.0391, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12029 }, { "epoch": 0.8725611082904186, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.5036, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12030 }, { "epoch": 0.8726336403858708, "grad_norm": 7.96875, "learning_rate": 0.0003, "loss": 8.4422, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12031 }, { "epoch": 0.872706172481323, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.446, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12032 }, { "epoch": 0.8727787045767752, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.6492, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12033 }, { "epoch": 0.8728512366722274, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 8.5994, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12034 }, { "epoch": 0.8729237687676797, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.5427, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12035 }, { "epoch": 0.872996300863132, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.9534, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12036 }, { "epoch": 0.8730688329585842, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 9.0358, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12037 }, { "epoch": 0.8731413650540364, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.2841, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12038 }, { "epoch": 0.8732138971494886, "grad_norm": 9.4375, "learning_rate": 0.0003, "loss": 9.0116, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12039 }, { "epoch": 0.8732864292449409, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.5012, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12040 }, { "epoch": 0.8733589613403931, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 9.0576, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12041 }, { "epoch": 0.8734314934358454, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 9.1023, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12042 }, { "epoch": 0.8735040255312976, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 8.9728, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12043 }, { "epoch": 0.8735765576267498, "grad_norm": 6.5625, "learning_rate": 0.0003, "loss": 8.2539, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12044 }, { "epoch": 0.8736490897222021, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.8256, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12045 }, { "epoch": 0.8737216218176543, "grad_norm": 1.7265625, "learning_rate": 0.0003, "loss": 8.7882, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12046 }, { "epoch": 0.8737941539131066, "grad_norm": 8.9375, "learning_rate": 0.0003, "loss": 8.9244, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12047 }, { "epoch": 0.8738666860085588, "grad_norm": 6.5625, "learning_rate": 0.0003, "loss": 9.1542, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12048 }, { "epoch": 0.873939218104011, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 8.9528, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12049 }, { "epoch": 0.8740117501994633, "grad_norm": 6.0, "learning_rate": 0.0003, "loss": 8.7653, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12050 }, { "epoch": 0.8740842822949155, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 8.7008, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12051 }, { "epoch": 0.8741568143903677, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 8.6421, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12052 }, { "epoch": 0.87422934648582, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 8.4479, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12053 }, { "epoch": 0.8743018785812722, "grad_norm": 1.859375, "learning_rate": 0.0003, "loss": 9.081, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12054 }, { "epoch": 0.8743744106767245, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 8.4644, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12055 }, { "epoch": 0.8744469427721767, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.4586, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12056 }, { "epoch": 0.8745194748676289, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.2963, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12057 }, { "epoch": 0.8745920069630811, "grad_norm": 1.8203125, "learning_rate": 0.0003, "loss": 8.8281, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12058 }, { "epoch": 0.8746645390585334, "grad_norm": 1.546875, "learning_rate": 0.0003, "loss": 8.8449, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12059 }, { "epoch": 0.8747370711539857, "grad_norm": 16.375, "learning_rate": 0.0003, "loss": 9.0486, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12060 }, { "epoch": 0.8748096032494379, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 8.403, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12061 }, { "epoch": 0.8748821353448901, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.1192, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12062 }, { "epoch": 0.8749546674403423, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 8.8557, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12063 }, { "epoch": 0.8750271995357946, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.5481, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12064 }, { "epoch": 0.8750997316312469, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.5206, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12065 }, { "epoch": 0.8751722637266991, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.8303, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12066 }, { "epoch": 0.8752447958221513, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 9.5061, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12067 }, { "epoch": 0.8753173279176035, "grad_norm": 13.25, "learning_rate": 0.0003, "loss": 8.9072, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12068 }, { "epoch": 0.8753898600130557, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.2164, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12069 }, { "epoch": 0.8754623921085081, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 9.1137, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12070 }, { "epoch": 0.8755349242039603, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.9633, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12071 }, { "epoch": 0.8756074562994125, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 8.2263, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12072 }, { "epoch": 0.8756799883948647, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.8823, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12073 }, { "epoch": 0.8757525204903169, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.8621, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12074 }, { "epoch": 0.8758250525857691, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 8.4223, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12075 }, { "epoch": 0.8758975846812215, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.3587, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12076 }, { "epoch": 0.8759701167766737, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.8222, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12077 }, { "epoch": 0.8760426488721259, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.8443, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12078 }, { "epoch": 0.8761151809675781, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.9193, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12079 }, { "epoch": 0.8761877130630303, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.5498, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12080 }, { "epoch": 0.8762602451584827, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 9.3202, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12081 }, { "epoch": 0.8763327772539349, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.5893, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12082 }, { "epoch": 0.8764053093493871, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.8902, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12083 }, { "epoch": 0.8764778414448393, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.5004, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12084 }, { "epoch": 0.8765503735402915, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 9.1041, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12085 }, { "epoch": 0.8766229056357439, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.5509, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12086 }, { "epoch": 0.8766954377311961, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 8.3501, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12087 }, { "epoch": 0.8767679698266483, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.8712, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12088 }, { "epoch": 0.8768405019221005, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.2353, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12089 }, { "epoch": 0.8769130340175527, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 8.3919, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12090 }, { "epoch": 0.876985566113005, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.8048, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12091 }, { "epoch": 0.8770580982084573, "grad_norm": 9.1875, "learning_rate": 0.0003, "loss": 8.704, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12092 }, { "epoch": 0.8771306303039095, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.7921, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12093 }, { "epoch": 0.8772031623993617, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.9275, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12094 }, { "epoch": 0.8772756944948139, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 8.8918, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12095 }, { "epoch": 0.8773482265902662, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 8.9635, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12096 }, { "epoch": 0.8774207586857184, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 9.0594, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12097 }, { "epoch": 0.8774932907811707, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 8.9362, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12098 }, { "epoch": 0.8775658228766229, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.4469, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12099 }, { "epoch": 0.8776383549720751, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.2138, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12100 }, { "epoch": 0.8777108870675274, "grad_norm": 5.5625, "learning_rate": 0.0003, "loss": 8.9686, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12101 }, { "epoch": 0.8777834191629796, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.6882, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12102 }, { "epoch": 0.8778559512584319, "grad_norm": 6.125, "learning_rate": 0.0003, "loss": 8.654, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12103 }, { "epoch": 0.8779284833538841, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.104, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12104 }, { "epoch": 0.8780010154493363, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 9.276, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12105 }, { "epoch": 0.8780735475447886, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.2716, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12106 }, { "epoch": 0.8781460796402408, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 8.9544, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12107 }, { "epoch": 0.878218611735693, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 8.8718, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12108 }, { "epoch": 0.8782911438311453, "grad_norm": 5.46875, "learning_rate": 0.0003, "loss": 9.3458, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12109 }, { "epoch": 0.8783636759265975, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 8.6954, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12110 }, { "epoch": 0.8784362080220498, "grad_norm": 7.53125, "learning_rate": 0.0003, "loss": 9.1695, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12111 }, { "epoch": 0.878508740117502, "grad_norm": 8.0625, "learning_rate": 0.0003, "loss": 8.8525, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12112 }, { "epoch": 0.8785812722129542, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.6382, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12113 }, { "epoch": 0.8786538043084064, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 9.1075, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12114 }, { "epoch": 0.8787263364038587, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.8179, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12115 }, { "epoch": 0.878798868499311, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.7298, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12116 }, { "epoch": 0.8788714005947632, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 9.0694, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12117 }, { "epoch": 0.8789439326902154, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.0626, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12118 }, { "epoch": 0.8790164647856676, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 9.1169, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12119 }, { "epoch": 0.8790889968811199, "grad_norm": 7.53125, "learning_rate": 0.0003, "loss": 8.9637, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12120 }, { "epoch": 0.8791615289765722, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.3, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12121 }, { "epoch": 0.8792340610720244, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.5951, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12122 }, { "epoch": 0.8793065931674766, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 8.947, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12123 }, { "epoch": 0.8793791252629288, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.8638, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12124 }, { "epoch": 0.879451657358381, "grad_norm": 10.4375, "learning_rate": 0.0003, "loss": 8.9296, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12125 }, { "epoch": 0.8795241894538334, "grad_norm": 5.96875, "learning_rate": 0.0003, "loss": 9.0164, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12126 }, { "epoch": 0.8795967215492856, "grad_norm": 5.84375, "learning_rate": 0.0003, "loss": 8.6226, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12127 }, { "epoch": 0.8796692536447378, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.1248, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12128 }, { "epoch": 0.87974178574019, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.2626, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12129 }, { "epoch": 0.8798143178356422, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 9.0523, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12130 }, { "epoch": 0.8798868499310946, "grad_norm": 10.625, "learning_rate": 0.0003, "loss": 8.6974, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12131 }, { "epoch": 0.8799593820265468, "grad_norm": 9.3125, "learning_rate": 0.0003, "loss": 9.0025, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12132 }, { "epoch": 0.880031914121999, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.7875, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12133 }, { "epoch": 0.8801044462174512, "grad_norm": 5.5625, "learning_rate": 0.0003, "loss": 9.4214, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12134 }, { "epoch": 0.8801769783129034, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.3532, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12135 }, { "epoch": 0.8802495104083558, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.8235, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12136 }, { "epoch": 0.880322042503808, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 9.3876, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12137 }, { "epoch": 0.8803945745992602, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 9.1368, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12138 }, { "epoch": 0.8804671066947124, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 9.2207, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12139 }, { "epoch": 0.8805396387901646, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.0951, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12140 }, { "epoch": 0.8806121708856169, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 9.1202, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12141 }, { "epoch": 0.8806847029810692, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 8.8749, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12142 }, { "epoch": 0.8807572350765214, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.9217, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12143 }, { "epoch": 0.8808297671719736, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.7501, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12144 }, { "epoch": 0.8809022992674258, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.8955, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12145 }, { "epoch": 0.880974831362878, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 8.5376, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12146 }, { "epoch": 0.8810473634583303, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 8.9081, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12147 }, { "epoch": 0.8811198955537826, "grad_norm": 12.1875, "learning_rate": 0.0003, "loss": 8.5337, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12148 }, { "epoch": 0.8811924276492348, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.6475, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12149 }, { "epoch": 0.881264959744687, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 9.0416, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12150 }, { "epoch": 0.8813374918401392, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.5257, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12151 }, { "epoch": 0.8814100239355915, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.9691, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12152 }, { "epoch": 0.8814825560310438, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 9.5197, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12153 }, { "epoch": 0.881555088126496, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.839, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12154 }, { "epoch": 0.8816276202219482, "grad_norm": 1.8515625, "learning_rate": 0.0003, "loss": 9.0763, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12155 }, { "epoch": 0.8817001523174004, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 9.0577, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12156 }, { "epoch": 0.8817726844128527, "grad_norm": 1.59375, "learning_rate": 0.0003, "loss": 8.895, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12157 }, { "epoch": 0.8818452165083049, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.7947, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12158 }, { "epoch": 0.8819177486037572, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 9.252, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12159 }, { "epoch": 0.8819902806992094, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.6369, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12160 }, { "epoch": 0.8820628127946616, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.6064, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12161 }, { "epoch": 0.8821353448901139, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.1767, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12162 }, { "epoch": 0.8822078769855661, "grad_norm": 7.53125, "learning_rate": 0.0003, "loss": 8.8772, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12163 }, { "epoch": 0.8822804090810183, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 8.7664, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12164 }, { "epoch": 0.8823529411764706, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.6157, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12165 }, { "epoch": 0.8824254732719228, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 9.0067, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12166 }, { "epoch": 0.8824980053673751, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.9161, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12167 }, { "epoch": 0.8825705374628273, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 9.0341, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12168 }, { "epoch": 0.8826430695582795, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.6447, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12169 }, { "epoch": 0.8827156016537318, "grad_norm": 10.3125, "learning_rate": 0.0003, "loss": 8.6793, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12170 }, { "epoch": 0.882788133749184, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 9.2868, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12171 }, { "epoch": 0.8828606658446363, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 9.4744, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12172 }, { "epoch": 0.8829331979400885, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 8.6666, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12173 }, { "epoch": 0.8830057300355407, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 8.5146, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12174 }, { "epoch": 0.8830782621309929, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.1344, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12175 }, { "epoch": 0.8831507942264452, "grad_norm": 5.4375, "learning_rate": 0.0003, "loss": 8.4186, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12176 }, { "epoch": 0.8832233263218975, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 8.8138, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12177 }, { "epoch": 0.8832958584173497, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.5928, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12178 }, { "epoch": 0.8833683905128019, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 8.8273, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12179 }, { "epoch": 0.8834409226082541, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.7681, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12180 }, { "epoch": 0.8835134547037063, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.0458, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12181 }, { "epoch": 0.8835859867991587, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.5955, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12182 }, { "epoch": 0.8836585188946109, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 9.1768, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12183 }, { "epoch": 0.8837310509900631, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.128, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12184 }, { "epoch": 0.8838035830855153, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.3824, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12185 }, { "epoch": 0.8838761151809675, "grad_norm": 7.0625, "learning_rate": 0.0003, "loss": 9.0062, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12186 }, { "epoch": 0.8839486472764199, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.2021, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12187 }, { "epoch": 0.8840211793718721, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.8976, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12188 }, { "epoch": 0.8840937114673243, "grad_norm": 9.5, "learning_rate": 0.0003, "loss": 8.529, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12189 }, { "epoch": 0.8841662435627765, "grad_norm": 10.6875, "learning_rate": 0.0003, "loss": 9.0025, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12190 }, { "epoch": 0.8842387756582287, "grad_norm": 6.0, "learning_rate": 0.0003, "loss": 8.5269, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12191 }, { "epoch": 0.884311307753681, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 8.4716, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12192 }, { "epoch": 0.8843838398491333, "grad_norm": 7.53125, "learning_rate": 0.0003, "loss": 8.7674, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12193 }, { "epoch": 0.8844563719445855, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 8.731, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12194 }, { "epoch": 0.8845289040400377, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 8.9458, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12195 }, { "epoch": 0.8846014361354899, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 9.1934, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12196 }, { "epoch": 0.8846739682309422, "grad_norm": 1.7890625, "learning_rate": 0.0003, "loss": 8.9724, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12197 }, { "epoch": 0.8847465003263945, "grad_norm": 1.875, "learning_rate": 0.0003, "loss": 9.175, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12198 }, { "epoch": 0.8848190324218467, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.5191, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12199 }, { "epoch": 0.8848915645172989, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.5752, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12200 }, { "epoch": 0.8849640966127511, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.6457, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12201 }, { "epoch": 0.8850366287082034, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 8.7493, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12202 }, { "epoch": 0.8851091608036556, "grad_norm": 31.0, "learning_rate": 0.0003, "loss": 9.2026, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12203 }, { "epoch": 0.8851816928991079, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.1323, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12204 }, { "epoch": 0.8852542249945601, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.2286, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12205 }, { "epoch": 0.8853267570900123, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.6557, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12206 }, { "epoch": 0.8853992891854646, "grad_norm": 6.0, "learning_rate": 0.0003, "loss": 8.5524, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12207 }, { "epoch": 0.8854718212809168, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 9.0084, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12208 }, { "epoch": 0.885544353376369, "grad_norm": 6.21875, "learning_rate": 0.0003, "loss": 8.5406, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12209 }, { "epoch": 0.8856168854718213, "grad_norm": 8.125, "learning_rate": 0.0003, "loss": 8.9999, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12210 }, { "epoch": 0.8856894175672735, "grad_norm": 6.1875, "learning_rate": 0.0003, "loss": 8.8788, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12211 }, { "epoch": 0.8857619496627258, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 9.1313, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12212 }, { "epoch": 0.885834481758178, "grad_norm": 6.21875, "learning_rate": 0.0003, "loss": 9.1176, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12213 }, { "epoch": 0.8859070138536302, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.8903, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12214 }, { "epoch": 0.8859795459490825, "grad_norm": 1.4453125, "learning_rate": 0.0003, "loss": 8.7551, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12215 }, { "epoch": 0.8860520780445347, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.8371, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12216 }, { "epoch": 0.8861246101399869, "grad_norm": 16.25, "learning_rate": 0.0003, "loss": 9.0739, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12217 }, { "epoch": 0.8861971422354392, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 9.1473, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12218 }, { "epoch": 0.8862696743308914, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.9045, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12219 }, { "epoch": 0.8863422064263436, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.8642, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12220 }, { "epoch": 0.8864147385217959, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 8.899, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12221 }, { "epoch": 0.8864872706172481, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 8.477, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12222 }, { "epoch": 0.8865598027127004, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 8.8132, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12223 }, { "epoch": 0.8866323348081526, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.5951, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12224 }, { "epoch": 0.8867048669036048, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 9.0301, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12225 }, { "epoch": 0.886777398999057, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.5138, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12226 }, { "epoch": 0.8868499310945093, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.0844, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12227 }, { "epoch": 0.8869224631899616, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.9443, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12228 }, { "epoch": 0.8869949952854138, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 9.155, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12229 }, { "epoch": 0.887067527380866, "grad_norm": 15.5, "learning_rate": 0.0003, "loss": 9.172, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12230 }, { "epoch": 0.8871400594763182, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.7623, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12231 }, { "epoch": 0.8872125915717705, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 8.939, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12232 }, { "epoch": 0.8872851236672228, "grad_norm": 1.9765625, "learning_rate": 0.0003, "loss": 8.7703, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12233 }, { "epoch": 0.887357655762675, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 9.1129, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12234 }, { "epoch": 0.8874301878581272, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.8313, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12235 }, { "epoch": 0.8875027199535794, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 8.6229, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12236 }, { "epoch": 0.8875752520490316, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.9034, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12237 }, { "epoch": 0.887647784144484, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.8073, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12238 }, { "epoch": 0.8877203162399362, "grad_norm": 14.5, "learning_rate": 0.0003, "loss": 8.9586, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12239 }, { "epoch": 0.8877928483353884, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 8.9614, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12240 }, { "epoch": 0.8878653804308406, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.59, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12241 }, { "epoch": 0.8879379125262928, "grad_norm": 5.375, "learning_rate": 0.0003, "loss": 9.2731, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12242 }, { "epoch": 0.8880104446217452, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.0469, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12243 }, { "epoch": 0.8880829767171974, "grad_norm": 6.21875, "learning_rate": 0.0003, "loss": 8.9043, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12244 }, { "epoch": 0.8881555088126496, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.8116, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12245 }, { "epoch": 0.8882280409081018, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.9278, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12246 }, { "epoch": 0.888300573003554, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 8.6989, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12247 }, { "epoch": 0.8883731050990064, "grad_norm": 6.75, "learning_rate": 0.0003, "loss": 9.0091, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12248 }, { "epoch": 0.8884456371944586, "grad_norm": 7.0, "learning_rate": 0.0003, "loss": 8.9595, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12249 }, { "epoch": 0.8885181692899108, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.5617, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12250 }, { "epoch": 0.888590701385363, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.6477, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12251 }, { "epoch": 0.8886632334808152, "grad_norm": 1.9609375, "learning_rate": 0.0003, "loss": 9.1045, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12252 }, { "epoch": 0.8887357655762675, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 9.2807, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12253 }, { "epoch": 0.8888082976717198, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.0421, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12254 }, { "epoch": 0.888880829767172, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.8044, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12255 }, { "epoch": 0.8889533618626242, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.4813, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12256 }, { "epoch": 0.8890258939580764, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.5929, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12257 }, { "epoch": 0.8890984260535287, "grad_norm": 31.5, "learning_rate": 0.0003, "loss": 8.6056, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12258 }, { "epoch": 0.889170958148981, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 8.8078, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12259 }, { "epoch": 0.8892434902444332, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 8.9272, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12260 }, { "epoch": 0.8893160223398854, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.8949, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12261 }, { "epoch": 0.8893885544353376, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 8.3923, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12262 }, { "epoch": 0.8894610865307899, "grad_norm": 1.9140625, "learning_rate": 0.0003, "loss": 8.915, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12263 }, { "epoch": 0.8895336186262421, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 9.3324, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12264 }, { "epoch": 0.8896061507216944, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.3487, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12265 }, { "epoch": 0.8896786828171466, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 8.8536, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12266 }, { "epoch": 0.8897512149125988, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 9.3132, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12267 }, { "epoch": 0.8898237470080511, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 8.8177, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12268 }, { "epoch": 0.8898962791035033, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.5688, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12269 }, { "epoch": 0.8899688111989555, "grad_norm": 27.125, "learning_rate": 0.0003, "loss": 8.9412, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12270 }, { "epoch": 0.8900413432944078, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.0732, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12271 }, { "epoch": 0.89011387538986, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.4141, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12272 }, { "epoch": 0.8901864074853123, "grad_norm": 8.375, "learning_rate": 0.0003, "loss": 8.9639, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12273 }, { "epoch": 0.8902589395807645, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.7707, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12274 }, { "epoch": 0.8903314716762167, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 9.1113, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12275 }, { "epoch": 0.890404003771669, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 8.1074, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12276 }, { "epoch": 0.8904765358671212, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.9053, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12277 }, { "epoch": 0.8905490679625735, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 9.0356, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12278 }, { "epoch": 0.8906216000580257, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.734, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12279 }, { "epoch": 0.8906941321534779, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.4869, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12280 }, { "epoch": 0.8907666642489301, "grad_norm": 17.5, "learning_rate": 0.0003, "loss": 9.212, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12281 }, { "epoch": 0.8908391963443824, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 9.0374, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12282 }, { "epoch": 0.8909117284398346, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.6935, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12283 }, { "epoch": 0.8909842605352869, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.4105, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12284 }, { "epoch": 0.8910567926307391, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 8.5128, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12285 }, { "epoch": 0.8911293247261913, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.0947, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12286 }, { "epoch": 0.8912018568216435, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.9077, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12287 }, { "epoch": 0.8912743889170958, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 8.4734, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12288 }, { "epoch": 0.8913469210125481, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.8485, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12289 }, { "epoch": 0.8914194531080003, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 8.8438, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12290 }, { "epoch": 0.8914919852034525, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 8.9332, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12291 }, { "epoch": 0.8915645172989047, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 9.1328, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12292 }, { "epoch": 0.891637049394357, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 9.7189, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12293 }, { "epoch": 0.8917095814898093, "grad_norm": 8.375, "learning_rate": 0.0003, "loss": 9.1486, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12294 }, { "epoch": 0.8917821135852615, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 9.0564, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12295 }, { "epoch": 0.8918546456807137, "grad_norm": 29.125, "learning_rate": 0.0003, "loss": 8.7097, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12296 }, { "epoch": 0.8919271777761659, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.574, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12297 }, { "epoch": 0.8919997098716181, "grad_norm": 9.0, "learning_rate": 0.0003, "loss": 8.7112, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12298 }, { "epoch": 0.8920722419670705, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.7685, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12299 }, { "epoch": 0.8921447740625227, "grad_norm": 7.625, "learning_rate": 0.0003, "loss": 8.8977, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12300 }, { "epoch": 0.8922173061579749, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 8.148, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12301 }, { "epoch": 0.8922898382534271, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.5902, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12302 }, { "epoch": 0.8923623703488793, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 8.5242, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12303 }, { "epoch": 0.8924349024443317, "grad_norm": 6.25, "learning_rate": 0.0003, "loss": 8.9588, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12304 }, { "epoch": 0.8925074345397839, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.6115, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12305 }, { "epoch": 0.8925799666352361, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 8.9145, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12306 }, { "epoch": 0.8926524987306883, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 9.0969, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12307 }, { "epoch": 0.8927250308261405, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 9.1795, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12308 }, { "epoch": 0.8927975629215928, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 9.4762, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12309 }, { "epoch": 0.8928700950170451, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.745, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12310 }, { "epoch": 0.8929426271124973, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.766, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12311 }, { "epoch": 0.8930151592079495, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 9.0982, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12312 }, { "epoch": 0.8930876913034017, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 8.6435, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12313 }, { "epoch": 0.893160223398854, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 8.5423, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12314 }, { "epoch": 0.8932327554943063, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.7756, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12315 }, { "epoch": 0.8933052875897585, "grad_norm": 8.1875, "learning_rate": 0.0003, "loss": 8.8238, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12316 }, { "epoch": 0.8933778196852107, "grad_norm": 6.75, "learning_rate": 0.0003, "loss": 8.5855, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12317 }, { "epoch": 0.8934503517806629, "grad_norm": 1.7734375, "learning_rate": 0.0003, "loss": 8.9999, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12318 }, { "epoch": 0.8935228838761152, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.9934, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12319 }, { "epoch": 0.8935954159715674, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.705, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12320 }, { "epoch": 0.8936679480670197, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.5218, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12321 }, { "epoch": 0.8937404801624719, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.7947, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12322 }, { "epoch": 0.8938130122579241, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.9788, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12323 }, { "epoch": 0.8938855443533764, "grad_norm": 9.5625, "learning_rate": 0.0003, "loss": 8.8311, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12324 }, { "epoch": 0.8939580764488286, "grad_norm": 5.46875, "learning_rate": 0.0003, "loss": 8.5633, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12325 }, { "epoch": 0.8940306085442808, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.9442, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12326 }, { "epoch": 0.8941031406397331, "grad_norm": 6.0, "learning_rate": 0.0003, "loss": 8.8435, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12327 }, { "epoch": 0.8941756727351853, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 8.6708, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12328 }, { "epoch": 0.8942482048306376, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 8.6862, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12329 }, { "epoch": 0.8943207369260898, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 9.2936, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12330 }, { "epoch": 0.894393269021542, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.2446, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12331 }, { "epoch": 0.8944658011169943, "grad_norm": 8.125, "learning_rate": 0.0003, "loss": 8.4383, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12332 }, { "epoch": 0.8945383332124465, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 9.0381, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12333 }, { "epoch": 0.8946108653078988, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 8.9188, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12334 }, { "epoch": 0.894683397403351, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.9456, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12335 }, { "epoch": 0.8947559294988032, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 8.4999, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12336 }, { "epoch": 0.8948284615942554, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.6103, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12337 }, { "epoch": 0.8949009936897077, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.5636, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12338 }, { "epoch": 0.89497352578516, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.5302, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12339 }, { "epoch": 0.8950460578806122, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.936, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12340 }, { "epoch": 0.8951185899760644, "grad_norm": 12.125, "learning_rate": 0.0003, "loss": 8.8436, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12341 }, { "epoch": 0.8951911220715166, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 8.4675, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12342 }, { "epoch": 0.8952636541669688, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 8.9756, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12343 }, { "epoch": 0.8953361862624212, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 8.9247, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12344 }, { "epoch": 0.8954087183578734, "grad_norm": 9.0625, "learning_rate": 0.0003, "loss": 9.152, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12345 }, { "epoch": 0.8954812504533256, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.7296, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12346 }, { "epoch": 0.8955537825487778, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.7774, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12347 }, { "epoch": 0.89562631464423, "grad_norm": 5.78125, "learning_rate": 0.0003, "loss": 8.4936, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12348 }, { "epoch": 0.8956988467396824, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.2804, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12349 }, { "epoch": 0.8957713788351346, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.9446, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12350 }, { "epoch": 0.8958439109305868, "grad_norm": 10.4375, "learning_rate": 0.0003, "loss": 8.6753, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12351 }, { "epoch": 0.895916443026039, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 8.9361, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12352 }, { "epoch": 0.8959889751214912, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.9467, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12353 }, { "epoch": 0.8960615072169434, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 9.1358, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12354 }, { "epoch": 0.8961340393123958, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.8467, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12355 }, { "epoch": 0.896206571407848, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 8.7506, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12356 }, { "epoch": 0.8962791035033002, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.7688, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12357 }, { "epoch": 0.8963516355987524, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.3925, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12358 }, { "epoch": 0.8964241676942046, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.8778, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12359 }, { "epoch": 0.896496699789657, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.7285, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12360 }, { "epoch": 0.8965692318851092, "grad_norm": 12.6875, "learning_rate": 0.0003, "loss": 9.0594, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12361 }, { "epoch": 0.8966417639805614, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 9.1852, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12362 }, { "epoch": 0.8967142960760136, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.1906, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12363 }, { "epoch": 0.8967868281714658, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.8583, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12364 }, { "epoch": 0.8968593602669181, "grad_norm": 1.59375, "learning_rate": 0.0003, "loss": 8.9413, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12365 }, { "epoch": 0.8969318923623704, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 9.2476, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12366 }, { "epoch": 0.8970044244578226, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.3323, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12367 }, { "epoch": 0.8970769565532748, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 8.7022, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12368 }, { "epoch": 0.897149488648727, "grad_norm": 6.78125, "learning_rate": 0.0003, "loss": 9.3068, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12369 }, { "epoch": 0.8972220207441793, "grad_norm": 7.5, "learning_rate": 0.0003, "loss": 8.6562, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12370 }, { "epoch": 0.8972945528396316, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.9753, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12371 }, { "epoch": 0.8973670849350838, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.9692, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12372 }, { "epoch": 0.897439617030536, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.6539, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12373 }, { "epoch": 0.8975121491259882, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.8705, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12374 }, { "epoch": 0.8975846812214405, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 8.4693, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12375 }, { "epoch": 0.8976572133168927, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.1463, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12376 }, { "epoch": 0.897729745412345, "grad_norm": 13.0625, "learning_rate": 0.0003, "loss": 8.9481, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12377 }, { "epoch": 0.8978022775077972, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.8166, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12378 }, { "epoch": 0.8978748096032494, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 9.0812, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12379 }, { "epoch": 0.8979473416987017, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 8.6735, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12380 }, { "epoch": 0.8980198737941539, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 9.199, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12381 }, { "epoch": 0.8980924058896061, "grad_norm": 17.375, "learning_rate": 0.0003, "loss": 8.8839, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12382 }, { "epoch": 0.8981649379850584, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.5523, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12383 }, { "epoch": 0.8982374700805106, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 8.5146, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12384 }, { "epoch": 0.8983100021759629, "grad_norm": 8.1875, "learning_rate": 0.0003, "loss": 9.1442, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12385 }, { "epoch": 0.8983825342714151, "grad_norm": 7.1875, "learning_rate": 0.0003, "loss": 8.8855, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12386 }, { "epoch": 0.8984550663668673, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.2665, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12387 }, { "epoch": 0.8985275984623196, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 9.0753, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12388 }, { "epoch": 0.8986001305577718, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.6073, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12389 }, { "epoch": 0.8986726626532241, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 8.6959, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12390 }, { "epoch": 0.8987451947486763, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 8.8881, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12391 }, { "epoch": 0.8988177268441285, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 9.2264, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12392 }, { "epoch": 0.8988902589395807, "grad_norm": 7.15625, "learning_rate": 0.0003, "loss": 8.4453, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12393 }, { "epoch": 0.898962791035033, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 8.7598, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12394 }, { "epoch": 0.8990353231304853, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 8.5418, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12395 }, { "epoch": 0.8991078552259375, "grad_norm": 5.65625, "learning_rate": 0.0003, "loss": 8.2314, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12396 }, { "epoch": 0.8991803873213897, "grad_norm": 5.6875, "learning_rate": 0.0003, "loss": 8.5738, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12397 }, { "epoch": 0.8992529194168419, "grad_norm": 8.6875, "learning_rate": 0.0003, "loss": 8.6708, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12398 }, { "epoch": 0.8993254515122941, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 8.6013, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12399 }, { "epoch": 0.8993979836077465, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 8.2182, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12400 }, { "epoch": 0.8994705157031987, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.636, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12401 }, { "epoch": 0.8995430477986509, "grad_norm": 14.3125, "learning_rate": 0.0003, "loss": 9.1646, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12402 }, { "epoch": 0.8996155798941031, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 8.6748, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12403 }, { "epoch": 0.8996881119895553, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 9.1108, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12404 }, { "epoch": 0.8997606440850077, "grad_norm": 1.8984375, "learning_rate": 0.0003, "loss": 8.8737, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12405 }, { "epoch": 0.8998331761804599, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.7541, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12406 }, { "epoch": 0.8999057082759121, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 8.7544, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12407 }, { "epoch": 0.8999782403713643, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 8.6067, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12408 }, { "epoch": 0.9000507724668165, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 9.1828, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12409 }, { "epoch": 0.9001233045622689, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 8.9389, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12410 }, { "epoch": 0.9001958366577211, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.3799, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12411 }, { "epoch": 0.9002683687531733, "grad_norm": 12.875, "learning_rate": 0.0003, "loss": 8.4409, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12412 }, { "epoch": 0.9003409008486255, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 8.7943, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12413 }, { "epoch": 0.9004134329440777, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 9.0813, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12414 }, { "epoch": 0.90048596503953, "grad_norm": 8.4375, "learning_rate": 0.0003, "loss": 8.7957, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12415 }, { "epoch": 0.9005584971349823, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 9.5127, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12416 }, { "epoch": 0.9006310292304345, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 8.4866, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12417 }, { "epoch": 0.9007035613258867, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 9.1799, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12418 }, { "epoch": 0.9007760934213389, "grad_norm": 10.5, "learning_rate": 0.0003, "loss": 8.3591, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12419 }, { "epoch": 0.9008486255167912, "grad_norm": 1.921875, "learning_rate": 0.0003, "loss": 8.7436, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12420 }, { "epoch": 0.9009211576122435, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 8.3953, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12421 }, { "epoch": 0.9009936897076957, "grad_norm": 34.0, "learning_rate": 0.0003, "loss": 9.0934, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12422 }, { "epoch": 0.9010662218031479, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 9.001, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12423 }, { "epoch": 0.9011387538986001, "grad_norm": 23.25, "learning_rate": 0.0003, "loss": 8.9461, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12424 }, { "epoch": 0.9012112859940523, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.1099, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12425 }, { "epoch": 0.9012838180895046, "grad_norm": 6.75, "learning_rate": 0.0003, "loss": 8.7629, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12426 }, { "epoch": 0.9013563501849569, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.9963, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12427 }, { "epoch": 0.9014288822804091, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.6853, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12428 }, { "epoch": 0.9015014143758613, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 8.9885, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12429 }, { "epoch": 0.9015739464713135, "grad_norm": 20.0, "learning_rate": 0.0003, "loss": 8.6856, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12430 }, { "epoch": 0.9016464785667658, "grad_norm": 5.5625, "learning_rate": 0.0003, "loss": 8.8579, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12431 }, { "epoch": 0.901719010662218, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.2398, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12432 }, { "epoch": 0.9017915427576703, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.4797, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12433 }, { "epoch": 0.9018640748531225, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 8.5196, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12434 }, { "epoch": 0.9019366069485747, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 8.1405, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12435 }, { "epoch": 0.902009139044027, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.5736, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12436 }, { "epoch": 0.9020816711394792, "grad_norm": 29.875, "learning_rate": 0.0003, "loss": 9.0324, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12437 }, { "epoch": 0.9021542032349315, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 8.8485, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12438 }, { "epoch": 0.9022267353303837, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 8.3063, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12439 }, { "epoch": 0.9022992674258359, "grad_norm": 9.8125, "learning_rate": 0.0003, "loss": 8.7541, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12440 }, { "epoch": 0.9023717995212882, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 9.0853, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12441 }, { "epoch": 0.9024443316167404, "grad_norm": 13.125, "learning_rate": 0.0003, "loss": 9.3027, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12442 }, { "epoch": 0.9025168637121926, "grad_norm": 8.625, "learning_rate": 0.0003, "loss": 8.9741, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12443 }, { "epoch": 0.9025893958076449, "grad_norm": 10.6875, "learning_rate": 0.0003, "loss": 8.9704, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12444 }, { "epoch": 0.9026619279030971, "grad_norm": 17.25, "learning_rate": 0.0003, "loss": 9.2468, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12445 }, { "epoch": 0.9027344599985494, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 9.0111, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12446 }, { "epoch": 0.9028069920940016, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 8.0064, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12447 }, { "epoch": 0.9028795241894538, "grad_norm": 6.78125, "learning_rate": 0.0003, "loss": 8.6807, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12448 }, { "epoch": 0.902952056284906, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.8666, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12449 }, { "epoch": 0.9030245883803583, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 8.9556, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12450 }, { "epoch": 0.9030971204758106, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.082, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12451 }, { "epoch": 0.9031696525712628, "grad_norm": 6.90625, "learning_rate": 0.0003, "loss": 8.429, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12452 }, { "epoch": 0.903242184666715, "grad_norm": 7.03125, "learning_rate": 0.0003, "loss": 8.286, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12453 }, { "epoch": 0.9033147167621672, "grad_norm": 5.78125, "learning_rate": 0.0003, "loss": 8.3188, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12454 }, { "epoch": 0.9033872488576195, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 9.3014, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12455 }, { "epoch": 0.9034597809530718, "grad_norm": 1.96875, "learning_rate": 0.0003, "loss": 8.4876, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12456 }, { "epoch": 0.903532313048524, "grad_norm": 8.3125, "learning_rate": 0.0003, "loss": 8.7682, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12457 }, { "epoch": 0.9036048451439762, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 8.7955, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12458 }, { "epoch": 0.9036773772394284, "grad_norm": 7.3125, "learning_rate": 0.0003, "loss": 9.0757, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12459 }, { "epoch": 0.9037499093348806, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.8079, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12460 }, { "epoch": 0.903822441430333, "grad_norm": 10.4375, "learning_rate": 0.0003, "loss": 8.104, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12461 }, { "epoch": 0.9038949735257852, "grad_norm": 9.0, "learning_rate": 0.0003, "loss": 9.0613, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12462 }, { "epoch": 0.9039675056212374, "grad_norm": 5.65625, "learning_rate": 0.0003, "loss": 9.0286, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12463 }, { "epoch": 0.9040400377166896, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.941, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12464 }, { "epoch": 0.9041125698121418, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 8.8541, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12465 }, { "epoch": 0.9041851019075942, "grad_norm": 9.0625, "learning_rate": 0.0003, "loss": 8.3899, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12466 }, { "epoch": 0.9042576340030464, "grad_norm": 1.9140625, "learning_rate": 0.0003, "loss": 8.8379, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12467 }, { "epoch": 0.9043301660984986, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.7733, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12468 }, { "epoch": 0.9044026981939508, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 8.7062, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12469 }, { "epoch": 0.904475230289403, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 8.9263, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12470 }, { "epoch": 0.9045477623848553, "grad_norm": 11.0, "learning_rate": 0.0003, "loss": 8.7773, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12471 }, { "epoch": 0.9046202944803076, "grad_norm": 6.25, "learning_rate": 0.0003, "loss": 8.6016, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12472 }, { "epoch": 0.9046928265757598, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 9.1034, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12473 }, { "epoch": 0.904765358671212, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.227, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12474 }, { "epoch": 0.9048378907666642, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.803, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12475 }, { "epoch": 0.9049104228621165, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 9.2934, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12476 }, { "epoch": 0.9049829549575688, "grad_norm": 6.8125, "learning_rate": 0.0003, "loss": 8.865, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12477 }, { "epoch": 0.905055487053021, "grad_norm": 7.6875, "learning_rate": 0.0003, "loss": 9.3964, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12478 }, { "epoch": 0.9051280191484732, "grad_norm": 7.40625, "learning_rate": 0.0003, "loss": 8.6543, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12479 }, { "epoch": 0.9052005512439254, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.9496, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12480 }, { "epoch": 0.9052730833393777, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.8093, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12481 }, { "epoch": 0.9053456154348299, "grad_norm": 6.46875, "learning_rate": 0.0003, "loss": 8.6202, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12482 }, { "epoch": 0.9054181475302822, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.7191, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12483 }, { "epoch": 0.9054906796257344, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 8.9029, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12484 }, { "epoch": 0.9055632117211866, "grad_norm": 12.0625, "learning_rate": 0.0003, "loss": 7.996, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12485 }, { "epoch": 0.9056357438166389, "grad_norm": 10.1875, "learning_rate": 0.0003, "loss": 9.0693, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12486 }, { "epoch": 0.9057082759120911, "grad_norm": 2.21875, "learning_rate": 0.0003, "loss": 9.0868, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12487 }, { "epoch": 0.9057808080075433, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.9569, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12488 }, { "epoch": 0.9058533401029956, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.9094, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12489 }, { "epoch": 0.9059258721984478, "grad_norm": 11.3125, "learning_rate": 0.0003, "loss": 9.061, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12490 }, { "epoch": 0.9059984042939001, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 8.9289, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12491 }, { "epoch": 0.9060709363893523, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 9.0456, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12492 }, { "epoch": 0.9061434684848045, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.8495, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12493 }, { "epoch": 0.9062160005802568, "grad_norm": 6.28125, "learning_rate": 0.0003, "loss": 8.5075, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12494 }, { "epoch": 0.906288532675709, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 8.3434, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12495 }, { "epoch": 0.9063610647711612, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 8.6444, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12496 }, { "epoch": 0.9064335968666135, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.9921, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12497 }, { "epoch": 0.9065061289620657, "grad_norm": 10.875, "learning_rate": 0.0003, "loss": 8.7583, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12498 }, { "epoch": 0.9065786610575179, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 8.7618, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12499 }, { "epoch": 0.9066511931529702, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.7466, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12500 }, { "epoch": 0.9067237252484224, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.6698, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12501 }, { "epoch": 0.9067962573438747, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 8.6189, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12502 }, { "epoch": 0.9068687894393269, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.6397, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12503 }, { "epoch": 0.9069413215347791, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.309, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12504 }, { "epoch": 0.9070138536302313, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 8.612, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12505 }, { "epoch": 0.9070863857256836, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 8.7445, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12506 }, { "epoch": 0.9071589178211359, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 9.1586, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12507 }, { "epoch": 0.9072314499165881, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 9.0557, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12508 }, { "epoch": 0.9073039820120403, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 9.5614, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12509 }, { "epoch": 0.9073765141074925, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 8.4808, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12510 }, { "epoch": 0.9074490462029448, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 8.8574, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12511 }, { "epoch": 0.9075215782983971, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.7063, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12512 }, { "epoch": 0.9075941103938493, "grad_norm": 9.8125, "learning_rate": 0.0003, "loss": 8.8312, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12513 }, { "epoch": 0.9076666424893015, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 8.6205, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12514 }, { "epoch": 0.9077391745847537, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.8035, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12515 }, { "epoch": 0.9078117066802059, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 9.1178, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12516 }, { "epoch": 0.9078842387756583, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.5421, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12517 }, { "epoch": 0.9079567708711105, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.1988, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12518 }, { "epoch": 0.9080293029665627, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 9.0219, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12519 }, { "epoch": 0.9081018350620149, "grad_norm": 5.625, "learning_rate": 0.0003, "loss": 8.9255, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12520 }, { "epoch": 0.9081743671574671, "grad_norm": 10.375, "learning_rate": 0.0003, "loss": 8.7696, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12521 }, { "epoch": 0.9082468992529195, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 9.2293, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12522 }, { "epoch": 0.9083194313483717, "grad_norm": 52.5, "learning_rate": 0.0003, "loss": 8.5118, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12523 }, { "epoch": 0.9083919634438239, "grad_norm": 9.6875, "learning_rate": 0.0003, "loss": 9.4057, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12524 }, { "epoch": 0.9084644955392761, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 9.1841, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12525 }, { "epoch": 0.9085370276347283, "grad_norm": 22.5, "learning_rate": 0.0003, "loss": 8.7642, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12526 }, { "epoch": 0.9086095597301806, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 9.2908, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12527 }, { "epoch": 0.9086820918256329, "grad_norm": 6.46875, "learning_rate": 0.0003, "loss": 9.4199, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12528 }, { "epoch": 0.9087546239210851, "grad_norm": 10.4375, "learning_rate": 0.0003, "loss": 8.6027, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12529 }, { "epoch": 0.9088271560165373, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.7525, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12530 }, { "epoch": 0.9088996881119895, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 8.8942, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12531 }, { "epoch": 0.9089722202074418, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.6206, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12532 }, { "epoch": 0.909044752302894, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 8.4592, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12533 }, { "epoch": 0.9091172843983463, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 9.1089, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12534 }, { "epoch": 0.9091898164937985, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 9.079, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12535 }, { "epoch": 0.9092623485892507, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 9.1972, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12536 }, { "epoch": 0.909334880684703, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.5885, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12537 }, { "epoch": 0.9094074127801552, "grad_norm": 12.75, "learning_rate": 0.0003, "loss": 8.6692, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12538 }, { "epoch": 0.9094799448756075, "grad_norm": 5.90625, "learning_rate": 0.0003, "loss": 9.3065, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12539 }, { "epoch": 0.9095524769710597, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 9.1932, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12540 }, { "epoch": 0.9096250090665119, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 8.6635, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12541 }, { "epoch": 0.9096975411619642, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.5116, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12542 }, { "epoch": 0.9097700732574164, "grad_norm": 1.8203125, "learning_rate": 0.0003, "loss": 9.4119, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12543 }, { "epoch": 0.9098426053528686, "grad_norm": 6.125, "learning_rate": 0.0003, "loss": 8.9207, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12544 }, { "epoch": 0.9099151374483209, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 9.176, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12545 }, { "epoch": 0.9099876695437731, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 9.2002, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12546 }, { "epoch": 0.9100602016392254, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 8.9575, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12547 }, { "epoch": 0.9101327337346776, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.8071, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12548 }, { "epoch": 0.9102052658301298, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 9.0171, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12549 }, { "epoch": 0.910277797925582, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 8.7711, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12550 }, { "epoch": 0.9103503300210343, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.587, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12551 }, { "epoch": 0.9104228621164866, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 8.5041, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12552 }, { "epoch": 0.9104953942119388, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 8.9839, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12553 }, { "epoch": 0.910567926307391, "grad_norm": 25.875, "learning_rate": 0.0003, "loss": 8.9567, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12554 }, { "epoch": 0.9106404584028432, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.4941, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12555 }, { "epoch": 0.9107129904982955, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 9.1808, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12556 }, { "epoch": 0.9107855225937478, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 8.9689, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12557 }, { "epoch": 0.9108580546892, "grad_norm": 1.9765625, "learning_rate": 0.0003, "loss": 9.0436, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12558 }, { "epoch": 0.9109305867846522, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.6318, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12559 }, { "epoch": 0.9110031188801044, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 8.8625, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12560 }, { "epoch": 0.9110756509755567, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 8.732, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12561 }, { "epoch": 0.911148183071009, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.628, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12562 }, { "epoch": 0.9112207151664612, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.5616, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12563 }, { "epoch": 0.9112932472619134, "grad_norm": 8.125, "learning_rate": 0.0003, "loss": 8.4115, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12564 }, { "epoch": 0.9113657793573656, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 8.7509, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12565 }, { "epoch": 0.9114383114528178, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 9.0995, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12566 }, { "epoch": 0.9115108435482701, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 9.215, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12567 }, { "epoch": 0.9115833756437224, "grad_norm": 1.78125, "learning_rate": 0.0003, "loss": 8.8643, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12568 }, { "epoch": 0.9116559077391746, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.6023, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12569 }, { "epoch": 0.9117284398346268, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 9.5168, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12570 }, { "epoch": 0.911800971930079, "grad_norm": 6.3125, "learning_rate": 0.0003, "loss": 8.7021, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12571 }, { "epoch": 0.9118735040255312, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 9.0398, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12572 }, { "epoch": 0.9119460361209836, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 8.8284, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12573 }, { "epoch": 0.9120185682164358, "grad_norm": 6.71875, "learning_rate": 0.0003, "loss": 8.9095, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12574 }, { "epoch": 0.912091100311888, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 8.5026, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12575 }, { "epoch": 0.9121636324073402, "grad_norm": 23.625, "learning_rate": 0.0003, "loss": 9.1437, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12576 }, { "epoch": 0.9122361645027924, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 8.5485, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12577 }, { "epoch": 0.9123086965982448, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 9.0526, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12578 }, { "epoch": 0.912381228693697, "grad_norm": 6.3125, "learning_rate": 0.0003, "loss": 8.7298, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12579 }, { "epoch": 0.9124537607891492, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 8.9812, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12580 }, { "epoch": 0.9125262928846014, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 9.106, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12581 }, { "epoch": 0.9125988249800536, "grad_norm": 11.75, "learning_rate": 0.0003, "loss": 8.7386, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12582 }, { "epoch": 0.912671357075506, "grad_norm": 9.3125, "learning_rate": 0.0003, "loss": 8.9226, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12583 }, { "epoch": 0.9127438891709582, "grad_norm": 7.9375, "learning_rate": 0.0003, "loss": 8.4139, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12584 }, { "epoch": 0.9128164212664104, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.7142, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12585 }, { "epoch": 0.9128889533618626, "grad_norm": 6.15625, "learning_rate": 0.0003, "loss": 8.7856, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12586 }, { "epoch": 0.9129614854573148, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 9.2146, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12587 }, { "epoch": 0.9130340175527671, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.6857, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12588 }, { "epoch": 0.9131065496482194, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 9.2963, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12589 }, { "epoch": 0.9131790817436716, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 9.0386, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12590 }, { "epoch": 0.9132516138391238, "grad_norm": 10.0625, "learning_rate": 0.0003, "loss": 9.3433, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12591 }, { "epoch": 0.913324145934576, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.7369, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12592 }, { "epoch": 0.9133966780300283, "grad_norm": 12.4375, "learning_rate": 0.0003, "loss": 9.2063, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12593 }, { "epoch": 0.9134692101254805, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.7282, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12594 }, { "epoch": 0.9135417422209328, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.7408, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12595 }, { "epoch": 0.913614274316385, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.0617, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12596 }, { "epoch": 0.9136868064118372, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.2251, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12597 }, { "epoch": 0.9137593385072895, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.7352, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12598 }, { "epoch": 0.9138318706027417, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 9.2414, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12599 }, { "epoch": 0.913904402698194, "grad_norm": 8.9375, "learning_rate": 0.0003, "loss": 9.1767, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12600 }, { "epoch": 0.9139769347936462, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 8.8368, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12601 }, { "epoch": 0.9140494668890984, "grad_norm": 8.8125, "learning_rate": 0.0003, "loss": 8.9206, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12602 }, { "epoch": 0.9141219989845507, "grad_norm": 6.0625, "learning_rate": 0.0003, "loss": 9.2884, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12603 }, { "epoch": 0.9141945310800029, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 9.2245, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12604 }, { "epoch": 0.9142670631754551, "grad_norm": 5.46875, "learning_rate": 0.0003, "loss": 9.0576, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12605 }, { "epoch": 0.9143395952709074, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.5873, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12606 }, { "epoch": 0.9144121273663596, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.937, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12607 }, { "epoch": 0.9144846594618119, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.3355, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12608 }, { "epoch": 0.9145571915572641, "grad_norm": 16.5, "learning_rate": 0.0003, "loss": 8.5021, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12609 }, { "epoch": 0.9146297236527163, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.9097, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12610 }, { "epoch": 0.9147022557481685, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.5401, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12611 }, { "epoch": 0.9147747878436208, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.4436, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12612 }, { "epoch": 0.9148473199390731, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 8.6608, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12613 }, { "epoch": 0.9149198520345253, "grad_norm": 6.25, "learning_rate": 0.0003, "loss": 8.9855, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12614 }, { "epoch": 0.9149923841299775, "grad_norm": 6.1875, "learning_rate": 0.0003, "loss": 8.9091, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12615 }, { "epoch": 0.9150649162254297, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.6038, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12616 }, { "epoch": 0.915137448320882, "grad_norm": 18.5, "learning_rate": 0.0003, "loss": 9.717, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12617 }, { "epoch": 0.9152099804163343, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.8457, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12618 }, { "epoch": 0.9152825125117865, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 8.8334, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12619 }, { "epoch": 0.9153550446072387, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 8.7141, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12620 }, { "epoch": 0.9154275767026909, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 9.349, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12621 }, { "epoch": 0.9155001087981431, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 8.8036, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12622 }, { "epoch": 0.9155726408935955, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 8.5027, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12623 }, { "epoch": 0.9156451729890477, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 8.6765, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12624 }, { "epoch": 0.9157177050844999, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.9873, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12625 }, { "epoch": 0.9157902371799521, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.01, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12626 }, { "epoch": 0.9158627692754043, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.9775, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12627 }, { "epoch": 0.9159353013708567, "grad_norm": 13.75, "learning_rate": 0.0003, "loss": 9.4492, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12628 }, { "epoch": 0.9160078334663089, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 9.0824, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12629 }, { "epoch": 0.9160803655617611, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.6996, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12630 }, { "epoch": 0.9161528976572133, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 9.0362, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12631 }, { "epoch": 0.9162254297526655, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 8.4899, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12632 }, { "epoch": 0.9162979618481177, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.7824, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12633 }, { "epoch": 0.9163704939435701, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.9059, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12634 }, { "epoch": 0.9164430260390223, "grad_norm": 9.0625, "learning_rate": 0.0003, "loss": 9.0387, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12635 }, { "epoch": 0.9165155581344745, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.8612, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12636 }, { "epoch": 0.9165880902299267, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 8.9325, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12637 }, { "epoch": 0.9166606223253789, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.9463, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12638 }, { "epoch": 0.9167331544208313, "grad_norm": 1.7890625, "learning_rate": 0.0003, "loss": 8.7968, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12639 }, { "epoch": 0.9168056865162835, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.3829, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12640 }, { "epoch": 0.9168782186117357, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 8.3416, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12641 }, { "epoch": 0.9169507507071879, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.3586, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12642 }, { "epoch": 0.9170232828026401, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.1717, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12643 }, { "epoch": 0.9170958148980924, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.9458, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12644 }, { "epoch": 0.9171683469935447, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 8.9297, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12645 }, { "epoch": 0.9172408790889969, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 9.2297, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12646 }, { "epoch": 0.9173134111844491, "grad_norm": 6.0, "learning_rate": 0.0003, "loss": 8.8439, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12647 }, { "epoch": 0.9173859432799013, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.7563, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12648 }, { "epoch": 0.9174584753753536, "grad_norm": 18.75, "learning_rate": 0.0003, "loss": 8.7996, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12649 }, { "epoch": 0.9175310074708058, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 8.9345, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12650 }, { "epoch": 0.9176035395662581, "grad_norm": 7.84375, "learning_rate": 0.0003, "loss": 8.9435, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12651 }, { "epoch": 0.9176760716617103, "grad_norm": 13.1875, "learning_rate": 0.0003, "loss": 8.7399, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12652 }, { "epoch": 0.9177486037571625, "grad_norm": 5.6875, "learning_rate": 0.0003, "loss": 8.6005, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12653 }, { "epoch": 0.9178211358526148, "grad_norm": 7.65625, "learning_rate": 0.0003, "loss": 8.4663, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12654 }, { "epoch": 0.917893667948067, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.8054, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12655 }, { "epoch": 0.9179662000435193, "grad_norm": 7.53125, "learning_rate": 0.0003, "loss": 9.1949, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12656 }, { "epoch": 0.9180387321389715, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 8.71, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12657 }, { "epoch": 0.9181112642344237, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 9.0609, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12658 }, { "epoch": 0.918183796329876, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 8.7544, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12659 }, { "epoch": 0.9182563284253282, "grad_norm": 7.59375, "learning_rate": 0.0003, "loss": 8.9014, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12660 }, { "epoch": 0.9183288605207804, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.9916, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12661 }, { "epoch": 0.9184013926162327, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 8.7045, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12662 }, { "epoch": 0.9184739247116849, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.9903, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12663 }, { "epoch": 0.9185464568071372, "grad_norm": 7.0, "learning_rate": 0.0003, "loss": 8.6333, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12664 }, { "epoch": 0.9186189889025894, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 8.928, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12665 }, { "epoch": 0.9186915209980416, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 8.5231, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12666 }, { "epoch": 0.9187640530934938, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 9.269, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12667 }, { "epoch": 0.9188365851889461, "grad_norm": 1.9453125, "learning_rate": 0.0003, "loss": 9.2784, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12668 }, { "epoch": 0.9189091172843984, "grad_norm": 9.0, "learning_rate": 0.0003, "loss": 9.2526, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12669 }, { "epoch": 0.9189816493798506, "grad_norm": 6.75, "learning_rate": 0.0003, "loss": 8.7049, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12670 }, { "epoch": 0.9190541814753028, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.8351, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12671 }, { "epoch": 0.919126713570755, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 8.6033, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12672 }, { "epoch": 0.9191992456662073, "grad_norm": 5.6875, "learning_rate": 0.0003, "loss": 9.0246, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12673 }, { "epoch": 0.9192717777616596, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.6567, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12674 }, { "epoch": 0.9193443098571118, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 8.5186, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12675 }, { "epoch": 0.919416841952564, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.9941, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12676 }, { "epoch": 0.9194893740480162, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 9.3572, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12677 }, { "epoch": 0.9195619061434684, "grad_norm": 20.625, "learning_rate": 0.0003, "loss": 8.5174, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12678 }, { "epoch": 0.9196344382389208, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 8.4602, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12679 }, { "epoch": 0.919706970334373, "grad_norm": 30.5, "learning_rate": 0.0003, "loss": 9.1295, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12680 }, { "epoch": 0.9197795024298252, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.8789, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12681 }, { "epoch": 0.9198520345252774, "grad_norm": 18.25, "learning_rate": 0.0003, "loss": 8.8055, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12682 }, { "epoch": 0.9199245666207296, "grad_norm": 5.78125, "learning_rate": 0.0003, "loss": 8.5716, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12683 }, { "epoch": 0.919997098716182, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 7.905, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12684 }, { "epoch": 0.9200696308116342, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.9324, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12685 }, { "epoch": 0.9201421629070864, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 9.7192, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12686 }, { "epoch": 0.9202146950025386, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.6728, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12687 }, { "epoch": 0.9202872270979908, "grad_norm": 7.40625, "learning_rate": 0.0003, "loss": 8.8786, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12688 }, { "epoch": 0.9203597591934431, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 8.5258, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12689 }, { "epoch": 0.9204322912888954, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 8.3534, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12690 }, { "epoch": 0.9205048233843476, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.8773, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12691 }, { "epoch": 0.9205773554797998, "grad_norm": 5.78125, "learning_rate": 0.0003, "loss": 8.814, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12692 }, { "epoch": 0.920649887575252, "grad_norm": 1.984375, "learning_rate": 0.0003, "loss": 8.7967, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12693 }, { "epoch": 0.9207224196707043, "grad_norm": 8.125, "learning_rate": 0.0003, "loss": 8.8259, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12694 }, { "epoch": 0.9207949517661566, "grad_norm": 8.625, "learning_rate": 0.0003, "loss": 8.8242, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12695 }, { "epoch": 0.9208674838616088, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 9.0205, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12696 }, { "epoch": 0.920940015957061, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 8.8811, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12697 }, { "epoch": 0.9210125480525132, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.9767, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12698 }, { "epoch": 0.9210850801479655, "grad_norm": 6.34375, "learning_rate": 0.0003, "loss": 8.7651, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12699 }, { "epoch": 0.9211576122434177, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.2185, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12700 }, { "epoch": 0.92123014433887, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 9.2433, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12701 }, { "epoch": 0.9213026764343222, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 8.8086, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12702 }, { "epoch": 0.9213752085297744, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 8.7204, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12703 }, { "epoch": 0.9214477406252266, "grad_norm": 7.53125, "learning_rate": 0.0003, "loss": 8.7265, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12704 }, { "epoch": 0.9215202727206789, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.3126, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12705 }, { "epoch": 0.9215928048161312, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.8063, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12706 }, { "epoch": 0.9216653369115834, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.0826, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12707 }, { "epoch": 0.9217378690070356, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.7907, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12708 }, { "epoch": 0.9218104011024878, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 8.7054, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12709 }, { "epoch": 0.9218829331979401, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.7757, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12710 }, { "epoch": 0.9219554652933923, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 9.1488, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12711 }, { "epoch": 0.9220279973888446, "grad_norm": 1.9765625, "learning_rate": 0.0003, "loss": 8.9778, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12712 }, { "epoch": 0.9221005294842968, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 9.1388, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12713 }, { "epoch": 0.922173061579749, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 8.2983, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12714 }, { "epoch": 0.9222455936752013, "grad_norm": 6.28125, "learning_rate": 0.0003, "loss": 8.6375, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12715 }, { "epoch": 0.9223181257706535, "grad_norm": 3.953125, "learning_rate": 0.0003, "loss": 8.4544, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12716 }, { "epoch": 0.9223906578661057, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.3985, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12717 }, { "epoch": 0.922463189961558, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.2474, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12718 }, { "epoch": 0.9225357220570102, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.865, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12719 }, { "epoch": 0.9226082541524625, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 8.78, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12720 }, { "epoch": 0.9226807862479147, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.7437, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12721 }, { "epoch": 0.9227533183433669, "grad_norm": 6.53125, "learning_rate": 0.0003, "loss": 8.6083, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12722 }, { "epoch": 0.9228258504388192, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.6758, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12723 }, { "epoch": 0.9228983825342714, "grad_norm": 5.21875, "learning_rate": 0.0003, "loss": 8.6629, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12724 }, { "epoch": 0.9229709146297237, "grad_norm": 6.0, "learning_rate": 0.0003, "loss": 8.8918, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12725 }, { "epoch": 0.9230434467251759, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.9179, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12726 }, { "epoch": 0.9231159788206281, "grad_norm": 8.3125, "learning_rate": 0.0003, "loss": 8.9256, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12727 }, { "epoch": 0.9231885109160803, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 9.1468, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12728 }, { "epoch": 0.9232610430115326, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.1508, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12729 }, { "epoch": 0.9233335751069849, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 9.4502, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12730 }, { "epoch": 0.9234061072024371, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 8.8809, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12731 }, { "epoch": 0.9234786392978893, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 9.431, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12732 }, { "epoch": 0.9235511713933415, "grad_norm": 1.8203125, "learning_rate": 0.0003, "loss": 9.046, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12733 }, { "epoch": 0.9236237034887937, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.3614, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12734 }, { "epoch": 0.9236962355842461, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.0719, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12735 }, { "epoch": 0.9237687676796983, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.7745, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12736 }, { "epoch": 0.9238412997751505, "grad_norm": 1.9609375, "learning_rate": 0.0003, "loss": 9.0115, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12737 }, { "epoch": 0.9239138318706027, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 9.0447, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12738 }, { "epoch": 0.9239863639660549, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.9348, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12739 }, { "epoch": 0.9240588960615073, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 9.2678, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12740 }, { "epoch": 0.9241314281569595, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 8.5802, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12741 }, { "epoch": 0.9242039602524117, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.0865, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12742 }, { "epoch": 0.9242764923478639, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 9.004, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12743 }, { "epoch": 0.9243490244433161, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 9.2147, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12744 }, { "epoch": 0.9244215565387685, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 8.6163, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12745 }, { "epoch": 0.9244940886342207, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.4751, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12746 }, { "epoch": 0.9245666207296729, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.7143, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12747 }, { "epoch": 0.9246391528251251, "grad_norm": 3.953125, "learning_rate": 0.0003, "loss": 8.8907, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12748 }, { "epoch": 0.9247116849205773, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 8.5501, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12749 }, { "epoch": 0.9247842170160296, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.1985, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12750 }, { "epoch": 0.9248567491114819, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 8.9198, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12751 }, { "epoch": 0.9249292812069341, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.6787, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12752 }, { "epoch": 0.9250018133023863, "grad_norm": 22.125, "learning_rate": 0.0003, "loss": 9.0935, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12753 }, { "epoch": 0.9250743453978385, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 8.4515, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12754 }, { "epoch": 0.9251468774932908, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.8664, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12755 }, { "epoch": 0.925219409588743, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 8.8972, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12756 }, { "epoch": 0.9252919416841953, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 8.6608, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12757 }, { "epoch": 0.9253644737796475, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 8.9495, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12758 }, { "epoch": 0.9254370058750997, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 8.6606, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12759 }, { "epoch": 0.925509537970552, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 9.1728, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12760 }, { "epoch": 0.9255820700660042, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 9.1105, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12761 }, { "epoch": 0.9256546021614565, "grad_norm": 61.25, "learning_rate": 0.0003, "loss": 8.5333, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12762 }, { "epoch": 0.9257271342569087, "grad_norm": 5.5625, "learning_rate": 0.0003, "loss": 9.2506, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12763 }, { "epoch": 0.9257996663523609, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.5724, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12764 }, { "epoch": 0.9258721984478132, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 9.4978, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12765 }, { "epoch": 0.9259447305432654, "grad_norm": 3.765625, "learning_rate": 0.0003, "loss": 8.9291, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12766 }, { "epoch": 0.9260172626387176, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.2103, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12767 }, { "epoch": 0.9260897947341699, "grad_norm": 16.375, "learning_rate": 0.0003, "loss": 9.1815, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12768 }, { "epoch": 0.9261623268296221, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 9.0327, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12769 }, { "epoch": 0.9262348589250744, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 9.1224, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12770 }, { "epoch": 0.9263073910205266, "grad_norm": 13.625, "learning_rate": 0.0003, "loss": 9.1172, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12771 }, { "epoch": 0.9263799231159788, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.565, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12772 }, { "epoch": 0.926452455211431, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 9.4889, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12773 }, { "epoch": 0.9265249873068833, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 8.9988, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12774 }, { "epoch": 0.9265975194023355, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 9.1505, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12775 }, { "epoch": 0.9266700514977878, "grad_norm": 6.71875, "learning_rate": 0.0003, "loss": 8.9838, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12776 }, { "epoch": 0.92674258359324, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 9.1022, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12777 }, { "epoch": 0.9268151156886922, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.8669, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12778 }, { "epoch": 0.9268876477841445, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.5136, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12779 }, { "epoch": 0.9269601798795967, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.5301, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12780 }, { "epoch": 0.927032711975049, "grad_norm": 18.875, "learning_rate": 0.0003, "loss": 9.6985, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12781 }, { "epoch": 0.9271052440705012, "grad_norm": 7.125, "learning_rate": 0.0003, "loss": 8.8883, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12782 }, { "epoch": 0.9271777761659534, "grad_norm": 1.875, "learning_rate": 0.0003, "loss": 8.8293, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12783 }, { "epoch": 0.9272503082614056, "grad_norm": 7.625, "learning_rate": 0.0003, "loss": 8.1448, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12784 }, { "epoch": 0.9273228403568579, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 8.8326, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12785 }, { "epoch": 0.9273953724523102, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 8.773, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12786 }, { "epoch": 0.9274679045477624, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.669, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12787 }, { "epoch": 0.9275404366432146, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 8.5874, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12788 }, { "epoch": 0.9276129687386668, "grad_norm": 8.5, "learning_rate": 0.0003, "loss": 8.8204, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12789 }, { "epoch": 0.927685500834119, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.8253, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12790 }, { "epoch": 0.9277580329295714, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 8.931, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12791 }, { "epoch": 0.9278305650250236, "grad_norm": 10.8125, "learning_rate": 0.0003, "loss": 8.7305, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12792 }, { "epoch": 0.9279030971204758, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.247, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12793 }, { "epoch": 0.927975629215928, "grad_norm": 9.5625, "learning_rate": 0.0003, "loss": 8.7904, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12794 }, { "epoch": 0.9280481613113802, "grad_norm": 6.78125, "learning_rate": 0.0003, "loss": 9.0742, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12795 }, { "epoch": 0.9281206934068326, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.981, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12796 }, { "epoch": 0.9281932255022848, "grad_norm": 9.3125, "learning_rate": 0.0003, "loss": 8.59, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12797 }, { "epoch": 0.928265757597737, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 9.1345, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12798 }, { "epoch": 0.9283382896931892, "grad_norm": 1.9375, "learning_rate": 0.0003, "loss": 8.8633, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12799 }, { "epoch": 0.9284108217886414, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 8.7773, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12800 }, { "epoch": 0.9284833538840938, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.6624, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12801 }, { "epoch": 0.928555885979546, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.2874, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12802 }, { "epoch": 0.9286284180749982, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 9.2126, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12803 }, { "epoch": 0.9287009501704504, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 9.2134, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12804 }, { "epoch": 0.9287734822659026, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 8.3575, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12805 }, { "epoch": 0.9288460143613549, "grad_norm": 6.46875, "learning_rate": 0.0003, "loss": 8.8782, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12806 }, { "epoch": 0.9289185464568072, "grad_norm": 2.734375, "learning_rate": 0.0003, "loss": 8.7496, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12807 }, { "epoch": 0.9289910785522594, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.9635, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12808 }, { "epoch": 0.9290636106477116, "grad_norm": 76.0, "learning_rate": 0.0003, "loss": 8.8236, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12809 }, { "epoch": 0.9291361427431638, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.7956, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12810 }, { "epoch": 0.9292086748386161, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.4456, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12811 }, { "epoch": 0.9292812069340683, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.8908, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12812 }, { "epoch": 0.9293537390295206, "grad_norm": 5.21875, "learning_rate": 0.0003, "loss": 9.176, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12813 }, { "epoch": 0.9294262711249728, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 8.5956, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12814 }, { "epoch": 0.929498803220425, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 9.0665, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12815 }, { "epoch": 0.9295713353158773, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.1411, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12816 }, { "epoch": 0.9296438674113295, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.5139, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12817 }, { "epoch": 0.9297163995067818, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.7686, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12818 }, { "epoch": 0.929788931602234, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 9.0842, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12819 }, { "epoch": 0.9298614636976862, "grad_norm": 1.96875, "learning_rate": 0.0003, "loss": 9.0338, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12820 }, { "epoch": 0.9299339957931385, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.2117, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12821 }, { "epoch": 0.9300065278885907, "grad_norm": 10.1875, "learning_rate": 0.0003, "loss": 9.2458, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12822 }, { "epoch": 0.9300790599840429, "grad_norm": 21.125, "learning_rate": 0.0003, "loss": 8.6392, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12823 }, { "epoch": 0.9301515920794952, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.053, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12824 }, { "epoch": 0.9302241241749474, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.8401, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12825 }, { "epoch": 0.9302966562703997, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 8.9878, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12826 }, { "epoch": 0.9303691883658519, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.9569, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12827 }, { "epoch": 0.9304417204613041, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 8.3019, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12828 }, { "epoch": 0.9305142525567563, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 8.9937, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12829 }, { "epoch": 0.9305867846522086, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 9.0194, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12830 }, { "epoch": 0.9306593167476609, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.3873, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12831 }, { "epoch": 0.9307318488431131, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 8.6583, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12832 }, { "epoch": 0.9308043809385653, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 9.3528, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12833 }, { "epoch": 0.9308769130340175, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 8.6516, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12834 }, { "epoch": 0.9309494451294698, "grad_norm": 6.8125, "learning_rate": 0.0003, "loss": 8.4792, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12835 }, { "epoch": 0.9310219772249221, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 9.1707, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12836 }, { "epoch": 0.9310945093203743, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 8.8877, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12837 }, { "epoch": 0.9311670414158265, "grad_norm": 8.6875, "learning_rate": 0.0003, "loss": 8.7797, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12838 }, { "epoch": 0.9312395735112787, "grad_norm": 7.5625, "learning_rate": 0.0003, "loss": 8.7647, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12839 }, { "epoch": 0.9313121056067309, "grad_norm": 5.78125, "learning_rate": 0.0003, "loss": 8.4523, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12840 }, { "epoch": 0.9313846377021833, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.8982, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12841 }, { "epoch": 0.9314571697976355, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.9611, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12842 }, { "epoch": 0.9315297018930877, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 9.1112, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12843 }, { "epoch": 0.9316022339885399, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 8.7851, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12844 }, { "epoch": 0.9316747660839921, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.6397, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12845 }, { "epoch": 0.9317472981794443, "grad_norm": 10.625, "learning_rate": 0.0003, "loss": 8.7389, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12846 }, { "epoch": 0.9318198302748967, "grad_norm": 7.0, "learning_rate": 0.0003, "loss": 9.0329, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12847 }, { "epoch": 0.9318923623703489, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.4913, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12848 }, { "epoch": 0.9319648944658011, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.9923, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12849 }, { "epoch": 0.9320374265612533, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.6361, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12850 }, { "epoch": 0.9321099586567055, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.706, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12851 }, { "epoch": 0.9321824907521579, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 9.2381, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12852 }, { "epoch": 0.9322550228476101, "grad_norm": 6.90625, "learning_rate": 0.0003, "loss": 8.6905, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12853 }, { "epoch": 0.9323275549430623, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.8339, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12854 }, { "epoch": 0.9324000870385145, "grad_norm": 9.3125, "learning_rate": 0.0003, "loss": 9.2434, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12855 }, { "epoch": 0.9324726191339667, "grad_norm": 5.6875, "learning_rate": 0.0003, "loss": 9.2269, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12856 }, { "epoch": 0.9325451512294191, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 9.0173, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12857 }, { "epoch": 0.9326176833248713, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 8.9757, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12858 }, { "epoch": 0.9326902154203235, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.5846, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12859 }, { "epoch": 0.9327627475157757, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 9.1569, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12860 }, { "epoch": 0.9328352796112279, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 9.2855, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12861 }, { "epoch": 0.9329078117066802, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.9559, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12862 }, { "epoch": 0.9329803438021325, "grad_norm": 5.90625, "learning_rate": 0.0003, "loss": 9.0972, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12863 }, { "epoch": 0.9330528758975847, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 9.1984, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12864 }, { "epoch": 0.9331254079930369, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.4865, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12865 }, { "epoch": 0.9331979400884891, "grad_norm": 22.0, "learning_rate": 0.0003, "loss": 7.9542, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12866 }, { "epoch": 0.9332704721839414, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.7551, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12867 }, { "epoch": 0.9333430042793937, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 9.0759, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12868 }, { "epoch": 0.9334155363748459, "grad_norm": 1.9765625, "learning_rate": 0.0003, "loss": 8.8415, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12869 }, { "epoch": 0.9334880684702981, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.7349, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12870 }, { "epoch": 0.9335606005657503, "grad_norm": 6.8125, "learning_rate": 0.0003, "loss": 8.797, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12871 }, { "epoch": 0.9336331326612026, "grad_norm": 6.1875, "learning_rate": 0.0003, "loss": 8.8506, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12872 }, { "epoch": 0.9337056647566548, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 9.124, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12873 }, { "epoch": 0.9337781968521071, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 9.2274, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12874 }, { "epoch": 0.9338507289475593, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.6212, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12875 }, { "epoch": 0.9339232610430115, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.1249, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12876 }, { "epoch": 0.9339957931384638, "grad_norm": 8.0625, "learning_rate": 0.0003, "loss": 9.1549, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12877 }, { "epoch": 0.934068325233916, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.903, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12878 }, { "epoch": 0.9341408573293682, "grad_norm": 3.859375, "learning_rate": 0.0003, "loss": 8.8108, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12879 }, { "epoch": 0.9342133894248205, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 9.2111, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12880 }, { "epoch": 0.9342859215202727, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.7847, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12881 }, { "epoch": 0.934358453615725, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.8548, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12882 }, { "epoch": 0.9344309857111772, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 8.6444, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12883 }, { "epoch": 0.9345035178066294, "grad_norm": 314.0, "learning_rate": 0.0003, "loss": 9.0505, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12884 }, { "epoch": 0.9345760499020817, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.7686, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12885 }, { "epoch": 0.9346485819975339, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.7735, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12886 }, { "epoch": 0.9347211140929862, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.884, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12887 }, { "epoch": 0.9347936461884384, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 8.3517, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12888 }, { "epoch": 0.9348661782838906, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.1526, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12889 }, { "epoch": 0.9349387103793428, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 9.1046, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12890 }, { "epoch": 0.9350112424747951, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 8.8533, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12891 }, { "epoch": 0.9350837745702474, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 8.9385, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12892 }, { "epoch": 0.9351563066656996, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 8.7118, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12893 }, { "epoch": 0.9352288387611518, "grad_norm": 10.5625, "learning_rate": 0.0003, "loss": 8.8494, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12894 }, { "epoch": 0.935301370856604, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 8.6487, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12895 }, { "epoch": 0.9353739029520562, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.3882, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12896 }, { "epoch": 0.9354464350475086, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.7402, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12897 }, { "epoch": 0.9355189671429608, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 9.0759, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12898 }, { "epoch": 0.935591499238413, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.3532, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12899 }, { "epoch": 0.9356640313338652, "grad_norm": 5.6875, "learning_rate": 0.0003, "loss": 8.5429, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12900 }, { "epoch": 0.9357365634293174, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.749, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12901 }, { "epoch": 0.9358090955247698, "grad_norm": 1.953125, "learning_rate": 0.0003, "loss": 8.2109, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12902 }, { "epoch": 0.935881627620222, "grad_norm": 7.125, "learning_rate": 0.0003, "loss": 8.9439, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12903 }, { "epoch": 0.9359541597156742, "grad_norm": 10.0625, "learning_rate": 0.0003, "loss": 8.4693, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12904 }, { "epoch": 0.9360266918111264, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 9.0207, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12905 }, { "epoch": 0.9360992239065786, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 9.0472, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12906 }, { "epoch": 0.936171756002031, "grad_norm": 10.9375, "learning_rate": 0.0003, "loss": 8.604, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12907 }, { "epoch": 0.9362442880974832, "grad_norm": 2.78125, "learning_rate": 0.0003, "loss": 9.0245, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12908 }, { "epoch": 0.9363168201929354, "grad_norm": 6.4375, "learning_rate": 0.0003, "loss": 9.2605, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12909 }, { "epoch": 0.9363893522883876, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.1428, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12910 }, { "epoch": 0.9364618843838398, "grad_norm": 6.5, "learning_rate": 0.0003, "loss": 8.9232, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12911 }, { "epoch": 0.9365344164792921, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 9.2277, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12912 }, { "epoch": 0.9366069485747444, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 9.1418, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12913 }, { "epoch": 0.9366794806701966, "grad_norm": 6.46875, "learning_rate": 0.0003, "loss": 8.861, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12914 }, { "epoch": 0.9367520127656488, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.6587, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12915 }, { "epoch": 0.936824544861101, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 9.1427, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12916 }, { "epoch": 0.9368970769565532, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 8.6344, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12917 }, { "epoch": 0.9369696090520055, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 9.3604, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12918 }, { "epoch": 0.9370421411474578, "grad_norm": 8.125, "learning_rate": 0.0003, "loss": 9.2173, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12919 }, { "epoch": 0.93711467324291, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 9.0132, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12920 }, { "epoch": 0.9371872053383622, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.6995, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12921 }, { "epoch": 0.9372597374338144, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.9103, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12922 }, { "epoch": 0.9373322695292667, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 8.7863, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12923 }, { "epoch": 0.937404801624719, "grad_norm": 6.9375, "learning_rate": 0.0003, "loss": 8.4969, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12924 }, { "epoch": 0.9374773337201712, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 8.7649, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12925 }, { "epoch": 0.9375498658156234, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 9.0304, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12926 }, { "epoch": 0.9376223979110756, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 9.1352, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12927 }, { "epoch": 0.9376949300065279, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.9871, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12928 }, { "epoch": 0.9377674621019801, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.9889, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12929 }, { "epoch": 0.9378399941974324, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 8.6713, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12930 }, { "epoch": 0.9379125262928846, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 9.4063, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12931 }, { "epoch": 0.9379850583883368, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 8.9164, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12932 }, { "epoch": 0.9380575904837891, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 9.3245, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12933 }, { "epoch": 0.9381301225792413, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 9.4009, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12934 }, { "epoch": 0.9382026546746935, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 8.9877, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12935 }, { "epoch": 0.9382751867701458, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.1954, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12936 }, { "epoch": 0.938347718865598, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.4515, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12937 }, { "epoch": 0.9384202509610503, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 9.4616, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12938 }, { "epoch": 0.9384927830565025, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.8661, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12939 }, { "epoch": 0.9385653151519547, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.557, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12940 }, { "epoch": 0.938637847247407, "grad_norm": 30.375, "learning_rate": 0.0003, "loss": 9.2074, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12941 }, { "epoch": 0.9387103793428592, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 8.9753, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12942 }, { "epoch": 0.9387829114383115, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 9.2463, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12943 }, { "epoch": 0.9388554435337637, "grad_norm": 6.34375, "learning_rate": 0.0003, "loss": 9.1107, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12944 }, { "epoch": 0.9389279756292159, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 8.6408, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12945 }, { "epoch": 0.9390005077246681, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.8862, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12946 }, { "epoch": 0.9390730398201204, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.6112, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12947 }, { "epoch": 0.9391455719155727, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.6785, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12948 }, { "epoch": 0.9392181040110249, "grad_norm": 7.125, "learning_rate": 0.0003, "loss": 9.1671, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12949 }, { "epoch": 0.9392906361064771, "grad_norm": 3.03125, "learning_rate": 0.0003, "loss": 9.3137, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12950 }, { "epoch": 0.9393631682019293, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 8.8984, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12951 }, { "epoch": 0.9394357002973815, "grad_norm": 8.5, "learning_rate": 0.0003, "loss": 8.8027, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12952 }, { "epoch": 0.9395082323928339, "grad_norm": 9.8125, "learning_rate": 0.0003, "loss": 8.6122, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12953 }, { "epoch": 0.9395807644882861, "grad_norm": 1.890625, "learning_rate": 0.0003, "loss": 8.8888, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12954 }, { "epoch": 0.9396532965837383, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 9.1426, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12955 }, { "epoch": 0.9397258286791905, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 9.0145, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12956 }, { "epoch": 0.9397983607746427, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.839, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12957 }, { "epoch": 0.9398708928700951, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 8.6256, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12958 }, { "epoch": 0.9399434249655473, "grad_norm": 1.7578125, "learning_rate": 0.0003, "loss": 8.9552, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12959 }, { "epoch": 0.9400159570609995, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 9.0063, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12960 }, { "epoch": 0.9400884891564517, "grad_norm": 11.0625, "learning_rate": 0.0003, "loss": 8.6842, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12961 }, { "epoch": 0.9401610212519039, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.6895, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12962 }, { "epoch": 0.9402335533473563, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 8.8295, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12963 }, { "epoch": 0.9403060854428085, "grad_norm": 8.9375, "learning_rate": 0.0003, "loss": 8.6077, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12964 }, { "epoch": 0.9403786175382607, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 9.2246, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12965 }, { "epoch": 0.9404511496337129, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.217, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12966 }, { "epoch": 0.9405236817291651, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 9.0523, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12967 }, { "epoch": 0.9405962138246174, "grad_norm": 8.0, "learning_rate": 0.0003, "loss": 9.078, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12968 }, { "epoch": 0.9406687459200697, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 8.8647, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12969 }, { "epoch": 0.9407412780155219, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 8.4726, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12970 }, { "epoch": 0.9408138101109741, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.8164, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12971 }, { "epoch": 0.9408863422064263, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 8.7804, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12972 }, { "epoch": 0.9409588743018786, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 9.0155, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12973 }, { "epoch": 0.9410314063973308, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 8.5279, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12974 }, { "epoch": 0.9411039384927831, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 8.8059, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12975 }, { "epoch": 0.9411764705882353, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.4731, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12976 }, { "epoch": 0.9412490026836875, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.5976, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12977 }, { "epoch": 0.9413215347791398, "grad_norm": 7.125, "learning_rate": 0.0003, "loss": 9.0685, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12978 }, { "epoch": 0.941394066874592, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 9.1251, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12979 }, { "epoch": 0.9414665989700443, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.4712, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12980 }, { "epoch": 0.9415391310654965, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 8.7263, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12981 }, { "epoch": 0.9416116631609487, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 9.1906, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12982 }, { "epoch": 0.9416841952564009, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.5133, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12983 }, { "epoch": 0.9417567273518532, "grad_norm": 3.953125, "learning_rate": 0.0003, "loss": 8.9205, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12984 }, { "epoch": 0.9418292594473054, "grad_norm": 7.46875, "learning_rate": 0.0003, "loss": 8.8705, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12985 }, { "epoch": 0.9419017915427577, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.8015, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12986 }, { "epoch": 0.9419743236382099, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 8.7743, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12987 }, { "epoch": 0.9420468557336621, "grad_norm": 12.75, "learning_rate": 0.0003, "loss": 8.6833, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12988 }, { "epoch": 0.9421193878291144, "grad_norm": 8.3125, "learning_rate": 0.0003, "loss": 8.987, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12989 }, { "epoch": 0.9421919199245666, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.9145, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12990 }, { "epoch": 0.9422644520200189, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 8.8706, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12991 }, { "epoch": 0.9423369841154711, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.7706, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12992 }, { "epoch": 0.9424095162109233, "grad_norm": 1.796875, "learning_rate": 0.0003, "loss": 8.044, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12993 }, { "epoch": 0.9424820483063756, "grad_norm": 23.0, "learning_rate": 0.0003, "loss": 9.0919, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12994 }, { "epoch": 0.9425545804018278, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 8.6015, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12995 }, { "epoch": 0.94262711249728, "grad_norm": 6.40625, "learning_rate": 0.0003, "loss": 8.3113, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12996 }, { "epoch": 0.9426996445927323, "grad_norm": 20.75, "learning_rate": 0.0003, "loss": 8.823, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12997 }, { "epoch": 0.9427721766881845, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.5693, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12998 }, { "epoch": 0.9428447087836368, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.6267, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 12999 }, { "epoch": 0.942917240879089, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 8.9732, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13000 }, { "epoch": 0.9429897729745412, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 9.2866, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13001 }, { "epoch": 0.9430623050699934, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.8074, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13002 }, { "epoch": 0.9431348371654457, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 9.1488, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13003 }, { "epoch": 0.943207369260898, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 9.2798, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13004 }, { "epoch": 0.9432799013563502, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 9.1197, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13005 }, { "epoch": 0.9433524334518024, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.0199, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13006 }, { "epoch": 0.9434249655472546, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 8.5329, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13007 }, { "epoch": 0.9434974976427069, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 9.125, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13008 }, { "epoch": 0.9435700297381592, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 8.7606, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13009 }, { "epoch": 0.9436425618336114, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.9252, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13010 }, { "epoch": 0.9437150939290636, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.5658, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13011 }, { "epoch": 0.9437876260245158, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.6261, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13012 }, { "epoch": 0.943860158119968, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.7193, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13013 }, { "epoch": 0.9439326902154204, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 8.1634, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13014 }, { "epoch": 0.9440052223108726, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 8.907, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13015 }, { "epoch": 0.9440777544063248, "grad_norm": 3.46875, "learning_rate": 0.0003, "loss": 8.6256, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13016 }, { "epoch": 0.944150286501777, "grad_norm": 12.5, "learning_rate": 0.0003, "loss": 8.928, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13017 }, { "epoch": 0.9442228185972292, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 9.2328, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13018 }, { "epoch": 0.9442953506926816, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 8.84, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13019 }, { "epoch": 0.9443678827881338, "grad_norm": 5.90625, "learning_rate": 0.0003, "loss": 8.2555, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13020 }, { "epoch": 0.944440414883586, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 8.663, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13021 }, { "epoch": 0.9445129469790382, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 9.2719, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13022 }, { "epoch": 0.9445854790744904, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.8443, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13023 }, { "epoch": 0.9446580111699427, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.9867, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13024 }, { "epoch": 0.944730543265395, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 8.4871, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13025 }, { "epoch": 0.9448030753608472, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 9.2699, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13026 }, { "epoch": 0.9448756074562994, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 9.3289, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13027 }, { "epoch": 0.9449481395517516, "grad_norm": 7.0625, "learning_rate": 0.0003, "loss": 9.3333, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13028 }, { "epoch": 0.9450206716472039, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 8.6317, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13029 }, { "epoch": 0.9450932037426562, "grad_norm": 7.28125, "learning_rate": 0.0003, "loss": 7.9663, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13030 }, { "epoch": 0.9451657358381084, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 8.8333, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13031 }, { "epoch": 0.9452382679335606, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 9.0804, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13032 }, { "epoch": 0.9453108000290128, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.7941, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13033 }, { "epoch": 0.9453833321244651, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 9.2017, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13034 }, { "epoch": 0.9454558642199173, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.3467, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13035 }, { "epoch": 0.9455283963153696, "grad_norm": 13.25, "learning_rate": 0.0003, "loss": 8.5159, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13036 }, { "epoch": 0.9456009284108218, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 9.0087, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13037 }, { "epoch": 0.945673460506274, "grad_norm": 7.375, "learning_rate": 0.0003, "loss": 8.807, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13038 }, { "epoch": 0.9457459926017263, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 9.077, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13039 }, { "epoch": 0.9458185246971785, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.6123, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13040 }, { "epoch": 0.9458910567926307, "grad_norm": 17.625, "learning_rate": 0.0003, "loss": 8.8483, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13041 }, { "epoch": 0.945963588888083, "grad_norm": 1.7890625, "learning_rate": 0.0003, "loss": 8.8941, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13042 }, { "epoch": 0.9460361209835352, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 8.4667, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13043 }, { "epoch": 0.9461086530789875, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 9.0155, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13044 }, { "epoch": 0.9461811851744397, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 9.4554, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13045 }, { "epoch": 0.9462537172698919, "grad_norm": 9.9375, "learning_rate": 0.0003, "loss": 8.8533, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13046 }, { "epoch": 0.9463262493653442, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 9.1597, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13047 }, { "epoch": 0.9463987814607964, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 8.1803, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13048 }, { "epoch": 0.9464713135562487, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 8.8324, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13049 }, { "epoch": 0.9465438456517009, "grad_norm": 1.84375, "learning_rate": 0.0003, "loss": 9.2718, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13050 }, { "epoch": 0.9466163777471531, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 8.794, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13051 }, { "epoch": 0.9466889098426053, "grad_norm": 10.875, "learning_rate": 0.0003, "loss": 8.9625, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13052 }, { "epoch": 0.9467614419380576, "grad_norm": 5.53125, "learning_rate": 0.0003, "loss": 8.5539, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13053 }, { "epoch": 0.9468339740335098, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 8.5575, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13054 }, { "epoch": 0.9469065061289621, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 9.0532, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13055 }, { "epoch": 0.9469790382244143, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 9.0622, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13056 }, { "epoch": 0.9470515703198665, "grad_norm": 11.75, "learning_rate": 0.0003, "loss": 8.2557, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13057 }, { "epoch": 0.9471241024153187, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.5881, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13058 }, { "epoch": 0.947196634510771, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 8.6495, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13059 }, { "epoch": 0.9472691666062233, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.4901, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13060 }, { "epoch": 0.9473416987016755, "grad_norm": 6.0625, "learning_rate": 0.0003, "loss": 9.4114, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13061 }, { "epoch": 0.9474142307971277, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 9.0808, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13062 }, { "epoch": 0.9474867628925799, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 8.9666, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13063 }, { "epoch": 0.9475592949880322, "grad_norm": 8.125, "learning_rate": 0.0003, "loss": 8.8969, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13064 }, { "epoch": 0.9476318270834845, "grad_norm": 12.5625, "learning_rate": 0.0003, "loss": 8.8406, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13065 }, { "epoch": 0.9477043591789367, "grad_norm": 8.9375, "learning_rate": 0.0003, "loss": 9.0559, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13066 }, { "epoch": 0.9477768912743889, "grad_norm": 1.75, "learning_rate": 0.0003, "loss": 8.9818, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13067 }, { "epoch": 0.9478494233698411, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 8.9479, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13068 }, { "epoch": 0.9479219554652933, "grad_norm": 23.125, "learning_rate": 0.0003, "loss": 8.7341, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13069 }, { "epoch": 0.9479944875607457, "grad_norm": 5.71875, "learning_rate": 0.0003, "loss": 9.0803, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13070 }, { "epoch": 0.9480670196561979, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.7431, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13071 }, { "epoch": 0.9481395517516501, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 8.7012, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13072 }, { "epoch": 0.9482120838471023, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 8.6073, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13073 }, { "epoch": 0.9482846159425545, "grad_norm": 3.921875, "learning_rate": 0.0003, "loss": 9.0595, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13074 }, { "epoch": 0.9483571480380069, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.6609, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13075 }, { "epoch": 0.9484296801334591, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 8.2002, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13076 }, { "epoch": 0.9485022122289113, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.8556, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13077 }, { "epoch": 0.9485747443243635, "grad_norm": 17.75, "learning_rate": 0.0003, "loss": 8.8637, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13078 }, { "epoch": 0.9486472764198157, "grad_norm": 7.0625, "learning_rate": 0.0003, "loss": 9.2783, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13079 }, { "epoch": 0.948719808515268, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.5251, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13080 }, { "epoch": 0.9487923406107203, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 8.6973, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13081 }, { "epoch": 0.9488648727061725, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 9.1434, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13082 }, { "epoch": 0.9489374048016247, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 9.3341, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13083 }, { "epoch": 0.9490099368970769, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 8.6881, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13084 }, { "epoch": 0.9490824689925292, "grad_norm": 1.7265625, "learning_rate": 0.0003, "loss": 8.9088, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13085 }, { "epoch": 0.9491550010879815, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 8.8118, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13086 }, { "epoch": 0.9492275331834337, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 8.9065, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13087 }, { "epoch": 0.9493000652788859, "grad_norm": 5.4375, "learning_rate": 0.0003, "loss": 9.0534, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13088 }, { "epoch": 0.9493725973743381, "grad_norm": 6.46875, "learning_rate": 0.0003, "loss": 8.8498, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13089 }, { "epoch": 0.9494451294697904, "grad_norm": 13.125, "learning_rate": 0.0003, "loss": 8.7734, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13090 }, { "epoch": 0.9495176615652426, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 8.8571, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13091 }, { "epoch": 0.9495901936606949, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 9.0311, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13092 }, { "epoch": 0.9496627257561471, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 9.4404, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13093 }, { "epoch": 0.9497352578515993, "grad_norm": 2.9375, "learning_rate": 0.0003, "loss": 9.411, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13094 }, { "epoch": 0.9498077899470516, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.6665, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13095 }, { "epoch": 0.9498803220425038, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 9.1128, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13096 }, { "epoch": 0.949952854137956, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.0033, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13097 }, { "epoch": 0.9500253862334083, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 9.1692, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13098 }, { "epoch": 0.9500979183288605, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 9.2274, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13099 }, { "epoch": 0.9501704504243128, "grad_norm": 21.0, "learning_rate": 0.0003, "loss": 8.9587, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13100 }, { "epoch": 0.950242982519765, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 9.2795, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13101 }, { "epoch": 0.9503155146152172, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.8672, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13102 }, { "epoch": 0.9503880467106695, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.8629, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13103 }, { "epoch": 0.9504605788061217, "grad_norm": 6.625, "learning_rate": 0.0003, "loss": 8.9686, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13104 }, { "epoch": 0.950533110901574, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 8.2861, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13105 }, { "epoch": 0.9506056429970262, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.5103, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13106 }, { "epoch": 0.9506781750924784, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.8717, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13107 }, { "epoch": 0.9507507071879306, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.8659, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13108 }, { "epoch": 0.9508232392833829, "grad_norm": 1.9609375, "learning_rate": 0.0003, "loss": 8.4577, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13109 }, { "epoch": 0.9508957713788352, "grad_norm": 2.984375, "learning_rate": 0.0003, "loss": 9.214, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13110 }, { "epoch": 0.9509683034742874, "grad_norm": 19.25, "learning_rate": 0.0003, "loss": 9.0459, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13111 }, { "epoch": 0.9510408355697396, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.7707, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13112 }, { "epoch": 0.9511133676651918, "grad_norm": 2.1875, "learning_rate": 0.0003, "loss": 8.6964, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13113 }, { "epoch": 0.951185899760644, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.1784, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13114 }, { "epoch": 0.9512584318560964, "grad_norm": 25.875, "learning_rate": 0.0003, "loss": 8.9501, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13115 }, { "epoch": 0.9513309639515486, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 9.2057, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13116 }, { "epoch": 0.9514034960470008, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.6588, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13117 }, { "epoch": 0.951476028142453, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.7055, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13118 }, { "epoch": 0.9515485602379052, "grad_norm": 7.5625, "learning_rate": 0.0003, "loss": 8.7777, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13119 }, { "epoch": 0.9516210923333576, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 8.3948, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13120 }, { "epoch": 0.9516936244288098, "grad_norm": 6.59375, "learning_rate": 0.0003, "loss": 8.706, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13121 }, { "epoch": 0.951766156524262, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 8.3293, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13122 }, { "epoch": 0.9518386886197142, "grad_norm": 7.1875, "learning_rate": 0.0003, "loss": 8.5812, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13123 }, { "epoch": 0.9519112207151664, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 8.8598, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13124 }, { "epoch": 0.9519837528106186, "grad_norm": 6.625, "learning_rate": 0.0003, "loss": 9.161, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13125 }, { "epoch": 0.952056284906071, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 8.7975, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13126 }, { "epoch": 0.9521288170015232, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 8.7404, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13127 }, { "epoch": 0.9522013490969754, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.7554, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13128 }, { "epoch": 0.9522738811924276, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.2884, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13129 }, { "epoch": 0.9523464132878798, "grad_norm": 7.1875, "learning_rate": 0.0003, "loss": 8.8107, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13130 }, { "epoch": 0.9524189453833322, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.7872, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13131 }, { "epoch": 0.9524914774787844, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 9.1806, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13132 }, { "epoch": 0.9525640095742366, "grad_norm": 7.375, "learning_rate": 0.0003, "loss": 9.051, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13133 }, { "epoch": 0.9526365416696888, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 9.2196, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13134 }, { "epoch": 0.952709073765141, "grad_norm": 8.5, "learning_rate": 0.0003, "loss": 9.3838, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13135 }, { "epoch": 0.9527816058605934, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 9.0533, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13136 }, { "epoch": 0.9528541379560456, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 9.4917, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13137 }, { "epoch": 0.9529266700514978, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 8.9928, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13138 }, { "epoch": 0.95299920214695, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.715, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13139 }, { "epoch": 0.9530717342424022, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.8282, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13140 }, { "epoch": 0.9531442663378545, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 9.0508, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13141 }, { "epoch": 0.9532167984333068, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.5882, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13142 }, { "epoch": 0.953289330528759, "grad_norm": 93.5, "learning_rate": 0.0003, "loss": 8.8381, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13143 }, { "epoch": 0.9533618626242112, "grad_norm": 6.0, "learning_rate": 0.0003, "loss": 8.8175, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13144 }, { "epoch": 0.9534343947196634, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.9022, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13145 }, { "epoch": 0.9535069268151157, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 8.6528, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13146 }, { "epoch": 0.953579458910568, "grad_norm": 1.703125, "learning_rate": 0.0003, "loss": 8.9064, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13147 }, { "epoch": 0.9536519910060202, "grad_norm": 7.875, "learning_rate": 0.0003, "loss": 8.717, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13148 }, { "epoch": 0.9537245231014724, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 8.1771, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13149 }, { "epoch": 0.9537970551969246, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.9259, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13150 }, { "epoch": 0.9538695872923769, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.954, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13151 }, { "epoch": 0.9539421193878291, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 8.8311, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13152 }, { "epoch": 0.9540146514832814, "grad_norm": 14.75, "learning_rate": 0.0003, "loss": 9.0634, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13153 }, { "epoch": 0.9540871835787336, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 8.9525, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13154 }, { "epoch": 0.9541597156741858, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.986, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13155 }, { "epoch": 0.9542322477696381, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 9.0873, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13156 }, { "epoch": 0.9543047798650903, "grad_norm": 5.375, "learning_rate": 0.0003, "loss": 8.8622, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13157 }, { "epoch": 0.9543773119605425, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 8.7557, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13158 }, { "epoch": 0.9544498440559948, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 8.3756, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13159 }, { "epoch": 0.954522376151447, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.5466, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13160 }, { "epoch": 0.9545949082468993, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.9613, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13161 }, { "epoch": 0.9546674403423515, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 9.075, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13162 }, { "epoch": 0.9547399724378037, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.8852, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13163 }, { "epoch": 0.954812504533256, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 8.7956, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13164 }, { "epoch": 0.9548850366287082, "grad_norm": 6.375, "learning_rate": 0.0003, "loss": 8.4817, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13165 }, { "epoch": 0.9549575687241605, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.5722, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13166 }, { "epoch": 0.9550301008196127, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.6284, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13167 }, { "epoch": 0.9551026329150649, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.9922, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13168 }, { "epoch": 0.9551751650105171, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.7388, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13169 }, { "epoch": 0.9552476971059694, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.9229, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13170 }, { "epoch": 0.9553202292014217, "grad_norm": 8.75, "learning_rate": 0.0003, "loss": 8.8734, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13171 }, { "epoch": 0.9553927612968739, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 8.9514, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13172 }, { "epoch": 0.9554652933923261, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.9847, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13173 }, { "epoch": 0.9555378254877783, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.7601, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13174 }, { "epoch": 0.9556103575832305, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 9.1172, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13175 }, { "epoch": 0.9556828896786829, "grad_norm": 8.1875, "learning_rate": 0.0003, "loss": 8.9532, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13176 }, { "epoch": 0.9557554217741351, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 8.8474, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13177 }, { "epoch": 0.9558279538695873, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.6238, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13178 }, { "epoch": 0.9559004859650395, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 8.838, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13179 }, { "epoch": 0.9559730180604917, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 9.3335, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13180 }, { "epoch": 0.9560455501559441, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 8.7733, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13181 }, { "epoch": 0.9561180822513963, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 9.5068, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13182 }, { "epoch": 0.9561906143468485, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 8.938, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13183 }, { "epoch": 0.9562631464423007, "grad_norm": 5.5625, "learning_rate": 0.0003, "loss": 8.8475, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13184 }, { "epoch": 0.9563356785377529, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 9.4657, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13185 }, { "epoch": 0.9564082106332052, "grad_norm": 3.953125, "learning_rate": 0.0003, "loss": 8.7519, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13186 }, { "epoch": 0.9564807427286575, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 9.4443, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13187 }, { "epoch": 0.9565532748241097, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.8594, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13188 }, { "epoch": 0.9566258069195619, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 8.5634, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13189 }, { "epoch": 0.9566983390150141, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 9.3272, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13190 }, { "epoch": 0.9567708711104664, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.5855, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13191 }, { "epoch": 0.9568434032059187, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.9275, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13192 }, { "epoch": 0.9569159353013709, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 9.0367, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13193 }, { "epoch": 0.9569884673968231, "grad_norm": 7.75, "learning_rate": 0.0003, "loss": 8.6051, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13194 }, { "epoch": 0.9570609994922753, "grad_norm": 7.4375, "learning_rate": 0.0003, "loss": 8.8237, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13195 }, { "epoch": 0.9571335315877275, "grad_norm": 6.59375, "learning_rate": 0.0003, "loss": 8.8523, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13196 }, { "epoch": 0.9572060636831798, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 8.8163, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13197 }, { "epoch": 0.9572785957786321, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.7096, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13198 }, { "epoch": 0.9573511278740843, "grad_norm": 10.875, "learning_rate": 0.0003, "loss": 8.8673, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13199 }, { "epoch": 0.9574236599695365, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.3963, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13200 }, { "epoch": 0.9574961920649887, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.2737, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13201 }, { "epoch": 0.957568724160441, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 8.4458, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13202 }, { "epoch": 0.9576412562558932, "grad_norm": 3.6875, "learning_rate": 0.0003, "loss": 8.9154, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13203 }, { "epoch": 0.9577137883513455, "grad_norm": 3.421875, "learning_rate": 0.0003, "loss": 8.7411, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13204 }, { "epoch": 0.9577863204467977, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 9.0561, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13205 }, { "epoch": 0.9578588525422499, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 8.7678, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13206 }, { "epoch": 0.9579313846377022, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 8.7121, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13207 }, { "epoch": 0.9580039167331544, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 9.149, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13208 }, { "epoch": 0.9580764488286067, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 9.0898, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13209 }, { "epoch": 0.9581489809240589, "grad_norm": 10.4375, "learning_rate": 0.0003, "loss": 8.9282, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13210 }, { "epoch": 0.9582215130195111, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.0161, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13211 }, { "epoch": 0.9582940451149634, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.8, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13212 }, { "epoch": 0.9583665772104156, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.7022, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13213 }, { "epoch": 0.9584391093058678, "grad_norm": 37.0, "learning_rate": 0.0003, "loss": 9.0472, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13214 }, { "epoch": 0.9585116414013201, "grad_norm": 6.46875, "learning_rate": 0.0003, "loss": 8.943, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13215 }, { "epoch": 0.9585841734967723, "grad_norm": 1.96875, "learning_rate": 0.0003, "loss": 8.7817, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13216 }, { "epoch": 0.9586567055922246, "grad_norm": 3.15625, "learning_rate": 0.0003, "loss": 8.6603, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13217 }, { "epoch": 0.9587292376876768, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.1759, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13218 }, { "epoch": 0.958801769783129, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 9.1115, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13219 }, { "epoch": 0.9588743018785812, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.9069, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13220 }, { "epoch": 0.9589468339740335, "grad_norm": 1.8359375, "learning_rate": 0.0003, "loss": 9.5687, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13221 }, { "epoch": 0.9590193660694858, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.5399, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13222 }, { "epoch": 0.959091898164938, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.0549, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13223 }, { "epoch": 0.9591644302603902, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.3152, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13224 }, { "epoch": 0.9592369623558424, "grad_norm": 7.96875, "learning_rate": 0.0003, "loss": 8.7186, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13225 }, { "epoch": 0.9593094944512947, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.9485, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13226 }, { "epoch": 0.959382026546747, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.9337, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13227 }, { "epoch": 0.9594545586421992, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.7595, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13228 }, { "epoch": 0.9595270907376514, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.5757, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13229 }, { "epoch": 0.9595996228331036, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.8596, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13230 }, { "epoch": 0.9596721549285558, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 8.5773, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13231 }, { "epoch": 0.9597446870240082, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 9.2359, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13232 }, { "epoch": 0.9598172191194604, "grad_norm": 8.125, "learning_rate": 0.0003, "loss": 8.5736, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13233 }, { "epoch": 0.9598897512149126, "grad_norm": 1.984375, "learning_rate": 0.0003, "loss": 8.7526, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13234 }, { "epoch": 0.9599622833103648, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 8.5054, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13235 }, { "epoch": 0.960034815405817, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 8.8567, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13236 }, { "epoch": 0.9601073475012694, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 8.846, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13237 }, { "epoch": 0.9601798795967216, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 8.7191, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13238 }, { "epoch": 0.9602524116921738, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 9.3758, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13239 }, { "epoch": 0.960324943787626, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.3816, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13240 }, { "epoch": 0.9603974758830782, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 8.9051, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13241 }, { "epoch": 0.9604700079785305, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.5876, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13242 }, { "epoch": 0.9605425400739828, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 8.957, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13243 }, { "epoch": 0.960615072169435, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 8.6197, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13244 }, { "epoch": 0.9606876042648872, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 8.8277, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13245 }, { "epoch": 0.9607601363603394, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.7614, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13246 }, { "epoch": 0.9608326684557917, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 8.9758, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13247 }, { "epoch": 0.960905200551244, "grad_norm": 2.453125, "learning_rate": 0.0003, "loss": 8.2105, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13248 }, { "epoch": 0.9609777326466962, "grad_norm": 41.5, "learning_rate": 0.0003, "loss": 9.0474, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13249 }, { "epoch": 0.9610502647421484, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 8.8315, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13250 }, { "epoch": 0.9611227968376006, "grad_norm": 48.0, "learning_rate": 0.0003, "loss": 8.7999, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13251 }, { "epoch": 0.9611953289330529, "grad_norm": 8.3125, "learning_rate": 0.0003, "loss": 8.9322, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13252 }, { "epoch": 0.9612678610285051, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 8.9157, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13253 }, { "epoch": 0.9613403931239574, "grad_norm": 6.875, "learning_rate": 0.0003, "loss": 8.6021, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13254 }, { "epoch": 0.9614129252194096, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 9.3232, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13255 }, { "epoch": 0.9614854573148618, "grad_norm": 13.3125, "learning_rate": 0.0003, "loss": 9.3572, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13256 }, { "epoch": 0.9615579894103141, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.9665, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13257 }, { "epoch": 0.9616305215057663, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.788, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13258 }, { "epoch": 0.9617030536012185, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 9.3025, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13259 }, { "epoch": 0.9617755856966708, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 8.8881, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13260 }, { "epoch": 0.961848117792123, "grad_norm": 2.703125, "learning_rate": 0.0003, "loss": 8.6309, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13261 }, { "epoch": 0.9619206498875753, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.0536, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13262 }, { "epoch": 0.9619931819830275, "grad_norm": 1.96875, "learning_rate": 0.0003, "loss": 8.8623, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13263 }, { "epoch": 0.9620657140784797, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.9876, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13264 }, { "epoch": 0.962138246173932, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 9.0327, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13265 }, { "epoch": 0.9622107782693842, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.4914, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13266 }, { "epoch": 0.9622833103648364, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.8822, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13267 }, { "epoch": 0.9623558424602887, "grad_norm": 8.5, "learning_rate": 0.0003, "loss": 9.0339, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13268 }, { "epoch": 0.9624283745557409, "grad_norm": 7.09375, "learning_rate": 0.0003, "loss": 9.0437, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13269 }, { "epoch": 0.9625009066511931, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.9032, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13270 }, { "epoch": 0.9625734387466454, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 8.6446, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13271 }, { "epoch": 0.9626459708420976, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 9.2899, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13272 }, { "epoch": 0.9627185029375499, "grad_norm": 7.5, "learning_rate": 0.0003, "loss": 8.6806, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13273 }, { "epoch": 0.9627910350330021, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 8.4102, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13274 }, { "epoch": 0.9628635671284543, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.656, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13275 }, { "epoch": 0.9629360992239065, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.7297, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13276 }, { "epoch": 0.9630086313193588, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 8.9197, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13277 }, { "epoch": 0.9630811634148111, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 9.1374, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13278 }, { "epoch": 0.9631536955102633, "grad_norm": 19.25, "learning_rate": 0.0003, "loss": 9.0269, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13279 }, { "epoch": 0.9632262276057155, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 8.702, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13280 }, { "epoch": 0.9632987597011677, "grad_norm": 3.078125, "learning_rate": 0.0003, "loss": 8.7751, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13281 }, { "epoch": 0.96337129179662, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 8.6297, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13282 }, { "epoch": 0.9634438238920723, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 8.8202, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13283 }, { "epoch": 0.9635163559875245, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 8.7317, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13284 }, { "epoch": 0.9635888880829767, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 8.8455, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13285 }, { "epoch": 0.9636614201784289, "grad_norm": 2.59375, "learning_rate": 0.0003, "loss": 9.2734, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13286 }, { "epoch": 0.9637339522738811, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 9.2258, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13287 }, { "epoch": 0.9638064843693335, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.8261, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13288 }, { "epoch": 0.9638790164647857, "grad_norm": 7.71875, "learning_rate": 0.0003, "loss": 8.7231, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13289 }, { "epoch": 0.9639515485602379, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 8.8054, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13290 }, { "epoch": 0.9640240806556901, "grad_norm": 28.875, "learning_rate": 0.0003, "loss": 9.1883, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13291 }, { "epoch": 0.9640966127511423, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 8.7422, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13292 }, { "epoch": 0.9641691448465947, "grad_norm": 13.625, "learning_rate": 0.0003, "loss": 8.648, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13293 }, { "epoch": 0.9642416769420469, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 8.8675, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13294 }, { "epoch": 0.9643142090374991, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 8.932, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13295 }, { "epoch": 0.9643867411329513, "grad_norm": 318.0, "learning_rate": 0.0003, "loss": 9.2529, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13296 }, { "epoch": 0.9644592732284035, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.9831, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13297 }, { "epoch": 0.9645318053238559, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 8.9552, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13298 }, { "epoch": 0.9646043374193081, "grad_norm": 10.5, "learning_rate": 0.0003, "loss": 9.0217, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13299 }, { "epoch": 0.9646768695147603, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 8.9986, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13300 }, { "epoch": 0.9647494016102125, "grad_norm": 5.78125, "learning_rate": 0.0003, "loss": 8.7609, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13301 }, { "epoch": 0.9648219337056647, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 9.2362, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13302 }, { "epoch": 0.964894465801117, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 8.6834, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13303 }, { "epoch": 0.9649669978965693, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 8.7737, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13304 }, { "epoch": 0.9650395299920215, "grad_norm": 1.96875, "learning_rate": 0.0003, "loss": 9.0444, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13305 }, { "epoch": 0.9651120620874737, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.7181, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13306 }, { "epoch": 0.9651845941829259, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 9.3215, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13307 }, { "epoch": 0.9652571262783782, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 8.5013, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13308 }, { "epoch": 0.9653296583738304, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 8.3704, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13309 }, { "epoch": 0.9654021904692827, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 8.453, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13310 }, { "epoch": 0.9654747225647349, "grad_norm": 7.9375, "learning_rate": 0.0003, "loss": 9.1075, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13311 }, { "epoch": 0.9655472546601871, "grad_norm": 5.09375, "learning_rate": 0.0003, "loss": 8.6887, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13312 }, { "epoch": 0.9656197867556394, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 8.8205, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13313 }, { "epoch": 0.9656923188510916, "grad_norm": 6.5625, "learning_rate": 0.0003, "loss": 9.0647, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13314 }, { "epoch": 0.9657648509465439, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 9.0966, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13315 }, { "epoch": 0.9658373830419961, "grad_norm": 21.375, "learning_rate": 0.0003, "loss": 8.9511, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13316 }, { "epoch": 0.9659099151374483, "grad_norm": 6.03125, "learning_rate": 0.0003, "loss": 9.2793, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13317 }, { "epoch": 0.9659824472329006, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.4224, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13318 }, { "epoch": 0.9660549793283528, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 8.4339, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13319 }, { "epoch": 0.966127511423805, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.9115, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13320 }, { "epoch": 0.9662000435192573, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.8273, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13321 }, { "epoch": 0.9662725756147095, "grad_norm": 39.75, "learning_rate": 0.0003, "loss": 9.1681, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13322 }, { "epoch": 0.9663451077101618, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.6529, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13323 }, { "epoch": 0.966417639805614, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 9.4456, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13324 }, { "epoch": 0.9664901719010662, "grad_norm": 6.21875, "learning_rate": 0.0003, "loss": 8.4671, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13325 }, { "epoch": 0.9665627039965184, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.4928, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13326 }, { "epoch": 0.9666352360919707, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.9482, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13327 }, { "epoch": 0.966707768187423, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.533, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13328 }, { "epoch": 0.9667803002828752, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 9.1643, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13329 }, { "epoch": 0.9668528323783274, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.9091, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13330 }, { "epoch": 0.9669253644737796, "grad_norm": 29.625, "learning_rate": 0.0003, "loss": 8.9752, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13331 }, { "epoch": 0.9669978965692319, "grad_norm": 2.8125, "learning_rate": 0.0003, "loss": 8.9305, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13332 }, { "epoch": 0.9670704286646841, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.3033, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13333 }, { "epoch": 0.9671429607601364, "grad_norm": 6.875, "learning_rate": 0.0003, "loss": 8.9226, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13334 }, { "epoch": 0.9672154928555886, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 9.0611, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13335 }, { "epoch": 0.9672880249510408, "grad_norm": 5.21875, "learning_rate": 0.0003, "loss": 8.5216, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13336 }, { "epoch": 0.967360557046493, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.952, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13337 }, { "epoch": 0.9674330891419453, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.7797, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13338 }, { "epoch": 0.9675056212373976, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 8.6867, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13339 }, { "epoch": 0.9675781533328498, "grad_norm": 8.9375, "learning_rate": 0.0003, "loss": 8.3875, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13340 }, { "epoch": 0.967650685428302, "grad_norm": 33.0, "learning_rate": 0.0003, "loss": 8.6092, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13341 }, { "epoch": 0.9677232175237542, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 9.0528, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13342 }, { "epoch": 0.9677957496192064, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 8.7754, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13343 }, { "epoch": 0.9678682817146588, "grad_norm": 10.0, "learning_rate": 0.0003, "loss": 9.0574, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13344 }, { "epoch": 0.967940813810111, "grad_norm": 1.8203125, "learning_rate": 0.0003, "loss": 8.8858, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13345 }, { "epoch": 0.9680133459055632, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.3653, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13346 }, { "epoch": 0.9680858780010154, "grad_norm": 1.734375, "learning_rate": 0.0003, "loss": 8.8088, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13347 }, { "epoch": 0.9681584100964676, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 8.904, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13348 }, { "epoch": 0.96823094219192, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 9.0395, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13349 }, { "epoch": 0.9683034742873722, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.4932, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13350 }, { "epoch": 0.9683760063828244, "grad_norm": 3.5, "learning_rate": 0.0003, "loss": 8.7424, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13351 }, { "epoch": 0.9684485384782766, "grad_norm": 6.34375, "learning_rate": 0.0003, "loss": 9.1554, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13352 }, { "epoch": 0.9685210705737288, "grad_norm": 97.5, "learning_rate": 0.0003, "loss": 8.875, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13353 }, { "epoch": 0.9685936026691812, "grad_norm": 1.96875, "learning_rate": 0.0003, "loss": 8.7847, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13354 }, { "epoch": 0.9686661347646334, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 8.618, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13355 }, { "epoch": 0.9687386668600856, "grad_norm": 87.0, "learning_rate": 0.0003, "loss": 8.3585, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13356 }, { "epoch": 0.9688111989555378, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 8.8847, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13357 }, { "epoch": 0.96888373105099, "grad_norm": 3.1875, "learning_rate": 0.0003, "loss": 9.0459, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13358 }, { "epoch": 0.9689562631464423, "grad_norm": 7.84375, "learning_rate": 0.0003, "loss": 8.4017, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13359 }, { "epoch": 0.9690287952418946, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 8.8454, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13360 }, { "epoch": 0.9691013273373468, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.6285, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13361 }, { "epoch": 0.969173859432799, "grad_norm": 14.625, "learning_rate": 0.0003, "loss": 8.6475, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13362 }, { "epoch": 0.9692463915282512, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.7404, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13363 }, { "epoch": 0.9693189236237035, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 8.6819, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13364 }, { "epoch": 0.9693914557191557, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 9.1459, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13365 }, { "epoch": 0.969463987814608, "grad_norm": 1.59375, "learning_rate": 0.0003, "loss": 8.7515, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13366 }, { "epoch": 0.9695365199100602, "grad_norm": 3.828125, "learning_rate": 0.0003, "loss": 8.8685, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13367 }, { "epoch": 0.9696090520055124, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.5836, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13368 }, { "epoch": 0.9696815841009647, "grad_norm": 8.25, "learning_rate": 0.0003, "loss": 8.6813, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13369 }, { "epoch": 0.9697541161964169, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.1661, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13370 }, { "epoch": 0.9698266482918692, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.7673, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13371 }, { "epoch": 0.9698991803873214, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.4263, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13372 }, { "epoch": 0.9699717124827736, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 8.5054, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13373 }, { "epoch": 0.9700442445782259, "grad_norm": 5.8125, "learning_rate": 0.0003, "loss": 9.0008, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13374 }, { "epoch": 0.9701167766736781, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.7715, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13375 }, { "epoch": 0.9701893087691303, "grad_norm": 8.75, "learning_rate": 0.0003, "loss": 8.356, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13376 }, { "epoch": 0.9702618408645826, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 9.0748, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13377 }, { "epoch": 0.9703343729600348, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 8.6942, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13378 }, { "epoch": 0.9704069050554871, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 9.4554, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13379 }, { "epoch": 0.9704794371509393, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.7778, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13380 }, { "epoch": 0.9705519692463915, "grad_norm": 3.515625, "learning_rate": 0.0003, "loss": 8.9631, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13381 }, { "epoch": 0.9706245013418437, "grad_norm": 26.75, "learning_rate": 0.0003, "loss": 8.4287, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13382 }, { "epoch": 0.970697033437296, "grad_norm": 2.0, "learning_rate": 0.0003, "loss": 9.0965, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13383 }, { "epoch": 0.9707695655327483, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.274, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13384 }, { "epoch": 0.9708420976282005, "grad_norm": 5.6875, "learning_rate": 0.0003, "loss": 8.1821, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13385 }, { "epoch": 0.9709146297236527, "grad_norm": 4.59375, "learning_rate": 0.0003, "loss": 8.846, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13386 }, { "epoch": 0.9709871618191049, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 9.2511, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13387 }, { "epoch": 0.9710596939145572, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 8.9969, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13388 }, { "epoch": 0.9711322260100095, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 9.1437, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13389 }, { "epoch": 0.9712047581054617, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 8.8076, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13390 }, { "epoch": 0.9712772902009139, "grad_norm": 6.96875, "learning_rate": 0.0003, "loss": 8.1063, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13391 }, { "epoch": 0.9713498222963661, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 8.9965, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13392 }, { "epoch": 0.9714223543918183, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.5536, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13393 }, { "epoch": 0.9714948864872707, "grad_norm": 6.125, "learning_rate": 0.0003, "loss": 8.7347, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13394 }, { "epoch": 0.9715674185827229, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.0257, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13395 }, { "epoch": 0.9716399506781751, "grad_norm": 3.734375, "learning_rate": 0.0003, "loss": 8.5602, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13396 }, { "epoch": 0.9717124827736273, "grad_norm": 3.109375, "learning_rate": 0.0003, "loss": 8.5591, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13397 }, { "epoch": 0.9717850148690795, "grad_norm": 1.9140625, "learning_rate": 0.0003, "loss": 9.0407, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13398 }, { "epoch": 0.9718575469645319, "grad_norm": 8.25, "learning_rate": 0.0003, "loss": 8.8651, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13399 }, { "epoch": 0.9719300790599841, "grad_norm": 3.4375, "learning_rate": 0.0003, "loss": 8.8537, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13400 }, { "epoch": 0.9720026111554363, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.1571, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13401 }, { "epoch": 0.9720751432508885, "grad_norm": 2.515625, "learning_rate": 0.0003, "loss": 8.4661, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13402 }, { "epoch": 0.9721476753463407, "grad_norm": 6.1875, "learning_rate": 0.0003, "loss": 8.5226, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13403 }, { "epoch": 0.9722202074417929, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.8655, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13404 }, { "epoch": 0.9722927395372453, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.9403, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13405 }, { "epoch": 0.9723652716326975, "grad_norm": 8.5625, "learning_rate": 0.0003, "loss": 9.1631, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13406 }, { "epoch": 0.9724378037281497, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 9.2118, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13407 }, { "epoch": 0.9725103358236019, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 8.2881, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13408 }, { "epoch": 0.9725828679190541, "grad_norm": 5.75, "learning_rate": 0.0003, "loss": 8.533, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13409 }, { "epoch": 0.9726554000145065, "grad_norm": 9.25, "learning_rate": 0.0003, "loss": 8.6733, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13410 }, { "epoch": 0.9727279321099587, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.9033, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13411 }, { "epoch": 0.9728004642054109, "grad_norm": 9.75, "learning_rate": 0.0003, "loss": 8.6117, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13412 }, { "epoch": 0.9728729963008631, "grad_norm": 11.0625, "learning_rate": 0.0003, "loss": 8.9532, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13413 }, { "epoch": 0.9729455283963153, "grad_norm": 3.265625, "learning_rate": 0.0003, "loss": 9.4731, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13414 }, { "epoch": 0.9730180604917676, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 8.9861, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13415 }, { "epoch": 0.9730905925872199, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 8.7268, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13416 }, { "epoch": 0.9731631246826721, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 8.9766, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13417 }, { "epoch": 0.9732356567781243, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 9.1193, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13418 }, { "epoch": 0.9733081888735765, "grad_norm": 1.953125, "learning_rate": 0.0003, "loss": 9.041, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13419 }, { "epoch": 0.9733807209690288, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.8045, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13420 }, { "epoch": 0.973453253064481, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 9.2099, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13421 }, { "epoch": 0.9735257851599333, "grad_norm": 11.75, "learning_rate": 0.0003, "loss": 8.5794, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13422 }, { "epoch": 0.9735983172553855, "grad_norm": 1.890625, "learning_rate": 0.0003, "loss": 8.6144, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13423 }, { "epoch": 0.9736708493508377, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 8.6598, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13424 }, { "epoch": 0.97374338144629, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.7627, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13425 }, { "epoch": 0.9738159135417422, "grad_norm": 5.15625, "learning_rate": 0.0003, "loss": 9.267, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13426 }, { "epoch": 0.9738884456371945, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 8.9793, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13427 }, { "epoch": 0.9739609777326467, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 8.5074, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13428 }, { "epoch": 0.9740335098280989, "grad_norm": 5.21875, "learning_rate": 0.0003, "loss": 8.8501, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13429 }, { "epoch": 0.9741060419235512, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 8.556, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13430 }, { "epoch": 0.9741785740190034, "grad_norm": 5.875, "learning_rate": 0.0003, "loss": 9.2143, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13431 }, { "epoch": 0.9742511061144556, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 9.4085, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13432 }, { "epoch": 0.9743236382099079, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 8.6347, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13433 }, { "epoch": 0.9743961703053601, "grad_norm": 7.96875, "learning_rate": 0.0003, "loss": 8.8498, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13434 }, { "epoch": 0.9744687024008124, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 8.5849, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13435 }, { "epoch": 0.9745412344962646, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 8.6216, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13436 }, { "epoch": 0.9746137665917168, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 9.0574, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13437 }, { "epoch": 0.974686298687169, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 9.3936, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13438 }, { "epoch": 0.9747588307826213, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 8.7139, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13439 }, { "epoch": 0.9748313628780736, "grad_norm": 2.03125, "learning_rate": 0.0003, "loss": 8.1857, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13440 }, { "epoch": 0.9749038949735258, "grad_norm": 1.84375, "learning_rate": 0.0003, "loss": 8.8875, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13441 }, { "epoch": 0.974976427068978, "grad_norm": 7.875, "learning_rate": 0.0003, "loss": 8.5305, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13442 }, { "epoch": 0.9750489591644302, "grad_norm": 2.578125, "learning_rate": 0.0003, "loss": 9.0273, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13443 }, { "epoch": 0.9751214912598825, "grad_norm": 3.484375, "learning_rate": 0.0003, "loss": 8.5282, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13444 }, { "epoch": 0.9751940233553348, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.7517, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13445 }, { "epoch": 0.975266555450787, "grad_norm": 6.96875, "learning_rate": 0.0003, "loss": 8.7957, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13446 }, { "epoch": 0.9753390875462392, "grad_norm": 20.875, "learning_rate": 0.0003, "loss": 9.0118, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13447 }, { "epoch": 0.9754116196416914, "grad_norm": 5.625, "learning_rate": 0.0003, "loss": 8.6764, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13448 }, { "epoch": 0.9754841517371436, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.6296, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13449 }, { "epoch": 0.975556683832596, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 8.7218, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13450 }, { "epoch": 0.9756292159280482, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 9.1026, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13451 }, { "epoch": 0.9757017480235004, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.4362, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13452 }, { "epoch": 0.9757742801189526, "grad_norm": 21.375, "learning_rate": 0.0003, "loss": 9.1422, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13453 }, { "epoch": 0.9758468122144048, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 9.2319, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13454 }, { "epoch": 0.9759193443098572, "grad_norm": 5.84375, "learning_rate": 0.0003, "loss": 8.6047, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13455 }, { "epoch": 0.9759918764053094, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 8.2359, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13456 }, { "epoch": 0.9760644085007616, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.5281, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13457 }, { "epoch": 0.9761369405962138, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 8.7094, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13458 }, { "epoch": 0.976209472691666, "grad_norm": 3.875, "learning_rate": 0.0003, "loss": 8.8977, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13459 }, { "epoch": 0.9762820047871184, "grad_norm": 16.875, "learning_rate": 0.0003, "loss": 8.499, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13460 }, { "epoch": 0.9763545368825706, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 9.1458, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13461 }, { "epoch": 0.9764270689780228, "grad_norm": 15.6875, "learning_rate": 0.0003, "loss": 9.1105, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13462 }, { "epoch": 0.976499601073475, "grad_norm": 2.09375, "learning_rate": 0.0003, "loss": 8.8163, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13463 }, { "epoch": 0.9765721331689272, "grad_norm": 7.03125, "learning_rate": 0.0003, "loss": 8.7876, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13464 }, { "epoch": 0.9766446652643795, "grad_norm": 2.5625, "learning_rate": 0.0003, "loss": 8.9764, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13465 }, { "epoch": 0.9767171973598318, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 9.1875, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13466 }, { "epoch": 0.976789729455284, "grad_norm": 2.140625, "learning_rate": 0.0003, "loss": 8.7206, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13467 }, { "epoch": 0.9768622615507362, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.9772, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13468 }, { "epoch": 0.9769347936461884, "grad_norm": 6.625, "learning_rate": 0.0003, "loss": 8.7125, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13469 }, { "epoch": 0.9770073257416407, "grad_norm": 6.40625, "learning_rate": 0.0003, "loss": 8.8103, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13470 }, { "epoch": 0.977079857837093, "grad_norm": 3.84375, "learning_rate": 0.0003, "loss": 9.2512, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13471 }, { "epoch": 0.9771523899325452, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 9.0794, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13472 }, { "epoch": 0.9772249220279974, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 9.0032, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13473 }, { "epoch": 0.9772974541234496, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.0556, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13474 }, { "epoch": 0.9773699862189018, "grad_norm": 7.71875, "learning_rate": 0.0003, "loss": 8.9135, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13475 }, { "epoch": 0.9774425183143541, "grad_norm": 6.0625, "learning_rate": 0.0003, "loss": 9.1534, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13476 }, { "epoch": 0.9775150504098064, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.4433, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13477 }, { "epoch": 0.9775875825052586, "grad_norm": 5.65625, "learning_rate": 0.0003, "loss": 9.0813, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13478 }, { "epoch": 0.9776601146007108, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 8.9188, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13479 }, { "epoch": 0.977732646696163, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.7847, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13480 }, { "epoch": 0.9778051787916153, "grad_norm": 9.125, "learning_rate": 0.0003, "loss": 8.9089, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13481 }, { "epoch": 0.9778777108870675, "grad_norm": 7.1875, "learning_rate": 0.0003, "loss": 9.0308, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13482 }, { "epoch": 0.9779502429825198, "grad_norm": 2.25, "learning_rate": 0.0003, "loss": 8.7222, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13483 }, { "epoch": 0.978022775077972, "grad_norm": 3.75, "learning_rate": 0.0003, "loss": 8.6097, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13484 }, { "epoch": 0.9780953071734242, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.5236, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13485 }, { "epoch": 0.9781678392688765, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.3246, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13486 }, { "epoch": 0.9782403713643287, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.7521, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13487 }, { "epoch": 0.978312903459781, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.7351, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13488 }, { "epoch": 0.9783854355552332, "grad_norm": 6.15625, "learning_rate": 0.0003, "loss": 8.605, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13489 }, { "epoch": 0.9784579676506854, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 9.2448, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13490 }, { "epoch": 0.9785304997461377, "grad_norm": 45.75, "learning_rate": 0.0003, "loss": 9.356, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13491 }, { "epoch": 0.9786030318415899, "grad_norm": 5.0, "learning_rate": 0.0003, "loss": 8.8701, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13492 }, { "epoch": 0.9786755639370421, "grad_norm": 3.5625, "learning_rate": 0.0003, "loss": 9.1162, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13493 }, { "epoch": 0.9787480960324944, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 9.0553, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13494 }, { "epoch": 0.9788206281279466, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.7153, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13495 }, { "epoch": 0.9788931602233989, "grad_norm": 6.6875, "learning_rate": 0.0003, "loss": 8.8515, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13496 }, { "epoch": 0.9789656923188511, "grad_norm": 6.5625, "learning_rate": 0.0003, "loss": 8.7273, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13497 }, { "epoch": 0.9790382244143033, "grad_norm": 42.0, "learning_rate": 0.0003, "loss": 8.9956, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13498 }, { "epoch": 0.9791107565097555, "grad_norm": 17.0, "learning_rate": 0.0003, "loss": 8.8802, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13499 }, { "epoch": 0.9791832886052078, "grad_norm": 3.671875, "learning_rate": 0.0003, "loss": 8.713, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13500 }, { "epoch": 0.9792558207006601, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 8.967, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13501 }, { "epoch": 0.9793283527961123, "grad_norm": 7.375, "learning_rate": 0.0003, "loss": 9.1514, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13502 }, { "epoch": 0.9794008848915645, "grad_norm": 2.546875, "learning_rate": 0.0003, "loss": 8.5488, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13503 }, { "epoch": 0.9794734169870167, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 8.5981, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13504 }, { "epoch": 0.979545949082469, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.8786, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13505 }, { "epoch": 0.9796184811779213, "grad_norm": 3.25, "learning_rate": 0.0003, "loss": 8.9559, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13506 }, { "epoch": 0.9796910132733735, "grad_norm": 18.375, "learning_rate": 0.0003, "loss": 8.9199, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13507 }, { "epoch": 0.9797635453688257, "grad_norm": 2.859375, "learning_rate": 0.0003, "loss": 9.0406, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13508 }, { "epoch": 0.9798360774642779, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 8.5282, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13509 }, { "epoch": 0.9799086095597301, "grad_norm": 5.21875, "learning_rate": 0.0003, "loss": 8.8953, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13510 }, { "epoch": 0.9799811416551825, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 8.7583, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13511 }, { "epoch": 0.9800536737506347, "grad_norm": 9.3125, "learning_rate": 0.0003, "loss": 8.3136, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13512 }, { "epoch": 0.9801262058460869, "grad_norm": 2.15625, "learning_rate": 0.0003, "loss": 8.5784, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13513 }, { "epoch": 0.9801987379415391, "grad_norm": 3.640625, "learning_rate": 0.0003, "loss": 8.2844, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13514 }, { "epoch": 0.9802712700369913, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 9.1589, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13515 }, { "epoch": 0.9803438021324437, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.4337, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13516 }, { "epoch": 0.9804163342278959, "grad_norm": 2.046875, "learning_rate": 0.0003, "loss": 8.6815, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13517 }, { "epoch": 0.9804888663233481, "grad_norm": 2.828125, "learning_rate": 0.0003, "loss": 9.4454, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13518 }, { "epoch": 0.9805613984188003, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.95, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13519 }, { "epoch": 0.9806339305142525, "grad_norm": 2.6875, "learning_rate": 0.0003, "loss": 8.8327, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13520 }, { "epoch": 0.9807064626097048, "grad_norm": 3.578125, "learning_rate": 0.0003, "loss": 9.1308, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13521 }, { "epoch": 0.9807789947051571, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.9049, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13522 }, { "epoch": 0.9808515268006093, "grad_norm": 1.921875, "learning_rate": 0.0003, "loss": 8.9292, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13523 }, { "epoch": 0.9809240588960615, "grad_norm": 5.21875, "learning_rate": 0.0003, "loss": 8.5796, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13524 }, { "epoch": 0.9809965909915137, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 9.418, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13525 }, { "epoch": 0.981069123086966, "grad_norm": 9.3125, "learning_rate": 0.0003, "loss": 8.7364, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13526 }, { "epoch": 0.9811416551824182, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.8474, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13527 }, { "epoch": 0.9812141872778705, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.713, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13528 }, { "epoch": 0.9812867193733227, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.6907, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13529 }, { "epoch": 0.9813592514687749, "grad_norm": 5.65625, "learning_rate": 0.0003, "loss": 8.726, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13530 }, { "epoch": 0.9814317835642272, "grad_norm": 5.90625, "learning_rate": 0.0003, "loss": 8.7637, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13531 }, { "epoch": 0.9815043156596794, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 9.2393, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13532 }, { "epoch": 0.9815768477551317, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 8.8944, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13533 }, { "epoch": 0.9816493798505839, "grad_norm": 3.8125, "learning_rate": 0.0003, "loss": 8.7298, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13534 }, { "epoch": 0.9817219119460361, "grad_norm": 5.0625, "learning_rate": 0.0003, "loss": 8.3608, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13535 }, { "epoch": 0.9817944440414884, "grad_norm": 29.5, "learning_rate": 0.0003, "loss": 8.9018, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13536 }, { "epoch": 0.9818669761369406, "grad_norm": 8.375, "learning_rate": 0.0003, "loss": 8.7613, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13537 }, { "epoch": 0.9819395082323928, "grad_norm": 6.75, "learning_rate": 0.0003, "loss": 8.5816, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13538 }, { "epoch": 0.9820120403278451, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 8.8908, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13539 }, { "epoch": 0.9820845724232973, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.7187, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13540 }, { "epoch": 0.9821571045187496, "grad_norm": 13.8125, "learning_rate": 0.0003, "loss": 8.3046, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13541 }, { "epoch": 0.9822296366142018, "grad_norm": 2.890625, "learning_rate": 0.0003, "loss": 8.7625, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13542 }, { "epoch": 0.982302168709654, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 8.5062, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13543 }, { "epoch": 0.9823747008051062, "grad_norm": 2.0625, "learning_rate": 0.0003, "loss": 8.742, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13544 }, { "epoch": 0.9824472329005585, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 9.01, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13545 }, { "epoch": 0.9825197649960107, "grad_norm": 8.0, "learning_rate": 0.0003, "loss": 8.7888, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13546 }, { "epoch": 0.982592297091463, "grad_norm": 6.34375, "learning_rate": 0.0003, "loss": 8.6928, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13547 }, { "epoch": 0.9826648291869152, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 9.1791, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13548 }, { "epoch": 0.9827373612823674, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 8.7394, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13549 }, { "epoch": 0.9828098933778197, "grad_norm": 16.25, "learning_rate": 0.0003, "loss": 9.4353, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13550 }, { "epoch": 0.9828824254732719, "grad_norm": 1.4375, "learning_rate": 0.0003, "loss": 8.9674, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13551 }, { "epoch": 0.9829549575687242, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 8.6485, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13552 }, { "epoch": 0.9830274896641764, "grad_norm": 5.40625, "learning_rate": 0.0003, "loss": 8.5598, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13553 }, { "epoch": 0.9831000217596286, "grad_norm": 4.3125, "learning_rate": 0.0003, "loss": 8.52, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13554 }, { "epoch": 0.9831725538550808, "grad_norm": 4.6875, "learning_rate": 0.0003, "loss": 9.0491, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13555 }, { "epoch": 0.9832450859505331, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 8.4436, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13556 }, { "epoch": 0.9833176180459854, "grad_norm": 3.546875, "learning_rate": 0.0003, "loss": 7.9531, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13557 }, { "epoch": 0.9833901501414376, "grad_norm": 8.0, "learning_rate": 0.0003, "loss": 9.1795, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13558 }, { "epoch": 0.9834626822368898, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 9.4969, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13559 }, { "epoch": 0.983535214332342, "grad_norm": 1.8828125, "learning_rate": 0.0003, "loss": 9.6119, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13560 }, { "epoch": 0.9836077464277942, "grad_norm": 55.0, "learning_rate": 0.0003, "loss": 9.2153, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13561 }, { "epoch": 0.9836802785232466, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 8.6196, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13562 }, { "epoch": 0.9837528106186988, "grad_norm": 11.3125, "learning_rate": 0.0003, "loss": 9.2449, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13563 }, { "epoch": 0.983825342714151, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 8.9461, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13564 }, { "epoch": 0.9838978748096032, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 9.3605, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13565 }, { "epoch": 0.9839704069050554, "grad_norm": 3.375, "learning_rate": 0.0003, "loss": 8.6171, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13566 }, { "epoch": 0.9840429390005078, "grad_norm": 2.953125, "learning_rate": 0.0003, "loss": 8.7236, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13567 }, { "epoch": 0.98411547109596, "grad_norm": 2.65625, "learning_rate": 0.0003, "loss": 8.9743, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13568 }, { "epoch": 0.9841880031914122, "grad_norm": 3.296875, "learning_rate": 0.0003, "loss": 8.9481, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13569 }, { "epoch": 0.9842605352868644, "grad_norm": 5.6875, "learning_rate": 0.0003, "loss": 8.9189, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13570 }, { "epoch": 0.9843330673823166, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.7362, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13571 }, { "epoch": 0.984405599477769, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.231, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13572 }, { "epoch": 0.9844781315732212, "grad_norm": 6.25, "learning_rate": 0.0003, "loss": 8.8842, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13573 }, { "epoch": 0.9845506636686734, "grad_norm": 3.203125, "learning_rate": 0.0003, "loss": 9.5341, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13574 }, { "epoch": 0.9846231957641256, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.768, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13575 }, { "epoch": 0.9846957278595778, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.7103, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13576 }, { "epoch": 0.9847682599550301, "grad_norm": 8.75, "learning_rate": 0.0003, "loss": 8.7759, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13577 }, { "epoch": 0.9848407920504824, "grad_norm": 11.5625, "learning_rate": 0.0003, "loss": 9.5143, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13578 }, { "epoch": 0.9849133241459346, "grad_norm": 3.171875, "learning_rate": 0.0003, "loss": 9.0002, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13579 }, { "epoch": 0.9849858562413868, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 8.3739, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13580 }, { "epoch": 0.985058388336839, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.6589, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13581 }, { "epoch": 0.9851309204322913, "grad_norm": 23.0, "learning_rate": 0.0003, "loss": 9.0729, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13582 }, { "epoch": 0.9852034525277436, "grad_norm": 4.65625, "learning_rate": 0.0003, "loss": 8.3931, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13583 }, { "epoch": 0.9852759846231958, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.9343, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13584 }, { "epoch": 0.985348516718648, "grad_norm": 6.28125, "learning_rate": 0.0003, "loss": 8.8882, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13585 }, { "epoch": 0.9854210488141002, "grad_norm": 4.875, "learning_rate": 0.0003, "loss": 9.4352, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13586 }, { "epoch": 0.9854935809095525, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 8.7449, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13587 }, { "epoch": 0.9855661130050047, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 8.9284, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13588 }, { "epoch": 0.985638645100457, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 8.9274, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13589 }, { "epoch": 0.9857111771959092, "grad_norm": 5.59375, "learning_rate": 0.0003, "loss": 8.544, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13590 }, { "epoch": 0.9857837092913614, "grad_norm": 4.96875, "learning_rate": 0.0003, "loss": 9.0432, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13591 }, { "epoch": 0.9858562413868137, "grad_norm": 5.1875, "learning_rate": 0.0003, "loss": 8.6699, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13592 }, { "epoch": 0.9859287734822659, "grad_norm": 1.90625, "learning_rate": 0.0003, "loss": 9.3907, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13593 }, { "epoch": 0.9860013055777181, "grad_norm": 2.90625, "learning_rate": 0.0003, "loss": 8.4398, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13594 }, { "epoch": 0.9860738376731704, "grad_norm": 7.5, "learning_rate": 0.0003, "loss": 8.9888, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13595 }, { "epoch": 0.9861463697686226, "grad_norm": 2.5, "learning_rate": 0.0003, "loss": 8.7558, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13596 }, { "epoch": 0.9862189018640749, "grad_norm": 3.984375, "learning_rate": 0.0003, "loss": 9.0968, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13597 }, { "epoch": 0.9862914339595271, "grad_norm": 4.25, "learning_rate": 0.0003, "loss": 8.6621, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13598 }, { "epoch": 0.9863639660549793, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.8227, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13599 }, { "epoch": 0.9864364981504316, "grad_norm": 9.875, "learning_rate": 0.0003, "loss": 8.434, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13600 }, { "epoch": 0.9865090302458838, "grad_norm": 4.40625, "learning_rate": 0.0003, "loss": 8.9729, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13601 }, { "epoch": 0.9865815623413361, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 8.6469, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13602 }, { "epoch": 0.9866540944367883, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.8228, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13603 }, { "epoch": 0.9867266265322405, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 8.7484, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13604 }, { "epoch": 0.9867991586276927, "grad_norm": 13.9375, "learning_rate": 0.0003, "loss": 8.6176, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13605 }, { "epoch": 0.986871690723145, "grad_norm": 7.78125, "learning_rate": 0.0003, "loss": 9.0818, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13606 }, { "epoch": 0.9869442228185973, "grad_norm": 4.15625, "learning_rate": 0.0003, "loss": 8.7568, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13607 }, { "epoch": 0.9870167549140495, "grad_norm": 2.765625, "learning_rate": 0.0003, "loss": 8.8642, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13608 }, { "epoch": 0.9870892870095017, "grad_norm": 7.4375, "learning_rate": 0.0003, "loss": 9.029, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13609 }, { "epoch": 0.9871618191049539, "grad_norm": 2.875, "learning_rate": 0.0003, "loss": 8.8442, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13610 }, { "epoch": 0.9872343512004061, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 9.0403, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13611 }, { "epoch": 0.9873068832958585, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.845, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13612 }, { "epoch": 0.9873794153913107, "grad_norm": 4.84375, "learning_rate": 0.0003, "loss": 8.3389, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13613 }, { "epoch": 0.9874519474867629, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.7688, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13614 }, { "epoch": 0.9875244795822151, "grad_norm": 8.3125, "learning_rate": 0.0003, "loss": 8.9232, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13615 }, { "epoch": 0.9875970116776673, "grad_norm": 3.796875, "learning_rate": 0.0003, "loss": 9.1974, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13616 }, { "epoch": 0.9876695437731196, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.7668, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13617 }, { "epoch": 0.9877420758685719, "grad_norm": 7.28125, "learning_rate": 0.0003, "loss": 8.9351, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13618 }, { "epoch": 0.9878146079640241, "grad_norm": 5.84375, "learning_rate": 0.0003, "loss": 9.6216, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13619 }, { "epoch": 0.9878871400594763, "grad_norm": 13.9375, "learning_rate": 0.0003, "loss": 8.8295, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13620 }, { "epoch": 0.9879596721549285, "grad_norm": 6.875, "learning_rate": 0.0003, "loss": 8.7243, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13621 }, { "epoch": 0.9880322042503807, "grad_norm": 3.390625, "learning_rate": 0.0003, "loss": 9.3075, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13622 }, { "epoch": 0.9881047363458331, "grad_norm": 4.8125, "learning_rate": 0.0003, "loss": 8.4788, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13623 }, { "epoch": 0.9881772684412853, "grad_norm": 6.03125, "learning_rate": 0.0003, "loss": 8.9566, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13624 }, { "epoch": 0.9882498005367375, "grad_norm": 4.90625, "learning_rate": 0.0003, "loss": 8.7722, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13625 }, { "epoch": 0.9883223326321897, "grad_norm": 3.09375, "learning_rate": 0.0003, "loss": 8.7279, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13626 }, { "epoch": 0.9883948647276419, "grad_norm": 3.40625, "learning_rate": 0.0003, "loss": 8.5628, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13627 }, { "epoch": 0.9884673968230943, "grad_norm": 2.109375, "learning_rate": 0.0003, "loss": 8.9693, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13628 }, { "epoch": 0.9885399289185465, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.7924, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13629 }, { "epoch": 0.9886124610139987, "grad_norm": 1.5625, "learning_rate": 0.0003, "loss": 8.6669, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13630 }, { "epoch": 0.9886849931094509, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.5714, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13631 }, { "epoch": 0.9887575252049031, "grad_norm": 7.5, "learning_rate": 0.0003, "loss": 8.7947, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13632 }, { "epoch": 0.9888300573003554, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 8.9796, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13633 }, { "epoch": 0.9889025893958077, "grad_norm": 5.375, "learning_rate": 0.0003, "loss": 8.8214, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13634 }, { "epoch": 0.9889751214912599, "grad_norm": 3.71875, "learning_rate": 0.0003, "loss": 9.0927, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13635 }, { "epoch": 0.9890476535867121, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 8.9631, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13636 }, { "epoch": 0.9891201856821643, "grad_norm": 4.03125, "learning_rate": 0.0003, "loss": 8.6483, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13637 }, { "epoch": 0.9891927177776166, "grad_norm": 9.75, "learning_rate": 0.0003, "loss": 8.8034, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13638 }, { "epoch": 0.9892652498730689, "grad_norm": 3.953125, "learning_rate": 0.0003, "loss": 8.4816, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13639 }, { "epoch": 0.9893377819685211, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 8.6805, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13640 }, { "epoch": 0.9894103140639733, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 8.9571, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13641 }, { "epoch": 0.9894828461594255, "grad_norm": 2.359375, "learning_rate": 0.0003, "loss": 8.697, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13642 }, { "epoch": 0.9895553782548778, "grad_norm": 3.90625, "learning_rate": 0.0003, "loss": 9.0694, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13643 }, { "epoch": 0.98962791035033, "grad_norm": 7.34375, "learning_rate": 0.0003, "loss": 8.8989, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13644 }, { "epoch": 0.9897004424457823, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.6797, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13645 }, { "epoch": 0.9897729745412345, "grad_norm": 3.78125, "learning_rate": 0.0003, "loss": 8.7628, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13646 }, { "epoch": 0.9898455066366867, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 9.2025, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13647 }, { "epoch": 0.989918038732139, "grad_norm": 2.71875, "learning_rate": 0.0003, "loss": 8.6305, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13648 }, { "epoch": 0.9899905708275912, "grad_norm": 2.84375, "learning_rate": 0.0003, "loss": 9.2937, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13649 }, { "epoch": 0.9900631029230434, "grad_norm": 6.875, "learning_rate": 0.0003, "loss": 9.1031, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13650 }, { "epoch": 0.9901356350184957, "grad_norm": 1.59375, "learning_rate": 0.0003, "loss": 9.2663, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13651 }, { "epoch": 0.9902081671139479, "grad_norm": 2.28125, "learning_rate": 0.0003, "loss": 8.5675, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13652 }, { "epoch": 0.9902806992094002, "grad_norm": 5.5625, "learning_rate": 0.0003, "loss": 8.5886, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13653 }, { "epoch": 0.9903532313048524, "grad_norm": 2.328125, "learning_rate": 0.0003, "loss": 8.7185, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13654 }, { "epoch": 0.9904257634003046, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 8.5266, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13655 }, { "epoch": 0.9904982954957569, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 8.2686, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13656 }, { "epoch": 0.9905708275912091, "grad_norm": 10.875, "learning_rate": 0.0003, "loss": 9.066, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13657 }, { "epoch": 0.9906433596866614, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 9.1221, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13658 }, { "epoch": 0.9907158917821136, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 9.3086, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13659 }, { "epoch": 0.9907884238775658, "grad_norm": 4.53125, "learning_rate": 0.0003, "loss": 9.1259, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13660 }, { "epoch": 0.990860955973018, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.4563, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13661 }, { "epoch": 0.9909334880684703, "grad_norm": 4.625, "learning_rate": 0.0003, "loss": 8.6723, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13662 }, { "epoch": 0.9910060201639226, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 8.913, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13663 }, { "epoch": 0.9910785522593748, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.9757, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13664 }, { "epoch": 0.991151084354827, "grad_norm": 3.34375, "learning_rate": 0.0003, "loss": 8.9173, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13665 }, { "epoch": 0.9912236164502792, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 9.8433, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13666 }, { "epoch": 0.9912961485457314, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 9.4505, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13667 }, { "epoch": 0.9913686806411838, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.8376, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13668 }, { "epoch": 0.991441212736636, "grad_norm": 2.96875, "learning_rate": 0.0003, "loss": 8.6814, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13669 }, { "epoch": 0.9915137448320882, "grad_norm": 5.34375, "learning_rate": 0.0003, "loss": 8.836, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13670 }, { "epoch": 0.9915862769275404, "grad_norm": 1.9921875, "learning_rate": 0.0003, "loss": 8.501, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13671 }, { "epoch": 0.9916588090229926, "grad_norm": 8.9375, "learning_rate": 0.0003, "loss": 8.9693, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13672 }, { "epoch": 0.991731341118445, "grad_norm": 3.65625, "learning_rate": 0.0003, "loss": 8.7037, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13673 }, { "epoch": 0.9918038732138972, "grad_norm": 3.625, "learning_rate": 0.0003, "loss": 9.1068, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13674 }, { "epoch": 0.9918764053093494, "grad_norm": 4.71875, "learning_rate": 0.0003, "loss": 8.8267, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13675 }, { "epoch": 0.9919489374048016, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 8.4724, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13676 }, { "epoch": 0.9920214695002538, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.3791, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13677 }, { "epoch": 0.9920940015957062, "grad_norm": 4.09375, "learning_rate": 0.0003, "loss": 8.5078, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13678 }, { "epoch": 0.9921665336911584, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 8.7383, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13679 }, { "epoch": 0.9922390657866106, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 8.6651, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13680 }, { "epoch": 0.9923115978820628, "grad_norm": 3.890625, "learning_rate": 0.0003, "loss": 9.314, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13681 }, { "epoch": 0.992384129977515, "grad_norm": 3.125, "learning_rate": 0.0003, "loss": 8.9124, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13682 }, { "epoch": 0.9924566620729672, "grad_norm": 18.625, "learning_rate": 0.0003, "loss": 8.953, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13683 }, { "epoch": 0.9925291941684196, "grad_norm": 7.90625, "learning_rate": 0.0003, "loss": 8.8646, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13684 }, { "epoch": 0.9926017262638718, "grad_norm": 4.4375, "learning_rate": 0.0003, "loss": 8.9495, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13685 }, { "epoch": 0.992674258359324, "grad_norm": 12.5, "learning_rate": 0.0003, "loss": 8.7352, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13686 }, { "epoch": 0.9927467904547762, "grad_norm": 4.28125, "learning_rate": 0.0003, "loss": 8.6217, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13687 }, { "epoch": 0.9928193225502284, "grad_norm": 3.0, "learning_rate": 0.0003, "loss": 9.0238, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13688 }, { "epoch": 0.9928918546456807, "grad_norm": 1.921875, "learning_rate": 0.0003, "loss": 8.7758, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13689 }, { "epoch": 0.992964386741133, "grad_norm": 17.375, "learning_rate": 0.0003, "loss": 8.834, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13690 }, { "epoch": 0.9930369188365852, "grad_norm": 2.421875, "learning_rate": 0.0003, "loss": 8.839, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13691 }, { "epoch": 0.9931094509320374, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 9.0115, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13692 }, { "epoch": 0.9931819830274896, "grad_norm": 2.53125, "learning_rate": 0.0003, "loss": 8.393, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13693 }, { "epoch": 0.9932545151229419, "grad_norm": 8.5625, "learning_rate": 0.0003, "loss": 9.0188, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13694 }, { "epoch": 0.9933270472183942, "grad_norm": 5.3125, "learning_rate": 0.0003, "loss": 8.6518, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13695 }, { "epoch": 0.9933995793138464, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 9.4269, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13696 }, { "epoch": 0.9934721114092986, "grad_norm": 5.4375, "learning_rate": 0.0003, "loss": 8.7993, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13697 }, { "epoch": 0.9935446435047508, "grad_norm": 6.9375, "learning_rate": 0.0003, "loss": 8.7907, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13698 }, { "epoch": 0.9936171756002031, "grad_norm": 9.5, "learning_rate": 0.0003, "loss": 8.3736, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13699 }, { "epoch": 0.9936897076956553, "grad_norm": 4.5625, "learning_rate": 0.0003, "loss": 9.1408, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13700 }, { "epoch": 0.9937622397911076, "grad_norm": 1.7734375, "learning_rate": 0.0003, "loss": 8.4717, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13701 }, { "epoch": 0.9938347718865598, "grad_norm": 5.375, "learning_rate": 0.0003, "loss": 9.1637, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13702 }, { "epoch": 0.993907303982012, "grad_norm": 7.3125, "learning_rate": 0.0003, "loss": 8.7132, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13703 }, { "epoch": 0.9939798360774643, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.8025, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13704 }, { "epoch": 0.9940523681729165, "grad_norm": 5.125, "learning_rate": 0.0003, "loss": 8.7748, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13705 }, { "epoch": 0.9941249002683687, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.7108, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13706 }, { "epoch": 0.994197432363821, "grad_norm": 6.46875, "learning_rate": 0.0003, "loss": 8.8764, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13707 }, { "epoch": 0.9942699644592732, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 8.9152, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13708 }, { "epoch": 0.9943424965547255, "grad_norm": 3.9375, "learning_rate": 0.0003, "loss": 8.127, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13709 }, { "epoch": 0.9944150286501777, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 9.0713, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13710 }, { "epoch": 0.9944875607456299, "grad_norm": 2.484375, "learning_rate": 0.0003, "loss": 9.232, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13711 }, { "epoch": 0.9945600928410822, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.6172, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13712 }, { "epoch": 0.9946326249365344, "grad_norm": 4.34375, "learning_rate": 0.0003, "loss": 8.9259, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13713 }, { "epoch": 0.9947051570319867, "grad_norm": 2.015625, "learning_rate": 0.0003, "loss": 8.3775, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13714 }, { "epoch": 0.9947776891274389, "grad_norm": 5.28125, "learning_rate": 0.0003, "loss": 8.6847, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13715 }, { "epoch": 0.9948502212228911, "grad_norm": 1.7734375, "learning_rate": 0.0003, "loss": 9.0127, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13716 }, { "epoch": 0.9949227533183433, "grad_norm": 5.25, "learning_rate": 0.0003, "loss": 8.7228, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13717 }, { "epoch": 0.9949952854137956, "grad_norm": 3.328125, "learning_rate": 0.0003, "loss": 8.728, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13718 }, { "epoch": 0.9950678175092479, "grad_norm": 2.796875, "learning_rate": 0.0003, "loss": 8.8485, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13719 }, { "epoch": 0.9951403496047001, "grad_norm": 2.4375, "learning_rate": 0.0003, "loss": 8.8946, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13720 }, { "epoch": 0.9952128817001523, "grad_norm": 3.046875, "learning_rate": 0.0003, "loss": 8.576, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13721 }, { "epoch": 0.9952854137956045, "grad_norm": 3.21875, "learning_rate": 0.0003, "loss": 8.8373, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13722 }, { "epoch": 0.9953579458910568, "grad_norm": 3.359375, "learning_rate": 0.0003, "loss": 9.318, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13723 }, { "epoch": 0.9954304779865091, "grad_norm": 3.453125, "learning_rate": 0.0003, "loss": 8.8041, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13724 }, { "epoch": 0.9955030100819613, "grad_norm": 12.25, "learning_rate": 0.0003, "loss": 9.3662, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13725 }, { "epoch": 0.9955755421774135, "grad_norm": 3.609375, "learning_rate": 0.0003, "loss": 9.4304, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13726 }, { "epoch": 0.9956480742728657, "grad_norm": 2.171875, "learning_rate": 0.0003, "loss": 8.8235, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13727 }, { "epoch": 0.9957206063683179, "grad_norm": 3.28125, "learning_rate": 0.0003, "loss": 9.06, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13728 }, { "epoch": 0.9957931384637703, "grad_norm": 2.40625, "learning_rate": 0.0003, "loss": 9.0571, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13729 }, { "epoch": 0.9958656705592225, "grad_norm": 6.4375, "learning_rate": 0.0003, "loss": 8.2892, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13730 }, { "epoch": 0.9959382026546747, "grad_norm": 2.3125, "learning_rate": 0.0003, "loss": 8.9813, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13731 }, { "epoch": 0.9960107347501269, "grad_norm": 4.75, "learning_rate": 0.0003, "loss": 8.9065, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13732 }, { "epoch": 0.9960832668455791, "grad_norm": 14.5625, "learning_rate": 0.0003, "loss": 8.8954, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13733 }, { "epoch": 0.9961557989410315, "grad_norm": 6.84375, "learning_rate": 0.0003, "loss": 9.0551, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13734 }, { "epoch": 0.9962283310364837, "grad_norm": 4.0, "learning_rate": 0.0003, "loss": 8.614, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13735 }, { "epoch": 0.9963008631319359, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 9.2567, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13736 }, { "epoch": 0.9963733952273881, "grad_norm": 2.625, "learning_rate": 0.0003, "loss": 8.4125, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13737 }, { "epoch": 0.9964459273228403, "grad_norm": 5.65625, "learning_rate": 0.0003, "loss": 8.1946, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13738 }, { "epoch": 0.9965184594182926, "grad_norm": 4.78125, "learning_rate": 0.0003, "loss": 8.9714, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13739 }, { "epoch": 0.9965909915137449, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.7335, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13740 }, { "epoch": 0.9966635236091971, "grad_norm": 3.140625, "learning_rate": 0.0003, "loss": 8.8053, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13741 }, { "epoch": 0.9967360557046493, "grad_norm": 3.96875, "learning_rate": 0.0003, "loss": 8.9136, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13742 }, { "epoch": 0.9968085878001015, "grad_norm": 2.234375, "learning_rate": 0.0003, "loss": 8.7342, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13743 }, { "epoch": 0.9968811198955538, "grad_norm": 2.375, "learning_rate": 0.0003, "loss": 8.2812, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13744 }, { "epoch": 0.996953651991006, "grad_norm": 22.125, "learning_rate": 0.0003, "loss": 8.7629, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13745 }, { "epoch": 0.9970261840864583, "grad_norm": 4.1875, "learning_rate": 0.0003, "loss": 8.7076, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13746 }, { "epoch": 0.9970987161819105, "grad_norm": 2.75, "learning_rate": 0.0003, "loss": 9.2488, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13747 }, { "epoch": 0.9971712482773627, "grad_norm": 4.375, "learning_rate": 0.0003, "loss": 9.1116, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13748 }, { "epoch": 0.997243780372815, "grad_norm": 2.34375, "learning_rate": 0.0003, "loss": 8.7591, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13749 }, { "epoch": 0.9973163124682672, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.6923, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13750 }, { "epoch": 0.9973888445637195, "grad_norm": 3.015625, "learning_rate": 0.0003, "loss": 9.0734, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13751 }, { "epoch": 0.9974613766591717, "grad_norm": 4.21875, "learning_rate": 0.0003, "loss": 8.5888, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13752 }, { "epoch": 0.9975339087546239, "grad_norm": 5.90625, "learning_rate": 0.0003, "loss": 8.628, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13753 }, { "epoch": 0.9976064408500761, "grad_norm": 3.703125, "learning_rate": 0.0003, "loss": 9.274, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13754 }, { "epoch": 0.9976789729455284, "grad_norm": 3.3125, "learning_rate": 0.0003, "loss": 8.6381, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13755 }, { "epoch": 0.9977515050409806, "grad_norm": 9.125, "learning_rate": 0.0003, "loss": 8.806, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13756 }, { "epoch": 0.9978240371364329, "grad_norm": 2.640625, "learning_rate": 0.0003, "loss": 9.3955, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13757 }, { "epoch": 0.9978965692318851, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 8.9865, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13758 }, { "epoch": 0.9979691013273373, "grad_norm": 2.921875, "learning_rate": 0.0003, "loss": 8.6797, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13759 }, { "epoch": 0.9980416334227896, "grad_norm": 5.5, "learning_rate": 0.0003, "loss": 8.7558, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13760 }, { "epoch": 0.9981141655182418, "grad_norm": 2.390625, "learning_rate": 0.0003, "loss": 8.8871, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13761 }, { "epoch": 0.998186697613694, "grad_norm": 4.125, "learning_rate": 0.0003, "loss": 8.8468, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13762 }, { "epoch": 0.9982592297091463, "grad_norm": 2.46875, "learning_rate": 0.0003, "loss": 9.0833, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13763 }, { "epoch": 0.9983317618045985, "grad_norm": 1.90625, "learning_rate": 0.0003, "loss": 8.7628, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13764 }, { "epoch": 0.9984042939000508, "grad_norm": 2.265625, "learning_rate": 0.0003, "loss": 9.1076, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13765 }, { "epoch": 0.998476825995503, "grad_norm": 1.96875, "learning_rate": 0.0003, "loss": 8.9706, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13766 }, { "epoch": 0.9985493580909552, "grad_norm": 3.59375, "learning_rate": 0.0003, "loss": 8.793, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13767 }, { "epoch": 0.9986218901864075, "grad_norm": 6.09375, "learning_rate": 0.0003, "loss": 8.9414, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13768 }, { "epoch": 0.9986944222818597, "grad_norm": 4.9375, "learning_rate": 0.0003, "loss": 8.2129, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13769 }, { "epoch": 0.998766954377312, "grad_norm": 2.125, "learning_rate": 0.0003, "loss": 8.8958, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13770 }, { "epoch": 0.9988394864727642, "grad_norm": 2.078125, "learning_rate": 0.0003, "loss": 9.3112, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13771 }, { "epoch": 0.9989120185682164, "grad_norm": 2.609375, "learning_rate": 0.0003, "loss": 8.8844, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13772 }, { "epoch": 0.9989845506636686, "grad_norm": 4.5, "learning_rate": 0.0003, "loss": 8.608, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13773 }, { "epoch": 0.9990570827591209, "grad_norm": 5.65625, "learning_rate": 0.0003, "loss": 9.2108, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13774 }, { "epoch": 0.9991296148545732, "grad_norm": 2.671875, "learning_rate": 0.0003, "loss": 9.0061, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13775 }, { "epoch": 0.9992021469500254, "grad_norm": 7.46875, "learning_rate": 0.0003, "loss": 8.8856, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13776 }, { "epoch": 0.9992746790454776, "grad_norm": 5.03125, "learning_rate": 0.0003, "loss": 8.5047, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13777 }, { "epoch": 0.9993472111409298, "grad_norm": 3.0625, "learning_rate": 0.0003, "loss": 8.8889, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13778 }, { "epoch": 0.999419743236382, "grad_norm": 5.9375, "learning_rate": 0.0003, "loss": 8.9956, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13779 }, { "epoch": 0.9994922753318344, "grad_norm": 3.53125, "learning_rate": 0.0003, "loss": 8.586, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13780 }, { "epoch": 0.9995648074272866, "grad_norm": 6.1875, "learning_rate": 0.0003, "loss": 8.9034, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13781 }, { "epoch": 0.9996373395227388, "grad_norm": 3.234375, "learning_rate": 0.0003, "loss": 8.8505, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13782 }, { "epoch": 0.999709871618191, "grad_norm": 4.0625, "learning_rate": 0.0003, "loss": 8.9245, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13783 }, { "epoch": 0.9997824037136432, "grad_norm": 2.203125, "learning_rate": 0.0003, "loss": 8.522, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13784 }, { "epoch": 0.9998549358090956, "grad_norm": 4.46875, "learning_rate": 0.0003, "loss": 8.6147, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13785 }, { "epoch": 0.9999274679045478, "grad_norm": 2.296875, "learning_rate": 0.0003, "loss": 8.9878, "memory/device_memory_reserved": 3.10546875, "memory/max_memory_active": 3.0872249603271484, "memory/max_memory_allocated": 3.0872249603271484, "step": 13786 } ], "logging_steps": 1, "max_steps": 13786, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.524655164235448e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }